diff --git a/.bzrignore b/.bzrignore index bc7a7a6429d..d58cca23226 100644 --- a/.bzrignore +++ b/.bzrignore @@ -1,6 +1,7 @@ *-t *.Plo *.Po +*.Tpo *.a *.bb *.bbg @@ -38,9 +39,9 @@ */.libs/* */.pure */debug/* +*/minsizerel/* */release/* */relwithdebinfo/* -*/minsizerel/* *~ .*.swp ./CMakeCache.txt @@ -1021,8 +1022,8 @@ libmysqld/.deps/unireg.Po libmysqld/backup_dir libmysqld/client.c libmysqld/client_settings.h -libmysqld/convert.cc libmysqld/cmake_dummy.c +libmysqld/convert.cc libmysqld/derror.cc libmysqld/discover.cc libmysqld/emb_qcache.cpp @@ -1072,6 +1073,7 @@ libmysqld/ha_innobase.cc libmysqld/ha_innodb.cc libmysqld/ha_isam.cc libmysqld/ha_isammrg.cc +libmysqld/ha_maria.cc libmysqld/ha_myisam.cc libmysqld/ha_myisammrg.cc libmysqld/ha_ndbcluster.cc @@ -2414,6 +2416,41 @@ storage/innobase/ut/.deps/ut0rnd.Po storage/innobase/ut/.deps/ut0ut.Po storage/innobase/ut/.deps/ut0vec.Po storage/innobase/ut/.deps/ut0wqueue.Po +storage/maria/*.MAD +storage/maria/*.MAI +storage/maria/ma_rt_test +storage/maria/ma_sp_test +storage/maria/ma_test1 +storage/maria/ma_test2 +storage/maria/ma_test3 +storage/maria/ma_test_all +storage/maria/maria.log +storage/maria/maria_chk +storage/maria/maria_control +storage/maria/maria_ftdump +storage/maria/maria_log +storage/maria/maria_log.* +storage/maria/maria_pack +storage/maria/unittest/ma_pagecache_consist_1k-t-big +storage/maria/unittest/ma_pagecache_consist_1kHC-t-big +storage/maria/unittest/ma_pagecache_consist_1kRD-t-big +storage/maria/unittest/ma_pagecache_consist_1kWR-t-big +storage/maria/unittest/ma_pagecache_consist_64k-t-big +storage/maria/unittest/ma_pagecache_consist_64kHC-t-big +storage/maria/unittest/ma_pagecache_consist_64kRD-t-big +storage/maria/unittest/ma_pagecache_consist_64kWR-t-big +storage/maria/unittest/ma_pagecache_single_64k-t-big +storage/maria/unittest/ma_test_loghandler_long-t-big +storage/maria/unittest/maria_control +storage/maria/unittest/mf_pagecache_consist_1k-t-big +storage/maria/unittest/mf_pagecache_consist_1kHC-t-big +storage/maria/unittest/mf_pagecache_consist_1kRD-t-big +storage/maria/unittest/mf_pagecache_consist_1kWR-t-big +storage/maria/unittest/mf_pagecache_consist_64k-t-big +storage/maria/unittest/mf_pagecache_consist_64kHC-t-big +storage/maria/unittest/mf_pagecache_consist_64kRD-t-big +storage/maria/unittest/mf_pagecache_consist_64kWR-t-big +storage/maria/unittest/mf_pagecache_single_64k-t-big storage/myisam/.deps/ft_boolean_search.Po storage/myisam/.deps/ft_nlq_search.Po storage/myisam/.deps/ft_parser.Po @@ -2964,13 +3001,25 @@ unittest/examples/.deps/simple-t.Po unittest/examples/.deps/skip-t.Po unittest/examples/.deps/skip_all-t.Po unittest/examples/.deps/todo-t.Po +unittest/maria_control unittest/mysys/*.t unittest/mysys/.deps/base64-t.Po unittest/mysys/.deps/bitmap-t.Po unittest/mysys/.deps/my_atomic-t.Po +unittest/mysys/mf_pagecache_consist_1k-t-big +unittest/mysys/mf_pagecache_consist_1kHC-t-big +unittest/mysys/mf_pagecache_consist_1kRD-t-big +unittest/mysys/mf_pagecache_consist_1kWR-t-big +unittest/mysys/mf_pagecache_consist_64k-t-big +unittest/mysys/mf_pagecache_consist_64kHC-t-big +unittest/mysys/mf_pagecache_consist_64kRD-t-big +unittest/mysys/mf_pagecache_consist_64kWR-t-big +unittest/mysys/mf_pagecache_single_64k-t-big unittest/mytap/.deps/tap.Po unittest/mytap/t/*.t unittest/mytap/t/.deps/basic-t.Po +unittest/page_cache_test_file_1 +unittest/pagecache_debug.log unittest/unit vi.h vio/*.ds? diff --git a/.tree-is-private b/.tree-is-private new file mode 100644 index 00000000000..e69de29bb2d diff --git a/BitKeeper/triggers/post-commit b/BitKeeper/triggers/post-commit index 8376b9e0631..af7c27398e5 100755 --- a/BitKeeper/triggers/post-commit +++ b/BitKeeper/triggers/post-commit @@ -5,7 +5,7 @@ FROM=$USER@mysql.com COMMITS=commits@lists.mysql.com DOCS=docs-commit@mysql.com LIMIT=10000 -VERSION="5.1" +VERSION="maria" BKROOT=`bk root` if [ -x /usr/sbin/sendmail ]; then diff --git a/Makefile.am b/Makefile.am index 88e0f949657..0f1ebb518fc 100644 --- a/Makefile.am +++ b/Makefile.am @@ -93,7 +93,7 @@ test-binlog-statement: cd mysql-test ; \ @PERL@ ./mysql-test-run.pl $(force) --mysqld=--binlog-format=statement -test: test-unit test-ns test-pr +test: test-ns test-pr test-full: test test-nr test-ps diff --git a/configure.in b/configure.in index de0a9bbdeaa..cfacad9a4f4 100644 --- a/configure.in +++ b/configure.in @@ -2464,8 +2464,6 @@ AC_SUBST(readline_basedir) AC_SUBST(readline_link) AC_SUBST(readline_h_ln_cmd) - - # Include man pages, if desired, adapted to the configured parts. if test X"$with_man" = Xyes then diff --git a/include/Makefile.am b/include/Makefile.am index b803f614a93..9d9d5545bde 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -27,8 +27,8 @@ pkginclude_HEADERS = $(HEADERS_ABI) my_dbug.h m_string.h my_sys.h \ my_getopt.h sslopt-longopts.h my_dir.h \ sslopt-vars.h sslopt-case.h sql_common.h keycache.h \ m_ctype.h mysql/plugin.h my_attribute.h $(HEADERS_GEN) -noinst_HEADERS = config-win.h config-netware.h \ - heap.h my_bitmap.h my_uctype.h \ +noinst_HEADERS = config-win.h config-netware.h lf.h my_bit.h \ + heap.h maria.h myisamchk.h my_bitmap.h my_uctype.h \ myisam.h myisampack.h myisammrg.h ft_global.h\ mysys_err.h my_base.h help_start.h help_end.h \ my_nosys.h my_alarm.h queues.h rijndael.h sha1.h \ @@ -37,7 +37,7 @@ noinst_HEADERS = config-win.h config-netware.h \ mysql_version.h.in my_handler.h my_time.h \ my_vle.h my_user.h my_atomic.h atomic/nolock.h \ atomic/rwlock.h atomic/x86-gcc.h atomic/x86-msvc.h \ - my_libwrap.h + my_libwrap.h wqueue.h # Remove built files and the symlinked directories CLEANFILES = $(BUILT_SOURCES) readline openssl diff --git a/include/atomic/nolock.h b/include/atomic/nolock.h index f15c8b13b7f..9b5cbecbd0a 100644 --- a/include/atomic/nolock.h +++ b/include/atomic/nolock.h @@ -13,24 +13,25 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#if defined(__i386__) || defined(_M_IX86) +#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) -#ifdef MY_ATOMIC_MODE_DUMMY -# define LOCK "" -#else -# define LOCK "lock" -#endif +# ifdef MY_ATOMIC_MODE_DUMMY +# define LOCK_prefix "" +# else +# define LOCK_prefix "lock" +# endif -#ifdef __GNUC__ -#include "x86-gcc.h" -#elif defined(_MSC_VER) -#include "x86-msvc.h" -#endif +# ifdef __GNUC__ +# include "x86-gcc.h" +# elif defined(_MSC_VER) +# error Broken! +# include "x86-msvc.h" +# endif #endif #ifdef make_atomic_cas_body -typedef struct { } my_atomic_rwlock_t; +typedef struct { } my_atomic_rwlock_t __attribute__ ((unused)); #define my_atomic_rwlock_destroy(name) #define my_atomic_rwlock_init(name) #define my_atomic_rwlock_rdlock(name) diff --git a/include/atomic/rwlock.h b/include/atomic/rwlock.h index 18b77e93d80..cb41952b70c 100644 --- a/include/atomic/rwlock.h +++ b/include/atomic/rwlock.h @@ -13,7 +13,7 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -typedef struct {pthread_rwlock_t rw;} my_atomic_rwlock_t; +typedef struct {pthread_mutex_t rw;} my_atomic_rwlock_t; #ifdef MY_ATOMIC_MODE_DUMMY /* @@ -31,17 +31,22 @@ typedef struct {pthread_rwlock_t rw;} my_atomic_rwlock_t; #define my_atomic_rwlock_wrunlock(name) #define MY_ATOMIC_MODE "dummy (non-atomic)" #else -#define my_atomic_rwlock_destroy(name) pthread_rwlock_destroy(& (name)->rw) -#define my_atomic_rwlock_init(name) pthread_rwlock_init(& (name)->rw, 0) -#define my_atomic_rwlock_rdlock(name) pthread_rwlock_rdlock(& (name)->rw) -#define my_atomic_rwlock_wrlock(name) pthread_rwlock_wrlock(& (name)->rw) -#define my_atomic_rwlock_rdunlock(name) pthread_rwlock_unlock(& (name)->rw) -#define my_atomic_rwlock_wrunlock(name) pthread_rwlock_unlock(& (name)->rw) -#define MY_ATOMIC_MODE "rwlocks" +/* + we're using read-write lock macros but map them to mutex locks, and they're + faster. Still, having semantically rich API we can change the + underlying implementation, if necessary. +*/ +#define my_atomic_rwlock_destroy(name) pthread_mutex_destroy(& (name)->rw) +#define my_atomic_rwlock_init(name) pthread_mutex_init(& (name)->rw, 0) +#define my_atomic_rwlock_rdlock(name) pthread_mutex_lock(& (name)->rw) +#define my_atomic_rwlock_wrlock(name) pthread_mutex_lock(& (name)->rw) +#define my_atomic_rwlock_rdunlock(name) pthread_mutex_unlock(& (name)->rw) +#define my_atomic_rwlock_wrunlock(name) pthread_mutex_unlock(& (name)->rw) +#define MY_ATOMIC_MODE "mutex" #endif #define make_atomic_add_body(S) int ## S sav; sav= *a; *a+= v; v=sav; -#define make_atomic_swap_body(S) int ## S sav; sav= *a; *a= v; v=sav; +#define make_atomic_fas_body(S) int ## S sav; sav= *a; *a= v; v=sav; #define make_atomic_cas_body(S) if ((ret= (*a == *cmp))) *a= set; else *cmp=*a; #define make_atomic_load_body(S) ret= *a; #define make_atomic_store_body(S) *a= v; diff --git a/include/atomic/x86-gcc.h b/include/atomic/x86-gcc.h index d79dadbf05e..5a34bc22f9e 100644 --- a/include/atomic/x86-gcc.h +++ b/include/atomic/x86-gcc.h @@ -19,10 +19,18 @@ architectures support double-word (128-bit) cas. */ -#ifdef MY_ATOMIC_NO_XADD -#define MY_ATOMIC_MODE "gcc-x86" LOCK "-no-xadd" +#ifdef __x86_64__ +# ifdef MY_ATOMIC_NO_XADD +# define MY_ATOMIC_MODE "gcc-amd64" LOCK_prefix "-no-xadd" +# else +# define MY_ATOMIC_MODE "gcc-amd64" LOCK_prefix +# endif #else -#define MY_ATOMIC_MODE "gcc-x86" LOCK +# ifdef MY_ATOMIC_NO_XADD +# define MY_ATOMIC_MODE "gcc-x86" LOCK_prefix "-no-xadd" +# else +# define MY_ATOMIC_MODE "gcc-x86" LOCK_prefix +# endif #endif /* fix -ansi errors while maintaining readability */ @@ -32,12 +40,12 @@ #ifndef MY_ATOMIC_NO_XADD #define make_atomic_add_body(S) \ - asm volatile (LOCK "; xadd %0, %1;" : "+r" (v) , "+m" (*a)) + asm volatile (LOCK_prefix "; xadd %0, %1;" : "+r" (v) , "+m" (*a)) #endif -#define make_atomic_swap_body(S) \ - asm volatile ("; xchg %0, %1;" : "+r" (v) , "+m" (*a)) +#define make_atomic_fas_body(S) \ + asm volatile ("xchg %0, %1;" : "+r" (v) , "+m" (*a)) #define make_atomic_cas_body(S) \ - asm volatile (LOCK "; cmpxchg %3, %0; setz %2;" \ + asm volatile (LOCK_prefix "; cmpxchg %3, %0; setz %2;" \ : "+m" (*a), "+a" (*cmp), "=q" (ret): "r" (set)) #ifdef MY_ATOMIC_MODE_DUMMY @@ -46,13 +54,16 @@ #else /* Actually 32-bit reads/writes are always atomic on x86 - But we add LOCK here anyway to force memory barriers + But we add LOCK_prefix here anyway to force memory barriers */ #define make_atomic_load_body(S) \ ret=0; \ - asm volatile (LOCK "; cmpxchg %2, %0" \ + asm volatile (LOCK_prefix "; cmpxchg %2, %0" \ : "+m" (*a), "+a" (ret): "r" (ret)) #define make_atomic_store_body(S) \ - asm volatile ("; xchg %0, %1;" : "+m" (*a) : "r" (v)) + asm volatile ("; xchg %0, %1;" : "+m" (*a), "+r" (v)) #endif +/* TODO test on intel whether the below helps. on AMD it makes no difference */ +//#define LF_BACKOFF ({asm volatile ("rep; nop"); 1; }) + diff --git a/include/atomic/x86-msvc.h b/include/atomic/x86-msvc.h index c4885bb8451..2a2cfe70de9 100644 --- a/include/atomic/x86-msvc.h +++ b/include/atomic/x86-msvc.h @@ -25,24 +25,24 @@ #ifndef _atomic_h_cleanup_ #define _atomic_h_cleanup_ "atomic/x86-msvc.h" -#define MY_ATOMIC_MODE "msvc-x86" LOCK +#define MY_ATOMIC_MODE "msvc-x86" LOCK_prefix #define make_atomic_add_body(S) \ _asm { \ _asm mov reg_ ## S, v \ - _asm LOCK xadd *a, reg_ ## S \ + _asm LOCK_prefix xadd *a, reg_ ## S \ _asm movzx v, reg_ ## S \ } #define make_atomic_cas_body(S) \ _asm { \ _asm mov areg_ ## S, *cmp \ _asm mov reg2_ ## S, set \ - _asm LOCK cmpxchg *a, reg2_ ## S \ + _asm LOCK_prefix cmpxchg *a, reg2_ ## S \ _asm mov *cmp, areg_ ## S \ _asm setz al \ _asm movzx ret, al \ } -#define make_atomic_swap_body(S) \ +#define make_atomic_fas_body(S) \ _asm { \ _asm mov reg_ ## S, v \ _asm xchg *a, reg_ ## S \ @@ -55,13 +55,13 @@ #else /* Actually 32-bit reads/writes are always atomic on x86 - But we add LOCK here anyway to force memory barriers + But we add LOCK_prefix here anyway to force memory barriers */ #define make_atomic_load_body(S) \ _asm { \ _asm mov areg_ ## S, 0 \ _asm mov reg2_ ## S, areg_ ## S \ - _asm LOCK cmpxchg *a, reg2_ ## S \ + _asm LOCK_prefix cmpxchg *a, reg2_ ## S \ _asm mov ret, areg_ ## S \ } #define make_atomic_store_body(S) \ diff --git a/include/ft_global.h b/include/ft_global.h index 752371d6bc6..dba8a6e75e5 100644 --- a/include/ft_global.h +++ b/include/ft_global.h @@ -65,6 +65,17 @@ void ft_free_stopwords(void); FT_INFO *ft_init_search(uint,void *, uint, uchar *, uint,CHARSET_INFO *, uchar *); my_bool ft_boolean_check_syntax_string(const uchar *); +/* Internal symbols for fulltext between maria and MyISAM */ + +#define HA_FT_WTYPE HA_KEYTYPE_FLOAT +#define HA_FT_WLEN 4 +#define FT_SEGS 2 + +#define ft_sintXkorr(A) mi_sint4korr(A) +#define ft_intXstore(T,A) mi_int4store(T,A) + +extern const HA_KEYSEG ft_keysegs[FT_SEGS]; + #ifdef __cplusplus } #endif diff --git a/include/keycache.h b/include/keycache.h index 7f4ce86cea0..14f11475641 100644 --- a/include/keycache.h +++ b/include/keycache.h @@ -132,7 +132,8 @@ extern void end_key_cache(KEY_CACHE *keycache, my_bool cleanup); /* Functions to handle multiple key caches */ extern my_bool multi_keycache_init(void); extern void multi_keycache_free(void); -extern KEY_CACHE *multi_key_cache_search(uchar *key, uint length); +extern KEY_CACHE *multi_key_cache_search(uchar *key, uint length, + KEY_CACHE *def); extern my_bool multi_key_cache_set(const uchar *key, uint length, KEY_CACHE *key_cache); extern void multi_key_cache_change(KEY_CACHE *old_data, diff --git a/include/lf.h b/include/lf.h new file mode 100644 index 00000000000..4712f9c4862 --- /dev/null +++ b/include/lf.h @@ -0,0 +1,260 @@ +/* Copyright (C) 2007 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _lf_h +#define _lf_h + +#include + +/* + Helpers to define both func() and _func(), where + func() is a _func() protected by my_atomic_rwlock_wrlock() +*/ + +#define lock_wrap(f, t, proto_args, args, lock) \ +t _ ## f proto_args; \ +static inline t f proto_args \ +{ \ + t ret; \ + my_atomic_rwlock_wrlock(lock); \ + ret= _ ## f args; \ + my_atomic_rwlock_wrunlock(lock); \ + return ret; \ +} + +#define lock_wrap_void(f, proto_args, args, lock) \ +void _ ## f proto_args; \ +static inline void f proto_args \ +{ \ + my_atomic_rwlock_wrlock(lock); \ + _ ## f args; \ + my_atomic_rwlock_wrunlock(lock); \ +} + +#define nolock_wrap(f, t, proto_args, args) \ +t _ ## f proto_args; \ +static inline t f proto_args \ +{ \ + return _ ## f args; \ +} + +#define nolock_wrap_void(f, proto_args, args) \ +void _ ## f proto_args; \ +static inline void f proto_args \ +{ \ + _ ## f args; \ +} + +/* + wait-free dynamic array, see lf_dynarray.c + + 4 levels of 256 elements each mean 4311810304 elements in an array - it + should be enough for a while +*/ +#define LF_DYNARRAY_LEVEL_LENGTH 256 +#define LF_DYNARRAY_LEVELS 4 + +typedef struct { + void * volatile level[LF_DYNARRAY_LEVELS]; + uint size_of_element; + my_atomic_rwlock_t lock; +} LF_DYNARRAY; + +typedef int (*lf_dynarray_func)(void *, void *); + +void lf_dynarray_init(LF_DYNARRAY *array, uint element_size); +void lf_dynarray_destroy(LF_DYNARRAY *array); + +nolock_wrap(lf_dynarray_value, void *, + (LF_DYNARRAY *array, uint idx), + (array, idx)) +lock_wrap(lf_dynarray_lvalue, void *, + (LF_DYNARRAY *array, uint idx), + (array, idx), + &array->lock) +nolock_wrap(lf_dynarray_iterate, int, + (LF_DYNARRAY *array, lf_dynarray_func func, void *arg), + (array, func, arg)) + +/* + pin manager for memory allocator, lf_alloc-pin.c +*/ + +#define LF_PINBOX_PINS 4 +#define LF_PURGATORY_SIZE 10 + +typedef void lf_pinbox_free_func(void *, void *, void*); + +typedef struct { + LF_DYNARRAY pinarray; + lf_pinbox_free_func *free_func; + void *free_func_arg; + uint free_ptr_offset; + uint32 volatile pinstack_top_ver; /* this is a versioned pointer */ + uint32 volatile pins_in_array; /* number of elements in array */ +} LF_PINBOX; + +typedef struct { + void * volatile pin[LF_PINBOX_PINS]; + LF_PINBOX *pinbox; + void *stack_ends_here; + void *purgatory; + uint32 purgatory_count; + uint32 volatile link; +/* we want sizeof(LF_PINS) to be 128 to avoid false sharing */ + char pad[128-sizeof(uint32)*2 + -sizeof(LF_PINBOX *) + -sizeof(void*) + -sizeof(void *)*(LF_PINBOX_PINS+1)]; +} LF_PINS; + +/* + shortcut macros to do an atomic_wrlock on a structure that uses pins + (e.g. lf_hash). +*/ +#define lf_rwlock_by_pins(PINS) \ + my_atomic_rwlock_wrlock(&(PINS)->pinbox->pinarray.lock) +#define lf_rwunlock_by_pins(PINS) \ + my_atomic_rwlock_wrunlock(&(PINS)->pinbox->pinarray.lock) + +/* + compile-time assert, to require "no less than N" pins + it's enough if it'll fail on at least one compiler, so + we'll enable it on GCC only, which supports zero-length arrays. +*/ +#if defined(__GNUC__) && defined(MY_LF_EXTRA_DEBUG) +#define LF_REQUIRE_PINS(N) \ + static const char require_pins[LF_PINBOX_PINS-N] \ + __attribute__ ((unused)); \ + static const int LF_NUM_PINS_IN_THIS_FILE= N; +#define _lf_pin(PINS, PIN, ADDR) \ + ( \ + assert(PIN < LF_NUM_PINS_IN_THIS_FILE), \ + my_atomic_storeptr(&(PINS)->pin[PIN], (ADDR)) \ + ) +#else +#define LF_REQUIRE_PINS(N) +#define _lf_pin(PINS, PIN, ADDR) my_atomic_storeptr(&(PINS)->pin[PIN], (ADDR)) +#endif + +#define _lf_unpin(PINS, PIN) _lf_pin(PINS, PIN, NULL) +#define lf_pin(PINS, PIN, ADDR) \ + do { \ + lf_rwlock_by_pins(PINS); \ + _lf_pin(PINS, PIN, ADDR); \ + lf_rwunlock_by_pins(PINS); \ + } while (0) +#define lf_unpin(PINS, PIN) lf_pin(PINS, PIN, NULL) +#define _lf_assert_pin(PINS, PIN) assert((PINS)->pin[PIN] != 0) +#define _lf_assert_unpin(PINS, PIN) assert((PINS)->pin[PIN] == 0) + +void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset, + lf_pinbox_free_func *free_func, void * free_func_arg); +void lf_pinbox_destroy(LF_PINBOX *pinbox); + +lock_wrap(lf_pinbox_get_pins, LF_PINS *, + (LF_PINBOX *pinbox, void *stack_end), + (pinbox, stack_end), + &pinbox->pinarray.lock); +lock_wrap_void(lf_pinbox_put_pins, + (LF_PINS *pins), + (pins), + &pins->pinbox->pinarray.lock); +lock_wrap_void(lf_pinbox_free, + (LF_PINS *pins, void *addr), + (pins, addr), + &pins->pinbox->pinarray.lock); + +/* + memory allocator, lf_alloc-pin.c +*/ + +struct st_lf_alloc_node { + struct st_lf_alloc_node *next; +}; + +typedef struct st_lf_allocator { + LF_PINBOX pinbox; + struct st_lf_alloc_node * volatile top; + uint element_size; + uint32 volatile mallocs; +} LF_ALLOCATOR; + +void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset); +void lf_alloc_destroy(LF_ALLOCATOR *allocator); +uint lf_alloc_pool_count(LF_ALLOCATOR *allocator); +/* + shortcut macros to access underlying pinbox functions from an LF_ALLOCATOR + see _lf_pinbox_get_pins() and _lf_pinbox_put_pins() +*/ +#define _lf_alloc_free(PINS, PTR) _lf_pinbox_free((PINS), (PTR)) +#define lf_alloc_free(PINS, PTR) lf_pinbox_free((PINS), (PTR)) +#define _lf_alloc_get_pins(A, ST) _lf_pinbox_get_pins(&(A)->pinbox, (ST)) +#define lf_alloc_get_pins(A, ST) lf_pinbox_get_pins(&(A)->pinbox, (ST)) +#define _lf_alloc_put_pins(PINS) _lf_pinbox_put_pins(PINS) +#define lf_alloc_put_pins(PINS) lf_pinbox_put_pins(PINS) +#define lf_alloc_direct_free(ALLOC, ADDR) my_free((gptr)(ADDR), MYF(0)) + +lock_wrap(lf_alloc_new, void *, + (LF_PINS *pins), + (pins), + &pins->pinbox->pinarray.lock); + +/* + extendible hash, lf_hash.c +*/ +#include + +#define LF_HASH_UNIQUE 1 + +typedef struct { + LF_DYNARRAY array; /* hash itself */ + LF_ALLOCATOR alloc; /* allocator for elements */ + hash_get_key get_key; /* see HASH */ + CHARSET_INFO *charset; /* see HASH */ + uint key_offset, key_length; /* see HASH */ + uint element_size, flags; /* LF_HASH_UNIQUE, etc */ + int32 volatile size; /* size of array */ + int32 volatile count; /* number of elements in the hash */ +} LF_HASH; + +void lf_hash_init(LF_HASH *hash, uint element_size, uint flags, + uint key_offset, uint key_length, hash_get_key get_key, + CHARSET_INFO *charset); +void lf_hash_destroy(LF_HASH *hash); +int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data); +void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen); +int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen); +/* + shortcut macros to access underlying pinbox functions from an LF_HASH + see _lf_pinbox_get_pins() and _lf_pinbox_put_pins() +*/ +#define _lf_hash_get_pins(HASH, ST) _lf_alloc_get_pins(&(HASH)->alloc, (ST)) +#define lf_hash_get_pins(HASH, ST) lf_alloc_get_pins(&(HASH)->alloc, (ST)) +#define _lf_hash_put_pins(PINS) _lf_pinbox_put_pins(PINS) +#define lf_hash_put_pins(PINS) lf_pinbox_put_pins(PINS) +#define lf_hash_search_unpin(PINS) lf_unpin((PINS), 2) +/* + cleanup +*/ + +#undef lock_wrap_void +#undef lock_wrap +#undef nolock_wrap_void +#undef nolock_wrap + +#endif + diff --git a/include/m_string.h b/include/m_string.h index 715720df294..814d88b6ead 100644 --- a/include/m_string.h +++ b/include/m_string.h @@ -67,7 +67,7 @@ # define bcopy(s, d, n) memcpy((d), (s), (n)) # define bcmp(A,B,C) memcmp((A),(B),(C)) # define bzero(A,B) memset((A),0,(B)) -# define bmove_align(A,B,C) memcpy((A),(B),(C)) +# define bmove_align(A,B,C) memcpy((A),(B),(C)) #endif #if defined(__cplusplus) @@ -129,7 +129,10 @@ extern size_t bcmp(const uchar *s1,const uchar *s2,size_t len); extern size_t my_bcmp(const uchar *s1,const uchar *s2,size_t len); #undef bcmp #define bcmp(A,B,C) my_bcmp((A),(B),(C)) -#endif +#define bzero_if_purify(A,B) bzero(A,B) +#else +#define bzero_if_purify(A,B) +#endif /* HAVE_purify */ #ifndef bmove512 extern void bmove512(uchar *dst,const uchar *src,size_t len); diff --git a/include/maria.h b/include/maria.h new file mode 100644 index 00000000000..5f8e3dcdd41 --- /dev/null +++ b/include/maria.h @@ -0,0 +1,442 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* This file should be included when using maria_funktions */ + +#ifndef _maria_h +#define _maria_h +#ifdef __cplusplus +extern "C" { +#endif +#ifndef _my_base_h +#include +#endif +#ifndef _m_ctype_h +#include +#endif +#include "../storage/maria/ma_pagecache.h" +#include "my_handler.h" +#include "ft_global.h" +#include +#include + +/* + Limit max keys according to HA_MAX_POSSIBLE_KEY; See myisamchk.h for details +*/ + +#if MAX_INDEXES > HA_MAX_POSSIBLE_KEY +#define MARIA_MAX_KEY HA_MAX_POSSIBLE_KEY /* Max allowed keys */ +#else +#define MARIA_MAX_KEY MAX_INDEXES /* Max allowed keys */ +#endif + +#define MARIA_MAX_MSG_BUF 1024 /* used in CHECK TABLE, REPAIR TABLE */ +#define MARIA_NAME_IEXT ".MAI" +#define MARIA_NAME_DEXT ".MAD" +/* Max extra space to use when sorting keys */ +#define MARIA_MAX_TEMP_LENGTH 2*1024L*1024L*1024L +/* Possible values for maria_block_size (must be power of 2) */ +#define MARIA_KEY_BLOCK_LENGTH 8192 /* default key block length */ +#define MARIA_MIN_KEY_BLOCK_LENGTH 1024 /* Min key block length */ +#define MARIA_MAX_KEY_BLOCK_LENGTH 32768 +/* Minimal page cache when we only want to be able to scan a table */ +#define MARIA_MIN_PAGE_CACHE_SIZE 65536 + +/* + In the following macros '_keyno_' is 0 .. keys-1. + If there can be more keys than bits in the key_map, the highest bit + is for all upper keys. They cannot be switched individually. + This means that clearing of high keys is ignored, setting one high key + sets all high keys. +*/ +#define MARIA_KEYMAP_BITS (8 * SIZEOF_LONG_LONG) +#define MARIA_KEYMAP_HIGH_MASK (ULL(1) << (MARIA_KEYMAP_BITS - 1)) +#define maria_get_mask_all_keys_active(_keys_) \ + (((_keys_) < MARIA_KEYMAP_BITS) ? \ + ((ULL(1) << (_keys_)) - ULL(1)) : \ + (~ ULL(0))) +#if MARIA_MAX_KEY > MARIA_KEYMAP_BITS +#define maria_is_key_active(_keymap_,_keyno_) \ + (((_keyno_) < MARIA_KEYMAP_BITS) ? \ + test((_keymap_) & (ULL(1) << (_keyno_))) : \ + test((_keymap_) & MARIA_KEYMAP_HIGH_MASK)) +#define maria_set_key_active(_keymap_,_keyno_) \ + (_keymap_)|= (((_keyno_) < MARIA_KEYMAP_BITS) ? \ + (ULL(1) << (_keyno_)) : \ + MARIA_KEYMAP_HIGH_MASK) +#define maria_clear_key_active(_keymap_,_keyno_) \ + (_keymap_)&= (((_keyno_) < MARIA_KEYMAP_BITS) ? \ + (~ (ULL(1) << (_keyno_))) : \ + (~ (ULL(0))) /*ignore*/ ) +#else +#define maria_is_key_active(_keymap_,_keyno_) \ + test((_keymap_) & (ULL(1) << (_keyno_))) +#define maria_set_key_active(_keymap_,_keyno_) \ + (_keymap_)|= (ULL(1) << (_keyno_)) +#define maria_clear_key_active(_keymap_,_keyno_) \ + (_keymap_)&= (~ (ULL(1) << (_keyno_))) +#endif +#define maria_is_any_key_active(_keymap_) \ + test((_keymap_)) +#define maria_is_all_keys_active(_keymap_,_keys_) \ + ((_keymap_) == maria_get_mask_all_keys_active(_keys_)) +#define maria_set_all_keys_active(_keymap_,_keys_) \ + (_keymap_)= maria_get_mask_all_keys_active(_keys_) +#define maria_clear_all_keys_active(_keymap_) \ + (_keymap_)= 0 +#define maria_intersect_keys_active(_to_,_from_) \ + (_to_)&= (_from_) +#define maria_is_any_intersect_keys_active(_keymap1_,_keys_,_keymap2_) \ + ((_keymap1_) & (_keymap2_) & \ + maria_get_mask_all_keys_active(_keys_)) +#define maria_copy_keys_active(_to_,_maxkeys_,_from_) \ + (_to_)= (maria_get_mask_all_keys_active(_maxkeys_) & \ + (_from_)) + + /* Param to/from maria_info */ + +typedef ulonglong MARIA_RECORD_POS; + +typedef struct st_maria_isaminfo /* Struct from h_info */ +{ + ha_rows records; /* Records in database */ + ha_rows deleted; /* Deleted records in database */ + MARIA_RECORD_POS recpos; /* Pos for last used record */ + MARIA_RECORD_POS newrecpos; /* Pos if we write new record */ + MARIA_RECORD_POS dup_key_pos; /* Position to record with dup key */ + my_off_t data_file_length; /* Length of data file */ + my_off_t max_data_file_length, index_file_length; + my_off_t max_index_file_length, delete_length; + ulonglong auto_increment; + ulonglong key_map; /* Which keys are used */ + time_t create_time; /* When table was created */ + time_t check_time; + time_t update_time; + ulong record_offset; + ulong *rec_per_key; /* for sql optimizing */ + ulong reclength; /* Recordlength */ + ulong mean_reclength; /* Mean recordlength (if packed) */ + char *data_file_name, *index_file_name; + enum data_file_type data_file_type; + uint keys; /* Number of keys in use */ + uint options; /* HA_OPTION_... used */ + uint reflength; + int errkey, /* With key was dupplicated on err */ + sortkey; /* clustered by this key */ + File filenr; /* (uniq) filenr for datafile */ +} MARIA_INFO; + + +typedef struct st_maria_create_info +{ + const char *index_file_name, *data_file_name; /* If using symlinks */ + ha_rows max_rows; + ha_rows reloc_rows; + ulonglong auto_increment; + ulonglong data_file_length; + ulonglong key_file_length; + /* Size of null bitmap at start of row */ + uint null_bytes; + uint old_options; + enum data_file_type org_data_file_type; + uint8 language; + my_bool with_auto_increment, transactional; +} MARIA_CREATE_INFO; + +struct st_maria_info; /* For referense */ +struct st_maria_share; +typedef struct st_maria_info MARIA_HA; +struct st_maria_s_param; + +typedef struct st_maria_keydef /* Key definition with open & info */ +{ + struct st_maria_share *share; /* Pointer to base (set in open) */ + uint16 keysegs; /* Number of key-segment */ + uint16 flag; /* NOSAME, PACK_USED */ + + uint8 key_alg; /* BTREE, RTREE */ + uint16 block_length; /* Length of keyblock (auto) */ + uint16 underflow_block_length; /* When to execute underflow */ + uint16 keylength; /* Tot length of keyparts (auto) */ + uint16 minlength; /* min length of (packed) key (auto) */ + uint16 maxlength; /* max length of (packed) key (auto) */ + uint32 version; /* For concurrent read/write */ + uint32 ftparser_nr; /* distinct ftparser number */ + + HA_KEYSEG *seg, *end; + struct st_mysql_ftparser *parser; /* Fulltext [pre]parser */ + int (*bin_search)(struct st_maria_info *info, + struct st_maria_keydef *keyinfo, byte *page, byte *key, + uint key_len, uint comp_flag, byte **ret_pos, + byte *buff, my_bool *was_last_key); + uint(*get_key)(struct st_maria_keydef *keyinfo, uint nod_flag, + byte **page, byte *key); + int (*pack_key)(struct st_maria_keydef *keyinfo, uint nod_flag, + byte *next_key, byte *org_key, byte *prev_key, + const byte *key, struct st_maria_s_param *s_temp); + void (*store_key)(struct st_maria_keydef *keyinfo, byte *key_pos, + struct st_maria_s_param *s_temp); + int (*ck_insert)(struct st_maria_info *inf, uint k_nr, byte *k, uint klen); + int (*ck_delete)(struct st_maria_info *inf, uint k_nr, byte *k, uint klen); +} MARIA_KEYDEF; + + +#define MARIA_UNIQUE_HASH_LENGTH 4 + +typedef struct st_maria_unique_def /* Segment definition of unique */ +{ + uint16 keysegs; /* Number of key-segment */ + uint8 key; /* Mapped to which key */ + uint8 null_are_equal; + HA_KEYSEG *seg, *end; +} MARIA_UNIQUEDEF; + +typedef struct st_maria_decode_tree /* Decode huff-table */ +{ + uint16 *table; + uint quick_table_bits; + byte *intervalls; +} MARIA_DECODE_TREE; + + +struct st_maria_bit_buff; + +/* + Note that null markers should always be first in a row ! + When creating a column, one should only specify: + type, length, null_bit and null_pos +*/ + +typedef struct st_maria_columndef /* column information */ +{ + uint64 offset; /* Offset to position in row */ + enum en_fieldtype type; + uint16 length; /* length of field */ + /* Intern variable (size of total storage area for the row) */ + uint16 fill_length; + uint16 null_pos; /* Position for null marker */ + uint16 empty_pos; /* Position for empty marker */ + uint8 null_bit; /* If column may be NULL */ + /* Intern. Set if column should be zero packed (part of empty_bits) */ + uint8 empty_bit; + +#ifndef NOT_PACKED_DATABASES + void(*unpack)(struct st_maria_columndef *rec, + struct st_maria_bit_buff *buff, + byte *start, byte *end); + enum en_fieldtype base_type; + uint space_length_bits, pack_type; + MARIA_DECODE_TREE *huff_tree; +#endif +} MARIA_COLUMNDEF; + + +extern ulong maria_block_size; +extern ulong maria_concurrent_insert; +extern my_bool maria_flush, maria_single_user; +extern my_bool maria_delay_key_write; +extern my_off_t maria_max_temp_length; +extern ulong maria_bulk_insert_tree_size, maria_data_pointer_size; +extern PAGECACHE maria_pagecache_var, *maria_pagecache; + + + /* Prototypes for maria-functions */ + +extern int maria_init(void); +extern void maria_end(void); +extern int maria_close(struct st_maria_info *file); +extern int maria_delete(struct st_maria_info *file, const byte *buff); +extern struct st_maria_info *maria_open(const char *name, int mode, + uint wait_if_locked); +extern struct st_maria_info *maria_clone(struct st_maria_share *share, int mode); +extern int maria_panic(enum ha_panic_function function); +extern int maria_rfirst(struct st_maria_info *file, byte *buf, int inx); +extern int maria_rkey(struct st_maria_info *file, byte *buf, int inx, + const byte *key, + uint key_len, enum ha_rkey_function search_flag); +extern int maria_rlast(struct st_maria_info *file, byte *buf, int inx); +extern int maria_rnext(struct st_maria_info *file, byte *buf, int inx); +extern int maria_rnext_same(struct st_maria_info *info, byte *buf); +extern int maria_rprev(struct st_maria_info *file, byte *buf, int inx); +extern int maria_rrnd(struct st_maria_info *file, byte *buf, + MARIA_RECORD_POS pos); +extern int maria_scan_init(struct st_maria_info *file); +extern int maria_scan(struct st_maria_info *file, byte *buf); +extern void maria_scan_end(struct st_maria_info *file); +extern int maria_rsame(struct st_maria_info *file, byte *record, int inx); +extern int maria_rsame_with_pos(struct st_maria_info *file, byte *record, + int inx, MARIA_RECORD_POS pos); +extern int maria_update(struct st_maria_info *file, const byte *old, + byte *new_record); +extern int maria_write(struct st_maria_info *file, byte *buff); +extern MARIA_RECORD_POS maria_position(struct st_maria_info *file); +extern int maria_status(struct st_maria_info *info, MARIA_INFO *x, uint flag); +extern int maria_lock_database(struct st_maria_info *file, int lock_type); +extern int maria_create(const char *name, enum data_file_type record_type, + uint keys, MARIA_KEYDEF *keydef, + uint columns, MARIA_COLUMNDEF *columndef, + uint uniques, MARIA_UNIQUEDEF *uniquedef, + MARIA_CREATE_INFO *create_info, uint flags); +extern int maria_delete_table(const char *name); +extern int maria_rename(const char *from, const char *to); +extern int maria_extra(struct st_maria_info *file, + enum ha_extra_function function, void *extra_arg); +extern int maria_reset(struct st_maria_info *file); +extern ha_rows maria_records_in_range(struct st_maria_info *info, int inx, + key_range *min_key, key_range *max_key); +extern int maria_is_changed(struct st_maria_info *info); +extern int maria_delete_all_rows(struct st_maria_info *info); +extern uint maria_get_pointer_length(ulonglong file_length, uint def); + + +/* this is used to pass to mysql_mariachk_table */ + +#define MARIA_CHK_REPAIR 1 /* equivalent to mariachk -r */ +#define MARIA_CHK_VERIFY 2 /* Verify, run repair if failure */ + +typedef uint maria_bit_type; + +typedef struct st_maria_bit_buff +{ /* Used for packing of record */ + maria_bit_type current_byte; + uint bits; + uchar *pos, *end, *blob_pos, *blob_end; + uint error; +} MARIA_BIT_BUFF; + + +typedef struct st_maria_sort_info +{ +#ifdef THREAD + /* sync things */ + pthread_mutex_t mutex; + pthread_cond_t cond; +#endif + MARIA_HA *info; + HA_CHECK *param; + char *buff; + SORT_KEY_BLOCKS *key_block, *key_block_end; + SORT_FT_BUF *ft_buf; + my_off_t filelength, dupp, buff_length; + ha_rows max_records; + uint current_key, total_keys; + uint got_error, threads_running; + myf myf_rw; + enum data_file_type new_data_file_type; +} MARIA_SORT_INFO; + +typedef struct st_maria_sort_param +{ + pthread_t thr; + IO_CACHE read_cache, tempfile, tempfile_for_exceptions; + DYNAMIC_ARRAY buffpek; + MARIA_BIT_BUFF bit_buff; /* For parallel repair of packrec. */ + + MARIA_KEYDEF *keyinfo; + MARIA_SORT_INFO *sort_info; + HA_KEYSEG *seg; + byte **sort_keys; + byte *rec_buff; + void *wordlist, *wordptr; + MEM_ROOT wordroot; + char *record; + MY_TMPDIR *tmpdir; + + /* + The next two are used to collect statistics, see maria_update_key_parts for + description. + */ + ulonglong unique[HA_MAX_KEY_SEG+1]; + ulonglong notnull[HA_MAX_KEY_SEG+1]; + + MARIA_RECORD_POS pos,max_pos,filepos,start_recpos; + uint key, key_length,real_key_length,sortbuff_size; + uint maxbuffers, keys, find_length, sort_keys_length; + my_bool fix_datafile, master; + my_bool calc_checksum; /* calculate table checksum */ + my_size_t rec_buff_size; + + int (*key_cmp)(struct st_maria_sort_param *, const void *, const void *); + int (*key_read)(struct st_maria_sort_param *, byte *); + int (*key_write)(struct st_maria_sort_param *, const byte *); + void (*lock_in_memory)(HA_CHECK *); + NEAR int (*write_keys)(struct st_maria_sort_param *, register byte **, + uint , struct st_buffpek *, IO_CACHE *); + NEAR uint (*read_to_buffer)(IO_CACHE *,struct st_buffpek *, uint); + NEAR int (*write_key)(struct st_maria_sort_param *, IO_CACHE *,char *, + uint, uint); +} MARIA_SORT_PARAM; + + +/* functions in maria_check */ +void maria_chk_init(HA_CHECK *param); +int maria_chk_status(HA_CHECK *param, MARIA_HA *info); +int maria_chk_del(HA_CHECK *param, register MARIA_HA *info, uint test_flag); +int maria_chk_size(HA_CHECK *param, MARIA_HA *info); +int maria_chk_key(HA_CHECK *param, MARIA_HA *info); +int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info, int extend); +int maria_repair(HA_CHECK *param, register MARIA_HA *info, + my_string name, int rep_quick); +int maria_sort_index(HA_CHECK *param, register MARIA_HA *info, + my_string name); +int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, + const char *name, int rep_quick); +int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, + const char *name, int rep_quick); +int maria_change_to_newfile(const char *filename, const char *old_ext, + const char *new_ext, myf myflags); +void maria_lock_memory(HA_CHECK *param); +int maria_update_state_info(HA_CHECK *param, MARIA_HA *info, uint update); +void maria_update_key_parts(MARIA_KEYDEF *keyinfo, ulong *rec_per_key_part, + ulonglong *unique, ulonglong *notnull, + ulonglong records); +int maria_filecopy(HA_CHECK *param, File to, File from, my_off_t start, + my_off_t length, const char *type); +int maria_movepoint(MARIA_HA *info, byte *record, my_off_t oldpos, + my_off_t newpos, uint prot_key); +int maria_write_data_suffix(MARIA_SORT_INFO *sort_info, my_bool fix_datafile); +int maria_test_if_almost_full(MARIA_HA *info); +int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename); +int maria_disable_indexes(MARIA_HA *info); +int maria_enable_indexes(MARIA_HA *info); +int maria_indexes_are_disabled(MARIA_HA *info); +void maria_disable_non_unique_index(MARIA_HA *info, ha_rows rows); +my_bool maria_test_if_sort_rep(MARIA_HA *info, ha_rows rows, ulonglong key_map, + my_bool force); + +int maria_init_bulk_insert(MARIA_HA *info, ulong cache_size, ha_rows rows); +void maria_flush_bulk_insert(MARIA_HA *info, uint inx); +void maria_end_bulk_insert(MARIA_HA *info); +int maria_assign_to_pagecache(MARIA_HA *info, ulonglong key_map, + PAGECACHE *key_cache); +void maria_change_pagecache(PAGECACHE *old_key_cache, + PAGECACHE *new_key_cache); +int maria_preload(MARIA_HA *info, ulonglong key_map, my_bool ignore_leaves); + +/* fulltext functions */ +FT_INFO *maria_ft_init_search(uint,void *, uint, byte *, uint, + CHARSET_INFO *, byte *); + +/* 'Almost-internal' Maria functions */ + +void _ma_update_auto_increment_key(HA_CHECK *param, MARIA_HA *info, + my_bool repair); + + +#ifdef __cplusplus +} +#endif +#endif diff --git a/include/my_atomic.h b/include/my_atomic.h index a1347d26401..5fe7e521c19 100644 --- a/include/my_atomic.h +++ b/include/my_atomic.h @@ -13,6 +13,40 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +/* + This header defines five atomic operations: + + my_atomic_add#(&var, what) + add 'what' to *var, and return the old value of *var + + my_atomic_fas#(&var, what) + 'Fetch And Store' + store 'what' in *var, and return the old value of *var + + my_atomic_cas#(&var, &old, new) + 'Compare And Swap' + if *var is equal to *old, then store 'new' in *var, and return TRUE + otherwise store *var in *old, and return FALSE + + my_atomic_load#(&var) + return *var + + my_atomic_store#(&var, what) + store 'what' in *var + + '#' is substituted by a size suffix - 8, 16, 32, or ptr + (e.g. my_atomic_add8, my_atomic_fas32, my_atomic_casptr). + + NOTE This operations are not always atomic, so they always must be + enclosed in my_atomic_rwlock_rdlock(lock)/my_atomic_rwlock_rdunlock(lock) + or my_atomic_rwlock_wrlock(lock)/my_atomic_rwlock_wrunlock(lock). + Hint: if a code block makes intensive use of atomic ops, it make sense + to take/release rwlock once for the whole block, not for every statement. + + On architectures where these operations are really atomic, rwlocks will + be optimized away. +*/ + #ifndef my_atomic_rwlock_init #define intptr void * @@ -26,70 +60,115 @@ #endif #ifndef make_atomic_add_body -#define make_atomic_add_body(S) \ +#define make_atomic_add_body(S) \ int ## S tmp=*a; \ while (!my_atomic_cas ## S(a, &tmp, tmp+v)); \ v=tmp; #endif +#ifdef __GNUC__ +/* + we want to be able to use my_atomic_xxx functions with + both signed and unsigned integers. But gcc will issue a warning + "passing arg N of `my_atomic_XXX' as [un]signed due to prototype" + if the signedness of the argument doesn't match the prototype, or + "pointer targets in passing argument N of my_atomic_XXX differ in signedness" + if int* is used where uint* is expected (or vice versa). + Let's shut these warnings up +*/ +#define make_transparent_unions(S) \ + typedef union { \ + int ## S i; \ + uint ## S u; \ + } U_ ## S __attribute__ ((transparent_union)); \ + typedef union { \ + int ## S volatile *i; \ + uint ## S volatile *u; \ + } Uv_ ## S __attribute__ ((transparent_union)); +#define uintptr intptr +make_transparent_unions(8) +make_transparent_unions(16) +make_transparent_unions(32) +make_transparent_unions(ptr) +#undef uintptr +#undef make_transparent_unions +#define a U_a.i +#define cmp U_cmp.i +#define v U_v.i +#define set U_set.i +#else +#define U_8 int8 +#define U_16 int16 +#define U_32 int32 +#define U_ptr intptr +#define Uv_8 int8 +#define Uv_16 int16 +#define Uv_32 int32 +#define Uv_ptr intptr +#define U_a volatile *a +#define U_cmp *cmp +#define U_v v +#define U_set set +#endif /* __GCC__ transparent_union magic */ + #ifdef HAVE_INLINE -#define make_atomic_add(S) \ -static inline int ## S my_atomic_add ## S( \ - int ## S volatile *a, int ## S v) \ -{ \ - make_atomic_add_body(S); \ - return v; \ +#define make_atomic_add(S) \ +STATIC_INLINE int ## S my_atomic_add ## S( \ + Uv_ ## S U_a, U_ ## S U_v) \ +{ \ + make_atomic_add_body(S); \ + return v; \ } -#define make_atomic_swap(S) \ -static inline int ## S my_atomic_swap ## S( \ - int ## S volatile *a, int ## S v) \ -{ \ - make_atomic_swap_body(S); \ - return v; \ +#define make_atomic_fas(S) \ +STATIC_INLINE int ## S my_atomic_fas ## S( \ + Uv_ ## S U_a, U_ ## S U_v) \ +{ \ + make_atomic_fas_body(S); \ + return v; \ } -#define make_atomic_cas(S) \ -static inline int my_atomic_cas ## S(int ## S volatile *a, \ - int ## S *cmp, int ## S set) \ -{ \ - int8 ret; \ - make_atomic_cas_body(S); \ - return ret; \ +#define make_atomic_cas(S) \ +STATIC_INLINE int my_atomic_cas ## S(Uv_ ## S U_a, \ + Uv_ ## S U_cmp, U_ ## S U_set) \ +{ \ + int8 ret; \ + make_atomic_cas_body(S); \ + return ret; \ } -#define make_atomic_load(S) \ -static inline int ## S my_atomic_load ## S(int ## S volatile *a) \ -{ \ - int ## S ret; \ - make_atomic_load_body(S); \ - return ret; \ +#define make_atomic_load(S) \ +STATIC_INLINE int ## S my_atomic_load ## S(Uv_ ## S U_a) \ +{ \ + int ## S ret; \ + make_atomic_load_body(S); \ + return ret; \ } -#define make_atomic_store(S) \ -static inline void my_atomic_store ## S( \ - int ## S volatile *a, int ## S v) \ -{ \ - make_atomic_store_body(S); \ +#define make_atomic_store(S) \ +STATIC_INLINE void my_atomic_store ## S( \ + Uv_ ## S U_a, U_ ## S U_v) \ +{ \ + make_atomic_store_body(S); \ } #else /* no inline functions */ -#define make_atomic_add(S) \ -extern int ## S my_atomic_add ## S(int ## S volatile *a, int ## S v); +#define make_atomic_add(S) \ +extern int ## S my_atomic_add ## S(Uv_ ## S, U_ ## S); -#define make_atomic_swap(S) \ -extern int ## S my_atomic_swap ## S(int ## S volatile *a, int ## S v); +#define make_atomic_fas(S) \ +extern int ## S my_atomic_fas ## S(Uv_ ## S, U_ ## S); -#define make_atomic_cas(S) \ -extern int my_atomic_cas ## S(int ## S volatile *a, int ## S *cmp, int ## S set); +#define make_atomic_cas(S) \ +extern int my_atomic_cas ## S(Uv_ ## S, Uv_ ## S, U_ ## S); -#define make_atomic_load(S) \ -extern int ## S my_atomic_load ## S(int ## S volatile *a); +#define make_atomic_load(S) \ +extern int ## S my_atomic_load ## S(Uv_ ## S); -#define make_atomic_store(S) \ -extern void my_atomic_store ## S(int ## S volatile *a, int ## S v); +#define make_atomic_store(S) \ +extern void my_atomic_store ## S(Uv_ ## S, U_ ## S); #endif @@ -112,28 +191,49 @@ make_atomic_store(16) make_atomic_store(32) make_atomic_store(ptr) -make_atomic_swap( 8) -make_atomic_swap(16) -make_atomic_swap(32) -make_atomic_swap(ptr) - -#undef make_atomic_add -#undef make_atomic_cas -#undef make_atomic_load -#undef make_atomic_store -#undef make_atomic_swap -#undef make_atomic_add_body -#undef make_atomic_cas_body -#undef make_atomic_load_body -#undef make_atomic_store_body -#undef make_atomic_swap_body -#undef intptr +make_atomic_fas( 8) +make_atomic_fas(16) +make_atomic_fas(32) +make_atomic_fas(ptr) #ifdef _atomic_h_cleanup_ #include _atomic_h_cleanup_ #undef _atomic_h_cleanup_ #endif +#undef U_8 +#undef U_16 +#undef U_32 +#undef U_ptr +#undef a +#undef cmp +#undef v +#undef set +#undef U_a +#undef U_cmp +#undef U_v +#undef U_set +#undef make_atomic_add +#undef make_atomic_cas +#undef make_atomic_load +#undef make_atomic_store +#undef make_atomic_fas +#undef make_atomic_add_body +#undef make_atomic_cas_body +#undef make_atomic_load_body +#undef make_atomic_store_body +#undef make_atomic_fas_body +#undef intptr + +/* + the macro below defines (as an expression) the code that + will be run in spin-loops. Intel manuals recummend to have PAUSE there. + It is expected to be defined in include/atomic/ *.h files +*/ +#ifndef LF_BACKOFF +#define LF_BACKOFF (1) +#endif + #define MY_ATOMIC_OK 0 #define MY_ATOMIC_NOT_1CPU 1 extern int my_atomic_initialize(); diff --git a/include/my_base.h b/include/my_base.h index 617cdb8c3f0..9f07bc70f5b 100644 --- a/include/my_base.h +++ b/include/my_base.h @@ -14,7 +14,6 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* This file includes constants used with all databases */ -/* Author: Michael Widenius */ #ifndef _my_base_h #define _my_base_h @@ -49,7 +48,7 @@ #define HA_OPEN_FROM_SQL_LAYER 64 #define HA_OPEN_MMAP 128 /* open memory mapped */ - /* The following is parameter to ha_rkey() how to use key */ +/* The following is parameter to ha_rkey() how to use key */ /* We define a complete-field prefix of a key value as a prefix where @@ -461,7 +460,7 @@ enum en_fieldtype { }; enum data_file_type { - STATIC_RECORD,DYNAMIC_RECORD,COMPRESSED_RECORD + STATIC_RECORD, DYNAMIC_RECORD, COMPRESSED_RECORD, BLOCK_RECORD }; /* For key ranges */ @@ -513,4 +512,7 @@ typedef ulong ha_rows; #define HA_VARCHAR_PACKLENGTH(field_length) ((field_length) < 256 ? 1 :2) +/* invalidator function reference for Query Cache */ +typedef void (* invalidator_by_filename)(const char * filename); + #endif /* _my_base_h */ diff --git a/include/my_bit.h b/include/my_bit.h new file mode 100644 index 00000000000..58e8bb39683 --- /dev/null +++ b/include/my_bit.h @@ -0,0 +1,107 @@ +/* + Some useful bit functions +*/ + +#ifdef HAVE_INLINE + +extern const char _my_bits_nbits[256]; +extern const uchar _my_bits_reverse_table[256]; + +/* + Find smallest X in 2^X >= value + This can be used to divide a number with value by doing a shift instead +*/ + +STATIC_INLINE uint my_bit_log2(ulong value) +{ + uint bit; + for (bit=0 ; value > 1 ; value>>=1, bit++) ; + return bit; +} + +STATIC_INLINE uint my_count_bits(ulonglong v) +{ +#if SIZEOF_LONG_LONG > 4 + /* The following code is a bit faster on 16 bit machines than if we would + only shift v */ + ulong v2=(ulong) (v >> 32); + return (uint) (uchar) (_my_bits_nbits[(uchar) v] + + _my_bits_nbits[(uchar) (v >> 8)] + + _my_bits_nbits[(uchar) (v >> 16)] + + _my_bits_nbits[(uchar) (v >> 24)] + + _my_bits_nbits[(uchar) (v2)] + + _my_bits_nbits[(uchar) (v2 >> 8)] + + _my_bits_nbits[(uchar) (v2 >> 16)] + + _my_bits_nbits[(uchar) (v2 >> 24)]); +#else + return (uint) (uchar) (_my_bits_nbits[(uchar) v] + + _my_bits_nbits[(uchar) (v >> 8)] + + _my_bits_nbits[(uchar) (v >> 16)] + + _my_bits_nbits[(uchar) (v >> 24)]); +#endif +} + +STATIC_INLINE uint my_count_bits_ushort(ushort v) +{ + return _my_bits_nbits[v]; +} + + +/* + Next highest power of two + + SYNOPSIS + my_round_up_to_next_power() + v Value to check + + RETURN + Next or equal power of 2 + Note: 0 will return 0 + + NOTES + Algorithm by Sean Anderson, according to: + http://graphics.stanford.edu/~seander/bithacks.html + (Orignal code public domain) + + Comments shows how this works with 01100000000000000000000000001011 +*/ + +STATIC_INLINE uint32 my_round_up_to_next_power(uint32 v) +{ + v--; /* 01100000000000000000000000001010 */ + v|= v >> 1; /* 01110000000000000000000000001111 */ + v|= v >> 2; /* 01111100000000000000000000001111 */ + v|= v >> 4; /* 01111111110000000000000000001111 */ + v|= v >> 8; /* 01111111111111111100000000001111 */ + v|= v >> 16; /* 01111111111111111111111111111111 */ + return v+1; /* 10000000000000000000000000000000 */ +} + +STATIC_INLINE uint32 my_clear_highest_bit(uint32 v) +{ + uint32 w=v >> 1; + w|= w >> 1; + w|= w >> 2; + w|= w >> 4; + w|= w >> 8; + w|= w >> 16; + return v & w; +} + +STATIC_INLINE uint32 my_reverse_bits(uint32 key) +{ + return + (_my_bits_reverse_table[ key & 255] << 24) | + (_my_bits_reverse_table[(key>> 8) & 255] << 16) | + (_my_bits_reverse_table[(key>>16) & 255] << 8) | + _my_bits_reverse_table[(key>>24) ]; +} + +#else +extern uint my_bit_log2(ulong value); +extern uint32 my_round_up_to_next_power(uint32 v); +uint32 my_clear_highest_bit(uint32 v); +uint32 my_reverse_bits(uint32 key); +extern uint my_count_bits(ulonglong v); +extern uint my_count_bits_ushort(ushort v); +#endif diff --git a/include/my_dbug.h b/include/my_dbug.h index 514cd17099b..a77e439b5db 100644 --- a/include/my_dbug.h +++ b/include/my_dbug.h @@ -101,7 +101,7 @@ extern FILE *_db_fp_(void); #define DBUG_LONGJMP(a1) longjmp(a1) #define DBUG_DUMP(keyword,a1,a2) #define DBUG_END() -#define DBUG_ASSERT(A) +#define DBUG_ASSERT(A) do { } while(0) #define DBUG_LOCK_FILE #define DBUG_FILE (stderr) #define DBUG_UNLOCK_FILE diff --git a/include/my_global.h b/include/my_global.h index d6f331019f6..af9c31cf34f 100644 --- a/include/my_global.h +++ b/include/my_global.h @@ -231,6 +231,8 @@ #endif #undef inline_test_2 #undef inline_test_1 +/* helper macro for "instantiating" inline functions */ +#define STATIC_INLINE static inline /* The following macros are used to control inlining a bit more than @@ -1001,6 +1003,8 @@ typedef long long intptr; #error sizeof(void *) is neither sizeof(int) nor sizeof(long) nor sizeof(long long) #endif +#define MY_ERRPTR ((void*)(intptr)1) + #ifdef USE_RAID /* The following is done with a if to not get problems with pre-processors @@ -1462,6 +1466,7 @@ do { doubleget_union _tmp; \ #define dlerror() "" #endif + #ifndef __NETWARE__ /* * Include standard definitions of operator new and delete. @@ -1487,5 +1492,13 @@ inline void operator delete[](void*, void*) { /* Do nothing */ } /* Length of decimal number represented by INT64. */ #define MY_INT64_NUM_DECIMAL_DIGITS 21 + +/* + Only Linux is known to need an explicit sync of the directory to make sure a + file creation/deletion/renaming in(from,to) this directory durable. +*/ +#ifdef TARGET_OS_LINUX +#define NEED_EXPLICIT_SYNC_DIR 1 +#endif #endif /* my_global_h */ diff --git a/include/my_handler.h b/include/my_handler.h index d7cd0567f9c..13dcd01a332 100644 --- a/include/my_handler.h +++ b/include/my_handler.h @@ -18,10 +18,30 @@ #ifndef _my_handler_h #define _my_handler_h -#include "my_base.h" -#include "m_ctype.h" #include "myisampack.h" +/* + There is a hard limit for the maximum number of keys as there are only + 8 bits in the index file header for the number of keys in a table. + This means that 0..255 keys can exist for a table. The idea of + HA_MAX_POSSIBLE_KEY is to ensure that one can use myisamchk & tools on + a MyISAM table for which one has more keys than MyISAM is normally + compiled for. If you don't have this, you will get a core dump when + running myisamchk compiled for 128 keys on a table with 255 keys. +*/ + +#define HA_MAX_POSSIBLE_KEY 255 /* For myisamchk */ +/* + The following defines can be increased if necessary. + But beware the dependency of HA_MAX_POSSIBLE_KEY_BUFF and HA_MAX_KEY_LENGTH. +*/ + +#define HA_MAX_KEY_LENGTH 1000 /* Max length in bytes */ +#define HA_MAX_KEY_SEG 16 /* Max segments for key */ + +#define HA_MAX_POSSIBLE_KEY_BUFF (HA_MAX_KEY_LENGTH + 24+ 6+6) +#define HA_MAX_KEY_BUFF (HA_MAX_KEY_LENGTH+HA_MAX_KEY_SEG*6+8+8) + typedef struct st_HA_KEYSEG /* Key-portion */ { CHARSET_INFO *charset; @@ -38,33 +58,35 @@ typedef struct st_HA_KEYSEG /* Key-portion */ } HA_KEYSEG; #define get_key_length(length,key) \ -{ if ((uchar) *(key) != 255) \ - length= (uint) (uchar) *((key)++); \ +{ if (*(uchar*) (key) != 255) \ + length= (uint) *(uchar*) ((key)++); \ else \ - { length=mi_uint2korr((key)+1); (key)+=3; } \ + { length= mi_uint2korr((key)+1); (key)+=3; } \ } #define get_key_length_rdonly(length,key) \ -{ if ((uchar) *(key) != 255) \ - length= ((uint) (uchar) *((key))); \ +{ if (*(uchar*) (key) != 255) \ + length= ((uint) *(uchar*) ((key))); \ else \ - { length=mi_uint2korr((key)+1); } \ + { length= mi_uint2korr((key)+1); } \ } #define get_key_pack_length(length,length_pack,key) \ -{ if ((uchar) *(key) != 255) \ - { length= (uint) (uchar) *((key)++); length_pack=1; }\ +{ if (*(uchar*) (key) != 255) \ + { length= (uint) *(uchar*) ((key)++); length_pack= 1; }\ else \ - { length=mi_uint2korr((key)+1); (key)+=3; length_pack=3; } \ + { length=mi_uint2korr((key)+1); (key)+= 3; length_pack= 3; } \ } #define store_key_length_inc(key,length) \ { if ((length) < 255) \ - { *(key)++=(length); } \ + { *(key)++= (length); } \ else \ { *(key)=255; mi_int2store((key)+1,(length)); (key)+=3; } \ } +#define size_to_store_key_length(length) ((length) < 255 ? 1 : 3) + #define get_rec_bits(bit_ptr, bit_ofs, bit_len) \ (((((uint16) (bit_ptr)[1] << 8) | (uint16) (bit_ptr)[0]) >> (bit_ofs)) & \ ((1 << (bit_len)) - 1)) @@ -81,7 +103,7 @@ typedef struct st_HA_KEYSEG /* Key-portion */ #define clr_rec_bits(bit_ptr, bit_ofs, bit_len) \ set_rec_bits(0, bit_ptr, bit_ofs, bit_len) -extern int mi_compare_text(CHARSET_INFO *, uchar *, uint, uchar *, uint , +extern int ha_compare_text(CHARSET_INFO *, uchar *, uint, uchar *, uint , my_bool, my_bool); extern int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, register uchar *b, uint key_length, uint nextflag, @@ -89,4 +111,11 @@ extern int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, extern HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a); +/* + Inside an in-memory data record, memory pointers to pieces of the + record (like BLOBs) are stored in their native byte order and in + this amount of bytes. +*/ +#define portable_sizeof_char_ptr 8 + #endif /* _my_handler_h */ diff --git a/include/my_sys.h b/include/my_sys.h index d1a253e4a7f..a9c7b1dd549 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -51,6 +51,7 @@ extern int NEAR my_errno; /* Last error in mysys */ #define MY_WME 16 /* Write message on error */ #define MY_WAIT_IF_FULL 32 /* Wait and try again if disk full error */ #define MY_IGNORE_BADFD 32 /* my_sync: ignore 'bad descriptor' errors */ +#define MY_SYNC_DIR 1024 /* my_create/delete/rename: sync directory */ #define MY_RAID 64 /* Support for RAID */ #define MY_FULL_IO 512 /* For my_read - loop intil I/O is complete */ #define MY_DONT_CHECK_FILESIZE 128 /* Option to init_io_cache() */ @@ -212,6 +213,7 @@ extern int (*error_handler_hook)(uint my_err, const char *str,myf MyFlags); extern int (*fatal_error_handler_hook)(uint my_err, const char *str, myf MyFlags); extern uint my_file_limit; +extern ulong my_thread_stack_size; #ifdef HAVE_LARGE_PAGES extern my_bool my_use_large_pages; @@ -275,7 +277,12 @@ enum cache_type enum flush_type { - FLUSH_KEEP, FLUSH_RELEASE, FLUSH_IGNORE_CHANGED, FLUSH_FORCE_WRITE + FLUSH_KEEP, /* flush block and keep it in the cache */ + FLUSH_RELEASE, /* flush block and remove it from the cache */ + FLUSH_IGNORE_CHANGED, /* remove block from the cache */ + /* as my_disable_flush_pagecache_blocks is always 0, it is + strictly equivalent to FLUSH_KEEP */ + FLUSH_FORCE_WRITE }; typedef struct st_record_cache /* Used when cacheing records */ @@ -626,6 +633,8 @@ extern FILE *my_fdopen(File Filedes,const char *name, int Flags,myf MyFlags); extern int my_fclose(FILE *fd,myf MyFlags); extern int my_chsize(File fd,my_off_t newlength, int filler, myf MyFlags); extern int my_sync(File fd, myf my_flags); +extern int my_sync_dir(const char *dir_name, myf my_flags); +extern int my_sync_dir_by_file(const char *file_name, myf my_flags); extern int my_error _VARARGS((int nr,myf MyFlags, ...)); extern int my_printf_error _VARARGS((uint my_err, const char *format, myf MyFlags, ...)) @@ -660,7 +669,7 @@ extern char *my_tmpdir(MY_TMPDIR *tmpdir); extern void free_tmpdir(MY_TMPDIR *tmpdir); extern void my_remember_signal(int signal_number,sig_handler (*func)(int)); -extern size_t dirname_part(char * to, const char *name, size_t *to_res_length); +extern size_t dirname_part(char * to,const char *name, size_t *to_res_length); extern size_t dirname_length(const char *name); #define base_name(A) (A+dirname_length(A)) extern int test_if_hard_path(const char *dir_name); @@ -706,14 +715,14 @@ extern sig_handler sigtstp_handler(int signal_number); extern void handle_recived_signals(void); extern sig_handler my_set_alarm_variable(int signo); -extern void my_string_ptr_sort(uchar *base, uint items, size_t size); +extern void my_string_ptr_sort(uchar *base,uint items,size_t size); extern void radixsort_for_str_ptr(uchar* base[], uint number_of_elements, - size_t size_of_element,uchar *buffer[]); + size_s size_of_element,uchar *buffer[]); extern qsort_t qsort2(void *base_ptr, size_t total_elems, size_t size, qsort2_cmp cmp, void *cmp_argument); extern qsort2_cmp get_ptr_compare(size_t); void my_store_ptr(uchar *buff, size_t pack_length, my_off_t pos); -my_off_t my_get_ptr(uchar *ptr, size_t pack_length); +my_off_t my_get_ptr(cuhar *ptr, size_t pack_length); extern int init_io_cache(IO_CACHE *info,File file,size_t cachesize, enum cache_type type,my_off_t seek_offset, pbool use_async_io, myf cache_myflags); @@ -772,6 +781,7 @@ extern my_bool insert_dynamic(DYNAMIC_ARRAY *array,uchar * element); extern uchar *alloc_dynamic(DYNAMIC_ARRAY *array); extern uchar *pop_dynamic(DYNAMIC_ARRAY*); extern my_bool set_dynamic(DYNAMIC_ARRAY *array,uchar * element,uint array_index); +extern my_bool allocate_dynamic(DYNAMIC_ARRAY *array, uint max_elements); extern void get_dynamic(DYNAMIC_ARRAY *array,uchar * element,uint array_index); extern void delete_dynamic(DYNAMIC_ARRAY *array); extern void delete_dynamic_element(DYNAMIC_ARRAY *array, uint array_index); @@ -830,19 +840,16 @@ extern void free_defaults(char **argv); extern void my_print_default_files(const char *conf_file); extern void print_defaults(const char *conf_file, const char **groups); extern my_bool my_compress(uchar *, size_t *, size_t *); -extern my_bool my_uncompress(uchar *, size_t , size_t *); +extern my_bool my_uncompress(uchar *, size_t *, size_t *); extern uchar *my_compress_alloc(const uchar *packet, size_t *len, size_t *complen); extern int packfrm(const uchar *, size_t, uchar **, size_t *); -extern int unpackfrm(uchar **, size_t *, const uchar *); +extern int unpackfrm(const uchar **, size_t *, const uchar *); extern ha_checksum my_checksum(ha_checksum crc, const uchar *mem, size_t count); -extern uint my_bit_log2(ulong value); -extern uint32 my_round_up_to_next_power(uint32 v); -extern uint my_count_bits(ulonglong v); -extern uint my_count_bits_ushort(ushort v); extern void my_sleep(ulong m_seconds); +extern ulong crc32(ulong crc, const uchar *buf, uint len); extern uint my_set_max_open_files(uint files); void my_free_open_file_info(void); @@ -856,7 +863,7 @@ extern int my_getncpus(); #ifndef MAP_NOSYNC #define MAP_NOSYNC 0 #endif -#ifndef MAP_NORESERVE +#ifndef MAP_NORESERVE #define MAP_NORESERVE 0 /* For irix and AIX */ #endif diff --git a/include/myisam.h b/include/myisam.h index 5d93ded3d6e..2ec083c3e12 100644 --- a/include/myisam.h +++ b/include/myisam.h @@ -31,33 +31,19 @@ extern "C" { #include "keycache.h" #endif #include "my_handler.h" +#include #include /* - There is a hard limit for the maximum number of keys as there are only - 8 bits in the index file header for the number of keys in a table. - This means that 0..255 keys can exist for a table. The idea of - MI_MAX_POSSIBLE_KEY is to ensure that one can use myisamchk & tools on - a MyISAM table for which one has more keys than MyISAM is normally - compiled for. If you don't have this, you will get a core dump when - running myisamchk compiled for 128 keys on a table with 255 keys. + Limit max keys according to HA_MAX_POSSIBLE_KEY; See myisamchk.h for details */ -#define MI_MAX_POSSIBLE_KEY 255 /* For myisam_chk */ -#if MAX_INDEXES > MI_MAX_POSSIBLE_KEY -#define MI_MAX_KEY MI_MAX_POSSIBLE_KEY /* Max allowed keys */ + +#if MAX_INDEXES > HA_MAX_POSSIBLE_KEY +#define MI_MAX_KEY HA_MAX_POSSIBLE_KEY /* Max allowed keys */ #else #define MI_MAX_KEY MAX_INDEXES /* Max allowed keys */ #endif -#define MI_MAX_POSSIBLE_KEY_BUFF (1024+6+6) /* For myisam_chk */ -/* - The following defines can be increased if necessary. - But beware the dependency of MI_MAX_POSSIBLE_KEY_BUFF and MI_MAX_KEY_LENGTH. -*/ -#define MI_MAX_KEY_LENGTH 1000 /* Max length in bytes */ -#define MI_MAX_KEY_SEG 16 /* Max segments for key */ - -#define MI_MAX_KEY_BUFF (MI_MAX_KEY_LENGTH+MI_MAX_KEY_SEG*6+8+8) #define MI_MAX_MSG_BUF 1024 /* used in CHECK TABLE, REPAIR TABLE */ #define MI_NAME_IEXT ".MYI" #define MI_NAME_DEXT ".MYD" @@ -69,8 +55,6 @@ extern "C" { #define MI_MIN_KEY_BLOCK_LENGTH 1024 /* Min key block length */ #define MI_MAX_KEY_BLOCK_LENGTH 16384 -#define mi_portable_sizeof_char_ptr 8 - /* In the following macros '_keyno_' is 0 .. keys-1. If there can be more keys than bits in the key_map, the highest bit @@ -256,9 +240,6 @@ typedef struct st_columndef /* column information */ #endif } MI_COLUMNDEF; -/* invalidator function reference for Query Cache */ -typedef void (* invalidator_by_filename)(const char * filename); - extern char * myisam_log_filename; /* Name of logfile */ extern ulong myisam_block_size; extern ulong myisam_concurrent_insert; @@ -302,7 +283,7 @@ extern int mi_extra(struct st_myisam_info *file, enum ha_extra_function function, void *extra_arg); extern int mi_reset(struct st_myisam_info *file); -extern ha_rows mi_records_in_range(MI_INFO *info, int inx, +extern ha_rows mi_records_in_range(MI_INFO *info,int inx, key_range *min_key, key_range *max_key); extern int mi_log(int activate_log); extern int mi_is_changed(struct st_myisam_info *info); @@ -310,195 +291,117 @@ extern int mi_delete_all_rows(struct st_myisam_info *info); extern ulong _mi_calc_blob_length(uint length , const uchar *pos); extern uint mi_get_pointer_length(ulonglong file_length, uint def); -/* this is used to pass to mysql_myisamchk_table -- by Sasha Pachev */ +/* this is used to pass to mysql_myisamchk_table */ #define MYISAMCHK_REPAIR 1 /* equivalent to myisamchk -r */ #define MYISAMCHK_VERIFY 2 /* Verify, run repair if failure */ -/* - Definitions needed for myisamchk.c +typedef uint mi_bit_type; - Entries marked as "QQ to be removed" are NOT used to - pass check/repair options to mi_check.c. They are used - internally by myisamchk.c or/and ha_myisam.cc and should NOT - be stored together with other flags. They should be removed - from the following list to make addition of new flags possible. -*/ +typedef struct st_mi_bit_buff +{ /* Used for packing of record */ + mi_bit_type current_byte; + uint bits; + uchar *pos, *end, *blob_pos, *blob_end; + uint error; +} MI_BIT_BUFF; -#define T_AUTO_INC 1 -#define T_AUTO_REPAIR 2 /* QQ to be removed */ -#define T_BACKUP_DATA 4 -#define T_CALC_CHECKSUM 8 -#define T_CHECK 16 /* QQ to be removed */ -#define T_CHECK_ONLY_CHANGED 32 /* QQ to be removed */ -#define T_CREATE_MISSING_KEYS 64 -#define T_DESCRIPT 128 -#define T_DONT_CHECK_CHECKSUM 256 -#define T_EXTEND 512 -#define T_FAST (1L << 10) /* QQ to be removed */ -#define T_FORCE_CREATE (1L << 11) /* QQ to be removed */ -#define T_FORCE_UNIQUENESS (1L << 12) -#define T_INFO (1L << 13) -#define T_MEDIUM (1L << 14) -#define T_QUICK (1L << 15) /* QQ to be removed */ -#define T_READONLY (1L << 16) /* QQ to be removed */ -#define T_REP (1L << 17) -#define T_REP_BY_SORT (1L << 18) /* QQ to be removed */ -#define T_REP_PARALLEL (1L << 19) /* QQ to be removed */ -#define T_RETRY_WITHOUT_QUICK (1L << 20) -#define T_SAFE_REPAIR (1L << 21) -#define T_SILENT (1L << 22) -#define T_SORT_INDEX (1L << 23) /* QQ to be removed */ -#define T_SORT_RECORDS (1L << 24) /* QQ to be removed */ -#define T_STATISTICS (1L << 25) -#define T_UNPACK (1L << 26) -#define T_UPDATE_STATE (1L << 27) -#define T_VERBOSE (1L << 28) -#define T_VERY_SILENT (1L << 29) -#define T_WAIT_FOREVER (1L << 30) -#define T_WRITE_LOOP ((ulong) 1L << 31) -#define T_REP_ANY (T_REP | T_REP_BY_SORT | T_REP_PARALLEL) - -/* - Flags used by myisamchk.c or/and ha_myisam.cc that are NOT passed - to mi_check.c follows: -*/ - -#define TT_USEFRM 1 -#define TT_FOR_UPGRADE 2 - -#define O_NEW_INDEX 1 /* Bits set in out_flag */ -#define O_NEW_DATA 2 -#define O_DATA_LOST 4 - -/* these struct is used by my_check to tell it what to do */ - -typedef struct st_sort_key_blocks /* Used when sorting */ +typedef struct st_sort_info { - uchar *buff,*end_pos; - uchar lastkey[MI_MAX_POSSIBLE_KEY_BUFF]; - uint last_length; - int inited; -} SORT_KEY_BLOCKS; - - -/* - MyISAM supports several statistics collection methods. Currently statistics - collection method is not stored in MyISAM file and has to be specified for - each table analyze/repair operation in MI_CHECK::stats_method. -*/ - -typedef enum -{ - /* Treat NULLs as inequal when collecting statistics (default for 4.1/5.0) */ - MI_STATS_METHOD_NULLS_NOT_EQUAL, - /* Treat NULLs as equal when collecting statistics (like 4.0 did) */ - MI_STATS_METHOD_NULLS_EQUAL, - /* Ignore NULLs - count only tuples without NULLs in the index components */ - MI_STATS_METHOD_IGNORE_NULLS -} enum_mi_stats_method; - -typedef struct st_mi_check_param -{ - ulonglong auto_increment_value; - ulonglong max_data_file_length; - ulonglong keys_in_use; - ulonglong max_record_length; - my_off_t search_after_block; - my_off_t new_file_pos,key_file_blocks; - my_off_t keydata,totaldata,key_blocks,start_check_pos; - ha_rows total_records,total_deleted; - ha_checksum record_checksum,glob_crc; - ulong use_buffers,read_buffer_length,write_buffer_length, - sort_buffer_length,sort_key_blocks; - uint out_flag,warning_printed,error_printed,verbose; - uint opt_sort_key,total_files,max_level; - uint testflag, key_cache_block_size; - uint8 language; - my_bool using_global_keycache, opt_lock_memory, opt_follow_links; - my_bool retry_repair, force_sort; - char temp_filename[FN_REFLEN],*isam_file_name; - MY_TMPDIR *tmpdir; - int tmpfile_createflag; +#ifdef THREAD + /* sync things */ + pthread_mutex_t mutex; + pthread_cond_t cond; +#endif + MI_INFO *info; + HA_CHECK *param; + char *buff; + SORT_KEY_BLOCKS *key_block, *key_block_end; + SORT_FT_BUF *ft_buf; + my_off_t filelength, dupp, buff_length; + ha_rows max_records; + uint current_key, total_keys; + uint got_error, threads_running; myf myf_rw; - IO_CACHE read_cache; + enum data_file_type new_data_file_type; +} MI_SORT_INFO; + +typedef struct st_mi_sort_param +{ + pthread_t thr; + IO_CACHE read_cache, tempfile, tempfile_for_exceptions; + DYNAMIC_ARRAY buffpek; + MI_BIT_BUFF bit_buff; /* For parallel repair of packrec. */ + MI_KEYDEF *keyinfo; + MI_SORT_INFO *sort_info; + HA_KEYSEG *seg; + uchar **sort_keys; + byte *rec_buff; + void *wordlist, *wordptr; + MEM_ROOT wordroot; + char *record; + MY_TMPDIR *tmpdir; + /* The next two are used to collect statistics, see update_key_parts for description. */ - ulonglong unique_count[MI_MAX_KEY_SEG+1]; - ulonglong notnull_count[MI_MAX_KEY_SEG+1]; - - ha_checksum key_crc[MI_MAX_POSSIBLE_KEY]; - ulong rec_per_key_part[MI_MAX_KEY_SEG*MI_MAX_POSSIBLE_KEY]; - void *thd; - const char *db_name, *table_name; - const char *op_name; - enum_mi_stats_method stats_method; -} MI_CHECK; + ulonglong unique[HA_MAX_KEY_SEG+1]; + ulonglong notnull[HA_MAX_KEY_SEG+1]; -typedef struct st_sort_ft_buf -{ - uchar *buf, *end; - int count; - uchar lastkey[MI_MAX_KEY_BUFF]; -} SORT_FT_BUF; + my_off_t pos,max_pos,filepos,start_recpos; + uint key, key_length,real_key_length,sortbuff_size; + uint maxbuffers, keys, find_length, sort_keys_length; + my_bool fix_datafile, master; + my_bool calc_checksum; /* calculate table checksum */ + + int (*key_cmp)(struct st_mi_sort_param *, const void *, const void *); + int (*key_read)(struct st_mi_sort_param *,void *); + int (*key_write)(struct st_mi_sort_param *, const void *); + void (*lock_in_memory)(HA_CHECK *); + NEAR int (*write_keys)(struct st_mi_sort_param *, register uchar **, + uint , struct st_buffpek *, IO_CACHE *); + NEAR uint (*read_to_buffer)(IO_CACHE *,struct st_buffpek *, uint); + NEAR int (*write_key)(struct st_mi_sort_param *, IO_CACHE *,char *, + uint, uint); +} MI_SORT_PARAM; -typedef struct st_sort_info -{ - my_off_t filelength,dupp,buff_length; - ha_rows max_records; - uint current_key, total_keys; - myf myf_rw; - enum data_file_type new_data_file_type; - MI_INFO *info; - MI_CHECK *param; - char *buff; - SORT_KEY_BLOCKS *key_block,*key_block_end; - SORT_FT_BUF *ft_buf; - /* sync things */ - uint got_error, threads_running; -#ifdef THREAD - pthread_mutex_t mutex; - pthread_cond_t cond; -#endif -} SORT_INFO; /* functions in mi_check */ -void myisamchk_init(MI_CHECK *param); -int chk_status(MI_CHECK *param, MI_INFO *info); -int chk_del(MI_CHECK *param, register MI_INFO *info, uint test_flag); -int chk_size(MI_CHECK *param, MI_INFO *info); -int chk_key(MI_CHECK *param, MI_INFO *info); -int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend); -int mi_repair(MI_CHECK *param, register MI_INFO *info, +void myisamchk_init(HA_CHECK *param); +int chk_status(HA_CHECK *param, MI_INFO *info); +int chk_del(HA_CHECK *param, register MI_INFO *info, uint test_flag); +int chk_size(HA_CHECK *param, MI_INFO *info); +int chk_key(HA_CHECK *param, MI_INFO *info); +int chk_data_link(HA_CHECK *param, MI_INFO *info,int extend); +int mi_repair(HA_CHECK *param, register MI_INFO *info, char * name, int rep_quick); -int mi_sort_index(MI_CHECK *param, register MI_INFO *info, char * name); -int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, +int mi_sort_index(HA_CHECK *param, register MI_INFO *info, char * name); +int mi_repair_by_sort(HA_CHECK *param, register MI_INFO *info, const char * name, int rep_quick); -int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, +int mi_repair_parallel(HA_CHECK *param, register MI_INFO *info, const char * name, int rep_quick); int change_to_newfile(const char * filename, const char * old_ext, const char * new_ext, uint raid_chunks, myf myflags); -int lock_file(MI_CHECK *param, File file, my_off_t start, int lock_type, +int lock_file(HA_CHECK *param, File file, my_off_t start, int lock_type, const char *filetype, const char *filename); -void lock_memory(MI_CHECK *param); -void update_auto_increment_key(MI_CHECK *param, MI_INFO *info, +void lock_memory(HA_CHECK *param); +void update_auto_increment_key(HA_CHECK *param, MI_INFO *info, my_bool repair); -int update_state_info(MI_CHECK *param, MI_INFO *info,uint update); +int update_state_info(HA_CHECK *param, MI_INFO *info,uint update); void update_key_parts(MI_KEYDEF *keyinfo, ulong *rec_per_key_part, ulonglong *unique, ulonglong *notnull, ulonglong records); -int filecopy(MI_CHECK *param, File to,File from,my_off_t start, +int filecopy(HA_CHECK *param, File to,File from,my_off_t start, my_off_t length, const char *type); int movepoint(MI_INFO *info,uchar *record,my_off_t oldpos, my_off_t newpos, uint prot_key); -int write_data_suffix(SORT_INFO *sort_info, my_bool fix_datafile); +int write_data_suffix(MI_SORT_INFO *sort_info, my_bool fix_datafile); int test_if_almost_full(MI_INFO *info); -int recreate_table(MI_CHECK *param, MI_INFO **org_info, char *filename); +int recreate_table(HA_CHECK *param, MI_INFO **org_info, char *filename); void mi_disable_non_unique_index(MI_INFO *info, ha_rows rows); my_bool mi_test_if_sort_rep(MI_INFO *info, ha_rows rows, ulonglong key_map, my_bool force); @@ -512,6 +415,13 @@ void mi_change_key_cache(KEY_CACHE *old_key_cache, KEY_CACHE *new_key_cache); int mi_preload(MI_INFO *info, ulonglong key_map, my_bool ignore_leaves); +int write_data_suffix(MI_SORT_INFO *sort_info, my_bool fix_datafile); +int flush_pending_blocks(MI_SORT_PARAM *param); +int sort_ft_buf_flush(MI_SORT_PARAM *sort_param); +int thr_write_keys(MI_SORT_PARAM *sort_param); +int sort_write_record(MI_SORT_PARAM *sort_param); +int _create_index_by_sort(MI_SORT_PARAM *info,my_bool no_messages, ulong); + #ifdef __cplusplus } #endif diff --git a/include/myisamchk.h b/include/myisamchk.h new file mode 100644 index 00000000000..887cf835b87 --- /dev/null +++ b/include/myisamchk.h @@ -0,0 +1,164 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Definitions needed for myisamchk/mariachk.c */ + +/* + Entries marked as "QQ to be removed" are NOT used to + pass check/repair options to xxx_check.c. They are used + internally by xxxchk.c or/and ha_xxxx.cc and should NOT + be stored together with other flags. They should be removed + from the following list to make addition of new flags possible. +*/ + +#ifndef _myisamchk_h +#define _myisamchk_h + +#define T_AUTO_INC 1 +#define T_AUTO_REPAIR 2 /* QQ to be removed */ +#define T_BACKUP_DATA 4 +#define T_CALC_CHECKSUM 8 +#define T_CHECK 16 /* QQ to be removed */ +#define T_CHECK_ONLY_CHANGED 32 /* QQ to be removed */ +#define T_CREATE_MISSING_KEYS 64 +#define T_DESCRIPT 128 +#define T_DONT_CHECK_CHECKSUM 256 +#define T_EXTEND 512 +#define T_FAST (1L << 10) /* QQ to be removed */ +#define T_FORCE_CREATE (1L << 11) /* QQ to be removed */ +#define T_FORCE_UNIQUENESS (1L << 12) +#define T_INFO (1L << 13) +#define T_MEDIUM (1L << 14) +#define T_QUICK (1L << 15) /* QQ to be removed */ +#define T_READONLY (1L << 16) /* QQ to be removed */ +#define T_REP (1L << 17) +#define T_REP_BY_SORT (1L << 18) /* QQ to be removed */ +#define T_REP_PARALLEL (1L << 19) /* QQ to be removed */ +#define T_RETRY_WITHOUT_QUICK (1L << 20) +#define T_SAFE_REPAIR (1L << 21) +#define T_SILENT (1L << 22) +#define T_SORT_INDEX (1L << 23) /* QQ to be removed */ +#define T_SORT_RECORDS (1L << 24) /* QQ to be removed */ +#define T_STATISTICS (1L << 25) +#define T_UNPACK (1L << 26) +#define T_UPDATE_STATE (1L << 27) +#define T_VERBOSE (1L << 28) +#define T_VERY_SILENT (1L << 29) +#define T_WAIT_FOREVER (1L << 30) +#define T_WRITE_LOOP ((ulong) 1L << 31) + +#define T_REP_ANY (T_REP | T_REP_BY_SORT | T_REP_PARALLEL) + +/* + Flags used by xxxxchk.c or/and ha_xxxx.cc that are NOT passed + to xxxcheck.c follows: +*/ + +#define TT_USEFRM 1 +#define TT_FOR_UPGRADE 2 + +#define O_NEW_INDEX 1 /* Bits set in out_flag */ +#define O_NEW_DATA 2 +#define O_DATA_LOST 4 + +typedef struct st_sort_key_blocks /* Used when sorting */ +{ + byte *buff, *end_pos; + byte lastkey[HA_MAX_POSSIBLE_KEY_BUFF]; + uint last_length; + int inited; +} SORT_KEY_BLOCKS; + + +/* + MARIA/MYISAM supports several statistics collection + methods. Currently statistics collection method is not stored in + MARIA file and has to be specified for each table analyze/repair + operation in MI_CHECK::stats_method. +*/ + +typedef enum +{ + /* Treat NULLs as inequal when collecting statistics (default for 4.1/5.0) */ + MI_STATS_METHOD_NULLS_NOT_EQUAL, + /* Treat NULLs as equal when collecting statistics (like 4.0 did) */ + MI_STATS_METHOD_NULLS_EQUAL, + /* Ignore NULLs - count only tuples without NULLs in the index components */ + MI_STATS_METHOD_IGNORE_NULLS +} enum_handler_stats_method; + + +typedef struct st_handler_check_param +{ + char *isam_file_name; + MY_TMPDIR *tmpdir; + void *thd; + const char *db_name, *table_name, *op_name; + ulonglong auto_increment_value; + ulonglong max_data_file_length; + ulonglong keys_in_use; + ulonglong max_record_length; + /* + The next two are used to collect statistics, see update_key_parts for + description. + */ + ulonglong unique_count[HA_MAX_KEY_SEG + 1]; + ulonglong notnull_count[HA_MAX_KEY_SEG + 1]; + + my_off_t search_after_block; + my_off_t new_file_pos, key_file_blocks; + my_off_t keydata, totaldata, key_blocks, start_check_pos; + my_off_t used, empty, splits, del_length, link_used; + ha_rows total_records, total_deleted, records,del_blocks; + ha_rows full_page_count, tail_count; + ha_checksum record_checksum, glob_crc; + ha_checksum key_crc[HA_MAX_POSSIBLE_KEY]; + ha_checksum tmp_key_crc[HA_MAX_POSSIBLE_KEY]; + ha_checksum tmp_record_checksum; + ulong use_buffers, read_buffer_length, write_buffer_length; + ulong sort_buffer_length, sort_key_blocks; + ulong rec_per_key_part[HA_MAX_KEY_SEG * HA_MAX_POSSIBLE_KEY]; + uint out_flag, warning_printed, error_printed, verbose; + uint opt_sort_key, total_files, max_level; + uint testflag, key_cache_block_size, pagecache_block_size; + int tmpfile_createflag, err_count; + myf myf_rw; + uint8 language; + my_bool using_global_keycache, opt_lock_memory, opt_follow_links; + my_bool retry_repair, force_sort, calc_checksum, static_row_size; + char temp_filename[FN_REFLEN]; + IO_CACHE read_cache; + enum_handler_stats_method stats_method; +} HA_CHECK; + + +typedef struct st_sort_ftbuf +{ + byte *buf, *end; + int count; + byte lastkey[HA_MAX_KEY_BUFF]; +} SORT_FT_BUF; + + +typedef struct st_buffpek { + my_off_t file_pos; /* Where we are in the sort file */ + byte *base, *key; /* Key pointers */ + ha_rows count; /* Number of rows in table */ + ulong mem_count; /* numbers of keys in memory */ + ulong max_keys; /* Max keys in buffert */ +} BUFFPEK; + +#endif /* _myisamchk_h */ diff --git a/include/wqueue.h b/include/wqueue.h new file mode 100644 index 00000000000..bacabb8c401 --- /dev/null +++ b/include/wqueue.h @@ -0,0 +1,26 @@ + +#ifndef _wqueue_h +#define _wqueue_h + +#include +#include + +/* info about requests in a waiting queue */ +typedef struct st_pagecache_wqueue +{ + struct st_my_thread_var *last_thread; /* circular list of waiting + threads */ +} WQUEUE; + +#ifdef THREAD +void wqueue_link_into_queue(WQUEUE *wqueue, struct st_my_thread_var *thread); +void wqueue_unlink_from_queue(WQUEUE *wqueue, struct st_my_thread_var *thread); +void wqueue_add_to_queue(WQUEUE *wqueue, struct st_my_thread_var *thread); +void wqueue_add_and_wait(WQUEUE *wqueue, + struct st_my_thread_var *thread, + pthread_mutex_t *lock); +void wqueue_release_queue(WQUEUE *wqueue); + +#endif + +#endif diff --git a/libmysql/CMakeLists.txt b/libmysql/CMakeLists.txt index 7d4dcc1e919..13ac2a37fb5 100644 --- a/libmysql/CMakeLists.txt +++ b/libmysql/CMakeLists.txt @@ -54,6 +54,7 @@ ADD_LIBRARY(libmysql SHARED dll.c libmysql.def ../mysys/my_open.c ../mysys/my_pread.c ../mysys/my_pthread.c ../mysys/my_read.c ../mysys/my_realloc.c ../mysys/my_rename.c ../mysys/my_seek.c ../mysys/my_static.c ../strings/my_strtoll10.c ../mysys/my_symlink.c + ../mysys/my_sync.c ../mysys/my_symlink2.c ../mysys/my_thr_init.c ../sql-common/my_time.c ../strings/my_vsnprintf.c ../mysys/my_wincond.c ../mysys/my_winthread.c ../mysys/my_write.c ../sql/net_serv.cc ../sql-common/pack.c ../sql/password.c diff --git a/libmysql/Makefile.shared b/libmysql/Makefile.shared index c24c6ab52db..70b67cee28e 100644 --- a/libmysql/Makefile.shared +++ b/libmysql/Makefile.shared @@ -68,7 +68,7 @@ mysysobjects1 = my_init.lo my_static.lo my_malloc.lo my_realloc.lo \ mf_iocache2.lo my_seek.lo my_sleep.lo \ my_pread.lo mf_cache.lo md5.lo sha1.lo \ my_getopt.lo my_gethostbyname.lo my_port.lo \ - my_rename.lo my_chsize.lo + my_rename.lo my_chsize.lo my_sync.lo sqlobjects = net.lo sql_cmn_objects = pack.lo client.lo my_time.lo diff --git a/mysql-test/include/have_maria.inc b/mysql-test/include/have_maria.inc new file mode 100644 index 00000000000..955e2305ca5 --- /dev/null +++ b/mysql-test/include/have_maria.inc @@ -0,0 +1,4 @@ +-- require r/have_maria.require +disable_query_log; +show variables like "have_maria"; +enable_query_log; diff --git a/mysql-test/mysql-test-run.pl b/mysql-test/mysql-test-run.pl index 5f07d84ad98..06c1906cd18 100755 --- a/mysql-test/mysql-test-run.pl +++ b/mysql-test/mysql-test-run.pl @@ -1265,19 +1265,6 @@ sub command_line_setup () { $path_ndb_testrun_log= "$opt_vardir/log/ndb_testrun.log"; $path_snapshot= "$opt_tmpdir/snapshot_$opt_master_myport/"; - - if ( $opt_valgrind and $opt_debug ) - { - # When both --valgrind and --debug is selected, send - # all output to the trace file, making it possible to - # see the exact location where valgrind complains - foreach my $mysqld (@{$master}, @{$slave}) - { - my $sidx= $mysqld->{idx} ? "$mysqld->{idx}" : ""; - $mysqld->{path_myerr}= - "$opt_vardir/log/" . $mysqld->{type} . "$sidx.trace"; - } - } } # @@ -2330,6 +2317,25 @@ sub setup_vardir() { { unlink($name); } + if ( $opt_valgrind and $opt_debug ) + { + # When both --valgrind and --debug is selected, send + # all output to the trace file, making it possible to + # see the exact location where valgrind complains + foreach my $mysqld (@{$master}, @{$slave}) + { + my $sidx= $mysqld->{idx} ? "$mysqld->{idx}" : ""; + my $trace_name= "$opt_vardir/log/" . $mysqld->{type} . "$sidx.trace"; + open(LOG, ">$mysqld->{path_myerr}") or die "Can't create $mysqld->{path_myerr}\n"; + print LOG " +NOTE: When running with --valgrind --debug the output from the .err file is +stored together with the trace file to make it easier to find the exact +position for valgrind errors. +See trace file $trace_name.\n"; + close(LOG); + $mysqld->{path_myerr}= $trace_name; + } + } } diff --git a/mysql-test/r/events_logs_tests.result b/mysql-test/r/events_logs_tests.result index 0d49060f4a9..916b5fd2af2 100644 --- a/mysql-test/r/events_logs_tests.result +++ b/mysql-test/r/events_logs_tests.result @@ -86,7 +86,7 @@ slo_val val 20 0 1 0 "Check slow log. Should see 1 row because 2 is over the threshold of 1 for GLOBAL, though under SESSION which is 10" -SELECT user_host, query_time, db, sql_text FROM mysql.slow_log; +SELECT user_host, query_time, db, sql_text FROM mysql.slow_log where sql_text <> "DROP EVENT long_event"; user_host query_time db sql_text USER_HOST SLEEPVAL events_test INSERT INTO slow_event_test SELECT @@long_query_time, SLEEP(2) DROP EVENT long_event2; diff --git a/mysql-test/r/have_maria.require b/mysql-test/r/have_maria.require new file mode 100644 index 00000000000..02988af6976 --- /dev/null +++ b/mysql-test/r/have_maria.require @@ -0,0 +1,2 @@ +Variable_name Value +have_maria YES diff --git a/mysql-test/r/maria-big.result b/mysql-test/r/maria-big.result new file mode 100644 index 00000000000..ca18d992187 --- /dev/null +++ b/mysql-test/r/maria-big.result @@ -0,0 +1,61 @@ +set storage_engine=maria; +affected rows: 0 +drop table if exists t1, t2; +affected rows: 0 +create table t1(a char(3)); +affected rows: 0 +insert into t1 values("abc"); +affected rows: 1 +insert into t1 select "def" from t1; +affected rows: 1 +info: Records: 1 Duplicates: 0 Warnings: 0 +insert into t1 select "ghi" from t1; +affected rows: 2 +info: Records: 2 Duplicates: 0 Warnings: 0 +insert into t1 select "jkl" from t1; +affected rows: 4 +info: Records: 4 Duplicates: 0 Warnings: 0 +insert into t1 select "mno" from t1; +affected rows: 8 +info: Records: 8 Duplicates: 0 Warnings: 0 +insert into t1 select "pqr" from t1; +affected rows: 16 +info: Records: 16 Duplicates: 0 Warnings: 0 +insert into t1 select "stu" from t1; +affected rows: 32 +info: Records: 32 Duplicates: 0 Warnings: 0 +insert into t1 select "vwx" from t1; +affected rows: 64 +info: Records: 64 Duplicates: 0 Warnings: 0 +insert into t1 select "yza" from t1; +affected rows: 128 +info: Records: 128 Duplicates: 0 Warnings: 0 +insert into t1 select "ceg" from t1; +affected rows: 256 +info: Records: 256 Duplicates: 0 Warnings: 0 +insert into t1 select "ikm" from t1; +affected rows: 512 +info: Records: 512 Duplicates: 0 Warnings: 0 +insert into t1 select "oqs" from t1; +affected rows: 1024 +info: Records: 1024 Duplicates: 0 Warnings: 0 +select count(*) from t1; +count(*) +2048 +affected rows: 1 +insert into t1 select "uwy" from t1; +affected rows: 2048 +info: Records: 2048 Duplicates: 0 Warnings: 0 +create table t2 select * from t1; +affected rows: 4096 +info: Records: 4096 Duplicates: 0 Warnings: 0 +select count(*) from t1; +count(*) +4096 +affected rows: 1 +select count(*) from t2; +count(*) +4096 +affected rows: 1 +drop table t1, t2; +affected rows: 0 diff --git a/mysql-test/r/maria.result b/mysql-test/r/maria.result new file mode 100644 index 00000000000..3ec9af0fffa --- /dev/null +++ b/mysql-test/r/maria.result @@ -0,0 +1,1813 @@ +set global storage_engine=maria; +set session storage_engine=maria; +drop table if exists t1,t2; +SET SQL_WARNINGS=1; +RESET MASTER; +set binlog_format=statement; +CREATE TABLE t1 (a int primary key); +insert t1 values (1),(2),(3); +insert t1 values (4),(2),(5); +ERROR 23000: Duplicate entry '2' for key 'PRIMARY' +select * from t1; +a +1 +2 +3 +4 +SHOW BINLOG EVENTS FROM 102; +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000001 102 Query 1 200 use `test`; CREATE TABLE t1 (a int primary key) +master-bin.000001 200 Query 1 291 use `test`; insert t1 values (1),(2),(3) +master-bin.000001 291 Query 1 382 use `test`; insert t1 values (4),(2),(5) +drop table t1; +set binlog_format=default; +CREATE TABLE t1 ( +STRING_DATA char(255) default NULL, +KEY string_data (STRING_DATA) +); +INSERT INTO t1 VALUES ('AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'); +INSERT INTO t1 VALUES ('DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD'); +INSERT INTO t1 VALUES ('FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF'); +INSERT INTO t1 VALUES ('FGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG'); +INSERT INTO t1 VALUES ('HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH'); +INSERT INTO t1 VALUES ('WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW'); +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +create table t1 (a tinyint not null auto_increment, b blob not null, primary key (a)); +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +repair table t1; +Table Op Msg_type Msg_text +test.t1 repair status OK +delete from t1 where (a & 1); +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +repair table t1; +Table Op Msg_type Msg_text +test.t1 repair status OK +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +create table t1 (a int not null auto_increment, b int not null, primary key (a), index(b)); +insert into t1 (b) values (1),(2),(2),(2),(2); +optimize table t1; +Table Op Msg_type Msg_text +test.t1 optimize status OK +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 0 PRIMARY 1 a A 5 NULL NULL BTREE +t1 1 b 1 b A 1 NULL NULL BTREE +optimize table t1; +Table Op Msg_type Msg_text +test.t1 optimize status Table is already up to date +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 0 PRIMARY 1 a A 5 NULL NULL BTREE +t1 1 b 1 b A 1 NULL NULL BTREE +drop table t1; +create table t1 (a int not null, b int not null, c int not null, primary key (a),key(b)); +insert into t1 values (3,3,3),(1,1,1),(2,2,2),(4,4,4); +explain select * from t1 order by a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using filesort +explain select * from t1 order by b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using filesort +explain select * from t1 order by c; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using filesort +explain select a from t1 order by a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL PRIMARY 4 NULL 4 Using index +explain select b from t1 order by b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL b 4 NULL 4 Using index +explain select a,b from t1 order by b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using filesort +explain select a,b from t1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 +explain select a,b,c from t1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 +drop table t1; +set autocommit=0; +begin; +CREATE TABLE t1 (a INT); +INSERT INTO t1 VALUES (1), (2), (3); +LOCK TABLES t1 WRITE; +INSERT INTO t1 VALUES (1), (2), (3); +commit; +set autocommit=1; +UNLOCK TABLES; +OPTIMIZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 optimize status OK +DROP TABLE t1; +create table t1 ( t1 char(255), key(t1(250))); +insert t1 values ('137513751375137513751375137513751375137569516951695169516951695169516951695169'); +insert t1 values ('178417841784178417841784178417841784178403420342034203420342034203420342034203'); +insert t1 values ('213872387238723872387238723872387238723867376737673767376737673767376737673767'); +insert t1 values ('242624262426242624262426242624262426242607890789078907890789078907890789078907'); +insert t1 values ('256025602560256025602560256025602560256011701170117011701170117011701170117011'); +insert t1 values ('276027602760276027602760276027602760276001610161016101610161016101610161016101'); +insert t1 values ('281528152815281528152815281528152815281564956495649564956495649564956495649564'); +insert t1 values ('292129212921292129212921292129212921292102100210021002100210021002100210021002'); +insert t1 values ('380638063806380638063806380638063806380634483448344834483448344834483448344834'); +insert t1 values ('411641164116411641164116411641164116411616301630163016301630163016301630163016'); +insert t1 values ('420842084208420842084208420842084208420899889988998899889988998899889988998899'); +insert t1 values ('438443844384438443844384438443844384438482448244824482448244824482448244824482'); +insert t1 values ('443244324432443244324432443244324432443239613961396139613961396139613961396139'); +insert t1 values ('485448544854485448544854485448544854485477847784778477847784778477847784778477'); +insert t1 values ('494549454945494549454945494549454945494555275527552755275527552755275527552755'); +insert t1 values ('538647864786478647864786478647864786478688918891889188918891889188918891889188'); +insert t1 values ('565556555655565556555655565556555655565554845484548454845484548454845484548454'); +insert t1 values ('607860786078607860786078607860786078607856665666566656665666566656665666566656'); +insert t1 values ('640164016401640164016401640164016401640141274127412741274127412741274127412741'); +insert t1 values ('719471947194719471947194719471947194719478717871787178717871787178717871787178'); +insert t1 values ('742574257425742574257425742574257425742549604960496049604960496049604960496049'); +insert t1 values ('887088708870887088708870887088708870887035963596359635963596359635963596359635'); +insert t1 values ('917791779177917791779177917791779177917773857385738573857385738573857385738573'); +insert t1 values ('933293329332933293329332933293329332933278987898789878987898789878987898789878'); +insert t1 values ('963896389638963896389638963896389638963877807780778077807780778077807780778077'); +delete from t1 where t1>'2'; +insert t1 values ('70'), ('84'), ('60'), ('20'), ('76'), ('89'), ('49'), ('50'), +('88'), ('61'), ('42'), ('98'), ('39'), ('30'), ('25'), ('66'), ('61'), ('48'), +('80'), ('84'), ('98'), ('19'), ('91'), ('42'), ('47'); +optimize table t1; +Table Op Msg_type Msg_text +test.t1 optimize status OK +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +create table t1 (i1 int, i2 int, i3 int, i4 int, i5 int, i6 int, i7 int, i8 +int, i9 int, i10 int, i11 int, i12 int, i13 int, i14 int, i15 int, i16 int, i17 +int, i18 int, i19 int, i20 int, i21 int, i22 int, i23 int, i24 int, i25 int, +i26 int, i27 int, i28 int, i29 int, i30 int, i31 int, i32 int, i33 int, i34 +int, i35 int, i36 int, i37 int, i38 int, i39 int, i40 int, i41 int, i42 int, +i43 int, i44 int, i45 int, i46 int, i47 int, i48 int, i49 int, i50 int, i51 +int, i52 int, i53 int, i54 int, i55 int, i56 int, i57 int, i58 int, i59 int, +i60 int, i61 int, i62 int, i63 int, i64 int, i65 int, i66 int, i67 int, i68 +int, i69 int, i70 int, i71 int, i72 int, i73 int, i74 int, i75 int, i76 int, +i77 int, i78 int, i79 int, i80 int, i81 int, i82 int, i83 int, i84 int, i85 +int, i86 int, i87 int, i88 int, i89 int, i90 int, i91 int, i92 int, i93 int, +i94 int, i95 int, i96 int, i97 int, i98 int, i99 int, i100 int, i101 int, i102 +int, i103 int, i104 int, i105 int, i106 int, i107 int, i108 int, i109 int, i110 +int, i111 int, i112 int, i113 int, i114 int, i115 int, i116 int, i117 int, i118 +int, i119 int, i120 int, i121 int, i122 int, i123 int, i124 int, i125 int, i126 +int, i127 int, i128 int, i129 int, i130 int, i131 int, i132 int, i133 int, i134 +int, i135 int, i136 int, i137 int, i138 int, i139 int, i140 int, i141 int, i142 +int, i143 int, i144 int, i145 int, i146 int, i147 int, i148 int, i149 int, i150 +int, i151 int, i152 int, i153 int, i154 int, i155 int, i156 int, i157 int, i158 +int, i159 int, i160 int, i161 int, i162 int, i163 int, i164 int, i165 int, i166 +int, i167 int, i168 int, i169 int, i170 int, i171 int, i172 int, i173 int, i174 +int, i175 int, i176 int, i177 int, i178 int, i179 int, i180 int, i181 int, i182 +int, i183 int, i184 int, i185 int, i186 int, i187 int, i188 int, i189 int, i190 +int, i191 int, i192 int, i193 int, i194 int, i195 int, i196 int, i197 int, i198 +int, i199 int, i200 int, i201 int, i202 int, i203 int, i204 int, i205 int, i206 +int, i207 int, i208 int, i209 int, i210 int, i211 int, i212 int, i213 int, i214 +int, i215 int, i216 int, i217 int, i218 int, i219 int, i220 int, i221 int, i222 +int, i223 int, i224 int, i225 int, i226 int, i227 int, i228 int, i229 int, i230 +int, i231 int, i232 int, i233 int, i234 int, i235 int, i236 int, i237 int, i238 +int, i239 int, i240 int, i241 int, i242 int, i243 int, i244 int, i245 int, i246 +int, i247 int, i248 int, i249 int, i250 int, i251 int, i252 int, i253 int, i254 +int, i255 int, i256 int, i257 int, i258 int, i259 int, i260 int, i261 int, i262 +int, i263 int, i264 int, i265 int, i266 int, i267 int, i268 int, i269 int, i270 +int, i271 int, i272 int, i273 int, i274 int, i275 int, i276 int, i277 int, i278 +int, i279 int, i280 int, i281 int, i282 int, i283 int, i284 int, i285 int, i286 +int, i287 int, i288 int, i289 int, i290 int, i291 int, i292 int, i293 int, i294 +int, i295 int, i296 int, i297 int, i298 int, i299 int, i300 int, i301 int, i302 +int, i303 int, i304 int, i305 int, i306 int, i307 int, i308 int, i309 int, i310 +int, i311 int, i312 int, i313 int, i314 int, i315 int, i316 int, i317 int, i318 +int, i319 int, i320 int, i321 int, i322 int, i323 int, i324 int, i325 int, i326 +int, i327 int, i328 int, i329 int, i330 int, i331 int, i332 int, i333 int, i334 +int, i335 int, i336 int, i337 int, i338 int, i339 int, i340 int, i341 int, i342 +int, i343 int, i344 int, i345 int, i346 int, i347 int, i348 int, i349 int, i350 +int, i351 int, i352 int, i353 int, i354 int, i355 int, i356 int, i357 int, i358 +int, i359 int, i360 int, i361 int, i362 int, i363 int, i364 int, i365 int, i366 +int, i367 int, i368 int, i369 int, i370 int, i371 int, i372 int, i373 int, i374 +int, i375 int, i376 int, i377 int, i378 int, i379 int, i380 int, i381 int, i382 +int, i383 int, i384 int, i385 int, i386 int, i387 int, i388 int, i389 int, i390 +int, i391 int, i392 int, i393 int, i394 int, i395 int, i396 int, i397 int, i398 +int, i399 int, i400 int, i401 int, i402 int, i403 int, i404 int, i405 int, i406 +int, i407 int, i408 int, i409 int, i410 int, i411 int, i412 int, i413 int, i414 +int, i415 int, i416 int, i417 int, i418 int, i419 int, i420 int, i421 int, i422 +int, i423 int, i424 int, i425 int, i426 int, i427 int, i428 int, i429 int, i430 +int, i431 int, i432 int, i433 int, i434 int, i435 int, i436 int, i437 int, i438 +int, i439 int, i440 int, i441 int, i442 int, i443 int, i444 int, i445 int, i446 +int, i447 int, i448 int, i449 int, i450 int, i451 int, i452 int, i453 int, i454 +int, i455 int, i456 int, i457 int, i458 int, i459 int, i460 int, i461 int, i462 +int, i463 int, i464 int, i465 int, i466 int, i467 int, i468 int, i469 int, i470 +int, i471 int, i472 int, i473 int, i474 int, i475 int, i476 int, i477 int, i478 +int, i479 int, i480 int, i481 int, i482 int, i483 int, i484 int, i485 int, i486 +int, i487 int, i488 int, i489 int, i490 int, i491 int, i492 int, i493 int, i494 +int, i495 int, i496 int, i497 int, i498 int, i499 int, i500 int, i501 int, i502 +int, i503 int, i504 int, i505 int, i506 int, i507 int, i508 int, i509 int, i510 +int, i511 int, i512 int, i513 int, i514 int, i515 int, i516 int, i517 int, i518 +int, i519 int, i520 int, i521 int, i522 int, i523 int, i524 int, i525 int, i526 +int, i527 int, i528 int, i529 int, i530 int, i531 int, i532 int, i533 int, i534 +int, i535 int, i536 int, i537 int, i538 int, i539 int, i540 int, i541 int, i542 +int, i543 int, i544 int, i545 int, i546 int, i547 int, i548 int, i549 int, i550 +int, i551 int, i552 int, i553 int, i554 int, i555 int, i556 int, i557 int, i558 +int, i559 int, i560 int, i561 int, i562 int, i563 int, i564 int, i565 int, i566 +int, i567 int, i568 int, i569 int, i570 int, i571 int, i572 int, i573 int, i574 +int, i575 int, i576 int, i577 int, i578 int, i579 int, i580 int, i581 int, i582 +int, i583 int, i584 int, i585 int, i586 int, i587 int, i588 int, i589 int, i590 +int, i591 int, i592 int, i593 int, i594 int, i595 int, i596 int, i597 int, i598 +int, i599 int, i600 int, i601 int, i602 int, i603 int, i604 int, i605 int, i606 +int, i607 int, i608 int, i609 int, i610 int, i611 int, i612 int, i613 int, i614 +int, i615 int, i616 int, i617 int, i618 int, i619 int, i620 int, i621 int, i622 +int, i623 int, i624 int, i625 int, i626 int, i627 int, i628 int, i629 int, i630 +int, i631 int, i632 int, i633 int, i634 int, i635 int, i636 int, i637 int, i638 +int, i639 int, i640 int, i641 int, i642 int, i643 int, i644 int, i645 int, i646 +int, i647 int, i648 int, i649 int, i650 int, i651 int, i652 int, i653 int, i654 +int, i655 int, i656 int, i657 int, i658 int, i659 int, i660 int, i661 int, i662 +int, i663 int, i664 int, i665 int, i666 int, i667 int, i668 int, i669 int, i670 +int, i671 int, i672 int, i673 int, i674 int, i675 int, i676 int, i677 int, i678 +int, i679 int, i680 int, i681 int, i682 int, i683 int, i684 int, i685 int, i686 +int, i687 int, i688 int, i689 int, i690 int, i691 int, i692 int, i693 int, i694 +int, i695 int, i696 int, i697 int, i698 int, i699 int, i700 int, i701 int, i702 +int, i703 int, i704 int, i705 int, i706 int, i707 int, i708 int, i709 int, i710 +int, i711 int, i712 int, i713 int, i714 int, i715 int, i716 int, i717 int, i718 +int, i719 int, i720 int, i721 int, i722 int, i723 int, i724 int, i725 int, i726 +int, i727 int, i728 int, i729 int, i730 int, i731 int, i732 int, i733 int, i734 +int, i735 int, i736 int, i737 int, i738 int, i739 int, i740 int, i741 int, i742 +int, i743 int, i744 int, i745 int, i746 int, i747 int, i748 int, i749 int, i750 +int, i751 int, i752 int, i753 int, i754 int, i755 int, i756 int, i757 int, i758 +int, i759 int, i760 int, i761 int, i762 int, i763 int, i764 int, i765 int, i766 +int, i767 int, i768 int, i769 int, i770 int, i771 int, i772 int, i773 int, i774 +int, i775 int, i776 int, i777 int, i778 int, i779 int, i780 int, i781 int, i782 +int, i783 int, i784 int, i785 int, i786 int, i787 int, i788 int, i789 int, i790 +int, i791 int, i792 int, i793 int, i794 int, i795 int, i796 int, i797 int, i798 +int, i799 int, i800 int, i801 int, i802 int, i803 int, i804 int, i805 int, i806 +int, i807 int, i808 int, i809 int, i810 int, i811 int, i812 int, i813 int, i814 +int, i815 int, i816 int, i817 int, i818 int, i819 int, i820 int, i821 int, i822 +int, i823 int, i824 int, i825 int, i826 int, i827 int, i828 int, i829 int, i830 +int, i831 int, i832 int, i833 int, i834 int, i835 int, i836 int, i837 int, i838 +int, i839 int, i840 int, i841 int, i842 int, i843 int, i844 int, i845 int, i846 +int, i847 int, i848 int, i849 int, i850 int, i851 int, i852 int, i853 int, i854 +int, i855 int, i856 int, i857 int, i858 int, i859 int, i860 int, i861 int, i862 +int, i863 int, i864 int, i865 int, i866 int, i867 int, i868 int, i869 int, i870 +int, i871 int, i872 int, i873 int, i874 int, i875 int, i876 int, i877 int, i878 +int, i879 int, i880 int, i881 int, i882 int, i883 int, i884 int, i885 int, i886 +int, i887 int, i888 int, i889 int, i890 int, i891 int, i892 int, i893 int, i894 +int, i895 int, i896 int, i897 int, i898 int, i899 int, i900 int, i901 int, i902 +int, i903 int, i904 int, i905 int, i906 int, i907 int, i908 int, i909 int, i910 +int, i911 int, i912 int, i913 int, i914 int, i915 int, i916 int, i917 int, i918 +int, i919 int, i920 int, i921 int, i922 int, i923 int, i924 int, i925 int, i926 +int, i927 int, i928 int, i929 int, i930 int, i931 int, i932 int, i933 int, i934 +int, i935 int, i936 int, i937 int, i938 int, i939 int, i940 int, i941 int, i942 +int, i943 int, i944 int, i945 int, i946 int, i947 int, i948 int, i949 int, i950 +int, i951 int, i952 int, i953 int, i954 int, i955 int, i956 int, i957 int, i958 +int, i959 int, i960 int, i961 int, i962 int, i963 int, i964 int, i965 int, i966 +int, i967 int, i968 int, i969 int, i970 int, i971 int, i972 int, i973 int, i974 +int, i975 int, i976 int, i977 int, i978 int, i979 int, i980 int, i981 int, i982 +int, i983 int, i984 int, i985 int, i986 int, i987 int, i988 int, i989 int, i990 +int, i991 int, i992 int, i993 int, i994 int, i995 int, i996 int, i997 int, i998 +int, i999 int, i1000 int, b blob) row_format=dynamic; +insert into t1 values (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, "Sergei"); +update t1 set b=repeat('a',256); +update t1 set i1=0, i2=0, i3=0, i4=0, i5=0, i6=0, i7=0; +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +delete from t1 where i8=1; +select i1,i2 from t1; +i1 i2 +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +CREATE TABLE `t1` ( +`post_id` mediumint(8) unsigned NOT NULL auto_increment, +`topic_id` mediumint(8) unsigned NOT NULL default '0', +`post_time` datetime NOT NULL default '0000-00-00 00:00:00', +`post_text` text NOT NULL, +`icon_url` varchar(10) NOT NULL default '', +`sign` tinyint(1) unsigned NOT NULL default '0', +`post_edit` varchar(150) NOT NULL default '', +`poster_login` varchar(35) NOT NULL default '', +`ip` varchar(15) NOT NULL default '', +PRIMARY KEY (`post_id`), +KEY `post_time` (`post_time`), +KEY `ip` (`ip`), +KEY `poster_login` (`poster_login`), +KEY `topic_id` (`topic_id`), +FULLTEXT KEY `post_text` (`post_text`) +); +INSERT INTO t1 (post_text) VALUES ('ceci est un test'),('ceci est un test'),('ceci est un test'),('ceci est un test'),('ceci est un test'); +REPAIR TABLE t1; +Table Op Msg_type Msg_text +test.t1 repair status OK +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +CREATE TABLE t1 (a varchar(255), b varchar(255), c varchar(255), d varchar(255), e varchar(255), KEY t1 (a, b, c, d, e)); +ERROR 42000: Specified key was too long; max key length is 1112 bytes +CREATE TABLE t1 (a varchar(32000), unique key(a)); +ERROR 42000: Specified key was too long; max key length is 1112 bytes +CREATE TABLE t1 (a varchar(1), b varchar(1), key (a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b)); +ERROR 42000: Too many key parts specified; max 16 parts allowed +CREATE TABLE t1 (a varchar(255), b varchar(255), c varchar(255), d varchar(255), e varchar(255)); +ALTER TABLE t1 ADD INDEX t1 (a, b, c, d, e); +ERROR 42000: Specified key was too long; max key length is 1112 bytes +DROP TABLE t1; +CREATE TABLE t1 (a int not null, b int, c int, key(b), key(c), key(a,b), key(c,a)); +INSERT into t1 values (0, null, 0), (0, null, 1), (0, null, 2), (0, null,3), (1,1,4); +create table t2 (a int not null, b int, c int, key(b), key(c), key(a)); +INSERT into t2 values (1,1,1), (2,2,2); +optimize table t1; +Table Op Msg_type Msg_text +test.t1 optimize status OK +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 b 1 b A 5 NULL NULL YES BTREE +t1 1 c 1 c A 5 NULL NULL YES BTREE +t1 1 a 1 a A 1 NULL NULL BTREE +t1 1 a 2 b A 5 NULL NULL YES BTREE +t1 1 c_2 1 c A 5 NULL NULL YES BTREE +t1 1 c_2 2 a A 5 NULL NULL BTREE +explain select * from t1,t2 where t1.a=t2.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL a NULL NULL NULL 2 +1 SIMPLE t1 ref a a 4 test.t2.a 3 +explain select * from t1,t2 force index(a) where t1.a=t2.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL a NULL NULL NULL 2 +1 SIMPLE t1 ref a a 4 test.t2.a 3 +explain select * from t1 force index(a),t2 force index(a) where t1.a=t2.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL a NULL NULL NULL 2 +1 SIMPLE t1 ref a a 4 test.t2.a 3 +explain select * from t1,t2 where t1.b=t2.b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL b NULL NULL NULL 2 +1 SIMPLE t1 ref b b 5 test.t2.b 1 Using where +explain select * from t1,t2 force index(c) where t1.a=t2.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 2 +1 SIMPLE t1 ref a a 4 test.t2.a 3 +explain select * from t1 where a=0 or a=2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 4 NULL 4 Using where +explain select * from t1 force index (a) where a=0 or a=2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 4 NULL 4 Using where +explain select * from t1 where c=1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref c,c_2 c 5 const 1 Using where +explain select * from t1 use index() where c=1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 5 Using where +drop table t1,t2; +create table t1 (a int not null auto_increment primary key, b varchar(255)); +insert into t1 (b) values (repeat('a',100)),(repeat('b',100)),(repeat('c',100)); +update t1 set b=repeat(left(b,1),200) where a=1; +delete from t1 where (a & 1)= 0; +update t1 set b=repeat('e',200) where a=1; +flush tables; +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +update t1 set b=repeat(left(b,1),255) where a between 1 and 5; +update t1 set b=repeat(left(b,1),10) where a between 32 and 43; +update t1 set b=repeat(left(b,1),2) where a between 64 and 66; +update t1 set b=repeat(left(b,1),65) where a between 67 and 70; +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +insert into t1 (b) values (repeat('z',100)); +update t1 set b="test" where left(b,1) > 'n'; +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +create table t1 ( a text not null, key a (a(20))); +insert into t1 values ('aaa '),('aaa'),('aa'); +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +repair table t1; +Table Op Msg_type Msg_text +test.t1 repair status OK +select concat(a,'.') from t1 where a='aaa'; +concat(a,'.') +aaa . +aaa. +select concat(a,'.') from t1 where binary a='aaa'; +concat(a,'.') +aaa. +update t1 set a='bbb' where a='aaa'; +select concat(a,'.') from t1; +concat(a,'.') +bbb. +bbb. +aa. +drop table t1; +create table t1(a text not null, b text not null, c text not null, index (a(10),b(10),c(10))); +insert into t1 values('807780', '477', '165'); +insert into t1 values('807780', '477', '162'); +insert into t1 values('807780', '472', '162'); +select * from t1 where a='807780' and b='477' and c='165'; +a b c +807780 477 165 +drop table t1; +CREATE TABLE t1 (a varchar(150) NOT NULL, KEY (a)); +INSERT t1 VALUES ("can \tcan"); +INSERT t1 VALUES ("can can"); +INSERT t1 VALUES ("can"); +SELECT * FROM t1; +a +can can +can +can can +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +DROP TABLE t1; +create table t1 (a blob); +insert into t1 values('a '),('a'); +select concat(a,'.') from t1 where a='a'; +concat(a,'.') +a. +select concat(a,'.') from t1 where a='a '; +concat(a,'.') +a . +alter table t1 add key(a(2)); +select concat(a,'.') from t1 where a='a'; +concat(a,'.') +a. +select concat(a,'.') from t1 where a='a '; +concat(a,'.') +a . +drop table t1; +create table t1 (a int not null auto_increment primary key, b text not null, unique b (b(20))); +insert into t1 (b) values ('a'),('b'),('c'); +select concat(b,'.') from t1; +concat(b,'.') +a. +b. +c. +update t1 set b='b ' where a=2; +update t1 set b='b ' where a > 1; +ERROR 23000: Duplicate entry 'b ' for key 'b' +insert into t1 (b) values ('b'); +ERROR 23000: Duplicate entry 'b' for key 'b' +select * from t1; +a b +1 a +2 b +3 c +delete from t1 where b='b'; +select a,concat(b,'.') from t1; +a concat(b,'.') +1 a. +3 c. +drop table t1; +create table t1 (a int not null); +create table t2 (a int not null, primary key (a)); +insert into t1 values (1); +insert into t2 values (1),(2); +select sql_big_result distinct t1.a from t1,t2 order by t2.a; +a +1 +select distinct t1.a from t1,t2 order by t2.a; +a +1 +select sql_big_result distinct t1.a from t1,t2; +a +1 +explain select sql_big_result distinct t1.a from t1,t2 order by t2.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 system NULL NULL NULL NULL 1 Using temporary +1 SIMPLE t2 index NULL PRIMARY 4 NULL 2 Using index; Distinct +explain select distinct t1.a from t1,t2 order by t2.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 system NULL NULL NULL NULL 1 Using temporary +1 SIMPLE t2 index NULL PRIMARY 4 NULL 2 Using index; Distinct +drop table t1,t2; +create table t1 ( +c1 varchar(32), +key (c1) +); +alter table t1 disable keys; +insert into t1 values ('a'), ('b'); +select c1 from t1 order by c1 limit 1; +c1 +a +drop table t1; +create table t1 (a int not null, primary key(a)) ROW_FORMAT=FIXED; +create table t2 (a int not null, b int not null, primary key(a,b)) ROW_FORMAT=FIXED; +insert into t1 values (1),(2),(3),(4),(5),(6); +insert into t2 values (1,1),(2,1); +set autocommit=0; +begin; +lock tables t1 read local, t2 read local; +select straight_join * from t1,t2 force index (primary) where t1.a=t2.a; +a a b +1 1 1 +2 2 1 +insert into t2 values(2,0); +commit; +select straight_join * from t1,t2 force index (primary) where t1.a=t2.a; +a a b +1 1 1 +2 2 1 +drop table t1,t2; +CREATE TABLE t1 (c1 varchar(250) NOT NULL) ROW_FORMAT=DYNAMIC; +CREATE TABLE t2 (c1 varchar(250) NOT NULL, PRIMARY KEY (c1)) ROW_FORMAT=DYNAMIC; +INSERT INTO t1 VALUES ('test000001'), ('test000002'), ('test000003'); +INSERT INTO t2 VALUES ('test000002'), ('test000003'), ('test000004'); +LOCK TABLES t1 READ LOCAL, t2 READ LOCAL; +SELECT t1.c1 AS t1c1, t2.c1 AS t2c1 FROM t1, t2 +WHERE t1.c1 = t2.c1 HAVING t1c1 != t2c1; +t1c1 t2c1 +INSERT INTO t2 VALUES ('test000001'), ('test000005'); +SELECT t1.c1 AS t1c1, t2.c1 AS t2c1 FROM t1, t2 +WHERE t1.c1 = t2.c1 HAVING t1c1 != t2c1; +t1c1 t2c1 +DROP TABLE t1,t2; +CREATE TABLE t1 (`a` int(11) NOT NULL default '0', `b` int(11) NOT NULL default '0', UNIQUE KEY `a` USING RTREE (`a`,`b`)); +Got one of the listed errors +create table t1 (a int, b varchar(200), c text not null) checksum=1; +create table t2 (a int, b varchar(200), c text not null) checksum=0; +insert t1 values (1, "aaa", "bbb"), (NULL, "", "ccccc"), (0, NULL, ""); +insert t2 select * from t1; +checksum table t1, t2, t3 quick; +Table Checksum +test.t1 2948697075 +test.t2 NULL +test.t3 NULL +Warnings: +Error 1146 Table 'test.t3' doesn't exist +checksum table t1, t2, t3; +Table Checksum +test.t1 2948697075 +test.t2 2948697075 +test.t3 NULL +Warnings: +Error 1146 Table 'test.t3' doesn't exist +checksum table t1, t2, t3 extended; +Table Checksum +test.t1 2948697075 +test.t2 2948697075 +test.t3 NULL +Warnings: +Error 1146 Table 'test.t3' doesn't exist +drop table t1,t2; +create table t1 (a int, key (a)); +show keys from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A NULL NULL NULL YES BTREE +alter table t1 disable keys; +show keys from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A NULL NULL NULL YES BTREE disabled +create table t2 (a int); +set @@rand_seed1=31415926,@@rand_seed2=2718281828; +insert t1 select * from t2; +show keys from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A NULL NULL NULL YES BTREE disabled +alter table t1 enable keys; +show keys from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A NULL NULL NULL YES BTREE disabled +alter table t1 engine=heap; +alter table t1 disable keys; +Warnings: +Note 1031 Table storage engine for 't1' doesn't have this option +show keys from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a NULL 500 NULL NULL YES HASH +drop table t1,t2; +create table t1 ( a tinytext, b char(1), index idx (a(1),b) ); +insert into t1 values (null,''), (null,''); +explain select count(*) from t1 where a is null; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref idx idx 4 const 1 Using where +select count(*) from t1 where a is null; +count(*) +2 +drop table t1; +create table t1 (c1 int, c2 varchar(4) not null default '', +key(c2(3))) default charset=utf8; +insert into t1 values (1,'A'), (2, 'B'), (3, 'A'); +update t1 set c2='A B' where c1=2; +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +create table t1 (c1 int); +insert into t1 values (1),(2),(3),(4); +checksum table t1; +Table Checksum +test.t1 149057747 +delete from t1 where c1 = 1; +create table t2 as select * from t1; +checksum table t1; +Table Checksum +test.t1 984116287 +checksum table t2; +Table Checksum +test.t2 984116287 +drop table t1, t2; +show variables like 'maria_stats_method'; +Variable_name Value +maria_stats_method nulls_unequal +create table t1 (a int, key(a)); +insert into t1 values (0),(1),(2),(3),(4); +insert into t1 select NULL from t1; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 10 NULL NULL YES BTREE +insert into t1 values (11); +delete from t1 where a=11; +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 10 NULL NULL YES BTREE +set maria_stats_method=nulls_equal; +show variables like 'maria_stats_method'; +Variable_name Value +maria_stats_method nulls_equal +insert into t1 values (11); +delete from t1 where a=11; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 5 NULL NULL YES BTREE +insert into t1 values (11); +delete from t1 where a=11; +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 5 NULL NULL YES BTREE +set maria_stats_method=DEFAULT; +show variables like 'maria_stats_method'; +Variable_name Value +maria_stats_method nulls_unequal +insert into t1 values (11); +delete from t1 where a=11; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 10 NULL NULL YES BTREE +insert into t1 values (11); +delete from t1 where a=11; +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 10 NULL NULL YES BTREE +drop table t1; +set maria_stats_method=nulls_ignored; +show variables like 'maria_stats_method'; +Variable_name Value +maria_stats_method nulls_ignored +create table t1 ( +a char(3), b char(4), c char(5), d char(6), +key(a,b,c,d) +); +insert into t1 values ('bcd','def1', NULL, 'zz'); +insert into t1 values ('bcd','def2', NULL, 'zz'); +insert into t1 values ('bce','def1', 'yuu', NULL); +insert into t1 values ('bce','def2', NULL, 'quux'); +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 2 NULL NULL YES BTREE +t1 1 a 2 b A 4 NULL NULL YES BTREE +t1 1 a 3 c A 4 NULL NULL YES BTREE +t1 1 a 4 d A 4 NULL NULL YES BTREE +delete from t1; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +show index from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 0 NULL NULL YES BTREE +t1 1 a 2 b A 0 NULL NULL YES BTREE +t1 1 a 3 c A 0 NULL NULL YES BTREE +t1 1 a 4 d A 0 NULL NULL YES BTREE +set maria_stats_method=DEFAULT; +drop table t1; +create table t1( +cip INT NOT NULL, +time TIME NOT NULL, +score INT NOT NULL DEFAULT 0, +bob TINYBLOB +); +insert into t1 (cip, time) VALUES (1, '00:01'), (2, '00:02'), (3,'00:03'); +insert into t1 (cip, bob, time) VALUES (4, 'a', '00:04'), (5, 'b', '00:05'), +(6, 'c', '00:06'); +select * from t1 where bob is null and cip=1; +cip time score bob +1 00:01:00 0 NULL +create index bug on t1 (bob(22), cip, time); +select * from t1 where bob is null and cip=1; +cip time score bob +1 00:01:00 0 NULL +drop table t1; +create table t1 ( +id1 int not null auto_increment, +id2 int not null default '0', +t text not null, +primary key (id1), +key x (id2, t(32)) +) engine=maria; +insert into t1 (id2, t) values +(10, 'abc'), (10, 'abc'), (10, 'abc'), +(20, 'abc'), (20, 'abc'), (20, 'def'), +(10, 'abc'), (10, 'abc'); +select count(*) from t1 where id2 = 10; +count(*) +5 +select count(id1) from t1 where id2 = 10; +count(id1) +5 +drop table t1; +CREATE TABLE t1(a TINYINT, KEY(a)); +INSERT INTO t1 VALUES(1); +SELECT MAX(a) FROM t1 IGNORE INDEX(a); +MAX(a) +1 +ALTER TABLE t1 DISABLE KEYS; +SELECT MAX(a) FROM t1; +MAX(a) +1 +SELECT MAX(a) FROM t1 IGNORE INDEX(a); +MAX(a) +1 +DROP TABLE t1; +CREATE TABLE t1(a CHAR(9), b VARCHAR(7)); +INSERT INTO t1(a) VALUES('xxxxxxxxx'),('xxxxxxxxx'); +UPDATE t1 AS ta1,t1 AS ta2 SET ta1.b='aaaaaa',ta2.b='bbbbbb'; +SELECT * FROM t1; +a b +xxxxxxxxx bbbbbb +xxxxxxxxx bbbbbb +DROP TABLE t1; +SET @@maria_repair_threads=2; +SHOW VARIABLES LIKE 'maria_repair%'; +Variable_name Value +maria_repair_threads 2 +CREATE TABLE t1 ( +`_id` int(11) NOT NULL default '0', +`url` text, +`email` text, +`description` text, +`loverlap` int(11) default NULL, +`roverlap` int(11) default NULL, +`lneighbor_id` int(11) default NULL, +`rneighbor_id` int(11) default NULL, +`length_` int(11) default NULL, +`sequence` mediumtext, +`name` text, +`_obj_class` text NOT NULL, +PRIMARY KEY (`_id`), +UNIQUE KEY `sequence_name_index` (`name`(50)), +KEY (`length_`) +) DEFAULT CHARSET=latin1; +INSERT INTO t1 VALUES +(1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''), +(2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''), +(3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''), +(4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''), +(5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''), +(6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''), +(7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''), +(8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''), +(9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9',''); +SELECT _id FROM t1; +_id +1 +2 +3 +4 +5 +6 +7 +8 +9 +DELETE FROM t1 WHERE _id < 8; +SHOW TABLE STATUS LIKE 't1'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 MARIA 10 Paged 2 # # # # 0 # # # # # # +CHECK TABLE t1 EXTENDED; +Table Op Msg_type Msg_text +test.t1 check status OK +OPTIMIZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 optimize status Table is already up to date +CHECK TABLE t1 EXTENDED; +Table Op Msg_type Msg_text +test.t1 check status OK +SHOW TABLE STATUS LIKE 't1'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 MARIA 10 Paged 2 # # # # 0 # # # # # # +SELECT _id FROM t1; +_id +8 +9 +DROP TABLE t1; +CREATE TABLE t1 ( +`_id` int(11) NOT NULL default '0', +`url` text, +`email` text, +`description` text, +`loverlap` int(11) default NULL, +`roverlap` int(11) default NULL, +`lneighbor_id` int(11) default NULL, +`rneighbor_id` int(11) default NULL, +`length_` int(11) default NULL, +`sequence` mediumtext, +`name` text, +`_obj_class` text NOT NULL, +PRIMARY KEY (`_id`), +UNIQUE KEY `sequence_name_index` (`name`(50)), +KEY (`length_`) +) DEFAULT CHARSET=latin1; +INSERT INTO t1 VALUES +(1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''), +(2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''), +(3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''), +(4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''), +(5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''), +(6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''), +(7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''), +(8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''), +(9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9',''); +SELECT _id FROM t1; +_id +1 +2 +3 +4 +5 +6 +7 +8 +9 +DELETE FROM t1 WHERE _id < 8; +SHOW TABLE STATUS LIKE 't1'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 MARIA 10 Paged 2 # # # # 0 # # # # # # +CHECK TABLE t1 EXTENDED; +Table Op Msg_type Msg_text +test.t1 check status OK +REPAIR TABLE t1 QUICK; +Table Op Msg_type Msg_text +test.t1 repair status OK +CHECK TABLE t1 EXTENDED; +Table Op Msg_type Msg_text +test.t1 check status OK +SHOW TABLE STATUS LIKE 't1'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 MARIA 10 Paged 2 # # # # 0 # # # # # # +SELECT _id FROM t1; +_id +8 +9 +DROP TABLE t1; +SET @@maria_repair_threads=1; +SHOW VARIABLES LIKE 'maria_repair%'; +Variable_name Value +maria_repair_threads 1 +drop table if exists t1,t2,t3; +--- Testing varchar --- +--- Testing varchar --- +create table t1 (v varchar(10), c char(10), t text); +insert into t1 values('+ ', '+ ', '+ '); +set @a=repeat(' ',20); +insert into t1 values (concat('+',@a),concat('+',@a),concat('+',@a)); +Warnings: +Note 1265 Data truncated for column 'v' at row 1 +select concat('*',v,'*',c,'*',t,'*') from t1; +concat('*',v,'*',c,'*',t,'*') +*+ *+*+ * +*+ *+*+ * +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` varchar(10) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `t` text +) ENGINE=MARIA DEFAULT CHARSET=latin1 +create table t2 like t1; +show create table t2; +Table Create Table +t2 CREATE TABLE `t2` ( + `v` varchar(10) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `t` text +) ENGINE=MARIA DEFAULT CHARSET=latin1 +create table t3 select * from t1; +show create table t3; +Table Create Table +t3 CREATE TABLE `t3` ( + `v` varchar(10) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `t` text +) ENGINE=MARIA DEFAULT CHARSET=latin1 +alter table t1 modify c varchar(10); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` varchar(10) DEFAULT NULL, + `c` varchar(10) DEFAULT NULL, + `t` text +) ENGINE=MARIA DEFAULT CHARSET=latin1 +alter table t1 modify v char(10); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` char(10) DEFAULT NULL, + `c` varchar(10) DEFAULT NULL, + `t` text +) ENGINE=MARIA DEFAULT CHARSET=latin1 +alter table t1 modify t varchar(10); +Warnings: +Note 1265 Data truncated for column 't' at row 2 +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` char(10) DEFAULT NULL, + `c` varchar(10) DEFAULT NULL, + `t` varchar(10) DEFAULT NULL +) ENGINE=MARIA DEFAULT CHARSET=latin1 +select concat('*',v,'*',c,'*',t,'*') from t1; +concat('*',v,'*',c,'*',t,'*') +*+*+*+ * +*+*+*+ * +drop table t1,t2,t3; +create table t1 (v varchar(10), c char(10), t text, key(v), key(c), key(t(10))); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` varchar(10) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `t` text, + KEY `v` (`v`), + KEY `c` (`c`), + KEY `t` (`t`(10)) +) ENGINE=MARIA DEFAULT CHARSET=latin1 +select count(*) from t1; +count(*) +270 +insert into t1 values(concat('a',char(1)),concat('a',char(1)),concat('a',char(1))); +select count(*) from t1 where v='a'; +count(*) +10 +select count(*) from t1 where c='a'; +count(*) +10 +select count(*) from t1 where t='a'; +count(*) +10 +select count(*) from t1 where v='a '; +count(*) +10 +select count(*) from t1 where c='a '; +count(*) +10 +select count(*) from t1 where t='a '; +count(*) +10 +select count(*) from t1 where v between 'a' and 'a '; +count(*) +10 +select count(*) from t1 where v between 'a' and 'a ' and v between 'a ' and 'b\n'; +count(*) +10 +select count(*) from t1 where v like 'a%'; +count(*) +11 +select count(*) from t1 where c like 'a%'; +count(*) +11 +select count(*) from t1 where t like 'a%'; +count(*) +11 +select count(*) from t1 where v like 'a %'; +count(*) +9 +explain select count(*) from t1 where v='a '; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 13 const # Using where; Using index +explain select count(*) from t1 where c='a '; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref c c 11 const # Using where; Using index +explain select count(*) from t1 where t='a '; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range t t 13 NULL # Using where +explain select count(*) from t1 where v like 'a%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range v v 13 NULL # Using where; Using index +explain select count(*) from t1 where v between 'a' and 'a '; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 13 const # Using where; Using index +explain select count(*) from t1 where v between 'a' and 'a ' and v between 'a ' and 'b\n'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 13 const # Using where; Using index +alter table t1 add unique(v); +ERROR 23000: Duplicate entry '{ ' for key 'v_2' +alter table t1 add key(v); +select concat('*',v,'*',c,'*',t,'*') as qq from t1 where v='a'; +qq +*a*a*a* +*a *a*a * +*a *a*a * +*a *a*a * +*a *a*a * +*a *a*a * +*a *a*a * +*a *a*a * +*a *a*a * +*a *a*a * +explain select * from t1 where v='a'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v,v_2 # 13 const # Using where +select v,count(*) from t1 group by v limit 10; +v count(*) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select v,count(t) from t1 group by v limit 10; +v count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select v,count(c) from t1 group by v limit 10; +v count(c) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select sql_big_result v,count(t) from t1 group by v limit 10; +v count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select sql_big_result v,count(c) from t1 group by v limit 10; +v count(c) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select c,count(*) from t1 group by c limit 10; +c count(*) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select c,count(t) from t1 group by c limit 10; +c count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select sql_big_result c,count(t) from t1 group by c limit 10; +c count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select t,count(*) from t1 group by t limit 10; +t count(*) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select t,count(t) from t1 group by t limit 10; +t count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select sql_big_result t,count(t) from t1 group by t limit 10; +t count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +alter table t1 modify v varchar(300), drop key v, drop key v_2, add key v (v); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` varchar(300) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `t` text, + KEY `c` (`c`), + KEY `t` (`t`(10)), + KEY `v` (`v`) +) ENGINE=MARIA DEFAULT CHARSET=latin1 +select count(*) from t1 where v='a'; +count(*) +10 +select count(*) from t1 where v='a '; +count(*) +10 +select count(*) from t1 where v between 'a' and 'a '; +count(*) +10 +select count(*) from t1 where v between 'a' and 'a ' and v between 'a ' and 'b\n'; +count(*) +10 +select count(*) from t1 where v like 'a%'; +count(*) +11 +select count(*) from t1 where v like 'a %'; +count(*) +9 +explain select count(*) from t1 where v='a '; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 303 const # Using where; Using index +explain select count(*) from t1 where v like 'a%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range v v 303 NULL # Using where; Using index +explain select count(*) from t1 where v between 'a' and 'a '; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 303 const # Using where; Using index +explain select count(*) from t1 where v between 'a' and 'a ' and v between 'a ' and 'b\n'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 303 const # Using where; Using index +explain select * from t1 where v='a'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 303 const # Using where +select v,count(*) from t1 group by v limit 10; +v count(*) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select v,count(t) from t1 group by v limit 10; +v count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select sql_big_result v,count(t) from t1 group by v limit 10; +v count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +alter table t1 drop key v, add key v (v(30)); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` varchar(300) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `t` text, + KEY `c` (`c`), + KEY `t` (`t`(10)), + KEY `v` (`v`(30)) +) ENGINE=MARIA DEFAULT CHARSET=latin1 +select count(*) from t1 where v='a'; +count(*) +10 +select count(*) from t1 where v='a '; +count(*) +10 +select count(*) from t1 where v between 'a' and 'a '; +count(*) +10 +select count(*) from t1 where v between 'a' and 'a ' and v between 'a ' and 'b\n'; +count(*) +10 +select count(*) from t1 where v like 'a%'; +count(*) +11 +select count(*) from t1 where v like 'a %'; +count(*) +9 +explain select count(*) from t1 where v='a '; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 33 const # Using where +explain select count(*) from t1 where v like 'a%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range v v 33 NULL # Using where +explain select count(*) from t1 where v between 'a' and 'a '; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 33 const # Using where +explain select count(*) from t1 where v between 'a' and 'a ' and v between 'a ' and 'b\n'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 33 const # Using where +explain select * from t1 where v='a'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref v v 33 const # Using where +select v,count(*) from t1 group by v limit 10; +v count(*) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select v,count(t) from t1 group by v limit 10; +v count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select sql_big_result v,count(t) from t1 group by v limit 10; +v count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +alter table t1 modify v varchar(600), drop key v, add key v (v); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` varchar(600) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `t` text, + KEY `c` (`c`), + KEY `t` (`t`(10)), + KEY `v` (`v`) +) ENGINE=MARIA DEFAULT CHARSET=latin1 +select v,count(*) from t1 group by v limit 10; +v count(*) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select v,count(t) from t1 group by v limit 10; +v count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +select sql_big_result v,count(t) from t1 group by v limit 10; +v count(t) +a 1 +a 10 +b 10 +c 10 +d 10 +e 10 +f 10 +g 10 +h 10 +i 10 +drop table t1; +create table t1 (a char(10), unique (a)); +insert into t1 values ('a '); +insert into t1 values ('a '); +ERROR 23000: Duplicate entry 'a' for key 'a' +alter table t1 modify a varchar(10); +insert into t1 values ('a '),('a '),('a '),('a '); +ERROR 23000: Duplicate entry 'a ' for key 'a' +insert into t1 values ('a '); +ERROR 23000: Duplicate entry 'a ' for key 'a' +insert into t1 values ('a '); +ERROR 23000: Duplicate entry 'a ' for key 'a' +insert into t1 values ('a '); +ERROR 23000: Duplicate entry 'a ' for key 'a' +update t1 set a='a ' where a like 'a%'; +select concat(a,'.') from t1; +concat(a,'.') +a . +update t1 set a='abc ' where a like 'a '; +select concat(a,'.') from t1; +concat(a,'.') +a . +update t1 set a='a ' where a like 'a %'; +select concat(a,'.') from t1; +concat(a,'.') +a . +update t1 set a='a ' where a like 'a '; +select concat(a,'.') from t1; +concat(a,'.') +a . +drop table t1; +create table t1 (v varchar(10), c char(10), t text, key(v(5)), key(c(5)), key(t(5))); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` varchar(10) DEFAULT NULL, + `c` char(10) DEFAULT NULL, + `t` text, + KEY `v` (`v`(5)), + KEY `c` (`c`(5)), + KEY `t` (`t`(5)) +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (v char(10) character set utf8); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` char(10) CHARACTER SET utf8 DEFAULT NULL +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (v varchar(10), c char(10)) row_format=fixed; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` varchar(10) DEFAULT NULL, + `c` char(10) DEFAULT NULL +) ENGINE=MARIA DEFAULT CHARSET=latin1 ROW_FORMAT=FIXED +insert into t1 values('a','a'),('a ','a '); +select concat('*',v,'*',c,'*') from t1; +concat('*',v,'*',c,'*') +*a*a* +*a *a* +drop table t1; +create table t1 (v varchar(65530), key(v(10))); +insert into t1 values(repeat('a',65530)); +select length(v) from t1 where v=repeat('a',65530); +length(v) +65530 +drop table t1; +create table t1(a int, b varchar(12), key ba(b, a)); +insert into t1 values (1, 'A'), (20, NULL); +explain select * from t1 where a=20 and b is null; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ref ba ba 20 const,const 1 Using where; Using index +select * from t1 where a=20 and b is null; +a b +20 NULL +drop table t1; +create table t1 (v varchar(65530), key(v)); +Warnings: +Warning 1071 Specified key was too long; max key length is 1112 bytes +drop table if exists t1; +create table t1 (v varchar(65536)); +Warnings: +Note 1246 Converting column 'v' from VARCHAR to TEXT +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` mediumtext +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (v varchar(65530) character set utf8); +Warnings: +Note 1246 Converting column 'v' from VARCHAR to TEXT +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `v` mediumtext CHARACTER SET utf8 +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (v varchar(65535)); +ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 65535. You have to change some columns to TEXT or BLOBs +set @save_concurrent_insert=@@concurrent_insert; +set global concurrent_insert=1; +create table t1 (a int) ROW_FORMAT=FIXED; +insert into t1 values (1),(2),(3),(4),(5); +lock table t1 read local; +insert into t1 values(6),(7); +unlock tables; +delete from t1 where a>=3 and a<=4; +lock table t1 read local; +set global concurrent_insert=2; +insert into t1 values (8),(9); +unlock tables; +insert into t1 values (10),(11),(12); +select * from t1; +a +1 +2 +11 +10 +5 +6 +7 +8 +9 +12 +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +create table t1 (a int, b varchar(30) default "hello") ROW_FORMAT=DYNAMIC; +insert into t1 (a) values (1),(2),(3),(4),(5); +lock table t1 read local; +insert into t1 (a) values(6),(7); +unlock tables; +delete from t1 where a>=3 and a<=4; +lock table t1 read local; +set global concurrent_insert=2; +insert into t1 (a) values (8),(9); +unlock tables; +insert into t1 (a) values (10),(11),(12); +select a from t1; +a +1 +2 +11 +10 +5 +6 +7 +8 +9 +12 +check table t1; +Table Op Msg_type Msg_text +test.t1 check status OK +drop table t1; +set global concurrent_insert=@save_concurrent_insert; +create table t1 (a int, key(a)); +insert into t1 values (1),(2),(3),(4),(NULL),(NULL),(NULL),(NULL); +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +show keys from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 8 NULL NULL YES BTREE +alter table t1 disable keys; +alter table t1 enable keys; +show keys from t1; +Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment +t1 1 a 1 a A 8 NULL NULL YES BTREE disabled +drop table t1; +show create table t1; +show create table t1; +create table t1 (a int) select 42 a; +select * from t1; +a +9 +select * from t1; +a +99 +select * from t1; +a +42 +drop table t1; +End of 4.1 tests +create table t1 (c1 int) pack_keys=0; +create table t2 (c1 int) pack_keys=1; +create table t3 (c1 int) pack_keys=default; +create table t4 (c1 int) pack_keys=2; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '2' at line 1 +drop table t1, t2, t3; +create table t1 (a int not null, key `a` (a) key_block_size=1024); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + KEY `a` (`a`) KEY_BLOCK_SIZE=8192 +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (a int not null, key `a` (a) key_block_size=2048); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + KEY `a` (`a`) KEY_BLOCK_SIZE=8192 +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (a varchar(2048), key `a` (a)); +Warnings: +Warning 1071 Specified key was too long; max key length is 1112 bytes +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(2048) DEFAULT NULL, + KEY `a` (`a`(1112)) +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (a varchar(2048), key `a` (a) key_block_size=1024); +Warnings: +Warning 1071 Specified key was too long; max key length is 1112 bytes +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(2048) DEFAULT NULL, + KEY `a` (`a`(1112)) KEY_BLOCK_SIZE=8192 +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (a int not null, b varchar(2048), key (a), key(b)) key_block_size=1024; +Warnings: +Warning 1071 Specified key was too long; max key length is 1112 bytes +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` varchar(2048) DEFAULT NULL, + KEY `a` (`a`) KEY_BLOCK_SIZE=8192, + KEY `b` (`b`(1112)) KEY_BLOCK_SIZE=8192 +) ENGINE=MARIA DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=1024 +alter table t1 key_block_size=2048; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` varchar(2048) DEFAULT NULL, + KEY `a` (`a`) KEY_BLOCK_SIZE=8192, + KEY `b` (`b`(1112)) KEY_BLOCK_SIZE=8192 +) ENGINE=MARIA DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=2048 +alter table t1 add c int, add key (c); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` varchar(2048) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + KEY `a` (`a`) KEY_BLOCK_SIZE=8192, + KEY `b` (`b`(1112)) KEY_BLOCK_SIZE=8192, + KEY `c` (`c`) KEY_BLOCK_SIZE=8192 +) ENGINE=MARIA DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=2048 +alter table t1 key_block_size=0; +alter table t1 add d int, add key (d); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` varchar(2048) DEFAULT NULL, + `c` int(11) DEFAULT NULL, + `d` int(11) DEFAULT NULL, + KEY `a` (`a`) KEY_BLOCK_SIZE=8192, + KEY `b` (`b`(1112)) KEY_BLOCK_SIZE=8192, + KEY `c` (`c`) KEY_BLOCK_SIZE=8192, + KEY `d` (`d`) +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (a int not null, b varchar(2048), key (a), key(b)) key_block_size=8192; +Warnings: +Warning 1071 Specified key was too long; max key length is 1112 bytes +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` varchar(2048) DEFAULT NULL, + KEY `a` (`a`), + KEY `b` (`b`(1112)) +) ENGINE=MARIA DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=8192 +drop table t1; +create table t1 (a int not null, b varchar(2048), key (a) key_block_size=1024, key(b)) key_block_size=8192; +Warnings: +Warning 1071 Specified key was too long; max key length is 1112 bytes +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` varchar(2048) DEFAULT NULL, + KEY `a` (`a`), + KEY `b` (`b`(1112)) +) ENGINE=MARIA DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=8192 +drop table t1; +create table t1 (a int not null, b int, key (a) key_block_size=1024, key(b) key_block_size=8192) key_block_size=16384; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + `b` int(11) DEFAULT NULL, + KEY `a` (`a`) KEY_BLOCK_SIZE=8192, + KEY `b` (`b`) KEY_BLOCK_SIZE=8192 +) ENGINE=MARIA DEFAULT CHARSET=latin1 KEY_BLOCK_SIZE=16384 +drop table t1; +create table t1 (a int not null, key `a` (a) key_block_size=512); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + KEY `a` (`a`) KEY_BLOCK_SIZE=8192 +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (a varchar(2048), key `a` (a) key_block_size=1000000000000000000); +Warnings: +Warning 1071 Specified key was too long; max key length is 1112 bytes +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(2048) DEFAULT NULL, + KEY `a` (`a`(1112)) KEY_BLOCK_SIZE=8192 +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (a int not null, key `a` (a) key_block_size=1025); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + KEY `a` (`a`) KEY_BLOCK_SIZE=8192 +) ENGINE=MARIA DEFAULT CHARSET=latin1 +drop table t1; +create table t1 (a int not null, key key_block_size=1024 (a)); +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '=1024 (a))' at line 1 +create table t1 (a int not null, key `a` key_block_size=1024 (a)); +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'key_block_size=1024 (a))' at line 1 +CREATE TABLE t1 ( +c1 INT, +c2 VARCHAR(300), +KEY (c1) KEY_BLOCK_SIZE 1024, +KEY (c2) KEY_BLOCK_SIZE 8192 +); +INSERT INTO t1 VALUES (10, REPEAT('a', CEIL(RAND(10) * 300))), +(11, REPEAT('b', CEIL(RAND() * 300))), +(12, REPEAT('c', CEIL(RAND() * 300))), +(13, REPEAT('d', CEIL(RAND() * 300))), +(14, REPEAT('e', CEIL(RAND() * 300))), +(15, REPEAT('f', CEIL(RAND() * 300))), +(16, REPEAT('g', CEIL(RAND() * 300))), +(17, REPEAT('h', CEIL(RAND() * 300))), +(18, REPEAT('i', CEIL(RAND() * 300))), +(19, REPEAT('j', CEIL(RAND() * 300))), +(20, REPEAT('k', CEIL(RAND() * 300))), +(21, REPEAT('l', CEIL(RAND() * 300))), +(22, REPEAT('m', CEIL(RAND() * 300))), +(23, REPEAT('n', CEIL(RAND() * 300))), +(24, REPEAT('o', CEIL(RAND() * 300))), +(25, REPEAT('p', CEIL(RAND() * 300))), +(26, REPEAT('q', CEIL(RAND() * 300))), +(27, REPEAT('r', CEIL(RAND() * 300))), +(28, REPEAT('s', CEIL(RAND() * 300))), +(29, REPEAT('t', CEIL(RAND() * 300))), +(30, REPEAT('u', CEIL(RAND() * 300))), +(31, REPEAT('v', CEIL(RAND() * 300))), +(32, REPEAT('w', CEIL(RAND() * 300))), +(33, REPEAT('x', CEIL(RAND() * 300))), +(34, REPEAT('y', CEIL(RAND() * 300))), +(35, REPEAT('z', CEIL(RAND() * 300))); +INSERT INTO t1 SELECT * FROM t1; +INSERT INTO t1 SELECT * FROM t1; +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +REPAIR TABLE t1; +Table Op Msg_type Msg_text +test.t1 repair status OK +DELETE FROM t1 WHERE c1 >= 10; +CHECK TABLE t1; +Table Op Msg_type Msg_text +test.t1 check status OK +DROP TABLE t1; diff --git a/mysql-test/r/ps_maria.result b/mysql-test/r/ps_maria.result new file mode 100644 index 00000000000..9268c44eecd --- /dev/null +++ b/mysql-test/r/ps_maria.result @@ -0,0 +1,3137 @@ +use test; +drop table if exists t1, t9 ; +create table t1 +( +a int, b varchar(30), +primary key(a) +) engine = 'MARIA' ; +create table t9 +( +c1 tinyint, c2 smallint, c3 mediumint, c4 int, +c5 integer, c6 bigint, c7 float, c8 double, +c9 double precision, c10 real, c11 decimal(7, 4), c12 numeric(8, 4), +c13 date, c14 datetime, c15 timestamp, c16 time, +c17 year, c18 tinyint, c19 bool, c20 char, +c21 char(10), c22 varchar(30), c23 tinyblob, c24 tinytext, +c25 blob, c26 text, c27 mediumblob, c28 mediumtext, +c29 longblob, c30 longtext, c31 enum('one', 'two', 'three'), +c32 set('monday', 'tuesday', 'wednesday'), +primary key(c1) +) engine = 'MARIA' ; +delete from t1 ; +insert into t1 values (1,'one'); +insert into t1 values (2,'two'); +insert into t1 values (3,'three'); +insert into t1 values (4,'four'); +commit ; +delete from t9 ; +insert into t9 +set c1= 1, c2= 1, c3= 1, c4= 1, c5= 1, c6= 1, c7= 1, c8= 1, c9= 1, +c10= 1, c11= 1, c12 = 1, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=true, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='one', c32= 'monday'; +insert into t9 +set c1= 9, c2= 9, c3= 9, c4= 9, c5= 9, c6= 9, c7= 9, c8= 9, c9= 9, +c10= 9, c11= 9, c12 = 9, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=false, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='two', c32= 'tuesday'; +commit ; +test_sequence +------ simple select tests ------ +prepare stmt1 from ' select * from t9 order by c1 ' ; +execute stmt1; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def test t9 t9 c1 c1 1 4 1 N 49155 0 63 +def test t9 t9 c2 c2 2 6 1 Y 32768 0 63 +def test t9 t9 c3 c3 9 9 1 Y 32768 0 63 +def test t9 t9 c4 c4 3 11 1 Y 32768 0 63 +def test t9 t9 c5 c5 3 11 1 Y 32768 0 63 +def test t9 t9 c6 c6 8 20 1 Y 32768 0 63 +def test t9 t9 c7 c7 4 12 1 Y 32768 31 63 +def test t9 t9 c8 c8 5 22 1 Y 32768 31 63 +def test t9 t9 c9 c9 5 22 1 Y 32768 31 63 +def test t9 t9 c10 c10 5 22 1 Y 32768 31 63 +def test t9 t9 c11 c11 246 9 6 Y 0 4 63 +def test t9 t9 c12 c12 246 10 6 Y 0 4 63 +def test t9 t9 c13 c13 10 10 10 Y 128 0 63 +def test t9 t9 c14 c14 12 19 19 Y 128 0 63 +def test t9 t9 c15 c15 7 19 19 N 1249 0 63 +def test t9 t9 c16 c16 11 8 8 Y 128 0 63 +def test t9 t9 c17 c17 13 4 4 Y 32864 0 63 +def test t9 t9 c18 c18 1 4 1 Y 32768 0 63 +def test t9 t9 c19 c19 1 1 1 Y 32768 0 63 +def test t9 t9 c20 c20 254 1 1 Y 0 0 8 +def test t9 t9 c21 c21 254 10 10 Y 0 0 8 +def test t9 t9 c22 c22 253 30 30 Y 0 0 8 +def test t9 t9 c23 c23 252 255 8 Y 144 0 63 +def test t9 t9 c24 c24 252 255 8 Y 16 0 8 +def test t9 t9 c25 c25 252 65535 4 Y 144 0 63 +def test t9 t9 c26 c26 252 65535 4 Y 16 0 8 +def test t9 t9 c27 c27 252 16777215 10 Y 144 0 63 +def test t9 t9 c28 c28 252 16777215 10 Y 16 0 8 +def test t9 t9 c29 c29 252 4294967295 8 Y 144 0 63 +def test t9 t9 c30 c30 252 4294967295 8 Y 16 0 8 +def test t9 t9 c31 c31 254 5 3 Y 256 0 8 +def test t9 t9 c32 c32 254 24 7 Y 2048 0 8 +c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c11 c12 c13 c14 c15 c16 c17 c18 c19 c20 c21 c22 c23 c24 c25 c26 c27 c28 c29 c30 c31 c32 +1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday +9 9 9 9 9 9 9 9 9 9 9.0000 9.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 0 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext two tuesday +set @arg00='SELECT' ; +@arg00 a from t1 where a=1; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '@arg00 a from t1 where a=1' at line 1 +prepare stmt1 from ' ? a from t1 where a=1 '; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '? a from t1 where a=1' at line 1 +set @arg00=1 ; +select @arg00, b from t1 where a=1 ; +@arg00 b +1 one +prepare stmt1 from ' select ?, b from t1 where a=1 ' ; +execute stmt1 using @arg00 ; +? b +1 one +set @arg00='lion' ; +select @arg00, b from t1 where a=1 ; +@arg00 b +lion one +prepare stmt1 from ' select ?, b from t1 where a=1 ' ; +execute stmt1 using @arg00 ; +? b +lion one +set @arg00=NULL ; +select @arg00, b from t1 where a=1 ; +@arg00 b +NULL one +prepare stmt1 from ' select ?, b from t1 where a=1 ' ; +execute stmt1 using @arg00 ; +? b +NULL one +set @arg00=1 ; +select b, a - @arg00 from t1 where a=1 ; +b a - @arg00 +one 0 +prepare stmt1 from ' select b, a - ? from t1 where a=1 ' ; +execute stmt1 using @arg00 ; +b a - ? +one 0 +set @arg00=null ; +select @arg00 as my_col ; +my_col +NULL +prepare stmt1 from ' select ? as my_col'; +execute stmt1 using @arg00 ; +my_col +NULL +select @arg00 + 1 as my_col ; +my_col +NULL +prepare stmt1 from ' select ? + 1 as my_col'; +execute stmt1 using @arg00 ; +my_col +NULL +select 1 + @arg00 as my_col ; +my_col +NULL +prepare stmt1 from ' select 1 + ? as my_col'; +execute stmt1 using @arg00 ; +my_col +NULL +set @arg00='MySQL' ; +select substr(@arg00,1,2) from t1 where a=1 ; +substr(@arg00,1,2) +My +prepare stmt1 from ' select substr(?,1,2) from t1 where a=1 ' ; +execute stmt1 using @arg00 ; +substr(?,1,2) +My +set @arg00=3 ; +select substr('MySQL',@arg00,5) from t1 where a=1 ; +substr('MySQL',@arg00,5) +SQL +prepare stmt1 from ' select substr(''MySQL'',?,5) from t1 where a=1 ' ; +execute stmt1 using @arg00 ; +substr('MySQL',?,5) +SQL +select substr('MySQL',1,@arg00) from t1 where a=1 ; +substr('MySQL',1,@arg00) +MyS +prepare stmt1 from ' select substr(''MySQL'',1,?) from t1 where a=1 ' ; +execute stmt1 using @arg00 ; +substr('MySQL',1,?) +MyS +set @arg00='MySQL' ; +select a , concat(@arg00,b) from t1 order by a; +a concat(@arg00,b) +1 MySQLone +2 MySQLtwo +3 MySQLthree +4 MySQLfour +prepare stmt1 from ' select a , concat(?,b) from t1 order by a ' ; +execute stmt1 using @arg00; +a concat(?,b) +1 MySQLone +2 MySQLtwo +3 MySQLthree +4 MySQLfour +select a , concat(b,@arg00) from t1 order by a ; +a concat(b,@arg00) +1 oneMySQL +2 twoMySQL +3 threeMySQL +4 fourMySQL +prepare stmt1 from ' select a , concat(b,?) from t1 order by a ' ; +execute stmt1 using @arg00; +a concat(b,?) +1 oneMySQL +2 twoMySQL +3 threeMySQL +4 fourMySQL +set @arg00='MySQL' ; +select group_concat(@arg00,b order by a) from t1 +group by 'a' ; +group_concat(@arg00,b order by a) +MySQLone,MySQLtwo,MySQLthree,MySQLfour +prepare stmt1 from ' select group_concat(?,b order by a) from t1 +group by ''a'' ' ; +execute stmt1 using @arg00; +group_concat(?,b order by a) +MySQLone,MySQLtwo,MySQLthree,MySQLfour +select group_concat(b,@arg00 order by a) from t1 +group by 'a' ; +group_concat(b,@arg00 order by a) +oneMySQL,twoMySQL,threeMySQL,fourMySQL +prepare stmt1 from ' select group_concat(b,? order by a) from t1 +group by ''a'' ' ; +execute stmt1 using @arg00; +group_concat(b,? order by a) +oneMySQL,twoMySQL,threeMySQL,fourMySQL +set @arg00='first' ; +set @arg01='second' ; +set @arg02=NULL; +select @arg00, @arg01 from t1 where a=1 ; +@arg00 @arg01 +first second +prepare stmt1 from ' select ?, ? from t1 where a=1 ' ; +execute stmt1 using @arg00, @arg01 ; +? ? +first second +execute stmt1 using @arg02, @arg01 ; +? ? +NULL second +execute stmt1 using @arg00, @arg02 ; +? ? +first NULL +execute stmt1 using @arg02, @arg02 ; +? ? +NULL NULL +drop table if exists t5 ; +create table t5 (id1 int(11) not null default '0', +value2 varchar(100), value1 varchar(100)) ; +insert into t5 values (1,'hh','hh'),(2,'hh','hh'), +(1,'ii','ii'),(2,'ii','ii') ; +prepare stmt1 from ' select id1,value1 from t5 where id1=? or value1=? order by id1,value1 ' ; +set @arg00=1 ; +set @arg01='hh' ; +execute stmt1 using @arg00, @arg01 ; +id1 value1 +1 hh +1 ii +2 hh +drop table t5 ; +drop table if exists t5 ; +create table t5(session_id char(9) not null) ; +insert into t5 values ('abc') ; +prepare stmt1 from ' select * from t5 +where ?=''1111'' and session_id = ''abc'' ' ; +set @arg00='abc' ; +execute stmt1 using @arg00 ; +session_id +set @arg00='1111' ; +execute stmt1 using @arg00 ; +session_id +abc +set @arg00='abc' ; +execute stmt1 using @arg00 ; +session_id +drop table t5 ; +set @arg00='FROM' ; +select a @arg00 t1 where a=1 ; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '@arg00 t1 where a=1' at line 1 +prepare stmt1 from ' select a ? t1 where a=1 ' ; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '? t1 where a=1' at line 1 +set @arg00='t1' ; +select a from @arg00 where a=1 ; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '@arg00 where a=1' at line 1 +prepare stmt1 from ' select a from ? where a=1 ' ; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '? where a=1' at line 1 +set @arg00='WHERE' ; +select a from t1 @arg00 a=1 ; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '@arg00 a=1' at line 1 +prepare stmt1 from ' select a from t1 ? a=1 ' ; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '? a=1' at line 1 +set @arg00=1 ; +select a FROM t1 where a=@arg00 ; +a +1 +prepare stmt1 from ' select a FROM t1 where a=? ' ; +execute stmt1 using @arg00 ; +a +1 +set @arg00=1000 ; +execute stmt1 using @arg00 ; +a +set @arg00=NULL ; +select a FROM t1 where a=@arg00 ; +a +prepare stmt1 from ' select a FROM t1 where a=? ' ; +execute stmt1 using @arg00 ; +a +set @arg00=4 ; +select a FROM t1 where a=sqrt(@arg00) ; +a +2 +prepare stmt1 from ' select a FROM t1 where a=sqrt(?) ' ; +execute stmt1 using @arg00 ; +a +2 +set @arg00=NULL ; +select a FROM t1 where a=sqrt(@arg00) ; +a +prepare stmt1 from ' select a FROM t1 where a=sqrt(?) ' ; +execute stmt1 using @arg00 ; +a +set @arg00=2 ; +set @arg01=3 ; +select a FROM t1 where a in (@arg00,@arg01) order by a; +a +2 +3 +prepare stmt1 from ' select a FROM t1 where a in (?,?) order by a '; +execute stmt1 using @arg00, @arg01; +a +2 +3 +set @arg00= 'one' ; +set @arg01= 'two' ; +set @arg02= 'five' ; +prepare stmt1 from ' select b FROM t1 where b in (?,?,?) order by b ' ; +execute stmt1 using @arg00, @arg01, @arg02 ; +b +one +two +prepare stmt1 from ' select b FROM t1 where b like ? '; +set @arg00='two' ; +execute stmt1 using @arg00 ; +b +two +set @arg00='tw%' ; +execute stmt1 using @arg00 ; +b +two +set @arg00='%wo' ; +execute stmt1 using @arg00 ; +b +two +set @arg00=null ; +insert into t9 set c1= 0, c5 = NULL ; +select c5 from t9 where c5 > NULL ; +c5 +prepare stmt1 from ' select c5 from t9 where c5 > ? '; +execute stmt1 using @arg00 ; +c5 +select c5 from t9 where c5 < NULL ; +c5 +prepare stmt1 from ' select c5 from t9 where c5 < ? '; +execute stmt1 using @arg00 ; +c5 +select c5 from t9 where c5 = NULL ; +c5 +prepare stmt1 from ' select c5 from t9 where c5 = ? '; +execute stmt1 using @arg00 ; +c5 +select c5 from t9 where c5 <=> NULL ; +c5 +NULL +prepare stmt1 from ' select c5 from t9 where c5 <=> ? '; +execute stmt1 using @arg00 ; +c5 +NULL +delete from t9 where c1= 0 ; +set @arg00='>' ; +select a FROM t1 where a @arg00 1 ; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '@arg00 1' at line 1 +prepare stmt1 from ' select a FROM t1 where a ? 1 ' ; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '? 1' at line 1 +set @arg00=1 ; +select a,b FROM t1 where a is not NULL +AND b is not NULL group by a - @arg00 ; +a b +1 one +2 two +3 three +4 four +prepare stmt1 from ' select a,b FROM t1 where a is not NULL +AND b is not NULL group by a - ? ' ; +execute stmt1 using @arg00 ; +a b +1 one +2 two +3 three +4 four +set @arg00='two' ; +select a,b FROM t1 where a is not NULL +AND b is not NULL having b <> @arg00 order by a ; +a b +1 one +3 three +4 four +prepare stmt1 from ' select a,b FROM t1 where a is not NULL +AND b is not NULL having b <> ? order by a ' ; +execute stmt1 using @arg00 ; +a b +1 one +3 three +4 four +set @arg00=1 ; +select a,b FROM t1 where a is not NULL +AND b is not NULL order by a - @arg00 ; +a b +1 one +2 two +3 three +4 four +prepare stmt1 from ' select a,b FROM t1 where a is not NULL +AND b is not NULL order by a - ? ' ; +execute stmt1 using @arg00 ; +a b +1 one +2 two +3 three +4 four +set @arg00=2 ; +select a,b from t1 order by 2 ; +a b +4 four +1 one +3 three +2 two +prepare stmt1 from ' select a,b from t1 +order by ? '; +execute stmt1 using @arg00; +a b +4 four +1 one +3 three +2 two +set @arg00=1 ; +execute stmt1 using @arg00; +a b +1 one +2 two +3 three +4 four +set @arg00=0 ; +execute stmt1 using @arg00; +ERROR 42S22: Unknown column '?' in 'order clause' +set @arg00=1; +prepare stmt1 from ' select a,b from t1 order by a +limit 1 '; +execute stmt1 ; +a b +1 one +prepare stmt1 from ' select a,b from t1 order by a limit ? '; +execute stmt1 using @arg00; +a b +1 one +set @arg00='b' ; +set @arg01=0 ; +set @arg02=2 ; +set @arg03=2 ; +select sum(a), @arg00 from t1 where a > @arg01 +and b is not null group by substr(b,@arg02) +having sum(a) <> @arg03 ; +sum(a) @arg00 +3 b +1 b +4 b +prepare stmt1 from ' select sum(a), ? from t1 where a > ? +and b is not null group by substr(b,?) +having sum(a) <> ? '; +execute stmt1 using @arg00, @arg01, @arg02, @arg03; +sum(a) ? +3 b +1 b +4 b +test_sequence +------ join tests ------ +select first.a as a1, second.a as a2 +from t1 first, t1 second +where first.a = second.a order by a1 ; +a1 a2 +1 1 +2 2 +3 3 +4 4 +prepare stmt1 from ' select first.a as a1, second.a as a2 + from t1 first, t1 second + where first.a = second.a order by a1 '; +execute stmt1 ; +a1 a2 +1 1 +2 2 +3 3 +4 4 +set @arg00='ABC'; +set @arg01='two'; +set @arg02='one'; +select first.a, @arg00, second.a FROM t1 first, t1 second +where @arg01 = first.b or first.a = second.a or second.b = @arg02 +order by second.a, first.a; +a @arg00 a +1 ABC 1 +2 ABC 1 +3 ABC 1 +4 ABC 1 +2 ABC 2 +2 ABC 3 +3 ABC 3 +2 ABC 4 +4 ABC 4 +prepare stmt1 from ' select first.a, ?, second.a FROM t1 first, t1 second + where ? = first.b or first.a = second.a or second.b = ? + order by second.a, first.a'; +execute stmt1 using @arg00, @arg01, @arg02; +a ? a +1 ABC 1 +2 ABC 1 +3 ABC 1 +4 ABC 1 +2 ABC 2 +2 ABC 3 +3 ABC 3 +2 ABC 4 +4 ABC 4 +drop table if exists t2 ; +create table t2 as select * from t1 ; +set @query1= 'SELECT * FROM t2 join t1 on (t1.a=t2.a) order by t2.a ' ; +set @query2= 'SELECT * FROM t2 natural join t1 order by t2.a ' ; +set @query3= 'SELECT * FROM t2 join t1 using(a) order by t2.a ' ; +set @query4= 'SELECT * FROM t2 left join t1 on(t1.a=t2.a) order by t2.a ' ; +set @query5= 'SELECT * FROM t2 natural left join t1 order by t2.a ' ; +set @query6= 'SELECT * FROM t2 left join t1 using(a) order by t2.a ' ; +set @query7= 'SELECT * FROM t2 right join t1 on(t1.a=t2.a) order by t2.a ' ; +set @query8= 'SELECT * FROM t2 natural right join t1 order by t2.a ' ; +set @query9= 'SELECT * FROM t2 right join t1 using(a) order by t2.a ' ; +the join statement is: +SELECT * FROM t2 right join t1 using(a) order by t2.a +prepare stmt1 from @query9 ; +execute stmt1 ; +a b b +1 one one +2 two two +3 three three +4 four four +execute stmt1 ; +a b b +1 one one +2 two two +3 three three +4 four four +execute stmt1 ; +a b b +1 one one +2 two two +3 three three +4 four four +the join statement is: +SELECT * FROM t2 natural right join t1 order by t2.a +prepare stmt1 from @query8 ; +execute stmt1 ; +a b +1 one +2 two +3 three +4 four +execute stmt1 ; +a b +1 one +2 two +3 three +4 four +execute stmt1 ; +a b +1 one +2 two +3 three +4 four +the join statement is: +SELECT * FROM t2 right join t1 on(t1.a=t2.a) order by t2.a +prepare stmt1 from @query7 ; +execute stmt1 ; +a b a b +1 one 1 one +2 two 2 two +3 three 3 three +4 four 4 four +execute stmt1 ; +a b a b +1 one 1 one +2 two 2 two +3 three 3 three +4 four 4 four +execute stmt1 ; +a b a b +1 one 1 one +2 two 2 two +3 three 3 three +4 four 4 four +the join statement is: +SELECT * FROM t2 left join t1 using(a) order by t2.a +prepare stmt1 from @query6 ; +execute stmt1 ; +a b b +1 one one +2 two two +3 three three +4 four four +execute stmt1 ; +a b b +1 one one +2 two two +3 three three +4 four four +execute stmt1 ; +a b b +1 one one +2 two two +3 three three +4 four four +the join statement is: +SELECT * FROM t2 natural left join t1 order by t2.a +prepare stmt1 from @query5 ; +execute stmt1 ; +a b +1 one +2 two +3 three +4 four +execute stmt1 ; +a b +1 one +2 two +3 three +4 four +execute stmt1 ; +a b +1 one +2 two +3 three +4 four +the join statement is: +SELECT * FROM t2 left join t1 on(t1.a=t2.a) order by t2.a +prepare stmt1 from @query4 ; +execute stmt1 ; +a b a b +1 one 1 one +2 two 2 two +3 three 3 three +4 four 4 four +execute stmt1 ; +a b a b +1 one 1 one +2 two 2 two +3 three 3 three +4 four 4 four +execute stmt1 ; +a b a b +1 one 1 one +2 two 2 two +3 three 3 three +4 four 4 four +the join statement is: +SELECT * FROM t2 join t1 using(a) order by t2.a +prepare stmt1 from @query3 ; +execute stmt1 ; +a b b +1 one one +2 two two +3 three three +4 four four +execute stmt1 ; +a b b +1 one one +2 two two +3 three three +4 four four +execute stmt1 ; +a b b +1 one one +2 two two +3 three three +4 four four +the join statement is: +SELECT * FROM t2 natural join t1 order by t2.a +prepare stmt1 from @query2 ; +execute stmt1 ; +a b +1 one +2 two +3 three +4 four +execute stmt1 ; +a b +1 one +2 two +3 three +4 four +execute stmt1 ; +a b +1 one +2 two +3 three +4 four +the join statement is: +SELECT * FROM t2 join t1 on (t1.a=t2.a) order by t2.a +prepare stmt1 from @query1 ; +execute stmt1 ; +a b a b +1 one 1 one +2 two 2 two +3 three 3 three +4 four 4 four +execute stmt1 ; +a b a b +1 one 1 one +2 two 2 two +3 three 3 three +4 four 4 four +execute stmt1 ; +a b a b +1 one 1 one +2 two 2 two +3 three 3 three +4 four 4 four +drop table t2 ; +test_sequence +------ subquery tests ------ +prepare stmt1 from ' select a, b FROM t1 outer_table where + a = (select a from t1 where b = ''two'') '; +execute stmt1 ; +a b +2 two +set @arg00='two' ; +select a, b FROM t1 outer_table where +a = (select a from t1 where b = 'two' ) and b=@arg00 ; +a b +2 two +prepare stmt1 from ' select a, b FROM t1 outer_table where + a = (select a from t1 where b = ''two'') and b=? '; +execute stmt1 using @arg00; +a b +2 two +set @arg00='two' ; +select a, b FROM t1 outer_table where +a = (select a from t1 where b = @arg00 ) and b='two' ; +a b +2 two +prepare stmt1 from ' select a, b FROM t1 outer_table where + a = (select a from t1 where b = ? ) and b=''two'' ' ; +execute stmt1 using @arg00; +a b +2 two +set @arg00=3 ; +set @arg01='three' ; +select a,b FROM t1 where (a,b) in (select 3, 'three'); +a b +3 three +select a FROM t1 where (a,b) in (select @arg00,@arg01); +a +3 +prepare stmt1 from ' select a FROM t1 where (a,b) in (select ?, ?) '; +execute stmt1 using @arg00, @arg01; +a +3 +set @arg00=1 ; +set @arg01='two' ; +set @arg02=2 ; +set @arg03='two' ; +select a, @arg00, b FROM t1 outer_table where +b=@arg01 and a = (select @arg02 from t1 where b = @arg03 ) ; +a @arg00 b +2 1 two +prepare stmt1 from ' select a, ?, b FROM t1 outer_table where + b=? and a = (select ? from t1 where b = ? ) ' ; +execute stmt1 using @arg00, @arg01, @arg02, @arg03 ; +a ? b +2 1 two +prepare stmt1 from 'select c4 FROM t9 where + c13 = (select MAX(b) from t1 where a = ?) and c22 = ? ' ; +execute stmt1 using @arg01, @arg02; +c4 +prepare stmt1 from ' select a, b FROM t1 outer_table where + a = (select a from t1 where b = outer_table.b ) order by a '; +execute stmt1 ; +a b +1 one +2 two +3 three +4 four +prepare stmt1 from ' SELECT a as ccc from t1 where a+1= + (SELECT 1+ccc from t1 where ccc+1=a+1 and a=1) '; +execute stmt1 ; +ccc +1 +deallocate prepare stmt1 ; +prepare stmt1 from ' SELECT a as ccc from t1 where a+1= + (SELECT 1+ccc from t1 where ccc+1=a+1 and a=1) '; +execute stmt1 ; +ccc +1 +deallocate prepare stmt1 ; +prepare stmt1 from ' SELECT a as ccc from t1 where a+1= + (SELECT 1+ccc from t1 where ccc+1=a+1 and a=1) '; +execute stmt1 ; +ccc +1 +deallocate prepare stmt1 ; +set @arg00='two' ; +select a, b FROM t1 outer_table where +a = (select a from t1 where b = outer_table.b ) and b=@arg00 ; +a b +2 two +prepare stmt1 from ' select a, b FROM t1 outer_table where + a = (select a from t1 where b = outer_table.b) and b=? '; +execute stmt1 using @arg00; +a b +2 two +set @arg00=2 ; +select a, b FROM t1 outer_table where +a = (select a from t1 where a = @arg00 and b = outer_table.b) and b='two' ; +a b +2 two +prepare stmt1 from ' select a, b FROM t1 outer_table where + a = (select a from t1 where a = ? and b = outer_table.b) and b=''two'' ' ; +execute stmt1 using @arg00; +a b +2 two +set @arg00=2 ; +select a, b FROM t1 outer_table where +a = (select a from t1 where outer_table.a = @arg00 and a=2) and b='two' ; +a b +2 two +prepare stmt1 from ' select a, b FROM t1 outer_table where + a = (select a from t1 where outer_table.a = ? and a=2) and b=''two'' ' ; +execute stmt1 using @arg00; +a b +2 two +set @arg00=1 ; +set @arg01='two' ; +set @arg02=2 ; +set @arg03='two' ; +select a, @arg00, b FROM t1 outer_table where +b=@arg01 and a = (select @arg02 from t1 where outer_table.b = @arg03 +and outer_table.a=a ) ; +a @arg00 b +2 1 two +prepare stmt1 from ' select a, ?, b FROM t1 outer_table where + b=? and a = (select ? from t1 where outer_table.b = ? + and outer_table.a=a ) ' ; +execute stmt1 using @arg00, @arg01, @arg02, @arg03 ; +a ? b +2 1 two +set @arg00=1 ; +set @arg01=0 ; +select a, @arg00 +from ( select a - @arg00 as a from t1 where a=@arg00 ) as t2 +where a=@arg01; +a @arg00 +0 1 +prepare stmt1 from ' select a, ? + from ( select a - ? as a from t1 where a=? ) as t2 + where a=? '; +execute stmt1 using @arg00, @arg00, @arg00, @arg01 ; +a ? +0 1 +drop table if exists t2 ; +create table t2 as select * from t1; +prepare stmt1 from ' select a in (select a from t2) from t1 ' ; +execute stmt1 ; +a in (select a from t2) +1 +1 +1 +1 +drop table if exists t5, t6, t7 ; +create table t5 (a int , b int) ; +create table t6 like t5 ; +create table t7 like t5 ; +insert into t5 values (0, 100), (1, 2), (1, 3), (2, 2), (2, 7), +(2, -1), (3, 10) ; +insert into t6 values (0, 0), (1, 1), (2, 1), (3, 1), (4, 1) ; +insert into t7 values (3, 3), (2, 2), (1, 1) ; +prepare stmt1 from ' select a, (select count(distinct t5.b) as sum from t5, t6 + where t5.a=t6.a and t6.b > 0 and t5.a <= t7.b + group by t5.a order by sum limit 1) from t7 ' ; +execute stmt1 ; +a (select count(distinct t5.b) as sum from t5, t6 + where t5.a=t6.a and t6.b > 0 and t5.a <= t7.b + group by t5.a order by sum limit 1) +3 1 +2 2 +1 2 +execute stmt1 ; +a (select count(distinct t5.b) as sum from t5, t6 + where t5.a=t6.a and t6.b > 0 and t5.a <= t7.b + group by t5.a order by sum limit 1) +3 1 +2 2 +1 2 +execute stmt1 ; +a (select count(distinct t5.b) as sum from t5, t6 + where t5.a=t6.a and t6.b > 0 and t5.a <= t7.b + group by t5.a order by sum limit 1) +3 1 +2 2 +1 2 +drop table t5, t6, t7 ; +drop table if exists t2 ; +create table t2 as select * from t9; +set @stmt= ' SELECT + (SELECT SUM(c1 + c12 + 0.0) FROM t2 + where (t9.c2 - 0e-3) = t2.c2 + GROUP BY t9.c15 LIMIT 1) as scalar_s, + exists (select 1.0e+0 from t2 + where t2.c3 * 9.0000000000 = t9.c4) as exists_s, + c5 * 4 in (select c6 + 0.3e+1 from t2) as in_s, + (c7 - 4, c8 - 4) in (select c9 + 4.0, c10 + 40e-1 from t2) as in_row_s +FROM t9, +(select c25 x, c32 y from t2) tt WHERE x = c25 ' ; +prepare stmt1 from @stmt ; +execute stmt1 ; +execute stmt1 ; +set @stmt= concat('explain ',@stmt); +prepare stmt1 from @stmt ; +execute stmt1 ; +execute stmt1 ; +set @stmt= ' SELECT + (SELECT SUM(c1+c12+?) FROM t2 where (t9.c2-?)=t2.c2 + GROUP BY t9.c15 LIMIT 1) as scalar_s, + exists (select ? from t2 + where t2.c3*?=t9.c4) as exists_s, + c5*? in (select c6+? from t2) as in_s, + (c7-?, c8-?) in (select c9+?, c10+? from t2) as in_row_s +FROM t9, +(select c25 x, c32 y from t2) tt WHERE x =c25 ' ; +set @arg00= 0.0 ; +set @arg01= 0e-3 ; +set @arg02= 1.0e+0 ; +set @arg03= 9.0000000000 ; +set @arg04= 4 ; +set @arg05= 0.3e+1 ; +set @arg06= 4 ; +set @arg07= 4 ; +set @arg08= 4.0 ; +set @arg09= 40e-1 ; +prepare stmt1 from @stmt ; +execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04, @arg05, @arg06, +@arg07, @arg08, @arg09 ; +execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04, @arg05, @arg06, +@arg07, @arg08, @arg09 ; +set @stmt= concat('explain ',@stmt); +prepare stmt1 from @stmt ; +execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04, @arg05, @arg06, +@arg07, @arg08, @arg09 ; +execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04, @arg05, @arg06, +@arg07, @arg08, @arg09 ; +drop table t2 ; +select 1 < (select a from t1) ; +ERROR 21000: Subquery returns more than 1 row +prepare stmt1 from ' select 1 < (select a from t1) ' ; +execute stmt1 ; +ERROR 21000: Subquery returns more than 1 row +select 1 as my_col ; +my_col +1 +test_sequence +------ union tests ------ +prepare stmt1 from ' select a FROM t1 where a=1 + union distinct + select a FROM t1 where a=1 '; +execute stmt1 ; +a +1 +execute stmt1 ; +a +1 +prepare stmt1 from ' select a FROM t1 where a=1 + union all + select a FROM t1 where a=1 '; +execute stmt1 ; +a +1 +1 +prepare stmt1 from ' SELECT 1, 2 union SELECT 1 ' ; +ERROR 21000: The used SELECT statements have a different number of columns +prepare stmt1 from ' SELECT 1 union SELECT 1, 2 ' ; +ERROR 21000: The used SELECT statements have a different number of columns +prepare stmt1 from ' SELECT * from t1 union SELECT 1 ' ; +ERROR 21000: The used SELECT statements have a different number of columns +prepare stmt1 from ' SELECT 1 union SELECT * from t1 ' ; +ERROR 21000: The used SELECT statements have a different number of columns +set @arg00=1 ; +select @arg00 FROM t1 where a=1 +union distinct +select 1 FROM t1 where a=1; +@arg00 +1 +prepare stmt1 from ' select ? FROM t1 where a=1 + union distinct + select 1 FROM t1 where a=1 ' ; +execute stmt1 using @arg00; +? +1 +set @arg00=1 ; +select 1 FROM t1 where a=1 +union distinct +select @arg00 FROM t1 where a=1; +1 +1 +prepare stmt1 from ' select 1 FROM t1 where a=1 + union distinct + select ? FROM t1 where a=1 ' ; +execute stmt1 using @arg00; +1 +1 +set @arg00='a' ; +select @arg00 FROM t1 where a=1 +union distinct +select @arg00 FROM t1 where a=1; +@arg00 +a +prepare stmt1 from ' select ? FROM t1 where a=1 + union distinct + select ? FROM t1 where a=1 '; +execute stmt1 using @arg00, @arg00; +? +a +prepare stmt1 from ' select ? + union distinct + select ? '; +execute stmt1 using @arg00, @arg00; +? +a +set @arg00='a' ; +set @arg01=1 ; +set @arg02='a' ; +set @arg03=2 ; +select @arg00 FROM t1 where a=@arg01 +union distinct +select @arg02 FROM t1 where a=@arg03; +@arg00 +a +prepare stmt1 from ' select ? FROM t1 where a=? + union distinct + select ? FROM t1 where a=? ' ; +execute stmt1 using @arg00, @arg01, @arg02, @arg03; +? +a +set @arg00=1 ; +prepare stmt1 from ' select sum(a) + 200, ? from t1 +union distinct +select sum(a) + 200, 1 from t1 +group by b ' ; +execute stmt1 using @arg00; +sum(a) + 200 ? +210 1 +204 1 +201 1 +203 1 +202 1 +set @Oporto='Oporto' ; +set @Lisboa='Lisboa' ; +set @0=0 ; +set @1=1 ; +set @2=2 ; +set @3=3 ; +set @4=4 ; +select @Oporto,@Lisboa,@0,@1,@2,@3,@4 ; +@Oporto @Lisboa @0 @1 @2 @3 @4 +Oporto Lisboa 0 1 2 3 4 +select sum(a) + 200 as the_sum, @Oporto as the_town from t1 +group by b +union distinct +select sum(a) + 200, @Lisboa from t1 +group by b ; +the_sum the_town +204 Oporto +201 Oporto +203 Oporto +202 Oporto +204 Lisboa +201 Lisboa +203 Lisboa +202 Lisboa +prepare stmt1 from ' select sum(a) + 200 as the_sum, ? as the_town from t1 + group by b + union distinct + select sum(a) + 200, ? from t1 + group by b ' ; +execute stmt1 using @Oporto, @Lisboa; +the_sum the_town +204 Oporto +201 Oporto +203 Oporto +202 Oporto +204 Lisboa +201 Lisboa +203 Lisboa +202 Lisboa +select sum(a) + 200 as the_sum, @Oporto as the_town from t1 +where a > @1 +group by b +union distinct +select sum(a) + 200, @Lisboa from t1 +where a > @2 +group by b ; +the_sum the_town +204 Oporto +203 Oporto +202 Oporto +204 Lisboa +203 Lisboa +prepare stmt1 from ' select sum(a) + 200 as the_sum, ? as the_town from t1 + where a > ? + group by b + union distinct + select sum(a) + 200, ? from t1 + where a > ? + group by b ' ; +execute stmt1 using @Oporto, @1, @Lisboa, @2; +the_sum the_town +204 Oporto +203 Oporto +202 Oporto +204 Lisboa +203 Lisboa +select sum(a) + 200 as the_sum, @Oporto as the_town from t1 +where a > @1 +group by b +having avg(a) > @2 +union distinct +select sum(a) + 200, @Lisboa from t1 +where a > @2 +group by b +having avg(a) > @3; +the_sum the_town +204 Oporto +203 Oporto +204 Lisboa +prepare stmt1 from ' select sum(a) + 200 as the_sum, ? as the_town from t1 + where a > ? + group by b + having avg(a) > ? + union distinct + select sum(a) + 200, ? from t1 + where a > ? + group by b + having avg(a) > ? '; +execute stmt1 using @Oporto, @1, @2, @Lisboa, @2, @3; +the_sum the_town +204 Oporto +203 Oporto +204 Lisboa +test_sequence +------ explain select tests ------ +prepare stmt1 from ' explain select * from t9 ' ; +execute stmt1; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def id 8 3 1 N 32929 0 63 +def select_type 253 19 6 N 1 31 8 +def table 253 64 2 Y 0 31 8 +def type 253 10 3 Y 0 31 8 +def possible_keys 253 4096 0 Y 0 31 8 +def key 253 64 0 Y 0 31 8 +def key_len 253 4096 0 Y 128 31 63 +def ref 253 1024 0 Y 0 31 8 +def rows 8 10 1 Y 32928 0 63 +def Extra 253 255 0 N 1 31 8 +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t9 ALL NULL NULL NULL NULL 2 +drop table if exists t2 ; +create table t2 (s varchar(25), fulltext(s)) +ENGINE = 'MARIA' ; +insert into t2 values ('Gravedigger'), ('Greed'),('Hollow Dogs') ; +commit ; +prepare stmt1 from ' select s from t2 where match (s) against (?) ' ; +set @arg00='Dogs' ; +execute stmt1 using @arg00 ; +s +Hollow Dogs +prepare stmt1 from ' SELECT s FROM t2 +where match (s) against (concat(?,''digger'')) '; +set @arg00='Grave' ; +execute stmt1 using @arg00 ; +s +Gravedigger +drop table t2 ; +test_sequence +------ delete tests ------ +delete from t1 ; +insert into t1 values (1,'one'); +insert into t1 values (2,'two'); +insert into t1 values (3,'three'); +insert into t1 values (4,'four'); +commit ; +delete from t9 ; +insert into t9 +set c1= 1, c2= 1, c3= 1, c4= 1, c5= 1, c6= 1, c7= 1, c8= 1, c9= 1, +c10= 1, c11= 1, c12 = 1, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=true, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='one', c32= 'monday'; +insert into t9 +set c1= 9, c2= 9, c3= 9, c4= 9, c5= 9, c6= 9, c7= 9, c8= 9, c9= 9, +c10= 9, c11= 9, c12 = 9, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=false, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='two', c32= 'tuesday'; +commit ; +prepare stmt1 from 'delete from t1 where a=2' ; +execute stmt1; +select a,b from t1 where a=2; +a b +execute stmt1; +insert into t1 values(0,NULL); +set @arg00=NULL; +prepare stmt1 from 'delete from t1 where b=?' ; +execute stmt1 using @arg00; +select a,b from t1 where b is NULL ; +a b +0 NULL +set @arg00='one'; +execute stmt1 using @arg00; +select a,b from t1 where b=@arg00; +a b +prepare stmt1 from 'truncate table t1' ; +test_sequence +------ update tests ------ +delete from t1 ; +insert into t1 values (1,'one'); +insert into t1 values (2,'two'); +insert into t1 values (3,'three'); +insert into t1 values (4,'four'); +commit ; +delete from t9 ; +insert into t9 +set c1= 1, c2= 1, c3= 1, c4= 1, c5= 1, c6= 1, c7= 1, c8= 1, c9= 1, +c10= 1, c11= 1, c12 = 1, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=true, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='one', c32= 'monday'; +insert into t9 +set c1= 9, c2= 9, c3= 9, c4= 9, c5= 9, c6= 9, c7= 9, c8= 9, c9= 9, +c10= 9, c11= 9, c12 = 9, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=false, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='two', c32= 'tuesday'; +commit ; +prepare stmt1 from 'update t1 set b=''a=two'' where a=2' ; +execute stmt1; +select a,b from t1 where a=2; +a b +2 a=two +execute stmt1; +select a,b from t1 where a=2; +a b +2 a=two +set @arg00=NULL; +prepare stmt1 from 'update t1 set b=? where a=2' ; +execute stmt1 using @arg00; +select a,b from t1 where a=2; +a b +2 NULL +set @arg00='two'; +execute stmt1 using @arg00; +select a,b from t1 where a=2; +a b +2 two +set @arg00=2; +prepare stmt1 from 'update t1 set b=NULL where a=?' ; +execute stmt1 using @arg00; +select a,b from t1 where a=@arg00; +a b +2 NULL +update t1 set b='two' where a=@arg00; +set @arg00=2000; +execute stmt1 using @arg00; +select a,b from t1 where a=@arg00; +a b +set @arg00=2; +set @arg01=22; +prepare stmt1 from 'update t1 set a=? where a=?' ; +execute stmt1 using @arg00, @arg00; +select a,b from t1 where a=@arg00; +a b +2 two +execute stmt1 using @arg01, @arg00; +select a,b from t1 where a=@arg01; +a b +22 two +execute stmt1 using @arg00, @arg01; +select a,b from t1 where a=@arg00; +a b +2 two +set @arg00=NULL; +set @arg01=2; +execute stmt1 using @arg00, @arg01; +Warnings: +Warning 1048 Column 'a' cannot be null +select a,b from t1 order by a; +a b +0 two +1 one +3 three +4 four +set @arg00=0; +execute stmt1 using @arg01, @arg00; +select a,b from t1 order by a; +a b +1 one +2 two +3 three +4 four +set @arg00=23; +set @arg01='two'; +set @arg02=2; +set @arg03='two'; +set @arg04=2; +drop table if exists t2; +create table t2 as select a,b from t1 ; +prepare stmt1 from 'update t1 set a=? where b=? + and a in (select ? from t2 + where b = ? or a = ?)'; +execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04 ; +affected rows: 1 +info: Rows matched: 1 Changed: 1 Warnings: 0 +select a,b from t1 where a = @arg00 ; +a b +23 two +prepare stmt1 from 'update t1 set a=? where b=? + and a not in (select ? from t2 + where b = ? or a = ?)'; +execute stmt1 using @arg04, @arg01, @arg02, @arg03, @arg00 ; +affected rows: 1 +info: Rows matched: 1 Changed: 1 Warnings: 0 +select a,b from t1 order by a ; +a b +1 one +2 two +3 three +4 four +drop table t2 ; +create table t2 +( +a int, b varchar(30), +primary key(a) +) engine = 'MARIA' ; +insert into t2(a,b) select a, b from t1 ; +prepare stmt1 from 'update t1 set a=? where b=? + and a in (select ? from t2 + where b = ? or a = ?)'; +execute stmt1 using @arg00, @arg01, @arg02, @arg03, @arg04 ; +affected rows: 1 +info: Rows matched: 1 Changed: 1 Warnings: 0 +select a,b from t1 where a = @arg00 ; +a b +23 two +prepare stmt1 from 'update t1 set a=? where b=? + and a not in (select ? from t2 + where b = ? or a = ?)'; +execute stmt1 using @arg04, @arg01, @arg02, @arg03, @arg00 ; +affected rows: 1 +info: Rows matched: 1 Changed: 1 Warnings: 0 +select a,b from t1 order by a ; +a b +1 one +2 two +3 three +4 four +drop table t2 ; +set @arg00=1; +prepare stmt1 from 'update t1 set b=''bla'' +where a=2 +limit 1'; +execute stmt1 ; +select a,b from t1 where b = 'bla' ; +a b +2 bla +prepare stmt1 from 'update t1 set b=''bla'' where a=2 limit ?'; +execute stmt1 using @arg00; +test_sequence +------ insert tests ------ +delete from t1 ; +insert into t1 values (1,'one'); +insert into t1 values (2,'two'); +insert into t1 values (3,'three'); +insert into t1 values (4,'four'); +commit ; +delete from t9 ; +insert into t9 +set c1= 1, c2= 1, c3= 1, c4= 1, c5= 1, c6= 1, c7= 1, c8= 1, c9= 1, +c10= 1, c11= 1, c12 = 1, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=true, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='one', c32= 'monday'; +insert into t9 +set c1= 9, c2= 9, c3= 9, c4= 9, c5= 9, c6= 9, c7= 9, c8= 9, c9= 9, +c10= 9, c11= 9, c12 = 9, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=false, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='two', c32= 'tuesday'; +commit ; +prepare stmt1 from 'insert into t1 values(5, ''five'' )'; +execute stmt1; +select a,b from t1 where a = 5; +a b +5 five +set @arg00='six' ; +prepare stmt1 from 'insert into t1 values(6, ? )'; +execute stmt1 using @arg00; +select a,b from t1 where b = @arg00; +a b +6 six +execute stmt1 using @arg00; +ERROR 23000: Duplicate entry '6' for key 'PRIMARY' +set @arg00=NULL ; +prepare stmt1 from 'insert into t1 values(0, ? )'; +execute stmt1 using @arg00; +select a,b from t1 where b is NULL; +a b +0 NULL +set @arg00=8 ; +set @arg01='eight' ; +prepare stmt1 from 'insert into t1 values(?, ? )'; +execute stmt1 using @arg00, @arg01 ; +select a,b from t1 where b = @arg01; +a b +8 eight +set @NULL= null ; +set @arg00= 'abc' ; +execute stmt1 using @NULL, @NULL ; +ERROR 23000: Column 'a' cannot be null +execute stmt1 using @NULL, @NULL ; +ERROR 23000: Column 'a' cannot be null +execute stmt1 using @NULL, @arg00 ; +ERROR 23000: Column 'a' cannot be null +execute stmt1 using @NULL, @arg00 ; +ERROR 23000: Column 'a' cannot be null +set @arg01= 10000 + 2 ; +execute stmt1 using @arg01, @arg00 ; +set @arg01= 10000 + 1 ; +execute stmt1 using @arg01, @arg00 ; +select * from t1 where a > 10000 order by a ; +a b +10001 abc +10002 abc +delete from t1 where a > 10000 ; +set @arg01= 10000 + 2 ; +execute stmt1 using @arg01, @NULL ; +set @arg01= 10000 + 1 ; +execute stmt1 using @arg01, @NULL ; +select * from t1 where a > 10000 order by a ; +a b +10001 NULL +10002 NULL +delete from t1 where a > 10000 ; +set @arg01= 10000 + 10 ; +execute stmt1 using @arg01, @arg01 ; +set @arg01= 10000 + 9 ; +execute stmt1 using @arg01, @arg01 ; +set @arg01= 10000 + 8 ; +execute stmt1 using @arg01, @arg01 ; +set @arg01= 10000 + 7 ; +execute stmt1 using @arg01, @arg01 ; +set @arg01= 10000 + 6 ; +execute stmt1 using @arg01, @arg01 ; +set @arg01= 10000 + 5 ; +execute stmt1 using @arg01, @arg01 ; +set @arg01= 10000 + 4 ; +execute stmt1 using @arg01, @arg01 ; +set @arg01= 10000 + 3 ; +execute stmt1 using @arg01, @arg01 ; +set @arg01= 10000 + 2 ; +execute stmt1 using @arg01, @arg01 ; +set @arg01= 10000 + 1 ; +execute stmt1 using @arg01, @arg01 ; +select * from t1 where a > 10000 order by a ; +a b +10001 10001 +10002 10002 +10003 10003 +10004 10004 +10005 10005 +10006 10006 +10007 10007 +10008 10008 +10009 10009 +10010 10010 +delete from t1 where a > 10000 ; +set @arg00=81 ; +set @arg01='8-1' ; +set @arg02=82 ; +set @arg03='8-2' ; +prepare stmt1 from 'insert into t1 values(?,?),(?,?)'; +execute stmt1 using @arg00, @arg01, @arg02, @arg03 ; +select a,b from t1 where a in (@arg00,@arg02) ; +a b +81 8-1 +82 8-2 +set @arg00=9 ; +set @arg01='nine' ; +prepare stmt1 from 'insert into t1 set a=?, b=? '; +execute stmt1 using @arg00, @arg01 ; +select a,b from t1 where a = @arg00 ; +a b +9 nine +set @arg00=6 ; +set @arg01=1 ; +prepare stmt1 from 'insert into t1 set a=?, b=''sechs'' + on duplicate key update a=a + ?, b=concat(b,''modified'') '; +execute stmt1 using @arg00, @arg01; +select * from t1 order by a; +a b +0 NULL +1 one +2 two +3 three +4 four +5 five +7 sixmodified +8 eight +9 nine +81 8-1 +82 8-2 +set @arg00=81 ; +set @arg01=1 ; +execute stmt1 using @arg00, @arg01; +ERROR 23000: Duplicate entry '82' for key 'PRIMARY' +drop table if exists t2 ; +create table t2 (id int auto_increment primary key) +ENGINE= 'MARIA' ; +prepare stmt1 from ' select last_insert_id() ' ; +insert into t2 values (NULL) ; +execute stmt1 ; +last_insert_id() +1 +insert into t2 values (NULL) ; +execute stmt1 ; +last_insert_id() +2 +drop table t2 ; +set @1000=1000 ; +set @x1000_2="x1000_2" ; +set @x1000_3="x1000_3" ; +set @x1000="x1000" ; +set @1100=1100 ; +set @x1100="x1100" ; +set @100=100 ; +set @updated="updated" ; +insert into t1 values(1000,'x1000_1') ; +insert into t1 values(@1000,@x1000_2),(@1000,@x1000_3) +on duplicate key update a = a + @100, b = concat(b,@updated) ; +select a,b from t1 where a >= 1000 order by a ; +a b +1000 x1000_3 +1100 x1000_1updated +delete from t1 where a >= 1000 ; +insert into t1 values(1000,'x1000_1') ; +prepare stmt1 from ' insert into t1 values(?,?),(?,?) + on duplicate key update a = a + ?, b = concat(b,?) '; +execute stmt1 using @1000, @x1000_2, @1000, @x1000_3, @100, @updated ; +select a,b from t1 where a >= 1000 order by a ; +a b +1000 x1000_3 +1100 x1000_1updated +delete from t1 where a >= 1000 ; +insert into t1 values(1000,'x1000_1') ; +execute stmt1 using @1000, @x1000_2, @1100, @x1000_3, @100, @updated ; +select a,b from t1 where a >= 1000 order by a ; +a b +1200 x1000_1updatedupdated +delete from t1 where a >= 1000 ; +prepare stmt1 from ' replace into t1 (a,b) select 100, ''hundred'' '; +execute stmt1; +execute stmt1; +execute stmt1; +test_sequence +------ multi table tests ------ +delete from t1 ; +delete from t9 ; +insert into t1(a,b) values (1, 'one'), (2, 'two'), (3, 'three') ; +insert into t9 (c1,c21) +values (1, 'one'), (2, 'two'), (3, 'three') ; +prepare stmt_delete from " delete t1, t9 + from t1, t9 where t1.a=t9.c1 and t1.b='updated' "; +prepare stmt_update from " update t1, t9 + set t1.b='updated', t9.c21='updated' + where t1.a=t9.c1 and t1.a=? "; +prepare stmt_select1 from " select a, b from t1 order by a" ; +prepare stmt_select2 from " select c1, c21 from t9 order by c1" ; +set @arg00= 1 ; +execute stmt_update using @arg00 ; +execute stmt_delete ; +execute stmt_select1 ; +a b +2 two +3 three +execute stmt_select2 ; +c1 c21 +2 two +3 three +set @arg00= @arg00 + 1 ; +execute stmt_update using @arg00 ; +execute stmt_delete ; +execute stmt_select1 ; +a b +3 three +execute stmt_select2 ; +c1 c21 +3 three +set @arg00= @arg00 + 1 ; +execute stmt_update using @arg00 ; +execute stmt_delete ; +execute stmt_select1 ; +a b +execute stmt_select2 ; +c1 c21 +set @arg00= @arg00 + 1 ; +delete from t1 ; +insert into t1 values (1,'one'); +insert into t1 values (2,'two'); +insert into t1 values (3,'three'); +insert into t1 values (4,'four'); +commit ; +delete from t9 ; +insert into t9 +set c1= 1, c2= 1, c3= 1, c4= 1, c5= 1, c6= 1, c7= 1, c8= 1, c9= 1, +c10= 1, c11= 1, c12 = 1, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=true, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='one', c32= 'monday'; +insert into t9 +set c1= 9, c2= 9, c3= 9, c4= 9, c5= 9, c6= 9, c7= 9, c8= 9, c9= 9, +c10= 9, c11= 9, c12 = 9, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=false, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='two', c32= 'tuesday'; +commit ; +insert into t1 values(0,NULL) ; +set @duplicate='duplicate ' ; +set @1000=1000 ; +set @5=5 ; +select a,b from t1 where a < 5 order by a ; +a b +0 NULL +1 one +2 two +3 three +4 four +insert into t1 select a + @1000, concat(@duplicate,b) from t1 +where a < @5 ; +affected rows: 5 +info: Records: 5 Duplicates: 0 Warnings: 0 +select a,b from t1 where a >= 1000 order by a ; +a b +1000 NULL +1001 duplicate one +1002 duplicate two +1003 duplicate three +1004 duplicate four +delete from t1 where a >= 1000 ; +prepare stmt1 from ' insert into t1 select a + ?, concat(?,b) from t1 +where a < ? ' ; +execute stmt1 using @1000, @duplicate, @5; +affected rows: 5 +info: Records: 5 Duplicates: 0 Warnings: 0 +select a,b from t1 where a >= 1000 order by a ; +a b +1000 NULL +1001 duplicate one +1002 duplicate two +1003 duplicate three +1004 duplicate four +delete from t1 where a >= 1000 ; +set @1=1 ; +set @2=2 ; +set @100=100 ; +set @float=1.00; +set @five='five' ; +drop table if exists t2; +create table t2 like t1 ; +insert into t2 (b,a) +select @duplicate, sum(first.a) from t1 first, t1 second +where first.a <> @5 and second.b = first.b +and second.b <> @five +group by second.b +having sum(second.a) > @2 +union +select b, a + @100 from t1 +where (a,b) in ( select sqrt(a+@1)+CAST(@float AS signed),b +from t1); +affected rows: 3 +info: Records: 3 Duplicates: 0 Warnings: 0 +select a,b from t2 order by a ; +a b +3 duplicate +4 duplicate +103 three +delete from t2 ; +prepare stmt1 from ' insert into t2 (b,a) +select ?, sum(first.a) + from t1 first, t1 second + where first.a <> ? and second.b = first.b and second.b <> ? + group by second.b + having sum(second.a) > ? +union +select b, a + ? from t1 + where (a,b) in ( select sqrt(a+?)+CAST(? AS signed),b + from t1 ) ' ; +execute stmt1 using @duplicate, @5, @five, @2, @100, @1, @float ; +affected rows: 3 +info: Records: 3 Duplicates: 0 Warnings: 0 +select a,b from t2 order by a ; +a b +3 duplicate +4 duplicate +103 three +drop table t2; +drop table if exists t5 ; +set @arg01= 8; +set @arg02= 8.0; +set @arg03= 80.00000000000e-1; +set @arg04= 'abc' ; +set @arg05= CAST('abc' as binary) ; +set @arg06= '1991-08-05' ; +set @arg07= CAST('1991-08-05' as date); +set @arg08= '1991-08-05 01:01:01' ; +set @arg09= CAST('1991-08-05 01:01:01' as datetime) ; +set @arg10= unix_timestamp('1991-01-01 01:01:01'); +set @arg11= YEAR('1991-01-01 01:01:01'); +set @arg12= 8 ; +set @arg12= NULL ; +set @arg13= 8.0 ; +set @arg13= NULL ; +set @arg14= 'abc'; +set @arg14= NULL ; +set @arg15= CAST('abc' as binary) ; +set @arg15= NULL ; +create table t5 as select +8 as const01, @arg01 as param01, +8.0 as const02, @arg02 as param02, +80.00000000000e-1 as const03, @arg03 as param03, +'abc' as const04, @arg04 as param04, +CAST('abc' as binary) as const05, @arg05 as param05, +'1991-08-05' as const06, @arg06 as param06, +CAST('1991-08-05' as date) as const07, @arg07 as param07, +'1991-08-05 01:01:01' as const08, @arg08 as param08, +CAST('1991-08-05 01:01:01' as datetime) as const09, @arg09 as param09, +unix_timestamp('1991-01-01 01:01:01') as const10, @arg10 as param10, +YEAR('1991-01-01 01:01:01') as const11, @arg11 as param11, +NULL as const12, @arg12 as param12, +@arg13 as param13, +@arg14 as param14, +@arg15 as param15; +show create table t5 ; +Table Create Table +t5 CREATE TABLE `t5` ( + `const01` int(1) NOT NULL DEFAULT '0', + `param01` bigint(20) DEFAULT NULL, + `const02` decimal(2,1) NOT NULL DEFAULT '0.0', + `param02` decimal(65,30) DEFAULT NULL, + `const03` double NOT NULL DEFAULT '0', + `param03` double DEFAULT NULL, + `const04` varchar(3) NOT NULL DEFAULT '', + `param04` longtext, + `const05` varbinary(3) NOT NULL DEFAULT '', + `param05` longblob, + `const06` varchar(10) NOT NULL DEFAULT '', + `param06` longtext, + `const07` date DEFAULT NULL, + `param07` longblob, + `const08` varchar(19) NOT NULL DEFAULT '', + `param08` longtext, + `const09` datetime DEFAULT NULL, + `param09` longblob, + `const10` int(10) NOT NULL DEFAULT '0', + `param10` bigint(20) DEFAULT NULL, + `const11` int(4) DEFAULT NULL, + `param11` bigint(20) DEFAULT NULL, + `const12` binary(0) DEFAULT NULL, + `param12` bigint(20) DEFAULT NULL, + `param13` decimal(65,30) DEFAULT NULL, + `param14` longtext, + `param15` longblob +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +select * from t5 ; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def test t5 t5 const01 const01 3 1 1 N 32769 0 63 +def test t5 t5 param01 param01 8 20 1 Y 32768 0 63 +def test t5 t5 const02 const02 246 4 3 N 1 1 63 +def test t5 t5 param02 param02 246 67 32 Y 0 30 63 +def test t5 t5 const03 const03 5 17 1 N 32769 31 63 +def test t5 t5 param03 param03 5 23 1 Y 32768 31 63 +def test t5 t5 const04 const04 253 3 3 N 1 0 8 +def test t5 t5 param04 param04 252 4294967295 3 Y 16 0 8 +def test t5 t5 const05 const05 253 3 3 N 129 0 63 +def test t5 t5 param05 param05 252 4294967295 3 Y 144 0 63 +def test t5 t5 const06 const06 253 10 10 N 1 0 8 +def test t5 t5 param06 param06 252 4294967295 10 Y 16 0 8 +def test t5 t5 const07 const07 10 10 10 Y 128 0 63 +def test t5 t5 param07 param07 252 4294967295 10 Y 144 0 63 +def test t5 t5 const08 const08 253 19 19 N 1 0 8 +def test t5 t5 param08 param08 252 4294967295 19 Y 16 0 8 +def test t5 t5 const09 const09 12 19 19 Y 128 0 63 +def test t5 t5 param09 param09 252 4294967295 19 Y 144 0 63 +def test t5 t5 const10 const10 3 10 9 N 32769 0 63 +def test t5 t5 param10 param10 8 20 9 Y 32768 0 63 +def test t5 t5 const11 const11 3 4 4 Y 32768 0 63 +def test t5 t5 param11 param11 8 20 4 Y 32768 0 63 +def test t5 t5 const12 const12 254 0 0 Y 128 0 63 +def test t5 t5 param12 param12 8 20 0 Y 32768 0 63 +def test t5 t5 param13 param13 246 67 0 Y 0 30 63 +def test t5 t5 param14 param14 252 4294967295 0 Y 16 0 8 +def test t5 t5 param15 param15 252 4294967295 0 Y 144 0 63 +const01 8 +param01 8 +const02 8.0 +param02 8.000000000000000000000000000000 +const03 8 +param03 8 +const04 abc +param04 abc +const05 abc +param05 abc +const06 1991-08-05 +param06 1991-08-05 +const07 1991-08-05 +param07 1991-08-05 +const08 1991-08-05 01:01:01 +param08 1991-08-05 01:01:01 +const09 1991-08-05 01:01:01 +param09 1991-08-05 01:01:01 +const10 662680861 +param10 662680861 +const11 1991 +param11 1991 +const12 NULL +param12 NULL +param13 NULL +param14 NULL +param15 NULL +drop table t5 ; +test_sequence +------ data type conversion tests ------ +delete from t1 ; +insert into t1 values (1,'one'); +insert into t1 values (2,'two'); +insert into t1 values (3,'three'); +insert into t1 values (4,'four'); +commit ; +delete from t9 ; +insert into t9 +set c1= 1, c2= 1, c3= 1, c4= 1, c5= 1, c6= 1, c7= 1, c8= 1, c9= 1, +c10= 1, c11= 1, c12 = 1, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=true, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='one', c32= 'monday'; +insert into t9 +set c1= 9, c2= 9, c3= 9, c4= 9, c5= 9, c6= 9, c7= 9, c8= 9, c9= 9, +c10= 9, c11= 9, c12 = 9, +c13= '2004-02-29', c14= '2004-02-29 11:11:11', c15= '2004-02-29 11:11:11', +c16= '11:11:11', c17= '2004', +c18= 1, c19=false, c20= 'a', c21= '123456789a', +c22= '123456789a123456789b123456789c', c23= 'tinyblob', c24= 'tinytext', +c25= 'blob', c26= 'text', c27= 'mediumblob', c28= 'mediumtext', +c29= 'longblob', c30= 'longtext', c31='two', c32= 'tuesday'; +commit ; +insert into t9 set c1= 0, c15= '1991-01-01 01:01:01' ; +select * from t9 order by c1 ; +c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c11 c12 c13 c14 c15 c16 c17 c18 c19 c20 c21 c22 c23 c24 c25 c26 c27 c28 c29 c30 c31 c32 +0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday +9 9 9 9 9 9 9 9 9 9 9.0000 9.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 0 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext two tuesday +test_sequence +------ select @parameter:= column ------ +prepare full_info from "select @arg01, @arg02, @arg03, @arg04, + @arg05, @arg06, @arg07, @arg08, + @arg09, @arg10, @arg11, @arg12, + @arg13, @arg14, @arg15, @arg16, + @arg17, @arg18, @arg19, @arg20, + @arg21, @arg22, @arg23, @arg24, + @arg25, @arg26, @arg27, @arg28, + @arg29, @arg30, @arg31, @arg32" ; +select @arg01:= c1, @arg02:= c2, @arg03:= c3, @arg04:= c4, +@arg05:= c5, @arg06:= c6, @arg07:= c7, @arg08:= c8, +@arg09:= c9, @arg10:= c10, @arg11:= c11, @arg12:= c12, +@arg13:= c13, @arg14:= c14, @arg15:= c15, @arg16:= c16, +@arg17:= c17, @arg18:= c18, @arg19:= c19, @arg20:= c20, +@arg21:= c21, @arg22:= c22, @arg23:= c23, @arg24:= c24, +@arg25:= c25, @arg26:= c26, @arg27:= c27, @arg28:= c28, +@arg29:= c29, @arg30:= c30, @arg31:= c31, @arg32:= c32 +from t9 where c1= 1 ; +@arg01:= c1 @arg02:= c2 @arg03:= c3 @arg04:= c4 @arg05:= c5 @arg06:= c6 @arg07:= c7 @arg08:= c8 @arg09:= c9 @arg10:= c10 @arg11:= c11 @arg12:= c12 @arg13:= c13 @arg14:= c14 @arg15:= c15 @arg16:= c16 @arg17:= c17 @arg18:= c18 @arg19:= c19 @arg20:= c20 @arg21:= c21 @arg22:= c22 @arg23:= c23 @arg24:= c24 @arg25:= c25 @arg26:= c26 @arg27:= c27 @arg28:= c28 @arg29:= c29 @arg30:= c30 @arg31:= c31 @arg32:= c32 +1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday +execute full_info ; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def @arg01 253 20 1 Y 128 0 63 +def @arg02 253 20 1 Y 128 0 63 +def @arg03 253 20 1 Y 128 0 63 +def @arg04 253 20 1 Y 128 0 63 +def @arg05 253 20 1 Y 128 0 63 +def @arg06 253 20 1 Y 128 0 63 +def @arg07 253 23 1 Y 128 31 63 +def @arg08 253 23 1 Y 128 31 63 +def @arg09 253 23 1 Y 128 31 63 +def @arg10 253 23 1 Y 128 31 63 +def @arg11 253 67 6 Y 128 30 63 +def @arg12 253 67 6 Y 128 30 63 +def @arg13 253 16777216 10 Y 128 31 63 +def @arg14 253 16777216 19 Y 128 31 63 +def @arg15 253 16777216 19 Y 128 31 63 +def @arg16 253 16777216 8 Y 128 31 63 +def @arg17 253 20 4 Y 128 0 63 +def @arg18 253 20 1 Y 128 0 63 +def @arg19 253 20 1 Y 128 0 63 +def @arg20 253 16777216 1 Y 0 31 8 +def @arg21 253 16777216 10 Y 0 31 8 +def @arg22 253 16777216 30 Y 0 31 8 +def @arg23 253 16777216 8 Y 128 31 63 +def @arg24 253 16777216 8 Y 0 31 8 +def @arg25 253 16777216 4 Y 128 31 63 +def @arg26 253 16777216 4 Y 0 31 8 +def @arg27 253 16777216 10 Y 128 31 63 +def @arg28 253 16777216 10 Y 0 31 8 +def @arg29 253 16777216 8 Y 128 31 63 +def @arg30 253 16777216 8 Y 0 31 8 +def @arg31 253 16777216 3 Y 0 31 8 +def @arg32 253 16777216 6 Y 0 31 8 +@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 +1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday +select @arg01:= c1, @arg02:= c2, @arg03:= c3, @arg04:= c4, +@arg05:= c5, @arg06:= c6, @arg07:= c7, @arg08:= c8, +@arg09:= c9, @arg10:= c10, @arg11:= c11, @arg12:= c12, +@arg13:= c13, @arg14:= c14, @arg15:= c15, @arg16:= c16, +@arg17:= c17, @arg18:= c18, @arg19:= c19, @arg20:= c20, +@arg21:= c21, @arg22:= c22, @arg23:= c23, @arg24:= c24, +@arg25:= c25, @arg26:= c26, @arg27:= c27, @arg28:= c28, +@arg29:= c29, @arg30:= c30, @arg31:= c31, @arg32:= c32 +from t9 where c1= 0 ; +@arg01:= c1 @arg02:= c2 @arg03:= c3 @arg04:= c4 @arg05:= c5 @arg06:= c6 @arg07:= c7 @arg08:= c8 @arg09:= c9 @arg10:= c10 @arg11:= c11 @arg12:= c12 @arg13:= c13 @arg14:= c14 @arg15:= c15 @arg16:= c16 @arg17:= c17 @arg18:= c18 @arg19:= c19 @arg20:= c20 @arg21:= c21 @arg22:= c22 @arg23:= c23 @arg24:= c24 @arg25:= c25 @arg26:= c26 @arg27:= c27 @arg28:= c28 @arg29:= c29 @arg30:= c30 @arg31:= c31 @arg32:= c32 +0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +execute full_info ; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def @arg01 253 20 1 Y 128 0 63 +def @arg02 253 20 0 Y 128 0 63 +def @arg03 253 20 0 Y 128 0 63 +def @arg04 253 20 0 Y 128 0 63 +def @arg05 253 20 0 Y 128 0 63 +def @arg06 253 20 0 Y 128 0 63 +def @arg07 253 23 0 Y 128 31 63 +def @arg08 253 23 0 Y 128 31 63 +def @arg09 253 23 0 Y 128 31 63 +def @arg10 253 23 0 Y 128 31 63 +def @arg11 253 67 0 Y 128 30 63 +def @arg12 253 67 0 Y 128 30 63 +def @arg13 253 16777216 0 Y 128 31 63 +def @arg14 253 16777216 0 Y 128 31 63 +def @arg15 253 16777216 19 Y 128 31 63 +def @arg16 253 16777216 0 Y 128 31 63 +def @arg17 253 20 0 Y 128 0 63 +def @arg18 253 20 0 Y 128 0 63 +def @arg19 253 20 0 Y 128 0 63 +def @arg20 253 16777216 0 Y 0 31 8 +def @arg21 253 16777216 0 Y 0 31 8 +def @arg22 253 16777216 0 Y 0 31 8 +def @arg23 253 16777216 0 Y 128 31 63 +def @arg24 253 16777216 0 Y 0 31 8 +def @arg25 253 16777216 0 Y 128 31 63 +def @arg26 253 16777216 0 Y 0 31 8 +def @arg27 253 16777216 0 Y 128 31 63 +def @arg28 253 16777216 0 Y 0 31 8 +def @arg29 253 16777216 0 Y 128 31 63 +def @arg30 253 16777216 0 Y 0 31 8 +def @arg31 253 16777216 0 Y 0 31 8 +def @arg32 253 16777216 0 Y 0 31 8 +@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 +0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +prepare stmt1 from "select + @arg01:= c1, @arg02:= c2, @arg03:= c3, @arg04:= c4, + @arg05:= c5, @arg06:= c6, @arg07:= c7, @arg08:= c8, + @arg09:= c9, @arg10:= c10, @arg11:= c11, @arg12:= c12, + @arg13:= c13, @arg14:= c14, @arg15:= c15, @arg16:= c16, + @arg17:= c17, @arg18:= c18, @arg19:= c19, @arg20:= c20, + @arg21:= c21, @arg22:= c22, @arg23:= c23, @arg24:= c24, + @arg25:= c25, @arg26:= c26, @arg27:= c27, @arg28:= c28, + @arg29:= c29, @arg30:= c30, @arg31:= c31, @arg32:= c32 +from t9 where c1= ?" ; +set @my_key= 1 ; +execute stmt1 using @my_key ; +@arg01:= c1 @arg02:= c2 @arg03:= c3 @arg04:= c4 @arg05:= c5 @arg06:= c6 @arg07:= c7 @arg08:= c8 @arg09:= c9 @arg10:= c10 @arg11:= c11 @arg12:= c12 @arg13:= c13 @arg14:= c14 @arg15:= c15 @arg16:= c16 @arg17:= c17 @arg18:= c18 @arg19:= c19 @arg20:= c20 @arg21:= c21 @arg22:= c22 @arg23:= c23 @arg24:= c24 @arg25:= c25 @arg26:= c26 @arg27:= c27 @arg28:= c28 @arg29:= c29 @arg30:= c30 @arg31:= c31 @arg32:= c32 +1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday +execute full_info ; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def @arg01 253 20 1 Y 128 0 63 +def @arg02 253 20 1 Y 128 0 63 +def @arg03 253 20 1 Y 128 0 63 +def @arg04 253 20 1 Y 128 0 63 +def @arg05 253 20 1 Y 128 0 63 +def @arg06 253 20 1 Y 128 0 63 +def @arg07 253 23 1 Y 128 31 63 +def @arg08 253 23 1 Y 128 31 63 +def @arg09 253 23 1 Y 128 31 63 +def @arg10 253 23 1 Y 128 31 63 +def @arg11 253 67 6 Y 128 30 63 +def @arg12 253 67 6 Y 128 30 63 +def @arg13 253 16777216 10 Y 128 31 63 +def @arg14 253 16777216 19 Y 128 31 63 +def @arg15 253 16777216 19 Y 128 31 63 +def @arg16 253 16777216 8 Y 128 31 63 +def @arg17 253 20 4 Y 128 0 63 +def @arg18 253 20 1 Y 128 0 63 +def @arg19 253 20 1 Y 128 0 63 +def @arg20 253 16777216 1 Y 0 31 8 +def @arg21 253 16777216 10 Y 0 31 8 +def @arg22 253 16777216 30 Y 0 31 8 +def @arg23 253 16777216 8 Y 128 31 63 +def @arg24 253 16777216 8 Y 0 31 8 +def @arg25 253 16777216 4 Y 128 31 63 +def @arg26 253 16777216 4 Y 0 31 8 +def @arg27 253 16777216 10 Y 128 31 63 +def @arg28 253 16777216 10 Y 0 31 8 +def @arg29 253 16777216 8 Y 128 31 63 +def @arg30 253 16777216 8 Y 0 31 8 +def @arg31 253 16777216 3 Y 0 31 8 +def @arg32 253 16777216 6 Y 0 31 8 +@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 +1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday +set @my_key= 0 ; +execute stmt1 using @my_key ; +@arg01:= c1 @arg02:= c2 @arg03:= c3 @arg04:= c4 @arg05:= c5 @arg06:= c6 @arg07:= c7 @arg08:= c8 @arg09:= c9 @arg10:= c10 @arg11:= c11 @arg12:= c12 @arg13:= c13 @arg14:= c14 @arg15:= c15 @arg16:= c16 @arg17:= c17 @arg18:= c18 @arg19:= c19 @arg20:= c20 @arg21:= c21 @arg22:= c22 @arg23:= c23 @arg24:= c24 @arg25:= c25 @arg26:= c26 @arg27:= c27 @arg28:= c28 @arg29:= c29 @arg30:= c30 @arg31:= c31 @arg32:= c32 +0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +execute full_info ; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def @arg01 253 20 1 Y 128 0 63 +def @arg02 253 20 0 Y 128 0 63 +def @arg03 253 20 0 Y 128 0 63 +def @arg04 253 20 0 Y 128 0 63 +def @arg05 253 20 0 Y 128 0 63 +def @arg06 253 20 0 Y 128 0 63 +def @arg07 253 23 0 Y 128 31 63 +def @arg08 253 23 0 Y 128 31 63 +def @arg09 253 23 0 Y 128 31 63 +def @arg10 253 23 0 Y 128 31 63 +def @arg11 253 67 0 Y 128 30 63 +def @arg12 253 67 0 Y 128 30 63 +def @arg13 253 16777216 0 Y 128 31 63 +def @arg14 253 16777216 0 Y 128 31 63 +def @arg15 253 16777216 19 Y 128 31 63 +def @arg16 253 16777216 0 Y 128 31 63 +def @arg17 253 20 0 Y 128 0 63 +def @arg18 253 20 0 Y 128 0 63 +def @arg19 253 20 0 Y 128 0 63 +def @arg20 253 16777216 0 Y 0 31 8 +def @arg21 253 16777216 0 Y 0 31 8 +def @arg22 253 16777216 0 Y 0 31 8 +def @arg23 253 16777216 0 Y 128 31 63 +def @arg24 253 16777216 0 Y 0 31 8 +def @arg25 253 16777216 0 Y 128 31 63 +def @arg26 253 16777216 0 Y 0 31 8 +def @arg27 253 16777216 0 Y 128 31 63 +def @arg28 253 16777216 0 Y 0 31 8 +def @arg29 253 16777216 0 Y 128 31 63 +def @arg30 253 16777216 0 Y 0 31 8 +def @arg31 253 16777216 0 Y 0 31 8 +def @arg32 253 16777216 0 Y 0 31 8 +@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 +0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +prepare stmt1 from "select ? := c1 from t9 where c1= 1" ; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near ':= c1 from t9 where c1= 1' at line 1 +test_sequence +------ select column, .. into @parm,.. ------ +select c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, +c13, c14, c15, c16, c17, c18, c19, c20, c21, c22, c23, c24, +c25, c26, c27, c28, c29, c30, c31, c32 +into @arg01, @arg02, @arg03, @arg04, @arg05, @arg06, @arg07, @arg08, +@arg09, @arg10, @arg11, @arg12, @arg13, @arg14, @arg15, @arg16, +@arg17, @arg18, @arg19, @arg20, @arg21, @arg22, @arg23, @arg24, +@arg25, @arg26, @arg27, @arg28, @arg29, @arg30, @arg31, @arg32 +from t9 where c1= 1 ; +execute full_info ; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def @arg01 253 20 1 Y 128 0 63 +def @arg02 253 20 1 Y 128 0 63 +def @arg03 253 20 1 Y 128 0 63 +def @arg04 253 20 1 Y 128 0 63 +def @arg05 253 20 1 Y 128 0 63 +def @arg06 253 20 1 Y 128 0 63 +def @arg07 253 23 1 Y 128 31 63 +def @arg08 253 23 1 Y 128 31 63 +def @arg09 253 23 1 Y 128 31 63 +def @arg10 253 23 1 Y 128 31 63 +def @arg11 253 67 6 Y 128 30 63 +def @arg12 253 67 6 Y 128 30 63 +def @arg13 253 16777216 10 Y 128 31 63 +def @arg14 253 16777216 19 Y 128 31 63 +def @arg15 253 16777216 19 Y 128 31 63 +def @arg16 253 16777216 8 Y 128 31 63 +def @arg17 253 20 4 Y 128 0 63 +def @arg18 253 20 1 Y 128 0 63 +def @arg19 253 20 1 Y 128 0 63 +def @arg20 253 16777216 1 Y 0 31 8 +def @arg21 253 16777216 10 Y 0 31 8 +def @arg22 253 16777216 30 Y 0 31 8 +def @arg23 253 16777216 8 Y 128 31 63 +def @arg24 253 16777216 8 Y 0 31 8 +def @arg25 253 16777216 4 Y 128 31 63 +def @arg26 253 16777216 4 Y 0 31 8 +def @arg27 253 16777216 10 Y 128 31 63 +def @arg28 253 16777216 10 Y 0 31 8 +def @arg29 253 16777216 8 Y 128 31 63 +def @arg30 253 16777216 8 Y 0 31 8 +def @arg31 253 16777216 3 Y 0 31 8 +def @arg32 253 16777216 6 Y 0 31 8 +@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 +1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday +select c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, +c13, c14, c15, c16, c17, c18, c19, c20, c21, c22, c23, c24, +c25, c26, c27, c28, c29, c30, c31, c32 +into @arg01, @arg02, @arg03, @arg04, @arg05, @arg06, @arg07, @arg08, +@arg09, @arg10, @arg11, @arg12, @arg13, @arg14, @arg15, @arg16, +@arg17, @arg18, @arg19, @arg20, @arg21, @arg22, @arg23, @arg24, +@arg25, @arg26, @arg27, @arg28, @arg29, @arg30, @arg31, @arg32 +from t9 where c1= 0 ; +execute full_info ; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def @arg01 253 20 1 Y 128 0 63 +def @arg02 253 20 0 Y 128 0 63 +def @arg03 253 20 0 Y 128 0 63 +def @arg04 253 20 0 Y 128 0 63 +def @arg05 253 20 0 Y 128 0 63 +def @arg06 253 20 0 Y 128 0 63 +def @arg07 253 23 0 Y 128 31 63 +def @arg08 253 23 0 Y 128 31 63 +def @arg09 253 23 0 Y 128 31 63 +def @arg10 253 23 0 Y 128 31 63 +def @arg11 253 67 0 Y 128 30 63 +def @arg12 253 67 0 Y 128 30 63 +def @arg13 253 16777216 0 Y 128 31 63 +def @arg14 253 16777216 0 Y 128 31 63 +def @arg15 253 16777216 19 Y 128 31 63 +def @arg16 253 16777216 0 Y 128 31 63 +def @arg17 253 20 0 Y 128 0 63 +def @arg18 253 20 0 Y 128 0 63 +def @arg19 253 20 0 Y 128 0 63 +def @arg20 253 16777216 0 Y 0 31 8 +def @arg21 253 16777216 0 Y 0 31 8 +def @arg22 253 16777216 0 Y 0 31 8 +def @arg23 253 16777216 0 Y 128 31 63 +def @arg24 253 16777216 0 Y 0 31 8 +def @arg25 253 16777216 0 Y 128 31 63 +def @arg26 253 16777216 0 Y 0 31 8 +def @arg27 253 16777216 0 Y 128 31 63 +def @arg28 253 16777216 0 Y 0 31 8 +def @arg29 253 16777216 0 Y 128 31 63 +def @arg30 253 16777216 0 Y 0 31 8 +def @arg31 253 16777216 0 Y 0 31 8 +def @arg32 253 16777216 0 Y 0 31 8 +@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 +0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +prepare stmt1 from "select c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, + c13, c14, c15, c16, c17, c18, c19, c20, c21, c22, c23, c24, + c25, c26, c27, c28, c29, c30, c31, c32 +into @arg01, @arg02, @arg03, @arg04, @arg05, @arg06, @arg07, @arg08, + @arg09, @arg10, @arg11, @arg12, @arg13, @arg14, @arg15, @arg16, + @arg17, @arg18, @arg19, @arg20, @arg21, @arg22, @arg23, @arg24, + @arg25, @arg26, @arg27, @arg28, @arg29, @arg30, @arg31, @arg32 +from t9 where c1= ?" ; +set @my_key= 1 ; +execute stmt1 using @my_key ; +execute full_info ; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def @arg01 253 20 1 Y 128 0 63 +def @arg02 253 20 1 Y 128 0 63 +def @arg03 253 20 1 Y 128 0 63 +def @arg04 253 20 1 Y 128 0 63 +def @arg05 253 20 1 Y 128 0 63 +def @arg06 253 20 1 Y 128 0 63 +def @arg07 253 23 1 Y 128 31 63 +def @arg08 253 23 1 Y 128 31 63 +def @arg09 253 23 1 Y 128 31 63 +def @arg10 253 23 1 Y 128 31 63 +def @arg11 253 67 6 Y 128 30 63 +def @arg12 253 67 6 Y 128 30 63 +def @arg13 253 16777216 10 Y 128 31 63 +def @arg14 253 16777216 19 Y 128 31 63 +def @arg15 253 16777216 19 Y 128 31 63 +def @arg16 253 16777216 8 Y 128 31 63 +def @arg17 253 20 4 Y 128 0 63 +def @arg18 253 20 1 Y 128 0 63 +def @arg19 253 20 1 Y 128 0 63 +def @arg20 253 16777216 1 Y 0 31 8 +def @arg21 253 16777216 10 Y 0 31 8 +def @arg22 253 16777216 30 Y 0 31 8 +def @arg23 253 16777216 8 Y 128 31 63 +def @arg24 253 16777216 8 Y 0 31 8 +def @arg25 253 16777216 4 Y 128 31 63 +def @arg26 253 16777216 4 Y 0 31 8 +def @arg27 253 16777216 10 Y 128 31 63 +def @arg28 253 16777216 10 Y 0 31 8 +def @arg29 253 16777216 8 Y 128 31 63 +def @arg30 253 16777216 8 Y 0 31 8 +def @arg31 253 16777216 3 Y 0 31 8 +def @arg32 253 16777216 6 Y 0 31 8 +@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 +1 1 1 1 1 1 1 1 1 1 1.0000 1.0000 2004-02-29 2004-02-29 11:11:11 2004-02-29 11:11:11 11:11:11 2004 1 1 a 123456789a 123456789a123456789b123456789c tinyblob tinytext blob text mediumblob mediumtext longblob longtext one monday +set @my_key= 0 ; +execute stmt1 using @my_key ; +execute full_info ; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def @arg01 253 20 1 Y 128 0 63 +def @arg02 253 20 0 Y 128 0 63 +def @arg03 253 20 0 Y 128 0 63 +def @arg04 253 20 0 Y 128 0 63 +def @arg05 253 20 0 Y 128 0 63 +def @arg06 253 20 0 Y 128 0 63 +def @arg07 253 23 0 Y 128 31 63 +def @arg08 253 23 0 Y 128 31 63 +def @arg09 253 23 0 Y 128 31 63 +def @arg10 253 23 0 Y 128 31 63 +def @arg11 253 67 0 Y 128 30 63 +def @arg12 253 67 0 Y 128 30 63 +def @arg13 253 16777216 0 Y 128 31 63 +def @arg14 253 16777216 0 Y 128 31 63 +def @arg15 253 16777216 19 Y 128 31 63 +def @arg16 253 16777216 0 Y 128 31 63 +def @arg17 253 20 0 Y 128 0 63 +def @arg18 253 20 0 Y 128 0 63 +def @arg19 253 20 0 Y 128 0 63 +def @arg20 253 16777216 0 Y 0 31 8 +def @arg21 253 16777216 0 Y 0 31 8 +def @arg22 253 16777216 0 Y 0 31 8 +def @arg23 253 16777216 0 Y 128 31 63 +def @arg24 253 16777216 0 Y 0 31 8 +def @arg25 253 16777216 0 Y 128 31 63 +def @arg26 253 16777216 0 Y 0 31 8 +def @arg27 253 16777216 0 Y 128 31 63 +def @arg28 253 16777216 0 Y 0 31 8 +def @arg29 253 16777216 0 Y 128 31 63 +def @arg30 253 16777216 0 Y 0 31 8 +def @arg31 253 16777216 0 Y 0 31 8 +def @arg32 253 16777216 0 Y 0 31 8 +@arg01 @arg02 @arg03 @arg04 @arg05 @arg06 @arg07 @arg08 @arg09 @arg10 @arg11 @arg12 @arg13 @arg14 @arg15 @arg16 @arg17 @arg18 @arg19 @arg20 @arg21 @arg22 @arg23 @arg24 @arg25 @arg26 @arg27 @arg28 @arg29 @arg30 @arg31 @arg32 +0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1991-01-01 01:01:01 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +prepare stmt1 from "select c1 into ? from t9 where c1= 1" ; +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '? from t9 where c1= 1' at line 1 +test_sequence +-- insert into numeric columns -- +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 ) ; +set @arg00= 21 ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ) ; +prepare stmt1 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22 )" ; +execute stmt1 ; +set @arg00= 23; +prepare stmt2 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ; +execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, +30.0, 30.0, 30.0 ) ; +set @arg00= 31.0 ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ) ; +prepare stmt1 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, + 32.0, 32.0, 32.0 )" ; +execute stmt1 ; +set @arg00= 33.0; +prepare stmt2 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ; +execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( '40', '40', '40', '40', '40', '40', '40', '40', +'40', '40', '40' ) ; +set @arg00= '41' ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ) ; +prepare stmt1 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( '42', '42', '42', '42', '42', '42', '42', '42', + '42', '42', '42' )" ; +execute stmt1 ; +set @arg00= '43'; +prepare stmt2 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ; +execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( CAST('50' as binary), CAST('50' as binary), +CAST('50' as binary), CAST('50' as binary), CAST('50' as binary), +CAST('50' as binary), CAST('50' as binary), CAST('50' as binary), +CAST('50' as binary), CAST('50' as binary), CAST('50' as binary) ) ; +set @arg00= CAST('51' as binary) ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ) ; +prepare stmt1 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( CAST('52' as binary), CAST('52' as binary), + CAST('52' as binary), CAST('52' as binary), CAST('52' as binary), + CAST('52' as binary), CAST('52' as binary), CAST('52' as binary), + CAST('52' as binary), CAST('52' as binary), CAST('52' as binary) )" ; +execute stmt1 ; +set @arg00= CAST('53' as binary) ; +prepare stmt2 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ; +execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +set @arg00= 2 ; +set @arg00= NULL ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( 60, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +NULL, NULL, NULL ) ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( 61, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ) ; +prepare stmt1 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( 62, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL )" ; +execute stmt1 ; +prepare stmt2 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( 63, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ; +execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +set @arg00= 8.0 ; +set @arg00= NULL ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( 71, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ) ; +prepare stmt2 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( 73, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ; +execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +set @arg00= 'abc' ; +set @arg00= NULL ; +insert into t9 +( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values +( 81, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ) ; +prepare stmt2 from "insert into t9 + ( c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( 83, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ; +execute stmt2 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +select c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 +from t9 where c1 >= 20 +order by c1 ; +c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c12 +20 20 20 20 20 20 20 20 20 20 20.0000 +21 21 21 21 21 21 21 21 21 21 21.0000 +22 22 22 22 22 22 22 22 22 22 22.0000 +23 23 23 23 23 23 23 23 23 23 23.0000 +30 30 30 30 30 30 30 30 30 30 30.0000 +31 31 31 31 31 31 31 31 31 31 31.0000 +32 32 32 32 32 32 32 32 32 32 32.0000 +33 33 33 33 33 33 33 33 33 33 33.0000 +40 40 40 40 40 40 40 40 40 40 40.0000 +41 41 41 41 41 41 41 41 41 41 41.0000 +42 42 42 42 42 42 42 42 42 42 42.0000 +43 43 43 43 43 43 43 43 43 43 43.0000 +50 50 50 50 50 50 50 50 50 50 50.0000 +51 51 51 51 51 51 51 51 51 51 51.0000 +52 52 52 52 52 52 52 52 52 52 52.0000 +53 53 53 53 53 53 53 53 53 53 53.0000 +60 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +61 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +62 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +63 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +71 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +73 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +81 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +83 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +test_sequence +-- select .. where numeric column = .. -- +set @arg00= 20; +select 'true' as found from t9 +where c1= 20 and c2= 20 and c3= 20 and c4= 20 and c5= 20 and c6= 20 and c7= 20 +and c8= 20 and c9= 20 and c10= 20 and c12= 20; +found +true +select 'true' as found from t9 +where c1= @arg00 and c2= @arg00 and c3= @arg00 and c4= @arg00 and c5= @arg00 +and c6= @arg00 and c7= @arg00 and c8= @arg00 and c9= @arg00 and c10= @arg00 +and c12= @arg00; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and c2= 20 and c3= 20 and c4= 20 and c5= 20 and c6= 20 and c7= 20 + and c8= 20 and c9= 20 and c10= 20 and c12= 20 "; +execute stmt1 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= ? and c2= ? and c3= ? and c4= ? and c5= ? + and c6= ? and c7= ? and c8= ? and c9= ? and c10= ? + and c12= ? "; +execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +found +true +set @arg00= 20.0; +select 'true' as found from t9 +where c1= 20.0 and c2= 20.0 and c3= 20.0 and c4= 20.0 and c5= 20.0 and c6= 20.0 +and c7= 20.0 and c8= 20.0 and c9= 20.0 and c10= 20.0 and c12= 20.0; +found +true +select 'true' as found from t9 +where c1= @arg00 and c2= @arg00 and c3= @arg00 and c4= @arg00 and c5= @arg00 +and c6= @arg00 and c7= @arg00 and c8= @arg00 and c9= @arg00 and c10= @arg00 +and c12= @arg00; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20.0 and c2= 20.0 and c3= 20.0 and c4= 20.0 and c5= 20.0 and c6= 20.0 + and c7= 20.0 and c8= 20.0 and c9= 20.0 and c10= 20.0 and c12= 20.0 "; +execute stmt1 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= ? and c2= ? and c3= ? and c4= ? and c5= ? + and c6= ? and c7= ? and c8= ? and c9= ? and c10= ? + and c12= ? "; +execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +found +true +select 'true' as found from t9 +where c1= '20' and c2= '20' and c3= '20' and c4= '20' and c5= '20' and c6= '20' + and c7= '20' and c8= '20' and c9= '20' and c10= '20' and c12= '20'; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= '20' and c2= '20' and c3= '20' and c4= '20' and c5= '20' and c6= '20' + and c7= '20' and c8= '20' and c9= '20' and c10= '20' and c12= '20' "; +execute stmt1 ; +found +true +set @arg00= '20'; +select 'true' as found from t9 +where c1= @arg00 and c2= @arg00 and c3= @arg00 and c4= @arg00 and c5= @arg00 +and c6= @arg00 and c7= @arg00 and c8= @arg00 and c9= @arg00 and c10= @arg00 +and c12= @arg00; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= ? and c2= ? and c3= ? and c4= ? and c5= ? + and c6= ? and c7= ? and c8= ? and c9= ? and c10= ? + and c12= ? "; +execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +found +true +select 'true' as found from t9 +where c1= CAST('20' as binary) and c2= CAST('20' as binary) and +c3= CAST('20' as binary) and c4= CAST('20' as binary) and +c5= CAST('20' as binary) and c6= CAST('20' as binary) and +c7= CAST('20' as binary) and c8= CAST('20' as binary) and +c9= CAST('20' as binary) and c10= CAST('20' as binary) and +c12= CAST('20' as binary); +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= CAST('20' as binary) and c2= CAST('20' as binary) and + c3= CAST('20' as binary) and c4= CAST('20' as binary) and + c5= CAST('20' as binary) and c6= CAST('20' as binary) and + c7= CAST('20' as binary) and c8= CAST('20' as binary) and + c9= CAST('20' as binary) and c10= CAST('20' as binary) and + c12= CAST('20' as binary) "; +execute stmt1 ; +found +true +set @arg00= CAST('20' as binary) ; +select 'true' as found from t9 +where c1= @arg00 and c2= @arg00 and c3= @arg00 and c4= @arg00 and c5= @arg00 +and c6= @arg00 and c7= @arg00 and c8= @arg00 and c9= @arg00 and c10= @arg00 +and c12= @arg00; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= ? and c2= ? and c3= ? and c4= ? and c5= ? + and c6= ? and c7= ? and c8= ? and c9= ? and c10= ? + and c12= ? "; +execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00 ; +found +true +delete from t9 ; +test_sequence +-- some numeric overflow experiments -- +prepare my_insert from "insert into t9 + ( c21, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 ) +values + ( 'O', ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )" ; +prepare my_select from "select c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c12 +from t9 where c21 = 'O' "; +prepare my_delete from "delete from t9 where c21 = 'O' "; +set @arg00= 9223372036854775807 ; +execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +Warnings: +Warning 1264 Out of range value for column 'c1' at row 1 +Warning 1264 Out of range value for column 'c2' at row 1 +Warning 1264 Out of range value for column 'c3' at row 1 +Warning 1264 Out of range value for column 'c4' at row 1 +Warning 1264 Out of range value for column 'c5' at row 1 +Warning 1264 Out of range value for column 'c12' at row 1 +execute my_select ; +c1 127 +c2 32767 +c3 8388607 +c4 2147483647 +c5 2147483647 +c6 9223372036854775807 +c7 9.22337e+18 +c8 9.22337203685478e+18 +c9 9.22337203685478e+18 +c10 9.22337203685478e+18 +c12 9999.9999 +execute my_delete ; +set @arg00= '9223372036854775807' ; +execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +Warnings: +Warning 1264 Out of range value for column 'c1' at row 1 +Warning 1264 Out of range value for column 'c2' at row 1 +Warning 1264 Out of range value for column 'c3' at row 1 +Warning 1264 Out of range value for column 'c4' at row 1 +Warning 1264 Out of range value for column 'c5' at row 1 +Warning 1264 Out of range value for column 'c12' at row 1 +execute my_select ; +c1 127 +c2 32767 +c3 8388607 +c4 2147483647 +c5 2147483647 +c6 9223372036854775807 +c7 9.22337e+18 +c8 9.22337203685478e+18 +c9 9.22337203685478e+18 +c10 9.22337203685478e+18 +c12 9999.9999 +execute my_delete ; +set @arg00= -9223372036854775808 ; +execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +Warnings: +Warning 1264 Out of range value for column 'c1' at row 1 +Warning 1264 Out of range value for column 'c2' at row 1 +Warning 1264 Out of range value for column 'c3' at row 1 +Warning 1264 Out of range value for column 'c4' at row 1 +Warning 1264 Out of range value for column 'c5' at row 1 +Warning 1264 Out of range value for column 'c12' at row 1 +execute my_select ; +c1 -128 +c2 -32768 +c3 -8388608 +c4 -2147483648 +c5 -2147483648 +c6 -9223372036854775808 +c7 -9.22337e+18 +c8 -9.22337203685478e+18 +c9 -9.22337203685478e+18 +c10 -9.22337203685478e+18 +c12 -9999.9999 +execute my_delete ; +set @arg00= '-9223372036854775808' ; +execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +Warnings: +Warning 1264 Out of range value for column 'c1' at row 1 +Warning 1264 Out of range value for column 'c2' at row 1 +Warning 1264 Out of range value for column 'c3' at row 1 +Warning 1264 Out of range value for column 'c4' at row 1 +Warning 1264 Out of range value for column 'c5' at row 1 +Warning 1264 Out of range value for column 'c12' at row 1 +execute my_select ; +c1 -128 +c2 -32768 +c3 -8388608 +c4 -2147483648 +c5 -2147483648 +c6 -9223372036854775808 +c7 -9.22337e+18 +c8 -9.22337203685478e+18 +c9 -9.22337203685478e+18 +c10 -9.22337203685478e+18 +c12 -9999.9999 +execute my_delete ; +set @arg00= 1.11111111111111111111e+50 ; +execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +Warnings: +Warning 1264 Out of range value for column 'c1' at row 1 +Warning 1264 Out of range value for column 'c2' at row 1 +Warning 1264 Out of range value for column 'c3' at row 1 +Warning 1264 Out of range value for column 'c4' at row 1 +Warning 1264 Out of range value for column 'c5' at row 1 +Warning 1264 Out of range value for column 'c6' at row 1 +Warning 1264 Out of range value for column 'c7' at row 1 +Warning 1264 Out of range value for column 'c12' at row 1 +execute my_select ; +c1 127 +c2 32767 +c3 8388607 +c4 2147483647 +c5 2147483647 +c6 9223372036854775807 +c7 3.40282e+38 +c8 1.11111111111111e+50 +c9 1.11111111111111e+50 +c10 1.11111111111111e+50 +c12 9999.9999 +execute my_delete ; +set @arg00= '1.11111111111111111111e+50' ; +execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +Warnings: +Warning 1264 Out of range value for column 'c1' at row 1 +Warning 1264 Out of range value for column 'c2' at row 1 +Warning 1264 Out of range value for column 'c3' at row 1 +Warning 1264 Out of range value for column 'c4' at row 1 +Warning 1264 Out of range value for column 'c5' at row 1 +Warning 1264 Out of range value for column 'c6' at row 1 +Warning 1264 Out of range value for column 'c7' at row 1 +Warning 1264 Out of range value for column 'c12' at row 1 +execute my_select ; +c1 127 +c2 32767 +c3 8388607 +c4 2147483647 +c5 2147483647 +c6 9223372036854775807 +c7 3.40282e+38 +c8 1.11111111111111e+50 +c9 1.11111111111111e+50 +c10 1.11111111111111e+50 +c12 9999.9999 +execute my_delete ; +set @arg00= -1.11111111111111111111e+50 ; +execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +Warnings: +Warning 1264 Out of range value for column 'c1' at row 1 +Warning 1264 Out of range value for column 'c2' at row 1 +Warning 1264 Out of range value for column 'c3' at row 1 +Warning 1264 Out of range value for column 'c4' at row 1 +Warning 1264 Out of range value for column 'c5' at row 1 +Warning 1264 Out of range value for column 'c6' at row 1 +Warning 1264 Out of range value for column 'c7' at row 1 +Warning 1264 Out of range value for column 'c12' at row 1 +execute my_select ; +c1 -128 +c2 -32768 +c3 -8388608 +c4 -2147483648 +c5 -2147483648 +c6 -9223372036854775808 +c7 -3.40282e+38 +c8 -1.11111111111111e+50 +c9 -1.11111111111111e+50 +c10 -1.11111111111111e+50 +c12 -9999.9999 +execute my_delete ; +set @arg00= '-1.11111111111111111111e+50' ; +execute my_insert using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +Warnings: +Warning 1264 Out of range value for column 'c1' at row 1 +Warning 1264 Out of range value for column 'c2' at row 1 +Warning 1264 Out of range value for column 'c3' at row 1 +Warning 1264 Out of range value for column 'c4' at row 1 +Warning 1264 Out of range value for column 'c5' at row 1 +Warning 1264 Out of range value for column 'c6' at row 1 +Warning 1264 Out of range value for column 'c7' at row 1 +Warning 1264 Out of range value for column 'c12' at row 1 +execute my_select ; +c1 -128 +c2 -32768 +c3 -8388608 +c4 -2147483648 +c5 -2147483648 +c6 -9223372036854775808 +c7 -3.40282e+38 +c8 -1.11111111111111e+50 +c9 -1.11111111111111e+50 +c10 -1.11111111111111e+50 +c12 -9999.9999 +execute my_delete ; +test_sequence +-- insert into string columns -- +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c20' at row 1 +select c1, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30 +from t9 where c1 >= 20 +order by c1 ; +c1 c20 c21 c22 c23 c24 c25 c26 c27 c28 c29 c30 +20 2 20 20 20 20 20 20 20 20 20 20 +21 2 21 21 21 21 21 21 21 21 21 21 +22 2 22 22 22 22 22 22 22 22 22 22 +23 2 23 23 23 23 23 23 23 23 23 23 +30 3 30 30 30 30 30 30 30 30 30 30 +31 3 31 31 31 31 31 31 31 31 31 31 +32 3 32 32 32 32 32 32 32 32 32 32 +33 3 33 33 33 33 33 33 33 33 33 33 +40 4 40 40 40 40 40 40 40 40 40 40 +41 4 41 41 41 41 41 41 41 41 41 41 +42 4 42 42 42 42 42 42 42 42 42 42 +43 4 43 43 43 43 43 43 43 43 43 43 +50 5 50.0 50.0 50.0 50.0 50.0 50.0 50.0 50.0 50.0 50.0 +51 5 51.0 51.0 51.0 51.0 51.0 51.0 51.0 51.0 51.0 51.0 +52 5 52.0 52.0 52.0 52.0 52.0 52.0 52.0 52.0 52.0 52.0 +53 5 53.0 53.0 53.0 53.0 53.0 53.0 53.0 53.0 53.0 53.0 +54 5 54 54 54.00 54.00 54.00 54.00 54.00 54.00 54.00 54.00 +55 5 55 55 55 55 55 55 55 55 55 55 +56 6 56 56 56.00 56.00 56.00 56.00 56.00 56.00 56.00 56.00 +57 6 57 57 57.00 57.00 57.00 57.00 57.00 57.00 57.00 57.00 +60 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +61 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +62 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +63 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +71 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +73 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +81 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +83 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +test_sequence +-- select .. where string column = .. -- +set @arg00= '20'; +select 'true' as found from t9 +where c1= 20 and concat(c20,substr('20',1+length(c20)))= '20' and c21= '20' and +c22= '20' and c23= '20' and c24= '20' and c25= '20' and c26= '20' and +c27= '20' and c28= '20' and c29= '20' and c30= '20' ; +found +true +select 'true' as found from t9 +where c1= 20 and concat(c20,substr(@arg00,1+length(c20)))= @arg00 and +c21= @arg00 and c22= @arg00 and c23= @arg00 and c25= @arg00 and +c26= @arg00 and c27= @arg00 and c28= @arg00 and c29= @arg00 and c30= @arg00; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and concat(c20,substr('20',1+length(c20)))= '20' and c21= '20' and + c22= '20' and c23= '20' and c24= '20' and c25= '20' and c26= '20' and + c27= '20' and c28= '20' and c29= '20' and c30= '20'" ; +execute stmt1 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and concat(c20,substr(?,1+length(c20)))= ? and + c21= ? and c22= ? and c23= ? and c25= ? and + c26= ? and c27= ? and c28= ? and c29= ? and c30= ?" ; +execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +found +true +set @arg00= CAST('20' as binary); +select 'true' as found from t9 +where c1= 20 and concat(c20,substr(CAST('20' as binary),1+length(c20))) += CAST('20' as binary) and c21= CAST('20' as binary) +and c22= CAST('20' as binary) and c23= CAST('20' as binary) and +c24= CAST('20' as binary) and c25= CAST('20' as binary) and +c26= CAST('20' as binary) and c27= CAST('20' as binary) and +c28= CAST('20' as binary) and c29= CAST('20' as binary) and +c30= CAST('20' as binary) ; +found +true +select 'true' as found from t9 +where c1= 20 and concat(c20,substr(@arg00,1+length(c20))) = @arg00 and +c21= @arg00 and c22= @arg00 and c23= @arg00 and c25= @arg00 and +c26= @arg00 and c27= @arg00 and c28= @arg00 and c29= @arg00 and +c30= @arg00; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and concat(c20,substr(CAST('20' as binary),1+length(c20))) + = CAST('20' as binary) and c21= CAST('20' as binary) + and c22= CAST('20' as binary) and c23= CAST('20' as binary) and + c24= CAST('20' as binary) and c25= CAST('20' as binary) and + c26= CAST('20' as binary) and c27= CAST('20' as binary) and + c28= CAST('20' as binary) and c29= CAST('20' as binary) and + c30= CAST('20' as binary)" ; +execute stmt1 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and concat(c20,substr(?,1+length(c20))) = ? and c21= ? and + c22= ? and c23= ? and c25= ? and c26= ? and c27= ? and c28= ? and + c29= ? and c30= ?"; +execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +found +true +set @arg00= 20; +select 'true' as found from t9 +where c1= 20 and concat(c20,substr(20,1+length(c20)))= 20 and c21= 20 and +c22= 20 and c23= 20 and c24= 20 and c25= 20 and c26= 20 and +c27= 20 and c28= 20 and c29= 20 and c30= 20 ; +found +true +select 'true' as found from t9 +where c1= 20 and concat(c20,substr(@arg00,1+length(c20)))= @arg00 and +c21= @arg00 and c22= @arg00 and c23= @arg00 and c25= @arg00 and +c26= @arg00 and c27= @arg00 and c28= @arg00 and c29= @arg00 and c30= @arg00; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and concat(c20,substr(20,1+length(c20)))= 20 and c21= 20 and + c22= 20 and c23= 20 and c24= 20 and c25= 20 and c26= 20 and + c27= 20 and c28= 20 and c29= 20 and c30= 20" ; +execute stmt1 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and concat(c20,substr(?,1+length(c20)))= ? and + c21= ? and c22= ? and c23= ? and c25= ? and + c26= ? and c27= ? and c28= ? and c29= ? and c30= ?" ; +execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +found +true +set @arg00= 20.0; +select 'true' as found from t9 +where c1= 20 and concat(c20,substr(20.0,1+length(c20)))= 20.0 and c21= 20.0 and +c22= 20.0 and c23= 20.0 and c24= 20.0 and c25= 20.0 and c26= 20.0 and +c27= 20.0 and c28= 20.0 and c29= 20.0 and c30= 20.0 ; +found +true +select 'true' as found from t9 +where c1= 20 and concat(c20,substr(@arg00,1+length(c20)))= @arg00 and +c21= @arg00 and c22= @arg00 and c23= @arg00 and c25= @arg00 and +c26= @arg00 and c27= @arg00 and c28= @arg00 and c29= @arg00 and c30= @arg00; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and concat(c20,substr(20.0,1+length(c20)))= 20.0 and c21= 20.0 and + c22= 20.0 and c23= 20.0 and c24= 20.0 and c25= 20.0 and c26= 20.0 and + c27= 20.0 and c28= 20.0 and c29= 20.0 and c30= 20.0" ; +execute stmt1 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and concat(c20,substr(?,1+length(c20)))= ? and + c21= ? and c22= ? and c23= ? and c25= ? and + c26= ? and c27= ? and c28= ? and c29= ? and c30= ?" ; +execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00, @arg00, +@arg00, @arg00, @arg00, @arg00, @arg00 ; +found +true +delete from t9 ; +test_sequence +-- insert into date/time columns -- +Warnings: +Warning 1265 Data truncated for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c17' at row 1 +Warnings: +Warning 1264 Out of range value for column 'c13' at row 1 +Warning 1264 Out of range value for column 'c14' at row 1 +Warning 1265 Data truncated for column 'c15' at row 1 +Warning 1264 Out of range value for column 'c16' at row 1 +Warning 1264 Out of range value for column 'c17' at row 1 +Warnings: +Warning 1264 Out of range value for column 'c13' at row 1 +Warning 1264 Out of range value for column 'c14' at row 1 +Warning 1265 Data truncated for column 'c15' at row 1 +Warning 1264 Out of range value for column 'c16' at row 1 +Warning 1264 Out of range value for column 'c17' at row 1 +Warnings: +Warning 1264 Out of range value for column 'c13' at row 1 +Warning 1264 Out of range value for column 'c14' at row 1 +Warning 1265 Data truncated for column 'c15' at row 1 +Warning 1264 Out of range value for column 'c16' at row 1 +Warning 1264 Out of range value for column 'c17' at row 1 +Warnings: +Warning 1264 Out of range value for column 'c13' at row 1 +Warning 1264 Out of range value for column 'c14' at row 1 +Warning 1265 Data truncated for column 'c15' at row 1 +Warning 1264 Out of range value for column 'c16' at row 1 +Warning 1264 Out of range value for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c15' at row 1 +Warning 1264 Out of range value for column 'c16' at row 1 +Warning 1264 Out of range value for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c15' at row 1 +Warning 1264 Out of range value for column 'c16' at row 1 +Warning 1264 Out of range value for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c15' at row 1 +Warning 1264 Out of range value for column 'c16' at row 1 +Warning 1264 Out of range value for column 'c17' at row 1 +Warnings: +Warning 1265 Data truncated for column 'c15' at row 1 +Warning 1264 Out of range value for column 'c16' at row 1 +Warning 1264 Out of range value for column 'c17' at row 1 +select c1, c13, c14, c15, c16, c17 from t9 order by c1 ; +c1 c13 c14 c15 c16 c17 +20 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991 +21 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991 +22 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991 +23 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991 +30 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991 +31 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991 +32 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991 +33 1991-01-01 1991-01-01 01:01:01 1991-01-01 01:01:01 01:01:01 1991 +40 0000-00-00 0000-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000 +41 0000-00-00 0000-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000 +42 0000-00-00 0000-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000 +43 0000-00-00 0000-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000 +50 2001-00-00 2001-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000 +51 2010-00-00 2010-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000 +52 2001-00-00 2001-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000 +53 2001-00-00 2001-00-00 00:00:00 0000-00-00 00:00:00 838:59:59 0000 +60 NULL NULL 1991-01-01 01:01:01 NULL NULL +61 NULL NULL 1991-01-01 01:01:01 NULL NULL +62 NULL NULL 1991-01-01 01:01:01 NULL NULL +63 NULL NULL 1991-01-01 01:01:01 NULL NULL +71 NULL NULL 1991-01-01 01:01:01 NULL NULL +73 NULL NULL 1991-01-01 01:01:01 NULL NULL +81 NULL NULL 1991-01-01 01:01:01 NULL NULL +83 NULL NULL 1991-01-01 01:01:01 NULL NULL +test_sequence +-- select .. where date/time column = .. -- +set @arg00= '1991-01-01 01:01:01' ; +select 'true' as found from t9 +where c1= 20 and c13= '1991-01-01 01:01:01' and c14= '1991-01-01 01:01:01' and +c15= '1991-01-01 01:01:01' and c16= '1991-01-01 01:01:01' and +c17= '1991-01-01 01:01:01' ; +found +true +select 'true' as found from t9 +where c1= 20 and c13= @arg00 and c14= @arg00 and c15= @arg00 and c16= @arg00 +and c17= @arg00 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and c13= '1991-01-01 01:01:01' and c14= '1991-01-01 01:01:01' and + c15= '1991-01-01 01:01:01' and c16= '1991-01-01 01:01:01' and + c17= '1991-01-01 01:01:01'" ; +execute stmt1 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and c13= ? and c14= ? and c15= ? and c16= ? and c17= ?" ; +execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00 ; +found +true +set @arg00= CAST('1991-01-01 01:01:01' as datetime) ; +select 'true' as found from t9 +where c1= 20 and c13= CAST('1991-01-01 01:01:01' as datetime) and +c14= CAST('1991-01-01 01:01:01' as datetime) and +c15= CAST('1991-01-01 01:01:01' as datetime) and +c16= CAST('1991-01-01 01:01:01' as datetime) and +c17= CAST('1991-01-01 01:01:01' as datetime) ; +found +true +select 'true' as found from t9 +where c1= 20 and c13= @arg00 and c14= @arg00 and c15= @arg00 and c16= @arg00 +and c17= @arg00 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and c13= CAST('1991-01-01 01:01:01' as datetime) and + c14= CAST('1991-01-01 01:01:01' as datetime) and + c15= CAST('1991-01-01 01:01:01' as datetime) and + c16= CAST('1991-01-01 01:01:01' as datetime) and + c17= CAST('1991-01-01 01:01:01' as datetime)" ; +execute stmt1 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and c13= ? and c14= ? and c15= ? and c16= ? and c17= ?" ; +execute stmt1 using @arg00, @arg00, @arg00, @arg00, @arg00 ; +found +true +set @arg00= 1991 ; +select 'true' as found from t9 +where c1= 20 and c17= 1991 ; +found +true +select 'true' as found from t9 +where c1= 20 and c17= @arg00 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and c17= 1991" ; +execute stmt1 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and c17= ?" ; +execute stmt1 using @arg00 ; +found +true +set @arg00= 1.991e+3 ; +select 'true' as found from t9 +where c1= 20 and abs(c17 - 1.991e+3) < 0.01 ; +found +true +select 'true' as found from t9 +where c1= 20 and abs(c17 - @arg00) < 0.01 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and abs(c17 - 1.991e+3) < 0.01" ; +execute stmt1 ; +found +true +prepare stmt1 from "select 'true' as found from t9 +where c1= 20 and abs(c17 - ?) < 0.01" ; +execute stmt1 using @arg00 ; +found +true +drop table t1, t9; diff --git a/mysql-test/t/disabled.def b/mysql-test/t/disabled.def index 32de2077e81..746af7a6c38 100644 --- a/mysql-test/t/disabled.def +++ b/mysql-test/t/disabled.def @@ -43,7 +43,6 @@ ctype_big5 : BUG#26711 2007-06-21 Lars Test has never worked on Do #ndb_binlog_ddl_multi : BUG#18976 2006-04-10 kent CRBR: multiple binlog, second binlog may miss schema log events #ndb_binlog_discover : bug#21806 2006-08-24 #ndb_autodiscover3 : bug#21806 - #rpl_ndb_dd_advance : Bug#25913 rpl_ndb_dd_advance fails randomly ndb_partition_error2 : HF is not sure if the test can work as internded on all the platforms diff --git a/mysql-test/t/events_logs_tests.test b/mysql-test/t/events_logs_tests.test index 0c56f32beff..4fa7f3bbd71 100644 --- a/mysql-test/t/events_logs_tests.test +++ b/mysql-test/t/events_logs_tests.test @@ -96,7 +96,7 @@ let $wait_condition= SELECT COUNT(*) = 1 FROM mysql.slow_log; SELECT * FROM slow_event_test; --echo "Check slow log. Should see 1 row because 2 is over the threshold of 1 for GLOBAL, though under SESSION which is 10" --replace_column 1 USER_HOST 2 SLEEPVAL -SELECT user_host, query_time, db, sql_text FROM mysql.slow_log; +SELECT user_host, query_time, db, sql_text FROM mysql.slow_log where sql_text <> "DROP EVENT long_event"; DROP EVENT long_event2; --echo "Make it quite long" SET SESSION long_query_time=300; diff --git a/mysql-test/t/maria-big.test b/mysql-test/t/maria-big.test new file mode 100644 index 00000000000..f68531c3894 --- /dev/null +++ b/mysql-test/t/maria-big.test @@ -0,0 +1,26 @@ +# Test of scenarios potentially too big for --valgrind or --mem +enable_info; +set storage_engine=maria; +disable_warnings; +drop table if exists t1, t2; +enable_warnings; +create table t1(a char(3)); +insert into t1 values("abc"); +insert into t1 select "def" from t1; +insert into t1 select "ghi" from t1; +insert into t1 select "jkl" from t1; +insert into t1 select "mno" from t1; +insert into t1 select "pqr" from t1; +insert into t1 select "stu" from t1; +insert into t1 select "vwx" from t1; +insert into t1 select "yza" from t1; +insert into t1 select "ceg" from t1; +insert into t1 select "ikm" from t1; +insert into t1 select "oqs" from t1; +select count(*) from t1; +insert into t1 select "uwy" from t1; +create table t2 select * from t1; +select count(*) from t1; +select count(*) from t2; +drop table t1, t2; +disable_info; diff --git a/mysql-test/t/maria.test b/mysql-test/t/maria.test new file mode 100644 index 00000000000..763abbd9d25 --- /dev/null +++ b/mysql-test/t/maria.test @@ -0,0 +1,1125 @@ +# +# Testing of potential probelms in Maria +# This code was initially taken from myisam.test +# + +-- source include/have_maria.inc + +let $default=`select @@global.storage_engine`; +set global storage_engine=maria; +set session storage_engine=maria; + +# Initialise +--disable_warnings +drop table if exists t1,t2; +--enable_warnings +SET SQL_WARNINGS=1; + +# +# UNIQUE key test +# +# as long as maria cannot rollback, binlog should contain both inserts +# +RESET MASTER; +set binlog_format=statement; +CREATE TABLE t1 (a int primary key); +insert t1 values (1),(2),(3); +--error 1582 +insert t1 values (4),(2),(5); +select * from t1; +SHOW BINLOG EVENTS FROM 102; +drop table t1; +set binlog_format=default; + +# +# Test problem with CHECK TABLE; +# + +CREATE TABLE t1 ( + STRING_DATA char(255) default NULL, + KEY string_data (STRING_DATA) +); + +INSERT INTO t1 VALUES ('AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'); +INSERT INTO t1 VALUES ('DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD'); +INSERT INTO t1 VALUES ('FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF'); +INSERT INTO t1 VALUES ('FGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG'); +INSERT INTO t1 VALUES ('HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH'); +INSERT INTO t1 VALUES ('WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW'); +CHECK TABLE t1; +drop table t1; + +# +# Test problem with rows that are 65517-65520 bytes long +# + +create table t1 (a tinyint not null auto_increment, b blob not null, primary key (a)); + +let $1=100; +disable_query_log; +--disable_warnings +SET SQL_WARNINGS=0; +while ($1) +{ + eval insert into t1 (b) values(repeat(char(65+$1),65550-$1)); + dec $1; +} +SET SQL_WARNINGS=1; +--enable_warnings +--enable_query_log +check table t1; +repair table t1; +delete from t1 where (a & 1); +check table t1; +repair table t1; +check table t1; +drop table t1; + +# +# Test bug: Two optimize in a row reset index cardinality +# + +create table t1 (a int not null auto_increment, b int not null, primary key (a), index(b)); +insert into t1 (b) values (1),(2),(2),(2),(2); +optimize table t1; +show index from t1; +optimize table t1; +show index from t1; +drop table t1; + +# +# Test of how ORDER BY works when doing it on the whole table +# + +create table t1 (a int not null, b int not null, c int not null, primary key (a),key(b)); +insert into t1 values (3,3,3),(1,1,1),(2,2,2),(4,4,4); +explain select * from t1 order by a; +explain select * from t1 order by b; +explain select * from t1 order by c; +explain select a from t1 order by a; +explain select b from t1 order by b; +explain select a,b from t1 order by b; +explain select a,b from t1; +explain select a,b,c from t1; +drop table t1; + +# +# Test of OPTIMIZE of locked and modified tables +# +set autocommit=0; +begin; +CREATE TABLE t1 (a INT); +INSERT INTO t1 VALUES (1), (2), (3); +LOCK TABLES t1 WRITE; +INSERT INTO t1 VALUES (1), (2), (3); +commit; +set autocommit=1; +UNLOCK TABLES; +OPTIMIZE TABLE t1; +DROP TABLE t1; + +# +# Test of optimize, when only mi_sort_index (but not mi_repair*) is done +# in ha_maria::repair, and index size is changed (decreased). +# + +create table t1 ( t1 char(255), key(t1(250))); +insert t1 values ('137513751375137513751375137513751375137569516951695169516951695169516951695169'); +insert t1 values ('178417841784178417841784178417841784178403420342034203420342034203420342034203'); +insert t1 values ('213872387238723872387238723872387238723867376737673767376737673767376737673767'); +insert t1 values ('242624262426242624262426242624262426242607890789078907890789078907890789078907'); +insert t1 values ('256025602560256025602560256025602560256011701170117011701170117011701170117011'); +insert t1 values ('276027602760276027602760276027602760276001610161016101610161016101610161016101'); +insert t1 values ('281528152815281528152815281528152815281564956495649564956495649564956495649564'); +insert t1 values ('292129212921292129212921292129212921292102100210021002100210021002100210021002'); +insert t1 values ('380638063806380638063806380638063806380634483448344834483448344834483448344834'); +insert t1 values ('411641164116411641164116411641164116411616301630163016301630163016301630163016'); +insert t1 values ('420842084208420842084208420842084208420899889988998899889988998899889988998899'); +insert t1 values ('438443844384438443844384438443844384438482448244824482448244824482448244824482'); +insert t1 values ('443244324432443244324432443244324432443239613961396139613961396139613961396139'); +insert t1 values ('485448544854485448544854485448544854485477847784778477847784778477847784778477'); +insert t1 values ('494549454945494549454945494549454945494555275527552755275527552755275527552755'); +insert t1 values ('538647864786478647864786478647864786478688918891889188918891889188918891889188'); +insert t1 values ('565556555655565556555655565556555655565554845484548454845484548454845484548454'); +insert t1 values ('607860786078607860786078607860786078607856665666566656665666566656665666566656'); +insert t1 values ('640164016401640164016401640164016401640141274127412741274127412741274127412741'); +insert t1 values ('719471947194719471947194719471947194719478717871787178717871787178717871787178'); +insert t1 values ('742574257425742574257425742574257425742549604960496049604960496049604960496049'); +insert t1 values ('887088708870887088708870887088708870887035963596359635963596359635963596359635'); +insert t1 values ('917791779177917791779177917791779177917773857385738573857385738573857385738573'); +insert t1 values ('933293329332933293329332933293329332933278987898789878987898789878987898789878'); +insert t1 values ('963896389638963896389638963896389638963877807780778077807780778077807780778077'); +delete from t1 where t1>'2'; +insert t1 values ('70'), ('84'), ('60'), ('20'), ('76'), ('89'), ('49'), ('50'), +('88'), ('61'), ('42'), ('98'), ('39'), ('30'), ('25'), ('66'), ('61'), ('48'), +('80'), ('84'), ('98'), ('19'), ('91'), ('42'), ('47'); +optimize table t1; +check table t1; +drop table t1; + +# +# test of maria with huge number of packed fields +# + +create table t1 (i1 int, i2 int, i3 int, i4 int, i5 int, i6 int, i7 int, i8 +int, i9 int, i10 int, i11 int, i12 int, i13 int, i14 int, i15 int, i16 int, i17 +int, i18 int, i19 int, i20 int, i21 int, i22 int, i23 int, i24 int, i25 int, +i26 int, i27 int, i28 int, i29 int, i30 int, i31 int, i32 int, i33 int, i34 +int, i35 int, i36 int, i37 int, i38 int, i39 int, i40 int, i41 int, i42 int, +i43 int, i44 int, i45 int, i46 int, i47 int, i48 int, i49 int, i50 int, i51 +int, i52 int, i53 int, i54 int, i55 int, i56 int, i57 int, i58 int, i59 int, +i60 int, i61 int, i62 int, i63 int, i64 int, i65 int, i66 int, i67 int, i68 +int, i69 int, i70 int, i71 int, i72 int, i73 int, i74 int, i75 int, i76 int, +i77 int, i78 int, i79 int, i80 int, i81 int, i82 int, i83 int, i84 int, i85 +int, i86 int, i87 int, i88 int, i89 int, i90 int, i91 int, i92 int, i93 int, +i94 int, i95 int, i96 int, i97 int, i98 int, i99 int, i100 int, i101 int, i102 +int, i103 int, i104 int, i105 int, i106 int, i107 int, i108 int, i109 int, i110 +int, i111 int, i112 int, i113 int, i114 int, i115 int, i116 int, i117 int, i118 +int, i119 int, i120 int, i121 int, i122 int, i123 int, i124 int, i125 int, i126 +int, i127 int, i128 int, i129 int, i130 int, i131 int, i132 int, i133 int, i134 +int, i135 int, i136 int, i137 int, i138 int, i139 int, i140 int, i141 int, i142 +int, i143 int, i144 int, i145 int, i146 int, i147 int, i148 int, i149 int, i150 +int, i151 int, i152 int, i153 int, i154 int, i155 int, i156 int, i157 int, i158 +int, i159 int, i160 int, i161 int, i162 int, i163 int, i164 int, i165 int, i166 +int, i167 int, i168 int, i169 int, i170 int, i171 int, i172 int, i173 int, i174 +int, i175 int, i176 int, i177 int, i178 int, i179 int, i180 int, i181 int, i182 +int, i183 int, i184 int, i185 int, i186 int, i187 int, i188 int, i189 int, i190 +int, i191 int, i192 int, i193 int, i194 int, i195 int, i196 int, i197 int, i198 +int, i199 int, i200 int, i201 int, i202 int, i203 int, i204 int, i205 int, i206 +int, i207 int, i208 int, i209 int, i210 int, i211 int, i212 int, i213 int, i214 +int, i215 int, i216 int, i217 int, i218 int, i219 int, i220 int, i221 int, i222 +int, i223 int, i224 int, i225 int, i226 int, i227 int, i228 int, i229 int, i230 +int, i231 int, i232 int, i233 int, i234 int, i235 int, i236 int, i237 int, i238 +int, i239 int, i240 int, i241 int, i242 int, i243 int, i244 int, i245 int, i246 +int, i247 int, i248 int, i249 int, i250 int, i251 int, i252 int, i253 int, i254 +int, i255 int, i256 int, i257 int, i258 int, i259 int, i260 int, i261 int, i262 +int, i263 int, i264 int, i265 int, i266 int, i267 int, i268 int, i269 int, i270 +int, i271 int, i272 int, i273 int, i274 int, i275 int, i276 int, i277 int, i278 +int, i279 int, i280 int, i281 int, i282 int, i283 int, i284 int, i285 int, i286 +int, i287 int, i288 int, i289 int, i290 int, i291 int, i292 int, i293 int, i294 +int, i295 int, i296 int, i297 int, i298 int, i299 int, i300 int, i301 int, i302 +int, i303 int, i304 int, i305 int, i306 int, i307 int, i308 int, i309 int, i310 +int, i311 int, i312 int, i313 int, i314 int, i315 int, i316 int, i317 int, i318 +int, i319 int, i320 int, i321 int, i322 int, i323 int, i324 int, i325 int, i326 +int, i327 int, i328 int, i329 int, i330 int, i331 int, i332 int, i333 int, i334 +int, i335 int, i336 int, i337 int, i338 int, i339 int, i340 int, i341 int, i342 +int, i343 int, i344 int, i345 int, i346 int, i347 int, i348 int, i349 int, i350 +int, i351 int, i352 int, i353 int, i354 int, i355 int, i356 int, i357 int, i358 +int, i359 int, i360 int, i361 int, i362 int, i363 int, i364 int, i365 int, i366 +int, i367 int, i368 int, i369 int, i370 int, i371 int, i372 int, i373 int, i374 +int, i375 int, i376 int, i377 int, i378 int, i379 int, i380 int, i381 int, i382 +int, i383 int, i384 int, i385 int, i386 int, i387 int, i388 int, i389 int, i390 +int, i391 int, i392 int, i393 int, i394 int, i395 int, i396 int, i397 int, i398 +int, i399 int, i400 int, i401 int, i402 int, i403 int, i404 int, i405 int, i406 +int, i407 int, i408 int, i409 int, i410 int, i411 int, i412 int, i413 int, i414 +int, i415 int, i416 int, i417 int, i418 int, i419 int, i420 int, i421 int, i422 +int, i423 int, i424 int, i425 int, i426 int, i427 int, i428 int, i429 int, i430 +int, i431 int, i432 int, i433 int, i434 int, i435 int, i436 int, i437 int, i438 +int, i439 int, i440 int, i441 int, i442 int, i443 int, i444 int, i445 int, i446 +int, i447 int, i448 int, i449 int, i450 int, i451 int, i452 int, i453 int, i454 +int, i455 int, i456 int, i457 int, i458 int, i459 int, i460 int, i461 int, i462 +int, i463 int, i464 int, i465 int, i466 int, i467 int, i468 int, i469 int, i470 +int, i471 int, i472 int, i473 int, i474 int, i475 int, i476 int, i477 int, i478 +int, i479 int, i480 int, i481 int, i482 int, i483 int, i484 int, i485 int, i486 +int, i487 int, i488 int, i489 int, i490 int, i491 int, i492 int, i493 int, i494 +int, i495 int, i496 int, i497 int, i498 int, i499 int, i500 int, i501 int, i502 +int, i503 int, i504 int, i505 int, i506 int, i507 int, i508 int, i509 int, i510 +int, i511 int, i512 int, i513 int, i514 int, i515 int, i516 int, i517 int, i518 +int, i519 int, i520 int, i521 int, i522 int, i523 int, i524 int, i525 int, i526 +int, i527 int, i528 int, i529 int, i530 int, i531 int, i532 int, i533 int, i534 +int, i535 int, i536 int, i537 int, i538 int, i539 int, i540 int, i541 int, i542 +int, i543 int, i544 int, i545 int, i546 int, i547 int, i548 int, i549 int, i550 +int, i551 int, i552 int, i553 int, i554 int, i555 int, i556 int, i557 int, i558 +int, i559 int, i560 int, i561 int, i562 int, i563 int, i564 int, i565 int, i566 +int, i567 int, i568 int, i569 int, i570 int, i571 int, i572 int, i573 int, i574 +int, i575 int, i576 int, i577 int, i578 int, i579 int, i580 int, i581 int, i582 +int, i583 int, i584 int, i585 int, i586 int, i587 int, i588 int, i589 int, i590 +int, i591 int, i592 int, i593 int, i594 int, i595 int, i596 int, i597 int, i598 +int, i599 int, i600 int, i601 int, i602 int, i603 int, i604 int, i605 int, i606 +int, i607 int, i608 int, i609 int, i610 int, i611 int, i612 int, i613 int, i614 +int, i615 int, i616 int, i617 int, i618 int, i619 int, i620 int, i621 int, i622 +int, i623 int, i624 int, i625 int, i626 int, i627 int, i628 int, i629 int, i630 +int, i631 int, i632 int, i633 int, i634 int, i635 int, i636 int, i637 int, i638 +int, i639 int, i640 int, i641 int, i642 int, i643 int, i644 int, i645 int, i646 +int, i647 int, i648 int, i649 int, i650 int, i651 int, i652 int, i653 int, i654 +int, i655 int, i656 int, i657 int, i658 int, i659 int, i660 int, i661 int, i662 +int, i663 int, i664 int, i665 int, i666 int, i667 int, i668 int, i669 int, i670 +int, i671 int, i672 int, i673 int, i674 int, i675 int, i676 int, i677 int, i678 +int, i679 int, i680 int, i681 int, i682 int, i683 int, i684 int, i685 int, i686 +int, i687 int, i688 int, i689 int, i690 int, i691 int, i692 int, i693 int, i694 +int, i695 int, i696 int, i697 int, i698 int, i699 int, i700 int, i701 int, i702 +int, i703 int, i704 int, i705 int, i706 int, i707 int, i708 int, i709 int, i710 +int, i711 int, i712 int, i713 int, i714 int, i715 int, i716 int, i717 int, i718 +int, i719 int, i720 int, i721 int, i722 int, i723 int, i724 int, i725 int, i726 +int, i727 int, i728 int, i729 int, i730 int, i731 int, i732 int, i733 int, i734 +int, i735 int, i736 int, i737 int, i738 int, i739 int, i740 int, i741 int, i742 +int, i743 int, i744 int, i745 int, i746 int, i747 int, i748 int, i749 int, i750 +int, i751 int, i752 int, i753 int, i754 int, i755 int, i756 int, i757 int, i758 +int, i759 int, i760 int, i761 int, i762 int, i763 int, i764 int, i765 int, i766 +int, i767 int, i768 int, i769 int, i770 int, i771 int, i772 int, i773 int, i774 +int, i775 int, i776 int, i777 int, i778 int, i779 int, i780 int, i781 int, i782 +int, i783 int, i784 int, i785 int, i786 int, i787 int, i788 int, i789 int, i790 +int, i791 int, i792 int, i793 int, i794 int, i795 int, i796 int, i797 int, i798 +int, i799 int, i800 int, i801 int, i802 int, i803 int, i804 int, i805 int, i806 +int, i807 int, i808 int, i809 int, i810 int, i811 int, i812 int, i813 int, i814 +int, i815 int, i816 int, i817 int, i818 int, i819 int, i820 int, i821 int, i822 +int, i823 int, i824 int, i825 int, i826 int, i827 int, i828 int, i829 int, i830 +int, i831 int, i832 int, i833 int, i834 int, i835 int, i836 int, i837 int, i838 +int, i839 int, i840 int, i841 int, i842 int, i843 int, i844 int, i845 int, i846 +int, i847 int, i848 int, i849 int, i850 int, i851 int, i852 int, i853 int, i854 +int, i855 int, i856 int, i857 int, i858 int, i859 int, i860 int, i861 int, i862 +int, i863 int, i864 int, i865 int, i866 int, i867 int, i868 int, i869 int, i870 +int, i871 int, i872 int, i873 int, i874 int, i875 int, i876 int, i877 int, i878 +int, i879 int, i880 int, i881 int, i882 int, i883 int, i884 int, i885 int, i886 +int, i887 int, i888 int, i889 int, i890 int, i891 int, i892 int, i893 int, i894 +int, i895 int, i896 int, i897 int, i898 int, i899 int, i900 int, i901 int, i902 +int, i903 int, i904 int, i905 int, i906 int, i907 int, i908 int, i909 int, i910 +int, i911 int, i912 int, i913 int, i914 int, i915 int, i916 int, i917 int, i918 +int, i919 int, i920 int, i921 int, i922 int, i923 int, i924 int, i925 int, i926 +int, i927 int, i928 int, i929 int, i930 int, i931 int, i932 int, i933 int, i934 +int, i935 int, i936 int, i937 int, i938 int, i939 int, i940 int, i941 int, i942 +int, i943 int, i944 int, i945 int, i946 int, i947 int, i948 int, i949 int, i950 +int, i951 int, i952 int, i953 int, i954 int, i955 int, i956 int, i957 int, i958 +int, i959 int, i960 int, i961 int, i962 int, i963 int, i964 int, i965 int, i966 +int, i967 int, i968 int, i969 int, i970 int, i971 int, i972 int, i973 int, i974 +int, i975 int, i976 int, i977 int, i978 int, i979 int, i980 int, i981 int, i982 +int, i983 int, i984 int, i985 int, i986 int, i987 int, i988 int, i989 int, i990 +int, i991 int, i992 int, i993 int, i994 int, i995 int, i996 int, i997 int, i998 +int, i999 int, i1000 int, b blob) row_format=dynamic; +insert into t1 values (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, "Sergei"); +update t1 set b=repeat('a',256); +update t1 set i1=0, i2=0, i3=0, i4=0, i5=0, i6=0, i7=0; +check table t1; +delete from t1 where i8=1; +select i1,i2 from t1; +check table t1; +drop table t1; + +# +# Test of REPAIR that once failed +# +CREATE TABLE `t1` ( + `post_id` mediumint(8) unsigned NOT NULL auto_increment, + `topic_id` mediumint(8) unsigned NOT NULL default '0', + `post_time` datetime NOT NULL default '0000-00-00 00:00:00', + `post_text` text NOT NULL, + `icon_url` varchar(10) NOT NULL default '', + `sign` tinyint(1) unsigned NOT NULL default '0', + `post_edit` varchar(150) NOT NULL default '', + `poster_login` varchar(35) NOT NULL default '', + `ip` varchar(15) NOT NULL default '', + PRIMARY KEY (`post_id`), + KEY `post_time` (`post_time`), + KEY `ip` (`ip`), + KEY `poster_login` (`poster_login`), + KEY `topic_id` (`topic_id`), + FULLTEXT KEY `post_text` (`post_text`) +); + +INSERT INTO t1 (post_text) VALUES ('ceci est un test'),('ceci est un test'),('ceci est un test'),('ceci est un test'),('ceci est un test'); + +REPAIR TABLE t1; +CHECK TABLE t1; +drop table t1; + +# +# Test of creating table with too long key +# + +--error 1071 +CREATE TABLE t1 (a varchar(255), b varchar(255), c varchar(255), d varchar(255), e varchar(255), KEY t1 (a, b, c, d, e)); +--error 1071 +CREATE TABLE t1 (a varchar(32000), unique key(a)); +--error 1070 +CREATE TABLE t1 (a varchar(1), b varchar(1), key (a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b,a,b)); +CREATE TABLE t1 (a varchar(255), b varchar(255), c varchar(255), d varchar(255), e varchar(255)); +--error 1071 +ALTER TABLE t1 ADD INDEX t1 (a, b, c, d, e); +DROP TABLE t1; + +# +# Test of cardinality of keys with NULL +# + +CREATE TABLE t1 (a int not null, b int, c int, key(b), key(c), key(a,b), key(c,a)); +INSERT into t1 values (0, null, 0), (0, null, 1), (0, null, 2), (0, null,3), (1,1,4); +create table t2 (a int not null, b int, c int, key(b), key(c), key(a)); +INSERT into t2 values (1,1,1), (2,2,2); +optimize table t1; +show index from t1; +explain select * from t1,t2 where t1.a=t2.a; +explain select * from t1,t2 force index(a) where t1.a=t2.a; +explain select * from t1 force index(a),t2 force index(a) where t1.a=t2.a; +explain select * from t1,t2 where t1.b=t2.b; +explain select * from t1,t2 force index(c) where t1.a=t2.a; +explain select * from t1 where a=0 or a=2; +explain select * from t1 force index (a) where a=0 or a=2; +explain select * from t1 where c=1; +explain select * from t1 use index() where c=1; +drop table t1,t2; + +# +# Test bug when updating a split dynamic row where keys are not changed +# + +create table t1 (a int not null auto_increment primary key, b varchar(255)); +insert into t1 (b) values (repeat('a',100)),(repeat('b',100)),(repeat('c',100)); +update t1 set b=repeat(left(b,1),200) where a=1; +delete from t1 where (a & 1)= 0; +update t1 set b=repeat('e',200) where a=1; +flush tables; +check table t1; + +# +# check updating with keys +# + +disable_query_log; +let $1 = 100; +while ($1) +{ + eval insert into t1 (b) values (repeat(char(($1 & 32)+65), $1)); + dec $1; +} +enable_query_log; +update t1 set b=repeat(left(b,1),255) where a between 1 and 5; +update t1 set b=repeat(left(b,1),10) where a between 32 and 43; +update t1 set b=repeat(left(b,1),2) where a between 64 and 66; +update t1 set b=repeat(left(b,1),65) where a between 67 and 70; +check table t1; +insert into t1 (b) values (repeat('z',100)); +update t1 set b="test" where left(b,1) > 'n'; +check table t1; +drop table t1; + +# +# Test space-stripping features +# +create table t1 ( a text not null, key a (a(20))); +insert into t1 values ('aaa '),('aaa'),('aa'); +check table t1; +repair table t1; +select concat(a,'.') from t1 where a='aaa'; +select concat(a,'.') from t1 where binary a='aaa'; +update t1 set a='bbb' where a='aaa'; +select concat(a,'.') from t1; +drop table t1; + +# +# More space testing +# + +create table t1(a text not null, b text not null, c text not null, index (a(10),b(10),c(10))); +insert into t1 values('807780', '477', '165'); +insert into t1 values('807780', '477', '162'); +insert into t1 values('807780', '472', '162'); +select * from t1 where a='807780' and b='477' and c='165'; +drop table t1; + +# +# Space-stripping in prefix_search +# + +CREATE TABLE t1 (a varchar(150) NOT NULL, KEY (a)); +INSERT t1 VALUES ("can \tcan"); +INSERT t1 VALUES ("can can"); +INSERT t1 VALUES ("can"); +SELECT * FROM t1; +CHECK TABLE t1; +DROP TABLE t1; + +# +# Verify blob handling +# + +create table t1 (a blob); +insert into t1 values('a '),('a'); +select concat(a,'.') from t1 where a='a'; +select concat(a,'.') from t1 where a='a '; +alter table t1 add key(a(2)); +select concat(a,'.') from t1 where a='a'; +select concat(a,'.') from t1 where a='a '; +drop table t1; + +# +# Test text and unique +# +create table t1 (a int not null auto_increment primary key, b text not null, unique b (b(20))); +insert into t1 (b) values ('a'),('b'),('c'); +select concat(b,'.') from t1; +update t1 set b='b ' where a=2; +--error 1582 +update t1 set b='b ' where a > 1; +--error 1582 +insert into t1 (b) values ('b'); +select * from t1; +delete from t1 where b='b'; +select a,concat(b,'.') from t1; +drop table t1; + +# +# Test keys with 0 segments +# +create table t1 (a int not null); +create table t2 (a int not null, primary key (a)); +insert into t1 values (1); +insert into t2 values (1),(2); +select sql_big_result distinct t1.a from t1,t2 order by t2.a; +select distinct t1.a from t1,t2 order by t2.a; +select sql_big_result distinct t1.a from t1,t2; +explain select sql_big_result distinct t1.a from t1,t2 order by t2.a; +explain select distinct t1.a from t1,t2 order by t2.a; +drop table t1,t2; + +# +# Test freshly imported table and LIMIT +# +create table t1 ( + c1 varchar(32), + key (c1) +); +alter table t1 disable keys; +insert into t1 values ('a'), ('b'); +select c1 from t1 order by c1 limit 1; +drop table t1; + +# +# Test join that could miss concurrently inserted row +# Note that for the moment Maria only supports multiple writers if we have +# static or dynamic row format +# +# Partial key. +create table t1 (a int not null, primary key(a)) ROW_FORMAT=FIXED; +create table t2 (a int not null, b int not null, primary key(a,b)) ROW_FORMAT=FIXED; +insert into t1 values (1),(2),(3),(4),(5),(6); +insert into t2 values (1,1),(2,1); +set autocommit=0; +begin; +lock tables t1 read local, t2 read local; +select straight_join * from t1,t2 force index (primary) where t1.a=t2.a; +connect (root,localhost,root,,test,$MASTER_MYPORT,$MASTER_MYSOCK); +insert into t2 values(2,0); +commit; +disconnect root; +connection default; +select straight_join * from t1,t2 force index (primary) where t1.a=t2.a; +drop table t1,t2; +# +# Full key. +CREATE TABLE t1 (c1 varchar(250) NOT NULL) ROW_FORMAT=DYNAMIC; +CREATE TABLE t2 (c1 varchar(250) NOT NULL, PRIMARY KEY (c1)) ROW_FORMAT=DYNAMIC; +INSERT INTO t1 VALUES ('test000001'), ('test000002'), ('test000003'); +INSERT INTO t2 VALUES ('test000002'), ('test000003'), ('test000004'); +LOCK TABLES t1 READ LOCAL, t2 READ LOCAL; +SELECT t1.c1 AS t1c1, t2.c1 AS t2c1 FROM t1, t2 + WHERE t1.c1 = t2.c1 HAVING t1c1 != t2c1; +connect (con1,localhost,root,,); +connection con1; +INSERT INTO t2 VALUES ('test000001'), ('test000005'); +disconnect con1; +connection default; +SELECT t1.c1 AS t1c1, t2.c1 AS t2c1 FROM t1, t2 + WHERE t1.c1 = t2.c1 HAVING t1c1 != t2c1; +DROP TABLE t1,t2; + +# +# Test RTREE index +# +--error 1235, 1289 +CREATE TABLE t1 (`a` int(11) NOT NULL default '0', `b` int(11) NOT NULL default '0', UNIQUE KEY `a` USING RTREE (`a`,`b`)); +# INSERT INTO t1 VALUES (1,1),(1,1); +# DELETE FROM rt WHERE a<1; +# DROP TABLE IF EXISTS t1; + +create table t1 (a int, b varchar(200), c text not null) checksum=1; +create table t2 (a int, b varchar(200), c text not null) checksum=0; +insert t1 values (1, "aaa", "bbb"), (NULL, "", "ccccc"), (0, NULL, ""); +insert t2 select * from t1; +checksum table t1, t2, t3 quick; +checksum table t1, t2, t3; +checksum table t1, t2, t3 extended; +#show table status; +drop table t1,t2; + +create table t1 (a int, key (a)); +show keys from t1; +alter table t1 disable keys; +show keys from t1; +create table t2 (a int); +let $i=1000; +set @@rand_seed1=31415926,@@rand_seed2=2718281828; +--disable_query_log +while ($i) +{ + dec $i; + insert t2 values (rand()*100000); +} +--enable_query_log +insert t1 select * from t2; +show keys from t1; +alter table t1 enable keys; +show keys from t1; +#TODO after we have repair: delete the following --disable-warnings +--disable_warnings +alter table t1 engine=heap; +--enable_warnings +alter table t1 disable keys; +show keys from t1; +drop table t1,t2; + +# +# Index search for NULL in blob +# +create table t1 ( a tinytext, b char(1), index idx (a(1),b) ); +insert into t1 values (null,''), (null,''); +explain select count(*) from t1 where a is null; +select count(*) from t1 where a is null; +drop table t1; + +# +# Test corruption Can't open file: 'table.MYI' (errno: 145) +# +create table t1 (c1 int, c2 varchar(4) not null default '', + key(c2(3))) default charset=utf8; +insert into t1 values (1,'A'), (2, 'B'), (3, 'A'); +update t1 set c2='A B' where c1=2; +check table t1; +drop table t1; + + +# +# Test CHECKSUM TABLE +# + +create table t1 (c1 int); +insert into t1 values (1),(2),(3),(4); +checksum table t1; +delete from t1 where c1 = 1; +create table t2 as select * from t1; +# The following returns 0 with the bug in place. +checksum table t1; +# The above should give the same number as the following. +checksum table t2; +drop table t1, t2; + +# +# maria_stats_method variable. +# + +show variables like 'maria_stats_method'; + +create table t1 (a int, key(a)); +insert into t1 values (0),(1),(2),(3),(4); +insert into t1 select NULL from t1; + +# default: NULLs considered inequal +analyze table t1; +show index from t1; +insert into t1 values (11); +delete from t1 where a=11; +check table t1; +show index from t1; + +# Set nulls to be equal: +set maria_stats_method=nulls_equal; +show variables like 'maria_stats_method'; +insert into t1 values (11); +delete from t1 where a=11; + +analyze table t1; +show index from t1; + +insert into t1 values (11); +delete from t1 where a=11; + +check table t1; +show index from t1; + +# Set nulls back to be equal +set maria_stats_method=DEFAULT; +show variables like 'maria_stats_method'; +insert into t1 values (11); +delete from t1 where a=11; + +analyze table t1; +show index from t1; + +insert into t1 values (11); +delete from t1 where a=11; + +check table t1; +show index from t1; + +drop table t1; + +# WL#2609, CSC#XXXX: MARIA +set maria_stats_method=nulls_ignored; +show variables like 'maria_stats_method'; + +create table t1 ( + a char(3), b char(4), c char(5), d char(6), + key(a,b,c,d) +); +insert into t1 values ('bcd','def1', NULL, 'zz'); +insert into t1 values ('bcd','def2', NULL, 'zz'); +insert into t1 values ('bce','def1', 'yuu', NULL); +insert into t1 values ('bce','def2', NULL, 'quux'); +analyze table t1; +show index from t1; +delete from t1; +# This will give you different messages depending on if we are using +# row base or stmt based replication as stmt base replication will use +# truncate and row based will delete things row by row. +--replace_result "Table is already up to date" "OK" +analyze table t1; +show index from t1; + +set maria_stats_method=DEFAULT; +drop table t1; + +# +# Test key value packing for TINYBLOBs +# + +create table t1( + cip INT NOT NULL, + time TIME NOT NULL, + score INT NOT NULL DEFAULT 0, + bob TINYBLOB +); + +insert into t1 (cip, time) VALUES (1, '00:01'), (2, '00:02'), (3,'00:03'); +insert into t1 (cip, bob, time) VALUES (4, 'a', '00:04'), (5, 'b', '00:05'), + (6, 'c', '00:06'); +select * from t1 where bob is null and cip=1; +create index bug on t1 (bob(22), cip, time); +select * from t1 where bob is null and cip=1; +drop table t1; + +# +# Test COUNT(*) table with different INDEX +# + +create table t1 ( + id1 int not null auto_increment, + id2 int not null default '0', + t text not null, + primary key (id1), + key x (id2, t(32)) +) engine=maria; # engine clause is redundant but it's to test its parsing +insert into t1 (id2, t) values +(10, 'abc'), (10, 'abc'), (10, 'abc'), +(20, 'abc'), (20, 'abc'), (20, 'def'), +(10, 'abc'), (10, 'abc'); +select count(*) from t1 where id2 = 10; +select count(id1) from t1 where id2 = 10; +drop table t1; + +# +# Test MIN and MAX functions in queries +# + +CREATE TABLE t1(a TINYINT, KEY(a)); +INSERT INTO t1 VALUES(1); +SELECT MAX(a) FROM t1 IGNORE INDEX(a); +ALTER TABLE t1 DISABLE KEYS; +SELECT MAX(a) FROM t1; +SELECT MAX(a) FROM t1 IGNORE INDEX(a); +DROP TABLE t1; + +# +# Test update of table joined to self +# +CREATE TABLE t1(a CHAR(9), b VARCHAR(7)); +INSERT INTO t1(a) VALUES('xxxxxxxxx'),('xxxxxxxxx'); +UPDATE t1 AS ta1,t1 AS ta2 SET ta1.b='aaaaaa',ta2.b='bbbbbb'; +SELECT * FROM t1; +DROP TABLE t1; + +# +# OPTIMIZE TABLE with multiple threads +# +SET @@maria_repair_threads=2; +SHOW VARIABLES LIKE 'maria_repair%'; +# +# Test OPTIMIZE. This creates a new data file. +CREATE TABLE t1 ( + `_id` int(11) NOT NULL default '0', + `url` text, + `email` text, + `description` text, + `loverlap` int(11) default NULL, + `roverlap` int(11) default NULL, + `lneighbor_id` int(11) default NULL, + `rneighbor_id` int(11) default NULL, + `length_` int(11) default NULL, + `sequence` mediumtext, + `name` text, + `_obj_class` text NOT NULL, + PRIMARY KEY (`_id`), + UNIQUE KEY `sequence_name_index` (`name`(50)), + KEY (`length_`) +) DEFAULT CHARSET=latin1; +# +INSERT INTO t1 VALUES + (1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''), + (2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''), + (3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''), + (4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''), + (5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''), + (6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''), + (7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''), + (8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''), + (9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9',''); +# +SELECT _id FROM t1; +DELETE FROM t1 WHERE _id < 8; +--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 # +SHOW TABLE STATUS LIKE 't1'; +CHECK TABLE t1 EXTENDED; +OPTIMIZE TABLE t1; +CHECK TABLE t1 EXTENDED; +--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 # +SHOW TABLE STATUS LIKE 't1'; +SELECT _id FROM t1; +DROP TABLE t1; +# +# Test REPAIR QUICK. This retains the old data file. +CREATE TABLE t1 ( + `_id` int(11) NOT NULL default '0', + `url` text, + `email` text, + `description` text, + `loverlap` int(11) default NULL, + `roverlap` int(11) default NULL, + `lneighbor_id` int(11) default NULL, + `rneighbor_id` int(11) default NULL, + `length_` int(11) default NULL, + `sequence` mediumtext, + `name` text, + `_obj_class` text NOT NULL, + PRIMARY KEY (`_id`), + UNIQUE KEY `sequence_name_index` (`name`(50)), + KEY (`length_`) +) DEFAULT CHARSET=latin1; +# +INSERT INTO t1 VALUES + (1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''), + (2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''), + (3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''), + (4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''), + (5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''), + (6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''), + (7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''), + (8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''), + (9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9',''); +# +SELECT _id FROM t1; +DELETE FROM t1 WHERE _id < 8; +--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 # +SHOW TABLE STATUS LIKE 't1'; +CHECK TABLE t1 EXTENDED; +REPAIR TABLE t1 QUICK; +CHECK TABLE t1 EXTENDED; +--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 # +SHOW TABLE STATUS LIKE 't1'; +SELECT _id FROM t1; +DROP TABLE t1; +# +SET @@maria_repair_threads=1; +SHOW VARIABLES LIKE 'maria_repair%'; + +# +# Test varchar +# + +source include/varchar.inc; + +# +# Some errors/warnings on create +# + +create table t1 (v varchar(65530), key(v)); +drop table if exists t1; +create table t1 (v varchar(65536)); +show create table t1; +drop table t1; +create table t1 (v varchar(65530) character set utf8); +show create table t1; +drop table t1; + +# MARIA specific varchar tests +--error 1118 +create table t1 (v varchar(65535)); + +# +# Test concurrent insert +# First with static record length +# +set @save_concurrent_insert=@@concurrent_insert; +set global concurrent_insert=1; +create table t1 (a int) ROW_FORMAT=FIXED; +insert into t1 values (1),(2),(3),(4),(5); +lock table t1 read local; +connect (con1,localhost,root,,); +connection con1; +# Insert in table without hole +insert into t1 values(6),(7); +connection default; +unlock tables; +delete from t1 where a>=3 and a<=4; +lock table t1 read local; +connection con1; +set global concurrent_insert=2; +# Insert in table with hole -> Should insert at end +insert into t1 values (8),(9); +connection default; +unlock tables; +# Insert into hole +insert into t1 values (10),(11),(12); +select * from t1; +check table t1; +drop table t1; +disconnect con1; + +# Same test with dynamic record length +create table t1 (a int, b varchar(30) default "hello") ROW_FORMAT=DYNAMIC; +insert into t1 (a) values (1),(2),(3),(4),(5); +lock table t1 read local; +connect (con1,localhost,root,,); +connection con1; +# Insert in table without hole +insert into t1 (a) values(6),(7); +connection default; +unlock tables; +delete from t1 where a>=3 and a<=4; +lock table t1 read local; +connection con1; +set global concurrent_insert=2; +# Insert in table with hole -> Should insert at end +insert into t1 (a) values (8),(9); +connection default; +unlock tables; +# Insert into hole +insert into t1 (a) values (10),(11),(12); +select a from t1; +check table t1; +drop table t1; +disconnect con1; +set global concurrent_insert=@save_concurrent_insert; + +# +# ANALYZE TABLE and ALTER TABLE .. ENABLE INDEX +# + +create table t1 (a int, key(a)); + +insert into t1 values (1),(2),(3),(4),(NULL),(NULL),(NULL),(NULL); +analyze table t1; +show keys from t1; + +alter table t1 disable keys; +alter table t1 enable keys; +show keys from t1; + +drop table t1; + +# +# Test temporary table with data directory option +# +connect (session1,localhost,root,,); +connect (session2,localhost,root,,); + +connection session1; +disable_query_log; +eval create temporary table t1 (a int) data directory="$MYSQLTEST_VARDIR/tmp" select 9 a; +enable_query_log; +disable_result_log; +show create table t1; +enable_result_log; + +connection session2; +disable_query_log; +eval create temporary table t1 (a int) data directory="$MYSQLTEST_VARDIR/tmp" select 99 a; +enable_query_log; +disable_result_log; +show create table t1; +enable_result_log; + +connection default; +create table t1 (a int) select 42 a; + +connection session1; +select * from t1; +disconnect session1; +connection session2; +select * from t1; +disconnect session2; +connection default; +select * from t1; +drop table t1; + +--echo End of 4.1 tests + +# +# Test if PACK_KEYS option takes values greater than 1 while creating table +# +create table t1 (c1 int) pack_keys=0; +create table t2 (c1 int) pack_keys=1; +create table t3 (c1 int) pack_keys=default; +--error 1064 +create table t4 (c1 int) pack_keys=2; +drop table t1, t2, t3; + +# +# Test of key_block_size +# + +create table t1 (a int not null, key `a` (a) key_block_size=1024); +show create table t1; +drop table t1; + +create table t1 (a int not null, key `a` (a) key_block_size=2048); +show create table t1; +drop table t1; + +create table t1 (a varchar(2048), key `a` (a)); +show create table t1; +drop table t1; + +create table t1 (a varchar(2048), key `a` (a) key_block_size=1024); +show create table t1; +drop table t1; + +create table t1 (a int not null, b varchar(2048), key (a), key(b)) key_block_size=1024; +show create table t1; +alter table t1 key_block_size=2048; +show create table t1; +alter table t1 add c int, add key (c); +show create table t1; +alter table t1 key_block_size=0; +alter table t1 add d int, add key (d); +show create table t1; +drop table t1; + +create table t1 (a int not null, b varchar(2048), key (a), key(b)) key_block_size=8192; +show create table t1; +drop table t1; + +create table t1 (a int not null, b varchar(2048), key (a) key_block_size=1024, key(b)) key_block_size=8192; +show create table t1; +drop table t1; + +create table t1 (a int not null, b int, key (a) key_block_size=1024, key(b) key_block_size=8192) key_block_size=16384; +show create table t1; +drop table t1; + + +# Test limits and errors of key_block_size + +create table t1 (a int not null, key `a` (a) key_block_size=512); +show create table t1; +drop table t1; + +create table t1 (a varchar(2048), key `a` (a) key_block_size=1000000000000000000); +show create table t1; +drop table t1; + +create table t1 (a int not null, key `a` (a) key_block_size=1025); +show create table t1; +drop table t1; + +--error 1064 +create table t1 (a int not null, key key_block_size=1024 (a)); +--error 1064 +create table t1 (a int not null, key `a` key_block_size=1024 (a)); + + +# +# Test of changing MI_KEY_BLOCK_LENGTH +# +CREATE TABLE t1 ( + c1 INT, + c2 VARCHAR(300), + KEY (c1) KEY_BLOCK_SIZE 1024, + KEY (c2) KEY_BLOCK_SIZE 8192 + ); +INSERT INTO t1 VALUES (10, REPEAT('a', CEIL(RAND(10) * 300))), + (11, REPEAT('b', CEIL(RAND() * 300))), + (12, REPEAT('c', CEIL(RAND() * 300))), + (13, REPEAT('d', CEIL(RAND() * 300))), + (14, REPEAT('e', CEIL(RAND() * 300))), + (15, REPEAT('f', CEIL(RAND() * 300))), + (16, REPEAT('g', CEIL(RAND() * 300))), + (17, REPEAT('h', CEIL(RAND() * 300))), + (18, REPEAT('i', CEIL(RAND() * 300))), + (19, REPEAT('j', CEIL(RAND() * 300))), + (20, REPEAT('k', CEIL(RAND() * 300))), + (21, REPEAT('l', CEIL(RAND() * 300))), + (22, REPEAT('m', CEIL(RAND() * 300))), + (23, REPEAT('n', CEIL(RAND() * 300))), + (24, REPEAT('o', CEIL(RAND() * 300))), + (25, REPEAT('p', CEIL(RAND() * 300))), + (26, REPEAT('q', CEIL(RAND() * 300))), + (27, REPEAT('r', CEIL(RAND() * 300))), + (28, REPEAT('s', CEIL(RAND() * 300))), + (29, REPEAT('t', CEIL(RAND() * 300))), + (30, REPEAT('u', CEIL(RAND() * 300))), + (31, REPEAT('v', CEIL(RAND() * 300))), + (32, REPEAT('w', CEIL(RAND() * 300))), + (33, REPEAT('x', CEIL(RAND() * 300))), + (34, REPEAT('y', CEIL(RAND() * 300))), + (35, REPEAT('z', CEIL(RAND() * 300))); +INSERT INTO t1 SELECT * FROM t1; +INSERT INTO t1 SELECT * FROM t1; +CHECK TABLE t1; +REPAIR TABLE t1; +DELETE FROM t1 WHERE c1 >= 10; +CHECK TABLE t1; +DROP TABLE t1; + +# End of 5.2 tests + +--disable_result_log +--disable_query_log +eval set global storage_engine=$default; +--enable_result_log +--enable_query_log diff --git a/mysql-test/t/ps_maria.test b/mysql-test/t/ps_maria.test new file mode 100644 index 00000000000..2be99842132 --- /dev/null +++ b/mysql-test/t/ps_maria.test @@ -0,0 +1,46 @@ +############################################### +# # +# Prepared Statements test on MARIA tables # +# # +############################################### + +# +# NOTE: PLEASE SEE ps_1general.test (bottom) +# BEFORE ADDING NEW TEST CASES HERE !!! + +use test; + +-- source include/have_maria.inc + +let $type= 'MARIA' ; +-- source include/ps_create.inc +-- source include/ps_renew.inc + +-- source include/ps_query.inc + +# parameter in SELECT ... MATCH/AGAINST +# case derived from client_test.c: test_bug1500() +--disable_warnings +drop table if exists t2 ; +--enable_warnings +eval create table t2 (s varchar(25), fulltext(s)) +ENGINE = $type ; +insert into t2 values ('Gravedigger'), ('Greed'),('Hollow Dogs') ; +commit ; + +prepare stmt1 from ' select s from t2 where match (s) against (?) ' ; +set @arg00='Dogs' ; +execute stmt1 using @arg00 ; +prepare stmt1 from ' SELECT s FROM t2 +where match (s) against (concat(?,''digger'')) '; +set @arg00='Grave' ; +execute stmt1 using @arg00 ; +drop table t2 ; + +-- source include/ps_modify.inc +-- source include/ps_modify1.inc +-- source include/ps_conv.inc + +drop table t1, t9; + +# End of 4.1 tests diff --git a/mysys/Makefile.am b/mysys/Makefile.am index fca13ab52b8..bdc9f176d8e 100644 --- a/mysys/Makefile.am +++ b/mysys/Makefile.am @@ -25,12 +25,14 @@ libmysys_a_SOURCES = my_init.c my_getwd.c mf_getdate.c my_mmap.c \ mf_path.c mf_loadpath.c my_file.c \ my_open.c my_create.c my_dup.c my_seek.c my_read.c \ my_pread.c my_write.c my_getpagesize.c \ + my_safehash.c \ mf_keycache.c mf_keycaches.c my_crc32.c \ mf_iocache.c mf_iocache2.c mf_cache.c mf_tempfile.c \ mf_tempdir.c my_lock.c mf_brkhant.c my_alarm.c \ my_malloc.c my_realloc.c my_once.c mulalloc.c \ my_alloc.c safemalloc.c my_new.cc \ - my_vle.c my_atomic.c \ + my_vle.c my_atomic.c lf_hash.c \ + lf_dynarray.c lf_alloc-pin.c \ my_fopen.c my_fstream.c my_getsystime.c \ my_error.c errors.c my_div.c my_messnc.c \ mf_format.c mf_same.c mf_dirname.c mf_fn_ext.c \ @@ -52,7 +54,8 @@ libmysys_a_SOURCES = my_init.c my_getwd.c mf_getdate.c my_mmap.c \ my_gethostbyname.c rijndael.c my_aes.c sha1.c \ my_handler.c my_netware.c my_largepage.c \ my_memmem.c \ - my_windac.c my_access.c base64.c my_libwrap.c + my_windac.c my_access.c base64.c my_libwrap.c \ + wqueue.c EXTRA_DIST = thr_alarm.c thr_lock.c my_pthread.c my_thr_init.c \ thr_mutex.c thr_rwlock.c \ CMakeLists.txt mf_soundex.c \ @@ -126,5 +129,6 @@ test_base64$(EXEEXT): base64.c $(LIBRARIES) $(LINK) $(FLAGS) -DMAIN ./test_base64.c $(LDADD) $(LIBS) $(RM) -f ./test_base64.c + # Don't update the files from bitkeeper %::SCCS/s.% diff --git a/mysys/array.c b/mysys/array.c index 8a539f18a20..b7342f70ef8 100644 --- a/mysys/array.c +++ b/mysys/array.c @@ -63,7 +63,7 @@ my_bool init_dynamic_array2(DYNAMIC_ARRAY *array, uint element_size, array->size_of_element=element_size; if ((array->buffer= init_buffer)) DBUG_RETURN(FALSE); - if (!(array->buffer=(uchar*) my_malloc_ci(element_size*init_alloc,MYF(MY_WME)))) + if (!(array->buffer=(uchar*) my_malloc_ci(element_size*init_alloc, MYF(MY_WME)))) { array->max_element=0; DBUG_RETURN(TRUE); @@ -179,7 +179,7 @@ uchar *pop_dynamic(DYNAMIC_ARRAY *array) } /* - Replace elemnent in array with given element and index + Replace element in array with given element and index SYNOPSIS set_dynamic() @@ -200,42 +200,69 @@ my_bool set_dynamic(DYNAMIC_ARRAY *array, uchar* element, uint idx) { if (idx >= array->elements) { - if (idx >= array->max_element) - { - uint size; - char *new_ptr; - size=(idx+array->alloc_increment)/array->alloc_increment; - size*= array->alloc_increment; - if (array->buffer == (uchar *)(array + 1)) - { - /* - In this senerio, the buffer is statically preallocated, - so we have to create an all-new malloc since we overflowed - */ - if (!(new_ptr= (char *) my_malloc(size * - array->size_of_element, - MYF(MY_WME)))) - return 0; - memcpy(new_ptr, array->buffer, - array->elements * array->size_of_element); - } - else - if (!(new_ptr=(char*) my_realloc(array->buffer,size* - array->size_of_element, - MYF(MY_WME | MY_ALLOW_ZERO_PTR)))) - return TRUE; - array->buffer= (uchar*) new_ptr; - array->max_element=size; - } + if (idx >= array->max_element && allocate_dynamic(array, idx)) + return TRUE; bzero((uchar*) (array->buffer+array->elements*array->size_of_element), - (idx - array->elements)*array->size_of_element); + (idx - array->elements)*array->size_of_element); array->elements=idx+1; } memcpy(array->buffer+(idx * array->size_of_element),element, - (size_t) array->size_of_element); + (size_t) array->size_of_element); return FALSE; } + +/* + Ensure that dynamic array has enough elements + + SYNOPSIS + allocate_dynamic() + array + max_elements Numbers of elements that is needed + + NOTES + Any new allocated element are NOT initialized + + RETURN VALUE + FALSE Ok + TRUE Allocation of new memory failed +*/ + +my_bool allocate_dynamic(DYNAMIC_ARRAY *array, uint max_elements) +{ + if (max_elements >= array->max_element) + { + uint size; + char *new_ptr; + size= (max_elements + array->alloc_increment)/array->alloc_increment; + size*= array->alloc_increment; + if (array->buffer == (uchar *)(array + 1)) + { + /* + In this senerio, the buffer is statically preallocated, + so we have to create an all-new malloc since we overflowed + */ + if (!(new_ptr= (char *) my_malloc(size * + array->size_of_element, + MYF(MY_WME)))) + return 0; + memcpy(new_ptr, array->buffer, + array->elements * array->size_of_element); + } + else + + + if (!(new_ptr= (char*) my_realloc(array->buffer,size* + array->size_of_element, + MYF(MY_WME | MY_ALLOW_ZERO_PTR)))) + return TRUE; + array->buffer= new_ptr; + array->max_element= size; + } + return FALSE; +} + + /* Get an element from array by given index diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c new file mode 100644 index 00000000000..51c4df7c94a --- /dev/null +++ b/mysys/lf_alloc-pin.c @@ -0,0 +1,523 @@ +/* QQ: TODO multi-pinbox */ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + wait-free concurrent allocator based on pinning addresses + + It works as follows: every thread (strictly speaking - every CPU, but + it's too difficult to do) has a small array of pointers. They're called + "pins". Before using an object its address must be stored in this array + (pinned). When an object is no longer necessary its address must be + removed from this array (unpinned). When a thread wants to free() an + object it scans all pins of all threads to see if somebody has this + object pinned. If yes - the object is not freed (but stored in a + "purgatory"). To reduce the cost of a single free() pins are not scanned + on every free() but only added to (thread-local) purgatory. On every + LF_PURGATORY_SIZE free() purgatory is scanned and all unpinned objects + are freed. + + Pins are used to solve ABA problem. To use pins one must obey + a pinning protocol: + + 1. Let's assume that PTR is a shared pointer to an object. Shared means + that any thread may modify it anytime to point to a different object + and free the old object. Later the freed object may be potentially + allocated by another thread. If we're unlucky that other thread may + set PTR to point to this object again. This is ABA problem. + 2. Create a local pointer LOCAL_PTR. + 3. Pin the PTR in a loop: + do + { + LOCAL_PTR= PTR; + pin(PTR, PIN_NUMBER); + } while (LOCAL_PTR != PTR) + 4. It is guaranteed that after the loop has ended, LOCAL_PTR + points to an object (or NULL, if PTR may be NULL), that + will never be freed. It is not guaranteed though + that LOCAL_PTR == PTR (as PTR can change any time) + 5. When done working with the object, remove the pin: + unpin(PIN_NUMBER) + 6. When copying pins (as in the list traversing loop: + pin(CUR, 1); + while () + { + do // standard + { // pinning + NEXT=CUR->next; // loop + pin(NEXT, 0); // see #3 + } while (NEXT != CUR->next); // above + ... + ... + CUR=NEXT; + pin(CUR, 1); // copy pin[0] to pin[1] + } + which keeps CUR address constantly pinned), note than pins may be + copied only upwards (!!!), that is pin[N] to pin[M], M > N. + 7. Don't keep the object pinned longer than necessary - the number of + pins you have is limited (and small), keeping an object pinned + prevents its reuse and cause unnecessary mallocs. + + Explanations: + + 3. The loop is important. The following can occur: + thread1> LOCAL_PTR= PTR + thread2> free(PTR); PTR=0; + thread1> pin(PTR, PIN_NUMBER); + now thread1 cannot access LOCAL_PTR, even if it's pinned, + because it points to a freed memory. That is, it *must* + verify that it has indeed pinned PTR, the shared pointer. + + 6. When a thread wants to free some LOCAL_PTR, and it scans + all lists of pins to see whether it's pinned, it does it + upwards, from low pin numbers to high. Thus another thread + must copy an address from one pin to another in the same + direction - upwards, otherwise the scanning thread may + miss it. + + Implementation details: + + Pins are given away from a "pinbox". Pinbox is stack-based allocator. + It used dynarray for storing pins, new elements are allocated by dynarray + as necessary, old are pushed in the stack for reuse. ABA is solved by + versioning a pointer - because we use an array, a pointer to pins is 16 bit, + upper 16 bits are used for a version. + + It is assumed that pins belong to a thread and are not transferable + between threads (LF_PINS::stack_ends_here being a primary reason + for this limitation). +*/ + +#include +#include +#include + +#define LF_PINBOX_MAX_PINS 65536 + +static void _lf_pinbox_real_free(LF_PINS *pins); + +/* + Initialize a pinbox. Normally called from lf_alloc_init. + See the latter for details. +*/ +void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset, + lf_pinbox_free_func *free_func, void *free_func_arg) +{ + DBUG_ASSERT(free_ptr_offset % sizeof(void *) == 0); + compile_time_assert(sizeof(LF_PINS) == 128); + lf_dynarray_init(&pinbox->pinarray, sizeof(LF_PINS)); + pinbox->pinstack_top_ver= 0; + pinbox->pins_in_array= 0; + pinbox->free_ptr_offset= free_ptr_offset; + pinbox->free_func= free_func; + pinbox->free_func_arg= free_func_arg; +} + +void lf_pinbox_destroy(LF_PINBOX *pinbox) +{ + lf_dynarray_destroy(&pinbox->pinarray); +} + +/* + Get pins from a pinbox. Usually called via lf_alloc_get_pins() or + lf_hash_get_pins(). + + SYNOPSYS + pinbox - + stack_end - a pointer to the end (top/bottom, depending on the + STACK_DIRECTION) of stack. Used for safe alloca. There's + no safety margin deducted, a caller should take care of it, + if necessary. + + DESCRIPTION + get a new LF_PINS structure from a stack of unused pins, + or allocate a new one out of dynarray. + + NOTE + It is assumed that pins belong to a thread and are not transferable + between threads. +*/ +LF_PINS *_lf_pinbox_get_pins(LF_PINBOX *pinbox, void *stack_end) +{ + uint32 pins, next, top_ver; + LF_PINS *el; + /* + We have an array of max. 64k elements. + The highest index currently allocated is pinbox->pins_in_array. + Freed elements are in a lifo stack, pinstack_top_ver. + pinstack_top_ver is 32 bits; 16 low bits are the index in the + array, to the first element of the list. 16 high bits are a version + (every time the 16 low bits are updated, the 16 high bits are + incremented). Versioniong prevents the ABA problem. + */ + top_ver= pinbox->pinstack_top_ver; + do + { + if (!(pins= top_ver % LF_PINBOX_MAX_PINS)) + { + /* the stack of free elements is empty */ + pins= my_atomic_add32(&pinbox->pins_in_array, 1)+1; + if (unlikely(pins >= LF_PINBOX_MAX_PINS)) + return 0; + /* + note that the first allocated element has index 1 (pins==1). + index 0 is reserved to mean "NULL pointer" + */ + el= (LF_PINS *)_lf_dynarray_lvalue(&pinbox->pinarray, pins); + if (unlikely(!el)) + return 0; + break; + } + el= (LF_PINS *)_lf_dynarray_value(&pinbox->pinarray, pins); + next= el->link; + } while (!my_atomic_cas32(&pinbox->pinstack_top_ver, &top_ver, + top_ver-pins+next+LF_PINBOX_MAX_PINS)); + /* + set el->link to the index of el in the dynarray (el->link has two usages: + - if element is allocated, it's its own index + - if element is free, it's its next element in the free stack + */ + el->link= pins; + el->purgatory_count= 0; + el->pinbox= pinbox; + el->stack_ends_here= stack_end; + return el; +} + +/* + Put pins back to a pinbox. Usually called via lf_alloc_put_pins() or + lf_hash_put_pins(). + + DESCRIPTION + empty the purgatory (XXX deadlock warning below!), + push LF_PINS structure to a stack +*/ +void _lf_pinbox_put_pins(LF_PINS *pins) +{ + LF_PINBOX *pinbox= pins->pinbox; + uint32 top_ver, nr; + nr= pins->link; +#ifdef MY_LF_EXTRA_DEBUG + { + int i; + for (i= 0; i < LF_PINBOX_PINS; i++) + DBUG_ASSERT(pins->pin[i] == 0); + } +#endif + /* + XXX this will deadlock if other threads will wait for + the caller to do something after _lf_pinbox_put_pins(), + and they would have pinned addresses that the caller wants to free. + Thus: only free pins when all work is done and nobody can wait for you!!! + */ + while (pins->purgatory_count) + { + _lf_pinbox_real_free(pins); + if (pins->purgatory_count) + { + my_atomic_rwlock_wrunlock(&pins->pinbox->pinarray.lock); + pthread_yield(); + my_atomic_rwlock_wrlock(&pins->pinbox->pinarray.lock); + } + } + top_ver= pinbox->pinstack_top_ver; + do + { + pins->link= top_ver % LF_PINBOX_MAX_PINS; + } while (!my_atomic_cas32(&pinbox->pinstack_top_ver, &top_ver, + top_ver-pins->link+nr+LF_PINBOX_MAX_PINS)); + return; +} + +static int ptr_cmp(void **a, void **b) +{ + return *a < *b ? -1 : *a == *b ? 0 : 1; +} + +#define add_to_purgatory(PINS, ADDR) \ + do \ + { \ + *(void **)((char *)(ADDR)+(PINS)->pinbox->free_ptr_offset)= \ + (PINS)->purgatory; \ + (PINS)->purgatory= (ADDR); \ + (PINS)->purgatory_count++; \ + } while (0) + +/* + Free an object allocated via pinbox allocator + + DESCRIPTION + add an object to purgatory. if necessary, call _lf_pinbox_real_free() + to actually free something. +*/ +void _lf_pinbox_free(LF_PINS *pins, void *addr) +{ + add_to_purgatory(pins, addr); + if (pins->purgatory_count % LF_PURGATORY_SIZE) + _lf_pinbox_real_free(pins); +} + +struct st_harvester { + void **granary; + int npins; +}; + +/* + callback for _lf_dynarray_iterate: + scan all pins of all threads and accumulate all pins +*/ +static int harvest_pins(LF_PINS *el, struct st_harvester *hv) +{ + int i; + LF_PINS *el_end= el+min(hv->npins, LF_DYNARRAY_LEVEL_LENGTH); + for (; el < el_end; el++) + { + for (i= 0; i < LF_PINBOX_PINS; i++) + { + void *p= el->pin[i]; + if (p) + *hv->granary++= p; + } + } + /* + hv->npins may become negative below, but it means that + we're on the last dynarray page and harvest_pins() won't be + called again. We don't bother to make hv->npins() correct + (that is 0) in this case. + */ + hv->npins-= LF_DYNARRAY_LEVEL_LENGTH; + return 0; +} + +/* + callback for _lf_dynarray_iterate: + scan all pins of all threads and see if addr is present there +*/ +static int match_pins(LF_PINS *el, void *addr) +{ + int i; + LF_PINS *el_end= el+LF_DYNARRAY_LEVEL_LENGTH; + for (; el < el_end; el++) + for (i= 0; i < LF_PINBOX_PINS; i++) + if (el->pin[i] == addr) + return 1; + return 0; +} + +#if STACK_DIRECTION < 0 +#define available_stack_size(END,CUR) (long) ((char*)(CUR) - (char*)(END)) +#else +#define available_stack_size(END,CUR) (long) ((char*)(END) - (char*)(CUR)) +#endif + +/* + Scan the purgatory and free everything that can be freed +*/ +static void _lf_pinbox_real_free(LF_PINS *pins) +{ + int npins, alloca_size; + void *list, **addr; + struct st_lf_alloc_node *first, *last= NULL; + LF_PINBOX *pinbox= pins->pinbox; + + npins= pinbox->pins_in_array+1; + +#ifdef HAVE_ALLOCA + alloca_size= sizeof(void *)*LF_PINBOX_PINS*npins; + /* create a sorted list of pinned addresses, to speed up searches */ + if (available_stack_size(&pinbox, pins->stack_ends_here) > alloca_size) + { + struct st_harvester hv; + addr= (void **) alloca(alloca_size); + hv.granary= addr; + hv.npins= npins; + /* scan the dynarray and accumulate all pinned addresses */ + _lf_dynarray_iterate(&pinbox->pinarray, + (lf_dynarray_func)harvest_pins, &hv); + + npins= hv.granary-addr; + /* and sort them */ + if (npins) + qsort(addr, npins, sizeof(void *), (qsort_cmp)ptr_cmp); + } + else +#endif + addr= 0; + + list= pins->purgatory; + pins->purgatory= 0; + pins->purgatory_count= 0; + while (list) + { + void *cur= list; + list= *(void **)((char *)cur+pinbox->free_ptr_offset); + if (npins) + { + if (addr) /* use binary search */ + { + void **a, **b, **c; + for (a= addr, b= addr+npins-1, c= a+(b-a)/2; (b-a) > 1; c= a+(b-a)/2) + if (cur == *c) + a= b= c; + else if (cur > *c) + a= c; + else + b= c; + if (cur == *a || cur == *b) + goto found; + } + else /* no alloca - no cookie. linear search here */ + { + if (_lf_dynarray_iterate(&pinbox->pinarray, + (lf_dynarray_func)match_pins, cur)) + goto found; + } + } + /* not pinned - freeing */ + if (last) + last= last->next= (struct st_lf_alloc_node *)cur; + else + first= last= (struct st_lf_alloc_node *)cur; + continue; +found: + /* pinned - keeping */ + add_to_purgatory(pins, cur); + } + if (last) + pinbox->free_func(first, last, pinbox->free_func_arg); +} + +/* lock-free memory allocator for fixed-size objects */ + +LF_REQUIRE_PINS(1); + +/* + callback for _lf_pinbox_real_free to free a list of unpinned objects - + add it back to the allocator stack + + DESCRIPTION + 'first' and 'last' are the ends of the linked list of st_lf_alloc_node's: + first->el->el->....->el->last. Use first==last to free only one element. +*/ +static void alloc_free(struct st_lf_alloc_node *first, + struct st_lf_alloc_node *last, + LF_ALLOCATOR *allocator) +{ + struct st_lf_alloc_node *tmp; + tmp= allocator->top; + do + { + last->next= tmp; + } while (!my_atomic_casptr((void **)&allocator->top, (void **)&tmp, first) && + LF_BACKOFF); +} + +/* + initialize lock-free allocator + + SYNOPSYS + allocator - + size a size of an object to allocate + free_ptr_offset an offset inside the object to a sizeof(void *) + memory that is guaranteed to be unused after + the object is put in the purgatory. Unused by ANY + thread, not only the purgatory owner. + This memory will be used to link waiting-to-be-freed + objects in a purgatory list. +*/ +void lf_alloc_init(LF_ALLOCATOR *allocator, uint size, uint free_ptr_offset) +{ + lf_pinbox_init(&allocator->pinbox, free_ptr_offset, + (lf_pinbox_free_func *)alloc_free, allocator); + allocator->top= 0; + allocator->mallocs= 0; + allocator->element_size= size; + DBUG_ASSERT(size >= sizeof(void*) + free_ptr_offset); +} + +/* + destroy the allocator, free everything that's in it + + NOTE + As every other init/destroy function here and elsewhere it + is not thread safe. No, this function is no different, ensure + that no thread needs the allocator before destroying it. + We are not responsible for any damage that may be caused by + accessing the allocator when it is being or has been destroyed. + Oh yes, and don't put your cat in a microwave. +*/ +void lf_alloc_destroy(LF_ALLOCATOR *allocator) +{ + struct st_lf_alloc_node *node= allocator->top; + while (node) + { + struct st_lf_alloc_node *tmp= node->next; + my_free((void *)node, MYF(0)); + node= tmp; + } + lf_pinbox_destroy(&allocator->pinbox); + allocator->top= 0; +} + +/* + Allocate and return an new object. + + DESCRIPTION + Pop an unused object from the stack or malloc it is the stack is empty. + pin[0] is used, it's removed on return. +*/ +void *_lf_alloc_new(LF_PINS *pins) +{ + LF_ALLOCATOR *allocator= (LF_ALLOCATOR *)(pins->pinbox->free_func_arg); + struct st_lf_alloc_node *node; + for (;;) + { + do + { + node= allocator->top; + _lf_pin(pins, 0, node); + } while (node != allocator->top && LF_BACKOFF); + if (!node) + { + node= (void *)my_malloc(allocator->element_size, MYF(MY_WME)); +#ifdef MY_LF_EXTRA_DEBUG + if (likely(node)) + my_atomic_add32(&allocator->mallocs, 1); +#endif + break; + } + if (my_atomic_casptr((void **)&allocator->top, (void *)&node, node->next)) + break; + } + _lf_unpin(pins, 0); + return node; +} + +/* + count the number of objects in a pool. + + NOTE + This is NOT thread-safe !!! +*/ +uint lf_alloc_pool_count(LF_ALLOCATOR *allocator) +{ + uint i; + struct st_lf_alloc_node *node; + for (node= allocator->top, i= 0; node; node= node->next, i++) + /* no op */; + return i; +} + diff --git a/mysys/lf_dynarray.c b/mysys/lf_dynarray.c new file mode 100644 index 00000000000..770b1f9342b --- /dev/null +++ b/mysys/lf_dynarray.c @@ -0,0 +1,208 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Analog of DYNAMIC_ARRAY that never reallocs + (so no pointer into the array may ever become invalid). + + Memory is allocated in non-contiguous chunks. + This data structure is not space efficient for sparse arrays. + + Every element is aligned to sizeof(element) boundary + (to avoid false sharing if element is big enough). + + LF_DYNARRAY is a recursive structure. On the zero level + LF_DYNARRAY::level[0] it's an array of LF_DYNARRAY_LEVEL_LENGTH elements, + on the first level it's an array of LF_DYNARRAY_LEVEL_LENGTH pointers + to arrays of elements, on the second level it's an array of pointers + to arrays of pointers to arrays of elements. And so on. + + With four levels the number of elements is limited to 4311810304 + (but as in all functions index is uint, the real limit is 2^32-1) + + Actually, it's wait-free, not lock-free ;-) +*/ + +#include +#include +#include +#include + +void lf_dynarray_init(LF_DYNARRAY *array, uint element_size) +{ + bzero(array, sizeof(*array)); + array->size_of_element= element_size; + my_atomic_rwlock_init(&array->lock); +} + +static void recursive_free(void **alloc, int level) +{ + if (!alloc) + return; + + if (level) + { + int i; + for (i= 0; i < LF_DYNARRAY_LEVEL_LENGTH; i++) + recursive_free(alloc[i], level-1); + my_free((void *)alloc, MYF(0)); + } + else + my_free(alloc[-1], MYF(0)); +} + +void lf_dynarray_destroy(LF_DYNARRAY *array) +{ + int i; + for (i= 0; i < LF_DYNARRAY_LEVELS; i++) + recursive_free(array->level[i], i); + my_atomic_rwlock_destroy(&array->lock); +} + +static const ulong dynarray_idxes_in_prev_levels[LF_DYNARRAY_LEVELS]= +{ + 0, /* +1 here to to avoid -1's below */ + LF_DYNARRAY_LEVEL_LENGTH, + LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH + + LF_DYNARRAY_LEVEL_LENGTH, + LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH * + LF_DYNARRAY_LEVEL_LENGTH + LF_DYNARRAY_LEVEL_LENGTH * + LF_DYNARRAY_LEVEL_LENGTH + LF_DYNARRAY_LEVEL_LENGTH +}; + +static const ulong dynarray_idxes_in_prev_level[LF_DYNARRAY_LEVELS]= +{ + 0, /* +1 here to to avoid -1's below */ + LF_DYNARRAY_LEVEL_LENGTH, + LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH, + LF_DYNARRAY_LEVEL_LENGTH * LF_DYNARRAY_LEVEL_LENGTH * + LF_DYNARRAY_LEVEL_LENGTH, +}; + +/* + Returns a valid lvalue pointer to the element number 'idx'. + Allocates memory if necessary. +*/ +void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx) +{ + void * ptr, * volatile * ptr_ptr= 0; + int i; + + for (i= LF_DYNARRAY_LEVELS-1; idx < dynarray_idxes_in_prev_levels[i]; i--) + /* no-op */; + ptr_ptr= &array->level[i]; + idx-= dynarray_idxes_in_prev_levels[i]; + for (; i > 0; i--) + { + if (!(ptr= *ptr_ptr)) + { + void *alloc= my_malloc(LF_DYNARRAY_LEVEL_LENGTH * sizeof(void *), + MYF(MY_WME|MY_ZEROFILL)); + if (unlikely(!alloc)) + return(NULL); + if (my_atomic_casptr(ptr_ptr, &ptr, alloc)) + ptr= alloc; + else + my_free(alloc, MYF(0)); + } + ptr_ptr= ((void **)ptr) + idx / dynarray_idxes_in_prev_level[i]; + idx%= dynarray_idxes_in_prev_level[i]; + } + if (!(ptr= *ptr_ptr)) + { + void *alloc, *data; + alloc= my_malloc(LF_DYNARRAY_LEVEL_LENGTH * array->size_of_element + + max(array->size_of_element, sizeof(void *)), + MYF(MY_WME|MY_ZEROFILL)); + if (unlikely(!alloc)) + return(NULL); + /* reserve the space for free() address */ + data= alloc + sizeof(void *); + { /* alignment */ + intptr mod= ((intptr)data) % array->size_of_element; + if (mod) + data+= array->size_of_element - mod; + } + ((void **)data)[-1]= alloc; /* free() will need the original pointer */ + if (my_atomic_casptr(ptr_ptr, &ptr, data)) + ptr= data; + else + my_free(alloc, MYF(0)); + } + return ptr + array->size_of_element * idx; +} + +/* + Returns a pointer to the element number 'idx' + or NULL if an element does not exists +*/ +void *_lf_dynarray_value(LF_DYNARRAY *array, uint idx) +{ + void * ptr, * volatile * ptr_ptr= 0; + int i; + + for (i= LF_DYNARRAY_LEVELS-1; idx < dynarray_idxes_in_prev_levels[i]; i--) + /* no-op */; + ptr_ptr= &array->level[i]; + idx-= dynarray_idxes_in_prev_levels[i]; + for (; i > 0; i--) + { + if (!(ptr= *ptr_ptr)) + return(NULL); + ptr_ptr= ((void **)ptr) + idx / dynarray_idxes_in_prev_level[i]; + idx %= dynarray_idxes_in_prev_level[i]; + } + if (!(ptr= *ptr_ptr)) + return(NULL); + return ptr + array->size_of_element * idx; +} + +static int recursive_iterate(LF_DYNARRAY *array, void *ptr, int level, + lf_dynarray_func func, void *arg) +{ + int res, i; + if (!ptr) + return 0; + if (!level) + return func(ptr, arg); + for (i= 0; i < LF_DYNARRAY_LEVEL_LENGTH; i++) + if ((res= recursive_iterate(array, ((void **)ptr)[i], level-1, func, arg))) + return res; + return 0; +} + +/* + Calls func(array, arg) on every array of LF_DYNARRAY_LEVEL_LENGTH elements + in lf_dynarray. + + DESCRIPTION + lf_dynarray consists of a set of arrays, LF_DYNARRAY_LEVEL_LENGTH elements + each. _lf_dynarray_iterate() calls user-supplied function on every array + from the set. It is the fastest way to scan the array, faster than + for (i=0; i < N; i++) { func(_lf_dynarray_value(dynarray, i)); } + + NOTE + if func() returns non-zero, the scan is aborted +*/ +int _lf_dynarray_iterate(LF_DYNARRAY *array, lf_dynarray_func func, void *arg) +{ + int i, res; + for (i= 0; i < LF_DYNARRAY_LEVELS; i++) + if ((res= recursive_iterate(array, array->level[i], i, func, arg))) + return res; + return 0; +} + diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c new file mode 100644 index 00000000000..832f0eb5852 --- /dev/null +++ b/mysys/lf_hash.c @@ -0,0 +1,493 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + extensible hash + + TODO + try to get rid of dummy nodes ? + for non-unique hash, count only _distinct_ values + (but how to do it in lf_hash_delete ?) +*/ +#include +#include +#include +#include +#include + +LF_REQUIRE_PINS(3); + +/* An element of the list */ +typedef struct { + intptr volatile link; /* a pointer to the next element in a listand a flag */ + uint32 hashnr; /* reversed hash number, for sorting */ + const byte *key; + uint keylen; + /* + data is stored here, directly after the keylen. + thus the pointer to data is (void*)(slist_element_ptr+1) + */ +} LF_SLIST; + +/* + a structure to pass the context (pointers two the three successive elements + in a list) from lfind to linsert/ldelete +*/ +typedef struct { + intptr volatile *prev; + LF_SLIST *curr, *next; +} CURSOR; + +/* + the last bit in LF_SLIST::link is a "deleted" flag. + the helper macros below convert it to a pure pointer or a pure flag +*/ +#define PTR(V) (LF_SLIST *)((V) & (~(intptr)1)) +#define DELETED(V) ((V) & 1) + +/* + DESCRIPTION + Search for hashnr/key/keylen in the list starting from 'head' and + position the cursor. The list is ORDER BY hashnr, key + + RETURN + 0 - not found + 1 - found + + NOTE + cursor is positioned in either case + pins[0..2] are used, they are NOT removed on return +*/ +static int lfind(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr, + const byte *key, uint keylen, CURSOR *cursor, LF_PINS *pins) +{ + uint32 cur_hashnr; + const byte *cur_key; + uint cur_keylen; + intptr link; + +retry: + cursor->prev= (intptr *)head; + do { /* PTR() isn't necessary below, head is a dummy node */ + cursor->curr= (LF_SLIST *)(*cursor->prev); + _lf_pin(pins, 1, cursor->curr); + } while (*cursor->prev != (intptr)cursor->curr && LF_BACKOFF); + for (;;) + { + if (unlikely(!cursor->curr)) + return 0; /* end of the list */ + do { + /* QQ: XXX or goto retry ? */ + link= cursor->curr->link; + cursor->next= PTR(link); + _lf_pin(pins, 0, cursor->next); + } while (link != cursor->curr->link && LF_BACKOFF); + cur_hashnr= cursor->curr->hashnr; + cur_key= cursor->curr->key; + cur_keylen= cursor->curr->keylen; + if (*cursor->prev != (intptr)cursor->curr) + { + (void)LF_BACKOFF; + goto retry; + } + if (!DELETED(link)) + { + if (cur_hashnr >= hashnr) + { + int r= 1; + if (cur_hashnr > hashnr || + (r= my_strnncoll(cs, (uchar*) cur_key, cur_keylen, (uchar*) key, + keylen)) >= 0) + return !r; + } + cursor->prev= &(cursor->curr->link); + _lf_pin(pins, 2, cursor->curr); + } + else + { + /* + we found a deleted node - be nice, help the other thread + and remove this deleted node + */ + if (my_atomic_casptr((void **)cursor->prev, + (void **)&cursor->curr, cursor->next)) + _lf_alloc_free(pins, cursor->curr); + else + { + (void)LF_BACKOFF; + goto retry; + } + } + cursor->curr= cursor->next; + _lf_pin(pins, 1, cursor->curr); + } +} + +/* + DESCRIPTION + insert a 'node' in the list that starts from 'head' in the correct + position (as found by lfind) + + RETURN + 0 - inserted + not 0 - a pointer to a duplicate (not pinned and thus unusable) + + NOTE + it uses pins[0..2], on return all pins are removed. + if there're nodes with the same key value, a new node is added before them. +*/ +static LF_SLIST *linsert(LF_SLIST * volatile *head, CHARSET_INFO *cs, + LF_SLIST *node, LF_PINS *pins, uint flags) +{ + CURSOR cursor; + int res; + + for (;;) + { + if (lfind(head, cs, node->hashnr, node->key, node->keylen, + &cursor, pins) && + (flags & LF_HASH_UNIQUE)) + { + res= 0; /* duplicate found */ + break; + } + else + { + node->link= (intptr)cursor.curr; + DBUG_ASSERT(node->link != (intptr)node); /* no circular references */ + DBUG_ASSERT(cursor.prev != &node->link); /* no circular references */ + if (my_atomic_casptr((void **)cursor.prev, (void **)&cursor.curr, node)) + { + res= 1; /* inserted ok */ + break; + } + } + } + _lf_unpin(pins, 0); + _lf_unpin(pins, 1); + _lf_unpin(pins, 2); + /* + Note that cursor.curr is not pinned here and the pointer is unreliable, + the object may dissapear anytime. But if it points to a dummy node, the + pointer is safe, because dummy nodes are never freed - initialize_bucket() + uses this fact. + */ + return res ? 0 : cursor.curr; +} + +/* + DESCRIPTION + deletes a node as identified by hashnr/keey/keylen from the list + that starts from 'head' + + RETURN + 0 - ok + 1 - not found + + NOTE + it uses pins[0..2], on return all pins are removed. +*/ +static int ldelete(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr, + const byte *key, uint keylen, LF_PINS *pins) +{ + CURSOR cursor; + int res; + + for (;;) + { + if (!lfind(head, cs, hashnr, key, keylen, &cursor, pins)) + { + res= 1; /* not found */ + break; + } + else + { + /* mark the node deleted */ + if (my_atomic_casptr((void **)&(cursor.curr->link), + (void **)&cursor.next, + (void *)(((intptr)cursor.next) | 1))) + { + /* and remove it from the list */ + if (my_atomic_casptr((void **)cursor.prev, + (void **)&cursor.curr, cursor.next)) + _lf_alloc_free(pins, cursor.curr); + else + { + /* + somebody already "helped" us and removed the node ? + Let's check if we need to help that someone too! + (to ensure the number of "set DELETED flag" actions + is equal to the number of "remove from the list" actions) + */ + lfind(head, cs, hashnr, key, keylen, &cursor, pins); + } + res= 0; + break; + } + } + } + _lf_unpin(pins, 0); + _lf_unpin(pins, 1); + _lf_unpin(pins, 2); + return res; +} + +/* + DESCRIPTION + searches for a node as identified by hashnr/keey/keylen in the list + that starts from 'head' + + RETURN + 0 - not found + node - found + + NOTE + it uses pins[0..2], on return the pin[2] keeps the node found + all other pins are removed. +*/ +static LF_SLIST *lsearch(LF_SLIST * volatile *head, CHARSET_INFO *cs, + uint32 hashnr, const byte *key, uint keylen, + LF_PINS *pins) +{ + CURSOR cursor; + int res= lfind(head, cs, hashnr, key, keylen, &cursor, pins); + if (res) + _lf_pin(pins, 2, cursor.curr); + _lf_unpin(pins, 0); + _lf_unpin(pins, 1); + return res ? cursor.curr : 0; +} + +static inline const byte* hash_key(const LF_HASH *hash, + const byte *record, uint *length) +{ + if (hash->get_key) + return (*hash->get_key)(record, length, 0); + *length= hash->key_length; + return record + hash->key_offset; +} + +/* + compute the hash key value from the raw key. + note, that the hash value is limited to 2^31, because we need one + bit to distinguish between normal and dummy nodes. +*/ +static inline uint calc_hash(LF_HASH *hash, const byte *key, uint keylen) +{ + ulong nr1= 1, nr2= 4; + hash->charset->coll->hash_sort(hash->charset, (uchar*) key, keylen, + &nr1, &nr2); + return nr1 & INT_MAX32; +} + +#define MAX_LOAD 1.0 /* average number of elements in a bucket */ + +static int initialize_bucket(LF_HASH *, LF_SLIST * volatile*, uint, LF_PINS *); + +/* + Initializes lf_hash, the arguments are compatible with hash_init +*/ +void lf_hash_init(LF_HASH *hash, uint element_size, uint flags, + uint key_offset, uint key_length, hash_get_key get_key, + CHARSET_INFO *charset) +{ + lf_alloc_init(&hash->alloc, sizeof(LF_SLIST)+element_size, + offsetof(LF_SLIST, key)); + lf_dynarray_init(&hash->array, sizeof(LF_SLIST *)); + hash->size= 1; + hash->count= 0; + hash->element_size= element_size; + hash->flags= flags; + hash->charset= charset ? charset : &my_charset_bin; + hash->key_offset= key_offset; + hash->key_length= key_length; + hash->get_key= get_key; + DBUG_ASSERT(get_key ? !key_offset && !key_length : key_length); +} + +void lf_hash_destroy(LF_HASH *hash) +{ + LF_SLIST *el, **head= (LF_SLIST **)_lf_dynarray_value(&hash->array, 0); + + if (unlikely(!head)) + return; + el= *head; + + while (el) + { + intptr next= el->link; + if (el->hashnr & 1) + lf_alloc_direct_free(&hash->alloc, el); /* normal node */ + else + my_free((void *)el, MYF(0)); /* dummy node */ + el= (LF_SLIST *)next; + } + lf_alloc_destroy(&hash->alloc); + lf_dynarray_destroy(&hash->array); +} + +/* + DESCRIPTION + inserts a new element to a hash. it will have a _copy_ of + data, not a pointer to it. + + RETURN + 0 - inserted + 1 - didn't (unique key conflict) + -1 - out of memory + + NOTE + see linsert() for pin usage notes +*/ +int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data) +{ + int csize, bucket, hashnr; + LF_SLIST *node, * volatile *el; + + lf_rwlock_by_pins(pins); + node= (LF_SLIST *)_lf_alloc_new(pins); + if (unlikely(!node)) + return -1; + memcpy(node+1, data, hash->element_size); + node->key= hash_key(hash, (byte *)(node+1), &node->keylen); + hashnr= calc_hash(hash, node->key, node->keylen); + bucket= hashnr % hash->size; + el= _lf_dynarray_lvalue(&hash->array, bucket); + if (unlikely(!el)) + return -1; + if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins))) + return -1; + node->hashnr= my_reverse_bits(hashnr) | 1; /* normal node */ + if (linsert(el, hash->charset, node, pins, hash->flags)) + { + _lf_alloc_free(pins, node); + lf_rwunlock_by_pins(pins); + return 1; + } + csize= hash->size; + if ((my_atomic_add32(&hash->count, 1)+1.0) / csize > MAX_LOAD) + my_atomic_cas32(&hash->size, &csize, csize*2); + lf_rwunlock_by_pins(pins); + return 0; +} + +/* + DESCRIPTION + deletes an element with the given key from the hash (if a hash is + not unique and there're many elements with this key - the "first" + matching element is deleted) + RETURN + 0 - deleted + 1 - didn't (not found) + -1 - out of memory + NOTE + see ldelete() for pin usage notes +*/ +int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) +{ + LF_SLIST * volatile *el; + uint bucket, hashnr= calc_hash(hash, (byte *)key, keylen); + + bucket= hashnr % hash->size; + lf_rwlock_by_pins(pins); + el= _lf_dynarray_lvalue(&hash->array, bucket); + if (unlikely(!el)) + return -1; + /* + note that we still need to initialize_bucket here, + we cannot return "node not found", because an old bucket of that + node may've been split and the node was assigned to a new bucket + that was never accessed before and thus is not initialized. + */ + if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins))) + return -1; + if (ldelete(el, hash->charset, my_reverse_bits(hashnr) | 1, + (byte *)key, keylen, pins)) + { + lf_rwunlock_by_pins(pins); + return 1; + } + my_atomic_add32(&hash->count, -1); + lf_rwunlock_by_pins(pins); + return 0; +} + +/* + RETURN + a pointer to an element with the given key (if a hash is not unique and + there're many elements with this key - the "first" matching element) + NULL if nothing is found + MY_ERRPTR if OOM + + NOTE + see lsearch() for pin usage notes +*/ +void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen) +{ + LF_SLIST * volatile *el, *found; + uint bucket, hashnr= calc_hash(hash, (byte *)key, keylen); + + bucket= hashnr % hash->size; + lf_rwlock_by_pins(pins); + el= _lf_dynarray_lvalue(&hash->array, bucket); + if (unlikely(!el)) + return MY_ERRPTR; + if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins))) + return MY_ERRPTR; + found= lsearch(el, hash->charset, my_reverse_bits(hashnr) | 1, + (byte *)key, keylen, pins); + lf_rwunlock_by_pins(pins); + return found ? found+1 : 0; +} + +static const byte *dummy_key= ""; + +/* + RETURN + 0 - ok + -1 - out of memory +*/ +static int initialize_bucket(LF_HASH *hash, LF_SLIST * volatile *node, + uint bucket, LF_PINS *pins) +{ + uint parent= my_clear_highest_bit(bucket); + LF_SLIST *dummy= (LF_SLIST *)my_malloc(sizeof(LF_SLIST), MYF(MY_WME)); + LF_SLIST **tmp= 0, *cur; + LF_SLIST * volatile *el= _lf_dynarray_lvalue(&hash->array, parent); + if (unlikely(!el || !dummy)) + return -1; + if (*el == NULL && bucket && + unlikely(initialize_bucket(hash, el, parent, pins))) + return -1; + dummy->hashnr= my_reverse_bits(bucket) | 0; /* dummy node */ + dummy->key= (char*) dummy_key; + dummy->keylen= 0; + if ((cur= linsert(el, hash->charset, dummy, pins, LF_HASH_UNIQUE))) + { + my_free((void *)dummy, MYF(0)); + dummy= cur; + } + my_atomic_casptr((void **)node, (void **)&tmp, dummy); + /* + note that if the CAS above failed (after linsert() succeeded), + it would mean that some other thread has executed linsert() for + the same dummy node, its linsert() failed, it picked up our + dummy node (in "dummy= cur") and executed the same CAS as above. + Which means that even if CAS above failed we don't need to retry, + and we should not free(dummy) - there's no memory leak here + */ + return 0; +} diff --git a/mysys/mf_keycache.c b/mysys/mf_keycache.c index 1fef8aac170..6845e63dc33 100644 --- a/mysys/mf_keycache.c +++ b/mysys/mf_keycache.c @@ -105,6 +105,7 @@ #include #include "my_static.h" #include +#include #include #include @@ -1261,12 +1262,12 @@ static void unlink_block(KEY_CACHE *keycache, BLOCK_LINK *block) KEYCACHE_THREAD_TRACE("unlink_block"); #if defined(KEYCACHE_DEBUG) + KEYCACHE_DBUG_ASSERT(keycache->blocks_available != 0); keycache->blocks_available--; KEYCACHE_DBUG_PRINT("unlink_block", ("unlinked block %u status=%x #requests=%u #available=%u", BLOCK_NUMBER(block), block->status, block->requests, keycache->blocks_available)); - KEYCACHE_DBUG_ASSERT(keycache->blocks_available >= 0); #endif } @@ -2359,9 +2360,9 @@ restart: (block->hash_link->diskpos == filepos))); *page_st=page_status; KEYCACHE_DBUG_PRINT("find_key_block", - ("fd: %d pos: %lu block->status: %u page_status: %u", + ("fd: %d pos: %lu block->status: %u page_status: %d", file, (ulong) filepos, block->status, - (uint) page_status)); + page_status)); #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) DBUG_EXECUTE("check_keycache2", @@ -2512,17 +2513,19 @@ static void read_block(KEY_CACHE *keycache, */ uchar *key_cache_read(KEY_CACHE *keycache, - File file, my_off_t filepos, int level, - uchar *buff, uint length, - uint block_length __attribute__((unused)), - int return_buffer __attribute__((unused))) + File file, my_off_t filepos, int level, + uchar *buff, uint length, + uint block_length __attribute__((unused)), + int return_buffer __attribute__((unused))) { my_bool locked_and_incremented= FALSE; int error=0; uchar *start= buff; DBUG_ENTER("key_cache_read"); - DBUG_PRINT("enter", ("fd: %u pos: %lu length: %u", - (uint) file, (ulong) filepos, length)); + DBUG_PRINT("enter", ("fd: %u pos: %lu page: %lu length: %u", + (uint) file, (ulong) filepos, + (ulong) (filepos / keycache->key_cache_block_size), + length)); if (keycache->key_cache_inited) { @@ -2533,12 +2536,13 @@ uchar *key_cache_read(KEY_CACHE *keycache, uint status; int page_st; - /* + + /* When the key cache is once initialized, we use the cache_lock to reliably distinguish the cases of normal operation, resizing, and disabled cache. We always increment and decrement 'cnt_for_resize_op' so that a resizer can wait for pending I/O. - */ + */ keycache_pthread_mutex_lock(&keycache->cache_lock); /* Cache resizing has two phases: Flushing and re-initializing. In @@ -2565,7 +2569,7 @@ uchar *key_cache_read(KEY_CACHE *keycache, do { /* Cache could be disabled in a later iteration. */ - + if (!keycache->can_be_used) goto no_key_cache; /* Start reading at the beginning of the cache block. */ @@ -2975,9 +2979,10 @@ int key_cache_write(KEY_CACHE *keycache, int error=0; DBUG_ENTER("key_cache_write"); DBUG_PRINT("enter", - ("fd: %u pos: %lu length: %u block_length: %u key_block_length: %u", - (uint) file, (ulong) filepos, length, block_length, - keycache ? keycache->key_cache_block_size : 0)); + ("fd: %u pos: %lu page: %lu length: %u block_length: %u", + (uint) file, (ulong) filepos, + (ulong) (filepos / keycache->key_cache_block_size), + length, block_length)); if (!dont_write) { @@ -3169,7 +3174,7 @@ int key_cache_write(KEY_CACHE *keycache, if (!dont_write) { - /* Not used in the server. buff has been written to disk at start. */ + /* Not used in the server. buff has been written to disk at start. */ if ((block->status & BLOCK_CHANGED) && (!offset && read_length >= keycache->key_cache_block_size)) link_to_file_list(keycache, block, block->hash_link->file, 1); @@ -3183,7 +3188,6 @@ int key_cache_write(KEY_CACHE *keycache, a flush. */ block->status&= ~BLOCK_FOR_UPDATE; - set_if_smaller(block->offset, offset); set_if_bigger(block->length, read_length+offset); diff --git a/mysys/mf_keycaches.c b/mysys/mf_keycaches.c index 6227a05ce06..9ea5678da9a 100644 --- a/mysys/mf_keycaches.c +++ b/mysys/mf_keycaches.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2003 MySQL AB +/* Copyright (C) 2003-2007 MySQL AB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,269 +25,7 @@ #include #include #include - -/***************************************************************************** - General functions to handle SAFE_HASH objects. - - A SAFE_HASH object is used to store the hash, the mutex and default value - needed by the rest of the key cache code. - This is a separate struct to make it easy to later reuse the code for other - purposes - - All entries are linked in a list to allow us to traverse all elements - and delete selected ones. (HASH doesn't allow any easy ways to do this). -*****************************************************************************/ - -/* - Struct to store a key and pointer to object -*/ - -typedef struct st_safe_hash_entry -{ - uchar *key; - uint length; - uchar *data; - struct st_safe_hash_entry *next, **prev; -} SAFE_HASH_ENTRY; - - -typedef struct st_safe_hash_with_default -{ -#ifdef THREAD - rw_lock_t mutex; -#endif - HASH hash; - uchar *default_value; - SAFE_HASH_ENTRY *root; -} SAFE_HASH; - - -/* - Free a SAFE_HASH_ENTRY - - This function is called by the hash object on delete -*/ - -static void safe_hash_entry_free(SAFE_HASH_ENTRY *entry) -{ - DBUG_ENTER("free_assign_entry"); - my_free((uchar*) entry, MYF(0)); - DBUG_VOID_RETURN; -} - - -/* Get key and length for a SAFE_HASH_ENTRY */ - -static uchar *safe_hash_entry_get(SAFE_HASH_ENTRY *entry, size_t *length, - my_bool not_used __attribute__((unused))) -{ - *length=entry->length; - return (uchar*) entry->key; -} - - -/* - Init a SAFE_HASH object - - SYNOPSIS - safe_hash_init() - hash safe_hash handler - elements Expected max number of elements - default_value default value - - NOTES - In case of error we set hash->default_value to 0 to allow one to call - safe_hash_free on an object that couldn't be initialized. - - RETURN - 0 ok - 1 error -*/ - -static my_bool safe_hash_init(SAFE_HASH *hash, uint elements, - uchar *default_value) -{ - DBUG_ENTER("safe_hash"); - if (hash_init(&hash->hash, &my_charset_bin, elements, - 0, 0, (hash_get_key) safe_hash_entry_get, - (void (*)(void*)) safe_hash_entry_free, 0)) - { - hash->default_value= 0; - DBUG_RETURN(1); - } - my_rwlock_init(&hash->mutex, 0); - hash->default_value= default_value; - hash->root= 0; - DBUG_RETURN(0); -} - - -/* - Free a SAFE_HASH object - - NOTES - This is safe to call on any object that has been sent to safe_hash_init() -*/ - -static void safe_hash_free(SAFE_HASH *hash) -{ - /* - Test if safe_hash_init succeeded. This will also guard us against multiple - free calls. - */ - if (hash->default_value) - { - hash_free(&hash->hash); - rwlock_destroy(&hash->mutex); - hash->default_value=0; - } -} - -/* - Return the value stored for a key or default value if no key -*/ - -static uchar *safe_hash_search(SAFE_HASH *hash, const uchar *key, uint length) -{ - uchar *result; - DBUG_ENTER("safe_hash_search"); - rw_rdlock(&hash->mutex); - result= hash_search(&hash->hash, key, length); - rw_unlock(&hash->mutex); - if (!result) - result= hash->default_value; - else - result= ((SAFE_HASH_ENTRY*) result)->data; - DBUG_PRINT("exit",("data: 0x%lx", (long) result)); - DBUG_RETURN(result); -} - - -/* - Associate a key with some data - - SYONOPSIS - safe_hash_set() - hash Hash handle - key key (path to table etc..) - length Length of key - data data to to associate with the data - - NOTES - This can be used both to insert a new entry and change an existing - entry. - If one associates a key with the default key cache, the key is deleted - - RETURN - 0 ok - 1 error (Can only be EOM). In this case my_message() is called. -*/ - -static my_bool safe_hash_set(SAFE_HASH *hash, const uchar *key, uint length, - uchar *data) -{ - SAFE_HASH_ENTRY *entry; - my_bool error= 0; - DBUG_ENTER("safe_hash_set"); - DBUG_PRINT("enter",("key: %.*s data: 0x%lx", length, key, (long) data)); - - rw_wrlock(&hash->mutex); - entry= (SAFE_HASH_ENTRY*) hash_search(&hash->hash, key, length); - - if (data == hash->default_value) - { - /* - The key is to be associated with the default entry. In this case - we can just delete the entry (if it existed) from the hash as a - search will return the default entry - */ - if (!entry) /* nothing to do */ - goto end; - /* unlink entry from list */ - if ((*entry->prev= entry->next)) - entry->next->prev= entry->prev; - hash_delete(&hash->hash, (uchar*) entry); - goto end; - } - if (entry) - { - /* Entry existed; Just change the pointer to point at the new data */ - entry->data= data; - } - else - { - if (!(entry= (SAFE_HASH_ENTRY *) my_malloc(sizeof(*entry) + length, - MYF(MY_WME)))) - { - error= 1; - goto end; - } - entry->key= (uchar*) (entry +1); - memcpy((char*) entry->key, (char*) key, length); - entry->length= length; - entry->data= data; - /* Link entry to list */ - if ((entry->next= hash->root)) - entry->next->prev= &entry->next; - entry->prev= &hash->root; - hash->root= entry; - if (my_hash_insert(&hash->hash, (uchar*) entry)) - { - /* This can only happen if hash got out of memory */ - my_free((char*) entry, MYF(0)); - error= 1; - goto end; - } - } - -end: - rw_unlock(&hash->mutex); - DBUG_RETURN(error); -} - - -/* - Change all entres with one data value to another data value - - SYONOPSIS - safe_hash_change() - hash Hash handle - old_data Old data - new_data Change all 'old_data' to this - - NOTES - We use the linked list to traverse all elements in the hash as - this allows us to delete elements in the case where 'new_data' is the - default value. -*/ - -static void safe_hash_change(SAFE_HASH *hash, uchar *old_data, uchar *new_data) -{ - SAFE_HASH_ENTRY *entry, *next; - DBUG_ENTER("safe_hash_set"); - - rw_wrlock(&hash->mutex); - - for (entry= hash->root ; entry ; entry= next) - { - next= entry->next; - if (entry->data == old_data) - { - if (new_data == hash->default_value) - { - if ((*entry->prev= entry->next)) - entry->next->prev= entry->prev; - hash_delete(&hash->hash, (uchar*) entry); - } - else - entry->data= new_data; - } - } - - rw_unlock(&hash->mutex); - DBUG_VOID_RETURN; -} - +#include "my_safehash.h" /***************************************************************************** Functions to handle the key cache objects @@ -315,6 +53,7 @@ void multi_keycache_free(void) multi_key_cache_search() key key to find (usually table path) uint length Length of key. + def Default value if no key cache NOTES This function is coded in such a way that we will return the @@ -325,11 +64,13 @@ void multi_keycache_free(void) key cache to use */ -KEY_CACHE *multi_key_cache_search(uchar *key, uint length) +KEY_CACHE *multi_key_cache_search(uchar *key, uint length, + KEY_CACHE *def) { if (!key_cache_hash.hash.records) - return dflt_key_cache; - return (KEY_CACHE*) safe_hash_search(&key_cache_hash, key, length); + return def; + return (KEY_CACHE*) safe_hash_search(&key_cache_hash, key, length, + (void*) def); } @@ -361,3 +102,5 @@ void multi_key_cache_change(KEY_CACHE *old_data, { safe_hash_change(&key_cache_hash, (uchar*) old_data, (uchar*) new_data); } + + diff --git a/mysys/my_atomic.c b/mysys/my_atomic.c index 6a30267eb80..aa04d55f624 100644 --- a/mysys/my_atomic.c +++ b/mysys/my_atomic.c @@ -17,11 +17,10 @@ #include #ifndef HAVE_INLINE -/* - the following will cause all inline functions to be instantiated -*/ +/* the following will cause all inline functions to be instantiated */ #define HAVE_INLINE -#define static extern +#undef STATIC_INLINE +#define STATIC_INLINE extern #endif #include @@ -35,7 +34,7 @@ */ int my_atomic_initialize() { - DBUG_ASSERT(sizeof(intptr) == sizeof(void *)); + compile_time_assert(sizeof(intptr) == sizeof(void *)); /* currently the only thing worth checking is SMP/UP issue */ #ifdef MY_ATOMIC_MODE_DUMMY return my_getncpus() == 1 ? MY_ATOMIC_OK : MY_ATOMIC_NOT_1CPU; diff --git a/mysys/my_bit.c b/mysys/my_bit.c index 5a9b1187c83..2881eb1ebd2 100644 --- a/mysys/my_bit.c +++ b/mysys/my_bit.c @@ -13,23 +13,18 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -/* Some useful bit functions */ +#include -#include "mysys_priv.h" +#ifndef HAVE_INLINE +/* the following will cause all inline functions to be instantiated */ +#define HAVE_INLINE +#undef STATIC_INLINE +#define STATIC_INLINE extern +#endif -/* - Find smallest X in 2^X >= value - This can be used to divide a number with value by doing a shift instead -*/ +#include -uint my_bit_log2(ulong value) -{ - uint bit; - for (bit=0 ; value > 1 ; value>>=1, bit++) ; - return bit; -} - -static char nbits[256] = { +const char _my_bits_nbits[256] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, @@ -48,60 +43,29 @@ static char nbits[256] = { 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, }; -uint my_count_bits(ulonglong v) -{ -#if SIZEOF_LONG_LONG > 4 - /* The following code is a bit faster on 16 bit machines than if we would - only shift v */ - ulong v2=(ulong) (v >> 32); - return (uint) (uchar) (nbits[(uchar) v] + - nbits[(uchar) (v >> 8)] + - nbits[(uchar) (v >> 16)] + - nbits[(uchar) (v >> 24)] + - nbits[(uchar) (v2)] + - nbits[(uchar) (v2 >> 8)] + - nbits[(uchar) (v2 >> 16)] + - nbits[(uchar) (v2 >> 24)]); -#else - return (uint) (uchar) (nbits[(uchar) v] + - nbits[(uchar) (v >> 8)] + - nbits[(uchar) (v >> 16)] + - nbits[(uchar) (v >> 24)]); -#endif -} - -uint my_count_bits_ushort(ushort v) -{ - return nbits[v]; -} - - /* - Next highest power of two - - SYNOPSIS - my_round_up_to_next_power() - v Value to check - - RETURN - Next or equal power of 2 - Note: 0 will return 0 - - NOTES - Algorithm by Sean Anderson, according to: - http://graphics.stanford.edu/~seander/bithacks.html - (Orignal code public domain) - - Comments shows how this works with 01100000000000000000000000001011 + perl -e 'print map{", 0x".unpack H2,pack B8,unpack b8,chr$_}(0..255)' */ +const uchar _my_bits_reverse_table[256]={ +0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, +0xB0, 0x70, 0xF0, 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, +0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, +0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, 0x0C, 0x8C, 0x4C, 0xCC, +0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, 0x02, +0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, +0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, +0xDA, 0x3A, 0xBA, 0x7A, 0xFA, 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, +0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6, 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, +0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, 0x01, 0x81, +0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, +0xF1, 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, +0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, +0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, +0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, 0x03, 0x83, 0x43, +0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, +0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, +0xBB, 0x7B, 0xFB, 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, +0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, +0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF +}; -uint32 my_round_up_to_next_power(uint32 v) -{ - v--; /* 01100000000000000000000000001010 */ - v|= v >> 1; /* 01110000000000000000000000001111 */ - v|= v >> 2; /* 01111100000000000000000000001111 */ - v|= v >> 4; /* 01111111110000000000000000001111 */ - v|= v >> 8; /* 01111111111111111100000000001111 */ - v|= v >> 16; /* 01111111111111111111111111111111 */ - return v+1; /* 10000000000000000000000000000000 */ -} diff --git a/mysys/my_bitmap.c b/mysys/my_bitmap.c index 10eff40b9ed..e127b2584ae 100644 --- a/mysys/my_bitmap.c +++ b/mysys/my_bitmap.c @@ -38,6 +38,7 @@ #include "mysys_priv.h" #include #include +#include void create_last_word_mask(MY_BITMAP *map) { diff --git a/mysys/my_create.c b/mysys/my_create.c index 55878318ead..454ccf6ab7d 100644 --- a/mysys/my_create.c +++ b/mysys/my_create.c @@ -52,6 +52,13 @@ File my_create(const char *FileName, int CreateFlags, int access_flags, fd = open(FileName, access_flags); #endif + if ((MyFlags & MY_SYNC_DIR) && (fd >=0) && + my_sync_dir_by_file(FileName, MyFlags)) + { + my_close(fd, MyFlags); + fd= -1; + } + DBUG_RETURN(my_register_filename(fd, FileName, FILE_BY_CREATE, EE_CANTCREATEFILE, MyFlags)); } /* my_create */ diff --git a/mysys/my_delete.c b/mysys/my_delete.c index bac3e2513e1..14374fd3fa8 100644 --- a/mysys/my_delete.c +++ b/mysys/my_delete.c @@ -29,6 +29,9 @@ int my_delete(const char *name, myf MyFlags) my_error(EE_DELETE,MYF(ME_BELL+ME_WAITTANG+(MyFlags & ME_NOINPUT)), name,errno); } + else if ((MyFlags & MY_SYNC_DIR) && + my_sync_dir_by_file(name, MyFlags)) + err= -1; DBUG_RETURN(err); } /* my_delete */ diff --git a/mysys/my_getsystime.c b/mysys/my_getsystime.c index 2fd7eed7778..8f7cc5b7029 100644 --- a/mysys/my_getsystime.c +++ b/mysys/my_getsystime.c @@ -34,10 +34,6 @@ ulonglong my_getsystime() LARGE_INTEGER t_cnt; if (!offset) { - /* strictly speaking there should be a mutex to protect - initialization section. But my_getsystime() is called from - UUID() code, and UUID() calls are serialized with a mutex anyway - */ LARGE_INTEGER li; FILETIME ft; GetSystemTimeAsFileTime(&ft); diff --git a/mysys/my_handler.c b/mysys/my_handler.c index 1c3bb20426e..78cc10ac840 100644 --- a/mysys/my_handler.c +++ b/mysys/my_handler.c @@ -16,9 +16,11 @@ MA 02111-1307, USA */ #include -#include "my_handler.h" +#include +#include +#include -int mi_compare_text(CHARSET_INFO *charset_info, uchar *a, uint a_length, +int ha_compare_text(CHARSET_INFO *charset_info, uchar *a, uint a_length, uchar *b, uint b_length, my_bool part_key, my_bool skip_end_space) { @@ -174,7 +176,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, next_key_length=key_length-b_length-pack_length; if (piks && - (flag=mi_compare_text(keyseg->charset,a,a_length,b,b_length, + (flag=ha_compare_text(keyseg->charset,a,a_length,b,b_length, (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0), (my_bool)!(nextflag & SEARCH_PREFIX)))) @@ -187,7 +189,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, { uint length=(uint) (end-a), a_length=length, b_length=length; if (piks && - (flag= mi_compare_text(keyseg->charset, a, a_length, b, b_length, + (flag= ha_compare_text(keyseg->charset, a, a_length, b, b_length, (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0), (my_bool)!(nextflag & SEARCH_PREFIX)))) @@ -235,7 +237,7 @@ int ha_key_cmp(register HA_KEYSEG *keyseg, register uchar *a, next_key_length=key_length-b_length-pack_length; if (piks && - (flag= mi_compare_text(keyseg->charset,a,a_length,b,b_length, + (flag= ha_compare_text(keyseg->charset,a,a_length,b,b_length, (my_bool) ((nextflag & SEARCH_PREFIX) && next_key_length <= 0), (my_bool) ((nextflag & (SEARCH_FIND | @@ -482,12 +484,15 @@ end: DESCRIPTION Find the first NULL value in index-suffix values tuple. - TODO Consider optimizing this fuction or its use so we don't search for - NULL values in completely NOT NULL index suffixes. + + TODO + Consider optimizing this function or its use so we don't search for + NULL values in completely NOT NULL index suffixes. RETURN - First key part that has NULL as value in values tuple, or the last key part - (with keyseg->type==HA_TYPE_END) if values tuple doesn't contain NULLs. + First key part that has NULL as value in values tuple, or the last key + part (with keyseg->type==HA_TYPE_END) if values tuple doesn't contain + NULLs. */ HA_KEYSEG *ha_find_null(HA_KEYSEG *keyseg, uchar *a) diff --git a/mysys/my_init.c b/mysys/my_init.c index c1337205eb4..45601f54cfa 100644 --- a/mysys/my_init.c +++ b/mysys/my_init.c @@ -43,6 +43,7 @@ static void netware_init(); my_bool my_init_done= 0; uint mysys_usage_id= 0; /* Incremented for each my_init() */ +ulong my_thread_stack_size= 65536; static ulong atoi_octal(const char *str) { diff --git a/mysys/my_open.c b/mysys/my_open.c index 5ad48e66b68..7efaed90e2d 100644 --- a/mysys/my_open.c +++ b/mysys/my_open.c @@ -161,6 +161,7 @@ File my_register_filename(File fd, const char *FileName, enum file_type } pthread_mutex_unlock(&THR_LOCK_open); (void) my_close(fd, MyFlags); + fd= -1; my_errno=ENOMEM; } else diff --git a/mysys/my_pread.c b/mysys/my_pread.c index 6e98132db73..de7a2b611ed 100644 --- a/mysys/my_pread.c +++ b/mysys/my_pread.c @@ -63,12 +63,12 @@ size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset, pthread_mutex_unlock(&my_file_info[Filedes].mutex); #else if ((error= ((readbytes= pread(Filedes, Buffer, Count, offset)) != Count))) - my_errno= errno; + my_errno= errno ? errno : -1; #endif if (error || readbytes != Count) { DBUG_PRINT("warning",("Read only %d bytes off %u from %d, errno: %d", - (int) readbytes, (uint) Count,Filedes,my_errno)); + (int) readbytes, (uint) Count,Filedes,my_errno)); #ifdef THREAD if ((readbytes == 0 || readbytes == (size_t) -1) && errno == EINTR) { @@ -115,7 +115,7 @@ size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset, RETURN (size_t) -1 Error # Number of bytes read - */ +*/ size_t my_pwrite(int Filedes, const uchar *Buffer, size_t Count, my_off_t offset, myf MyFlags) diff --git a/mysys/my_rename.c b/mysys/my_rename.c index 6a6aa6a5796..64dbac955ea 100644 --- a/mysys/my_rename.c +++ b/mysys/my_rename.c @@ -16,8 +16,9 @@ #include "mysys_priv.h" #include #include "mysys_err.h" - +#include "m_string.h" #undef my_rename + /* On unix rename deletes to file if it exists */ int my_rename(const char *from, const char *to, myf MyFlags) @@ -60,5 +61,18 @@ int my_rename(const char *from, const char *to, myf MyFlags) if (MyFlags & (MY_FAE+MY_WME)) my_error(EE_LINK, MYF(ME_BELL+ME_WAITTANG),from,to,my_errno); } + else if (MyFlags & MY_SYNC_DIR) + { +#ifdef NEED_EXPLICIT_SYNC_DIR + /* do only the needed amount of syncs: */ + char dir_from[FN_REFLEN], dir_to[FN_REFLEN]; + dirname_part(dir_from, from); + dirname_part(dir_to, to); + if (my_sync_dir(dir_from, MyFlags) || + (strcmp(dir_from, dir_to) && + my_sync_dir(dir_to, MyFlags))) + error= -1; +#endif + } DBUG_RETURN(error); } /* my_rename */ diff --git a/mysys/my_safehash.c b/mysys/my_safehash.c new file mode 100644 index 00000000000..57f408942bf --- /dev/null +++ b/mysys/my_safehash.c @@ -0,0 +1,297 @@ +/* Copyright (C) 2003-2007 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Handling of multiple key caches + + The idea is to have a thread safe hash on the table name, + with a default key cache value that is returned if the table name is not in + the cache. +*/ + +#include "mysys_priv.h" +#include +#include "my_safehash.h" + +/***************************************************************************** + General functions to handle SAFE_HASH objects. + + A SAFE_HASH object is used to store the hash, the mutex and default value + needed by the rest of the key cache code. + This is a separate struct to make it easy to later reuse the code for other + purposes + + All entries are linked in a list to allow us to traverse all elements + and delete selected ones. (HASH doesn't allow any easy ways to do this). +*****************************************************************************/ + + +/* + Free a SAFE_HASH_ENTRY + + SYNOPSIS + safe_hash_entry_free() + entry The entry which should be freed + + NOTE + This function is called by the hash object on delete +*/ + +static void safe_hash_entry_free(SAFE_HASH_ENTRY *entry) +{ + DBUG_ENTER("safe_hash_entry_free"); + my_free((gptr) entry, MYF(0)); + DBUG_VOID_RETURN; +} + + +/* + Get key and length for a SAFE_HASH_ENTRY + + SYNOPSIS + safe_hash_entry_get() + entry The entry for which the key should be returned + length Length of the key + + RETURN + # reference on the key +*/ + +static byte *safe_hash_entry_get(SAFE_HASH_ENTRY *entry, uint *length, + my_bool not_used __attribute__((unused))) +{ + *length= entry->length; + return (byte*) entry->key; +} + + +/* + Init a SAFE_HASH object + + SYNOPSIS + safe_hash_init() + hash safe_hash handler + elements Expected max number of elements + default_value default value + + NOTES + In case of error we set hash->default_value to 0 to allow one to call + safe_hash_free on an object that couldn't be initialized. + + RETURN + 0 OK + 1 error +*/ + +my_bool safe_hash_init(SAFE_HASH *hash, uint elements, + byte *default_value) +{ + DBUG_ENTER("safe_hash_init"); + if (hash_init(&hash->hash, &my_charset_bin, elements, + 0, 0, (hash_get_key) safe_hash_entry_get, + (void (*)(void*)) safe_hash_entry_free, 0)) + { + hash->default_value= 0; + DBUG_RETURN(1); + } + my_rwlock_init(&hash->mutex, 0); + hash->default_value= default_value; + hash->root= 0; + DBUG_RETURN(0); +} + + +/* + Free a SAFE_HASH object + + SYNOPSIS + safe_hash_free() + hash Hash handle + + NOTES + This is safe to call on any object that has been sent to safe_hash_init() +*/ + +void safe_hash_free(SAFE_HASH *hash) +{ + /* + Test if safe_hash_init succeeded. This will also guard us against multiple + free calls. + */ + if (hash->default_value) + { + hash_free(&hash->hash); + rwlock_destroy(&hash->mutex); + hash->default_value=0; + } +} + + +/* + Return the value stored for a key or default value if no key + + SYNOPSIS + safe_hash_search() + hash Hash handle + key key (path to table etc..) + length Length of key + def Default value of data + + RETURN + # data associated with the key of default value if data was not found +*/ + +byte *safe_hash_search(SAFE_HASH *hash, const byte *key, uint length, + byte *def) +{ + byte *result; + DBUG_ENTER("safe_hash_search"); + rw_rdlock(&hash->mutex); + result= hash_search(&hash->hash, key, length); + rw_unlock(&hash->mutex); + if (!result) + result= def; + else + result= ((SAFE_HASH_ENTRY*) result)->data; + DBUG_PRINT("exit",("data: 0x%lx", (long) result)); + DBUG_RETURN(result); +} + + +/* + Associate a key with some data + + SYNOPSIS + safe_hash_set() + hash Hash handle + key key (path to table etc..) + length Length of key + data data to to associate with the data + + NOTES + This can be used both to insert a new entry and change an existing + entry. + If one associates a key with the default key cache, the key is deleted + + RETURN + 0 OK + 1 error (Can only be EOM). In this case my_message() is called. +*/ + +my_bool safe_hash_set(SAFE_HASH *hash, const byte *key, uint length, + byte *data) +{ + SAFE_HASH_ENTRY *entry; + my_bool error= 0; + DBUG_ENTER("safe_hash_set"); + DBUG_PRINT("enter",("key: %.*s data: 0x%lx", length, key, (long) data)); + + rw_wrlock(&hash->mutex); + entry= (SAFE_HASH_ENTRY*) hash_search(&hash->hash, key, length); + + if (data == hash->default_value) + { + /* + The key is to be associated with the default entry. In this case + we can just delete the entry (if it existed) from the hash as a + search will return the default entry + */ + if (!entry) /* nothing to do */ + goto end; + /* unlink entry from list */ + if ((*entry->prev= entry->next)) + entry->next->prev= entry->prev; + hash_delete(&hash->hash, (byte*) entry); + goto end; + } + if (entry) + { + /* Entry existed; Just change the pointer to point at the new data */ + entry->data= data; + } + else + { + if (!(entry= (SAFE_HASH_ENTRY *) my_malloc(sizeof(*entry) + length, + MYF(MY_WME)))) + { + error= 1; + goto end; + } + entry->key= (byte*) (entry +1); + memcpy((char*) entry->key, (char*) key, length); + entry->length= length; + entry->data= data; + /* Link entry to list */ + if ((entry->next= hash->root)) + entry->next->prev= &entry->next; + entry->prev= &hash->root; + hash->root= entry; + if (my_hash_insert(&hash->hash, (byte*) entry)) + { + /* This can only happen if hash got out of memory */ + my_free((char*) entry, MYF(0)); + error= 1; + goto end; + } + } + +end: + rw_unlock(&hash->mutex); + DBUG_RETURN(error); +} + + +/* + Change all entries with one data value to another data value + + SYNOPSIS + safe_hash_change() + hash Hash handle + old_data Old data + new_data Change all 'old_data' to this + + NOTES + We use the linked list to traverse all elements in the hash as + this allows us to delete elements in the case where 'new_data' is the + default value. +*/ + +void safe_hash_change(SAFE_HASH *hash, byte *old_data, byte *new_data) +{ + SAFE_HASH_ENTRY *entry, *next; + DBUG_ENTER("safe_hash_change"); + + rw_wrlock(&hash->mutex); + + for (entry= hash->root ; entry ; entry= next) + { + next= entry->next; + if (entry->data == old_data) + { + if (new_data == hash->default_value) + { + if ((*entry->prev= entry->next)) + entry->next->prev= entry->prev; + hash_delete(&hash->hash, (byte*) entry); + } + else + entry->data= new_data; + } + } + + rw_unlock(&hash->mutex); + DBUG_VOID_RETURN; +} diff --git a/mysys/my_safehash.h b/mysys/my_safehash.h new file mode 100644 index 00000000000..53845a5fec7 --- /dev/null +++ b/mysys/my_safehash.h @@ -0,0 +1,58 @@ +/* Copyright (C) 2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Handling of multiple key caches + + The idea is to have a thread safe hash on the table name, + with a default key cache value that is returned if the table name is not in + the cache. +*/ + +#include + +/* + Struct to store a key and pointer to object +*/ + +typedef struct st_safe_hash_entry +{ + byte *key; + uint length; + byte *data; + struct st_safe_hash_entry *next, **prev; +} SAFE_HASH_ENTRY; + + +typedef struct st_safe_hash_with_default +{ +#ifdef THREAD + rw_lock_t mutex; +#endif + HASH hash; + byte *default_value; + SAFE_HASH_ENTRY *root; +} SAFE_HASH; + + +my_bool safe_hash_init(SAFE_HASH *hash, uint elements, + byte *default_value); +void safe_hash_free(SAFE_HASH *hash); +byte *safe_hash_search(SAFE_HASH *hash, const byte *key, uint length, + byte *def); +my_bool safe_hash_set(SAFE_HASH *hash, const byte *key, uint length, + byte *data); +void safe_hash_change(SAFE_HASH *hash, byte *old_data, byte *new_data); diff --git a/mysys/my_symlink.c b/mysys/my_symlink.c index 810c0c72632..98059ccd508 100644 --- a/mysys/my_symlink.c +++ b/mysys/my_symlink.c @@ -84,6 +84,8 @@ int my_symlink(const char *content, const char *linkname, myf MyFlags) if (MyFlags & MY_WME) my_error(EE_CANT_SYMLINK, MYF(0), linkname, content, errno); } + else if ((MyFlags & MY_SYNC_DIR) && my_sync_dir_by_file(linkname, MyFlags)) + result= -1; DBUG_RETURN(result); #endif /* HAVE_READLINK */ } diff --git a/mysys/my_sync.c b/mysys/my_sync.c index 64fce3aac21..ab3fc89e0d3 100644 --- a/mysys/my_sync.c +++ b/mysys/my_sync.c @@ -48,6 +48,16 @@ int my_sync(File fd, myf my_flags) do { +#if defined(F_FULLFSYNC) + /* + In Mac OS X >= 10.3 this call is safer than fsync() (it forces the + disk's cache and guarantees ordered writes). + */ + if (!(res= fcntl(fd, F_FULLFSYNC, 0))) + break; /* ok */ + /* Some file systems don't support F_FULLFSYNC and fail above: */ + DBUG_PRINT("info",("fcntl(F_FULLFSYNC) failed, falling back")); +#endif #if defined(HAVE_FDATASYNC) res= fdatasync(fd); #elif defined(HAVE_FSYNC) @@ -55,6 +65,7 @@ int my_sync(File fd, myf my_flags) #elif defined(__WIN__) res= _commit(fd); #else +#error Cannot find a way to sync a file, durability in danger res= 0; /* No sync (strange OS) */ #endif } while (res == -1 && errno == EINTR); @@ -66,10 +77,78 @@ int my_sync(File fd, myf my_flags) my_errno= -1; /* Unknown error */ if ((my_flags & MY_IGNORE_BADFD) && (er == EBADF || er == EINVAL || er == EROFS)) + { + DBUG_PRINT("info", ("ignoring errno %d", er)); res= 0; + } else if (my_flags & MY_WME) my_error(EE_SYNC, MYF(ME_BELL+ME_WAITTANG), my_filename(fd), my_errno); } DBUG_RETURN(res); } /* my_sync */ + +static const char cur_dir_name[]= {FN_CURLIB, 0}; +/* + Force directory information to disk. + + SYNOPSIS + my_sync_dir() + dir_name the name of the directory + my_flags flags (MY_WME etc) + + RETURN + 0 if ok, !=0 if error +*/ +int my_sync_dir(const char *dir_name, myf my_flags) +{ +#ifdef NEED_EXPLICIT_SYNC_DIR + DBUG_ENTER("my_sync_dir"); + DBUG_PRINT("my",("Dir: '%s' my_flags: %d", dir_name, my_flags)); + File dir_fd; + int res= 0; + const char *correct_dir_name; + /* Sometimes the path does not contain an explicit directory */ + correct_dir_name= (dir_name[0] == 0) ? cur_dir_name : dir_name; + /* + Syncing a dir may give EINVAL on tmpfs on Linux, which is ok. + EIO on the other hand is very important. Hence MY_IGNORE_BADFD. + */ + if ((dir_fd= my_open(correct_dir_name, O_RDONLY, MYF(my_flags))) >= 0) + { + if (my_sync(dir_fd, MYF(my_flags | MY_IGNORE_BADFD))) + res= 2; + if (my_close(dir_fd, MYF(my_flags))) + res= 3; + } + else + res= 1; + DBUG_RETURN(res); +#else + return 0; +#endif +} + + +/* + Force directory information to disk. + + SYNOPSIS + my_sync_dir_by_file() + file_name the name of a file in the directory + my_flags flags (MY_WME etc) + + RETURN + 0 if ok, !=0 if error +*/ +int my_sync_dir_by_file(const char *file_name, myf my_flags) +{ +#ifdef NEED_EXPLICIT_SYNC_DIR + char dir_name[FN_REFLEN]; + dirname_part(dir_name, file_name); + return my_sync_dir(dir_name, my_flags); +#else + return 0; +#endif +} + diff --git a/mysys/safemalloc.c b/mysys/safemalloc.c index 30c501c54ee..e78a8011b3e 100644 --- a/mysys/safemalloc.c +++ b/mysys/safemalloc.c @@ -428,6 +428,29 @@ void TERMINATE(FILE *file) } +/* + Report where a piece of memory was allocated + + This is usefull to call from withing a debugger +*/ + + +void sf_malloc_report_allocated(void *memory) +{ + struct st_irem *irem; + for (irem= sf_malloc_root ; irem ; irem=irem->next) + { + char *data= (((char*) irem) + ALIGN_SIZE(sizeof(struct st_irem)) + + sf_malloc_prehunc); + if (data <= (char*) memory && (char*) memory <= data + irem->datasize) + { + printf("%u bytes at 0x%lx, allocated at line %u in '%s'\n", + irem->datasize, (long) data, irem->linenum, irem->filename); + break; + } + } +} + /* Returns 0 if chunk is ok */ static int _checkchunk(register struct st_irem *irem, const char *filename, diff --git a/mysys/wqueue.c b/mysys/wqueue.c new file mode 100644 index 00000000000..28e044ff606 --- /dev/null +++ b/mysys/wqueue.c @@ -0,0 +1,167 @@ + +#include + +#define STRUCT_PTR(TYPE, MEMBER, a) \ + (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER)) +/* + Link a thread into double-linked queue of waiting threads. + + SYNOPSIS + wqueue_link_into_queue() + wqueue pointer to the queue structure + thread pointer to the thread to be added to the queue + + RETURN VALUE + none + + NOTES. + Queue is represented by a circular list of the thread structures + The list is double-linked of the type (**prev,*next), accessed by + a pointer to the last element. +*/ + +void wqueue_link_into_queue(WQUEUE *wqueue, struct st_my_thread_var *thread) +{ + struct st_my_thread_var *last; + if (!(last= wqueue->last_thread)) + { + /* Queue is empty */ + thread->next= thread; + thread->prev= &thread->next; + } + else + { + thread->prev= last->next->prev; + last->next->prev= &thread->next; + thread->next= last->next; + last->next= thread; + } + wqueue->last_thread= thread; +} + + +/* + Add a thread to single-linked queue of waiting threads + + SYNOPSIS + wqueue_add_to_queue() + wqueue pointer to the queue structure + thread pointer to the thread to be added to the queue + + RETURN VALUE + none + + NOTES. + Queue is represented by a circular list of the thread structures + The list is single-linked of the type (*next), accessed by a pointer + to the last element. +*/ + +void wqueue_add_to_queue(WQUEUE *wqueue, struct st_my_thread_var *thread) +{ + struct st_my_thread_var *last; + if (!(last= wqueue->last_thread)) + thread->next= thread; + else + { + thread->next= last->next; + last->next= thread; + } + wqueue->last_thread= thread; +} + +/* + Unlink a thread from double-linked queue of waiting threads + + SYNOPSIS + wqueue_unlink_from_queue() + wqueue pointer to the queue structure + thread pointer to the thread to be removed from the queue + + RETURN VALUE + none + + NOTES. + See NOTES for link_into_queue +*/ + +void wqueue_unlink_from_queue(WQUEUE *wqueue, struct st_my_thread_var *thread) +{ + if (thread->next == thread) + /* The queue contains only one member */ + wqueue->last_thread= NULL; + else + { + thread->next->prev= thread->prev; + *thread->prev= thread->next; + if (wqueue->last_thread == thread) + wqueue->last_thread= STRUCT_PTR(struct st_my_thread_var, next, + thread->prev); + } + thread->next= NULL; +} + + +/* + Remove all threads from queue signaling them to proceed + + SYNOPSIS + wqueue_realease_queue() + wqueue pointer to the queue structure + thread pointer to the thread to be added to the queue + + RETURN VALUE + none + + NOTES. + See notes for add_to_queue + When removed from the queue each thread is signaled via condition + variable thread->suspend. +*/ + +void wqueue_release_queue(WQUEUE *wqueue) +{ + struct st_my_thread_var *last= wqueue->last_thread; + struct st_my_thread_var *next= last->next; + struct st_my_thread_var *thread; + do + { + thread= next; + pthread_cond_signal(&thread->suspend); + next= thread->next; + thread->next= NULL; + } + while (thread != last); + wqueue->last_thread= NULL; +} + + +/* + Add thread and wait + + SYNOPSYS + wqueue_add_and_wait() + wqueue queue to add to + thread thread which is waiting + lock mutex need for the operation +*/ + +void wqueue_add_and_wait(WQUEUE *wqueue, + struct st_my_thread_var *thread, pthread_mutex_t *lock) +{ + DBUG_ENTER("wqueue_add_and_wait"); + DBUG_PRINT("enter", ("thread ox%lxcond 0x%lx, mutex 0x%lx", + (ulong) thread, (ulong) &thread->suspend, (ulong) lock)); + wqueue_add_to_queue(wqueue, thread); + do + { + DBUG_PRINT("info", ("wait... cond 0x%lx, mutex 0x%lx", + (ulong) &thread->suspend, (ulong) lock)); + pthread_cond_wait(&thread->suspend, lock); + DBUG_PRINT("info", ("wait done cond 0x%lx, mutex 0x%lx, next 0x%lx", + (ulong) &thread->suspend, (ulong) lock, + (ulong) thread->next)); + } + while (thread->next); + DBUG_VOID_RETURN; +} diff --git a/plugin/daemon_example/daemon_example.cc b/plugin/daemon_example/daemon_example.cc index af585bb4302..e683bf7ab7a 100644 --- a/plugin/daemon_example/daemon_example.cc +++ b/plugin/daemon_example/daemon_example.cc @@ -81,7 +81,7 @@ pthread_handler_t mysql_heartbeat(void *p) 1 failure (cannot happen) */ -static int daemon_example_plugin_init(void *p) +static int daemon_example_plugin_init(void *p __attribute__ ((unused))) { DBUG_ENTER("daemon_example_plugin_init"); @@ -147,7 +147,7 @@ static int daemon_example_plugin_init(void *p) */ -static int daemon_example_plugin_deinit(void *p) +static int daemon_example_plugin_deinit(void *p __attribute__ ((unused))) { DBUG_ENTER("daemon_example_plugin_deinit"); char buffer[HEART_STRING_BUFFER]; diff --git a/sql/filesort.cc b/sql/filesort.cc index b1dfb4d5e71..6c72a0a2856 100644 --- a/sql/filesort.cc +++ b/sql/filesort.cc @@ -457,7 +457,7 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select, ref_pos= ref_buff; quick_select=select && select->quick; record=0; - flag= ((!indexfile && file->ha_table_flags() & HA_REC_NOT_IN_SEQ) + flag= ((!indexfile && (file->ha_table_flags() & HA_REC_NOT_IN_SEQ)) || quick_select); if (indexfile || flag) ref_pos= &file->ref[0]; @@ -1076,7 +1076,7 @@ uint read_to_buffer(IO_CACHE *fromfile, BUFFPEK *buffpek, void reuse_freed_buff(QUEUE *queue, BUFFPEK *reuse, uint key_length) { - uchar *reuse_end= reuse->base + reuse->max_keys * key_length; + byte *reuse_end= reuse->base + reuse->max_keys * key_length; for (uint i= 0; i < queue->elements; ++i) { BUFFPEK *bp= (BUFFPEK *) queue_element(queue, i); @@ -1146,7 +1146,7 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file, offset= rec_length-res_length; maxcount= (ulong) (param->keys/((uint) (Tb-Fb) +1)); to_start_filepos= my_b_tell(to_file); - strpos= (uchar*) sort_buffer; + strpos= sort_buffer; org_max_rows=max_rows= param->max_rows; /* The following will fire if there is not enough space in sort_buffer */ @@ -1158,10 +1158,10 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file, DBUG_RETURN(1); /* purecov: inspected */ for (buffpek= Fb ; buffpek <= Tb ; buffpek++) { - buffpek->base= strpos; + buffpek->base= (byte*) strpos; buffpek->max_keys= maxcount; strpos+= (uint) (error= (int) read_to_buffer(from_file, buffpek, - rec_length)); + rec_length)); if (error == -1) goto err; /* purecov: inspected */ buffpek->max_keys= buffpek->mem_count; // If less data in buffers than expected @@ -1250,7 +1250,7 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file, } } buffpek= (BUFFPEK*) queue_top(&queue); - buffpek->base= sort_buffer; + buffpek->base= (byte*) sort_buffer; buffpek->max_keys= param->keys; /* @@ -1285,7 +1285,7 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file, else { register uchar *end; - strpos= buffpek->key+offset; + strpos= (uchar*) buffpek->key+offset; for (end= strpos+buffpek->mem_count*rec_length ; strpos != end ; strpos+= rec_length) diff --git a/sql/gen_lex_hash.cc b/sql/gen_lex_hash.cc index 36b7f30dc64..18f80e11a15 100644 --- a/sql/gen_lex_hash.cc +++ b/sql/gen_lex_hash.cc @@ -481,8 +481,8 @@ int main(int argc,char **argv) printf("\nstatic unsigned int symbols_max_len=%d;\n\n", max_len2); printf("\ -static inline SYMBOL *get_hash_symbol(const char *s,\n\ - unsigned int len,bool function)\n\ +static SYMBOL *get_hash_symbol(const char *s,\n\ + unsigned int len,bool function)\n\ {\n\ register uchar *hash_map;\n\ register const char *cur_str= s;\n\ diff --git a/sql/handler.cc b/sql/handler.cc index b290c5acaa7..d768ad4dc5d 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -30,6 +30,10 @@ #include #include +#ifdef WITH_MARIA_STORAGE_ENGINE +#include +#endif + #ifdef WITH_PARTITION_STORAGE_ENGINE #include "ha_partition.h" #endif @@ -2789,6 +2793,98 @@ int ha_change_key_cache(KEY_CACHE *old_key_cache, } +/***************************************************************************** + pagecache handling. + + This code is only relevant for maria tables + + pagecache->cache may be 0 only in the case where a key cache is not + initialized or when we where not able to init the key cache in a previous + call to ha_init_pagecache() (probably out of memory) +*****************************************************************************/ + + +#ifdef WITH_MARIA_STORAGE_ENGINE + +/* Init a pagecache if it has not been initied before */ + +int ha_init_pagecache(const char *name, PAGECACHE *pagecache) +{ + DBUG_ENTER("ha_init_key_cache"); + + if (!pagecache->inited) + { + pthread_mutex_lock(&LOCK_global_system_variables); + long tmp_buff_size= (long) pagecache->param_buff_size; + uint division_limit= pagecache->param_division_limit; + uint age_threshold= pagecache->param_age_threshold; + pthread_mutex_unlock(&LOCK_global_system_variables); + DBUG_RETURN(!init_pagecache(pagecache, + tmp_buff_size, division_limit, age_threshold, + MARIA_KEY_BLOCK_LENGTH)); + } + DBUG_RETURN(0); +} + + +/* Resize key cache */ +/* +TODO: uncomment when resize will be implemented +int ha_resize_pagecache(PAGECACHE *pagecache) +{ + DBUG_ENTER("ha_resize_pagecache"); + + if (pagecache->inited) + { + pthread_mutex_lock(&LOCK_global_system_variables); + long tmp_buff_size= (long) pagecache->param_buff_size; + long tmp_block_size= (long) pagecache->param_block_size; + uint division_limit= pagecache->param_division_limit; + uint age_threshold= pagecache->param_age_threshold; + pthread_mutex_unlock(&LOCK_global_system_variables); + DBUG_RETURN(!resize_pagecache(pagecache, tmp_block_size, + tmp_buff_size, + division_limit, age_threshold)); + } + DBUG_RETURN(0); +} +*/ + + +/* Change parameters for key cache (like size) */ + +int ha_change_pagecache_param(PAGECACHE *pagecache) +{ + if (pagecache->inited) + { + pthread_mutex_lock(&LOCK_global_system_variables); + uint division_limit= pagecache->param_division_limit; + uint age_threshold= pagecache->param_age_threshold; + pthread_mutex_unlock(&LOCK_global_system_variables); + change_pagecache_param(pagecache, division_limit, age_threshold); + } + return 0; +} + +/* Free memory allocated by a key cache */ + +int ha_end_pagecache(PAGECACHE *pagecache) +{ + end_pagecache(pagecache, 1); // Can never fail + return 0; +} + +/* Move all tables from one key cache to another one */ + +int ha_change_pagecache(PAGECACHE *old_pagecache, + PAGECACHE *new_pagecache) +{ + maria_change_pagecache(old_pagecache, new_pagecache); + return 0; +} + +#endif /* WITH_MARIA_STORAGE_ENGINE */ + /** @brief Try to discover one table from handler(s) diff --git a/sql/handler.h b/sql/handler.h index e80b62c1250..9eed5ce7f2e 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -20,8 +20,10 @@ #pragma interface /* gcc class implementation */ #endif +#include #include #include +#include "../storage/maria/ma_pagecache.h" #ifndef NO_HASH #define NO_HASH /* Not yet implemented */ @@ -272,6 +274,7 @@ enum legacy_db_type DB_TYPE_TABLE_FUNCTION, DB_TYPE_MEMCACHE, DB_TYPE_FALCON, + DB_TYPE_MARIA, DB_TYPE_FIRST_DYNAMIC=42, DB_TYPE_DEFAULT=127 // Must be last }; @@ -851,7 +854,8 @@ typedef struct st_ha_check_opt ulong sort_buffer_size; uint flags; /* isam layer flags (e.g. for myisamchk) */ uint sql_flags; /* sql layer flags - for something myisamchk cannot do */ - KEY_CACHE *key_cache; /* new key cache when changing key cache */ + KEY_CACHE *key_cache; /* new key cache when changing key cache */ + PAGECACHE *pagecache; /* new pagecache when changing pagecache */ void init(); } HA_CHECK_OPT; @@ -1806,6 +1810,15 @@ int ha_resize_key_cache(KEY_CACHE *key_cache); int ha_change_key_cache_param(KEY_CACHE *key_cache); int ha_change_key_cache(KEY_CACHE *old_key_cache, KEY_CACHE *new_key_cache); int ha_end_key_cache(KEY_CACHE *key_cache); +/* pagecache */ +int ha_init_pagecache(const char *name, PAGECACHE *pagecache); +/* +TODO: uncomment when resizing will be implemented +int ha_resize_pagecache(PAGECACHE *pagecache); +*/ +int ha_change_pagecache_param(PAGECACHE *pagecache); +int ha_change_pagecache(PAGECACHE *old_pagecache, PAGECACHE *new_pagecache); +int ha_end_pagecache(PAGECACHE *pagecache); /* report to InnoDB that control passes to the client */ int ha_release_temporary_latches(THD *thd); diff --git a/sql/item_func.cc b/sql/item_func.cc index fb2f361f676..d2485c012a1 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -27,6 +27,7 @@ #include #include #include +#include #include "sp_head.h" #include "sp_rcontext.h" diff --git a/sql/item_func.h b/sql/item_func.h index 8fc68f93e12..347a4a1f084 100644 --- a/sql/item_func.h +++ b/sql/item_func.h @@ -1368,7 +1368,6 @@ public: /* for fulltext search */ -#include class Item_func_match :public Item_real_func { diff --git a/sql/log.cc b/sql/log.cc index 6dc204265b0..1a7ec087c51 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -2413,6 +2413,11 @@ bool MYSQL_BIN_LOG::open_index_file(const char *index_file_name_arg, my_seek(index_file_nr,0L,MY_SEEK_END,MYF(0)), 0, MYF(MY_WME | MY_WAIT_IF_FULL))) { + /* + TODO: all operations creating/deleting the index file or a log, should + call my_sync_dir() or my_sync_dir_by_file() to be durable. + TODO: file creation should be done with my_create() not my_open(). + */ if (index_file_nr >= 0) my_close(index_file_nr,MYF(0)); return TRUE; diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h index 5a50b03282f..a14c139e926 100644 --- a/sql/mysql_priv.h +++ b/sql/mysql_priv.h @@ -1644,7 +1644,7 @@ extern ulong max_connections,max_connect_errors, connect_timeout; extern ulong slave_net_timeout, slave_trans_retries; extern uint max_user_connections; extern ulong what_to_log,flush_time; -extern ulong query_buff_size, thread_stack; +extern ulong query_buff_size; extern ulong max_prepared_stmt_count, prepared_stmt_count; extern ulong binlog_cache_size, max_binlog_cache_size, open_files_limit; extern ulong max_binlog_size, max_relay_log_size; @@ -1718,6 +1718,9 @@ extern pthread_cond_t COND_global_read_lock; extern pthread_attr_t connection_attrib; extern I_List threads; extern I_List key_caches; +#ifdef WITH_MARIA_STORAGE_ENGINE +extern I_List pagecaches; +#endif /* WITH_MARIA_STORAGE_ENGINE */ extern MY_BITMAP temp_pool; extern String my_empty_string; extern const String my_null_string; @@ -1743,7 +1746,7 @@ extern uint sql_command_flags[]; extern TYPELIB log_output_typelib; /* optional things, have_* variables */ - +extern SHOW_COMP_OPTION have_maria_db; extern handlerton *partition_hton; extern handlerton *myisam_hton; extern handlerton *heap_hton; diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 39a7c4e9095..ddaf0c360bb 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -16,6 +16,7 @@ #include "mysql_priv.h" #include #include +#include #include "slave.h" #include "rpl_mi.h" #include "sql_repl.h" @@ -27,6 +28,9 @@ #include "events.h" #include "../storage/myisam/ha_myisam.h" +#ifdef WITH_MARIA_STORAGE_ENGINE +#include "../storage/maria/ha_maria.h" +#endif #include "rpl_injector.h" @@ -439,7 +443,7 @@ uint volatile thread_count, thread_running; ulonglong thd_startup_options; ulong back_log, connect_timeout, concurrency, server_id; ulong table_cache_size, table_def_size; -ulong thread_stack, what_to_log; +ulong what_to_log; ulong query_buff_size, slow_launch_time, slave_open_temp_tables; ulong open_files_limit, max_binlog_size, max_relay_log_size; ulong slave_net_timeout, slave_trans_retries; @@ -500,6 +504,7 @@ char *mysqld_unix_port, *opt_mysql_tmpdir; const char **errmesg; /* Error messages */ const char *myisam_recover_options_str="OFF"; const char *myisam_stats_method_str="nulls_unequal"; +const char *maria_stats_method_str="nulls_unequal"; /* name of reference on left espression in rewritten IN subquery */ const char *in_left_expr_name= ""; @@ -522,6 +527,9 @@ FILE *stderror_file=0; I_List threads; I_List key_caches; +#ifdef WITH_MARIA_STORAGE_ENGINE +I_List pagecaches; +#endif /* WITH_MARIA_STORAGE_ENGINE */ Rpl_filter* rpl_filter; Rpl_filter* binlog_filter; @@ -1201,7 +1209,13 @@ void clean_up(bool print_message) tc_log->close(); xid_cache_free(); delete_elements(&key_caches, (void (*)(const char*, uchar*)) free_key_cache); +#ifdef WITH_MARIA_STORAGE_ENGINE + delete_elements(&pagecaches, (void (*)(const char*, gptr)) free_pagecache); +#endif /* WITH_MARIA_STORAGE_ENGINE */ multi_keycache_free(); +#ifdef WITH_MARIA_STORAGE_ENGINE + multi_pagecache_free(); +#endif free_status_vars(); end_thr_alarm(1); /* Free allocated memory */ my_free_open_file_info(); @@ -2190,6 +2204,10 @@ the problem, but since we have already crashed, something is definitely wrong\n\ and this may fail.\n\n"); fprintf(stderr, "key_buffer_size=%lu\n", (ulong) dflt_key_cache->key_cache_mem_size); +#ifdef WITH_MARIA_STORAGE_ENGINE + fprintf(stderr, "page_buffer_size=%lu\n", + (ulong) maria_pagecache->mem_size); +#endif /* WITH_MARIA_STORAGE_ENGINE */ fprintf(stderr, "read_buffer_size=%ld\n", (long) global_system_variables.read_buff_size); fprintf(stderr, "max_used_connections=%lu\n", max_used_connections); fprintf(stderr, "max_threads=%u\n", thread_scheduler.max_threads); @@ -2197,6 +2215,9 @@ and this may fail.\n\n"); fprintf(stderr, "It is possible that mysqld could use up to \n\ key_buffer_size + (read_buffer_size + sort_buffer_size)*max_threads = %lu K\n\ bytes of memory\n", ((ulong) dflt_key_cache->key_cache_mem_size + +#ifdef WITH_MARIA_STORAGE_ENGINE + (ulong) maria_pagecache->mem_size + +#endif /* WITH_MARIA_STORAGE_ENGINE */ (global_system_variables.read_buff_size + global_system_variables.sortbuff_size) * thread_scheduler.max_threads + @@ -2220,7 +2241,7 @@ the thread stack. Please read http://www.mysql.com/doc/en/Linux.html\n\n", { fprintf(stderr,"thd: 0x%lx\n",(long) thd); print_stacktrace(thd ? (uchar*) thd->thread_stack : (uchar*) 0, - thread_stack); + my_thread_stack_size); } if (thd) { @@ -2379,9 +2400,9 @@ static void start_signal_handler(void) Peculiar things with ia64 platforms - it seems we only have half the stack size in reality, so we have to double it here */ - pthread_attr_setstacksize(&thr_attr,thread_stack*2); + pthread_attr_setstacksize(&thr_attr,my_thread_stack_size*2); #else - pthread_attr_setstacksize(&thr_attr,thread_stack); + pthread_attr_setstacksize(&thr_attr,my_thread_stack_size); #endif #endif @@ -2780,6 +2801,16 @@ static int init_common_variables(const char *conf_file_name, int argc, get_options(&defaults_argc, defaults_argv); set_server_version(); +#ifdef WITH_MARIA_STORAGE_ENGINE + if (!(maria_pagecache= get_or_create_pagecache(maria_pagecache_base.str, + maria_pagecache_base.length))) + exit(1); +/* + maria_pagecache->param_buff_size= maria_pagecache_var.param_buff_size; + maria_pagecache->param_block_size= maria_block_size; +*/ +#endif + DBUG_PRINT("info",("%s Ver %s for %s on %s\n",my_progname, server_version, SYSTEM_TYPE,MACHINE_TYPE)); @@ -3554,6 +3585,9 @@ server."); /* call ha_init_key_cache() on all key caches to init them */ process_key_caches(&ha_init_key_cache); +#ifdef WITH_MARIA_STORAGE_ENGINE + process_pagecaches(&ha_init_pagecache); +#endif /* WITH_MARIA_STORAGE_ENGINE */ #if defined(HAVE_MLOCKALL) && defined(MCL_CURRENT) && !defined(EMBEDDED_LIBRARY) if (locked_in_memory && !getuid()) @@ -3744,9 +3778,9 @@ int main(int argc, char **argv) Peculiar things with ia64 platforms - it seems we only have half the stack size in reality, so we have to double it here */ - pthread_attr_setstacksize(&connection_attrib,thread_stack*2); + pthread_attr_setstacksize(&connection_attrib,my_thread_stack_size*2); #else - pthread_attr_setstacksize(&connection_attrib,thread_stack); + pthread_attr_setstacksize(&connection_attrib,my_thread_stack_size); #endif #ifdef HAVE_PTHREAD_ATTR_GETSTACKSIZE { @@ -3757,15 +3791,15 @@ int main(int argc, char **argv) stack_size/= 2; #endif /* We must check if stack_size = 0 as Solaris 2.9 can return 0 here */ - if (stack_size && stack_size < thread_stack) + if (stack_size && stack_size < my_thread_stack_size) { if (global_system_variables.log_warnings) sql_print_warning("Asked for %lu thread stack, but got %ld", - thread_stack, (long) stack_size); + my_thread_stack_size, (long) stack_size); #if defined(__ia64__) || defined(__ia64) - thread_stack= stack_size*2; + my_thread_stack_size= stack_size*2; #else - thread_stack= stack_size; + my_thread_stack_size= stack_size; #endif } } @@ -5009,10 +5043,17 @@ enum options_mysqld OPT_MAX_LENGTH_FOR_SORT_DATA, OPT_MAX_WRITE_LOCK_COUNT, OPT_BULK_INSERT_BUFFER_SIZE, OPT_MAX_ERROR_COUNT, OPT_MULTI_RANGE_COUNT, OPT_MYISAM_DATA_POINTER_SIZE, + OPT_MYISAM_BLOCK_SIZE, OPT_MYISAM_MAX_EXTRA_SORT_FILE_SIZE, OPT_MYISAM_MAX_SORT_FILE_SIZE, OPT_MYISAM_SORT_BUFFER_SIZE, - OPT_MYISAM_USE_MMAP, + OPT_MYISAM_USE_MMAP, OPT_MYISAM_REPAIR_THREADS, OPT_MYISAM_STATS_METHOD, + + OPT_MARIA_BLOCK_SIZE, + OPT_MARIA_MAX_SORT_FILE_SIZE, OPT_MARIA_SORT_BUFFER_SIZE, + OPT_MARIA_USE_MMAP, OPT_MARIA_REPAIR_THREADS, + OPT_MARIA_STATS_METHOD, + OPT_NET_BUFFER_LENGTH, OPT_NET_RETRY_COUNT, OPT_NET_READ_TIMEOUT, OPT_NET_WRITE_TIMEOUT, OPT_OPEN_FILES_LIMIT, @@ -5026,7 +5067,7 @@ enum options_mysqld OPT_SORT_BUFFER, OPT_TABLE_OPEN_CACHE, OPT_TABLE_DEF_CACHE, OPT_THREAD_CONCURRENCY, OPT_THREAD_CACHE_SIZE, OPT_TMP_TABLE_SIZE, OPT_THREAD_STACK, - OPT_WAIT_TIMEOUT, OPT_MYISAM_REPAIR_THREADS, + OPT_WAIT_TIMEOUT, OPT_ERROR_LOG_FILE, OPT_DEFAULT_WEEK_FORMAT, OPT_RANGE_ALLOC_BLOCK_SIZE, OPT_ALLOW_SUSPICIOUS_UDFS, @@ -5980,6 +6021,40 @@ log and this option does nothing anymore.", 0 #endif , 0, 2, 0, 1, 0}, +#ifdef WITH_MARIA_STORAGE_ENGINE + {"maria_block_size", OPT_MARIA_BLOCK_SIZE, + "Block size to be used for MARIA index pages.", + (uchar**) &maria_block_size, + (uchar**) &maria_block_size, 0, GET_ULONG, REQUIRED_ARG, + MARIA_KEY_BLOCK_LENGTH, MARIA_MIN_KEY_BLOCK_LENGTH, + MARIA_MAX_KEY_BLOCK_LENGTH, + 0, MARIA_MIN_KEY_BLOCK_LENGTH, 0}, + {"maria_max_sort_file_size", OPT_MARIA_MAX_SORT_FILE_SIZE, + "Don't use the fast sort index method to created index if the temporary " + "file would get bigger than this.", + (uchar**) &global_system_variables.maria_max_sort_file_size, + (uchar**) &max_system_variables.maria_max_sort_file_size, 0, + GET_ULL, REQUIRED_ARG, (longlong) LONG_MAX, 0, (ulonglong) MAX_FILE_SIZE, + 0, 1024*1024, 0}, + {"maria_repair_threads", OPT_MARIA_REPAIR_THREADS, + "Number of threads to use when repairing maria tables. The value of 1 " + "disables parallel repair.", + (uchar**) &global_system_variables.maria_repair_threads, + (uchar**) &max_system_variables.maria_repair_threads, 0, + GET_ULONG, REQUIRED_ARG, 1, 1, ~0L, 0, 1, 0}, + {"maria_sort_buffer_size", OPT_MARIA_SORT_BUFFER_SIZE, + "The buffer that is allocated when sorting the index when doing a REPAIR " + "or when creating indexes with CREATE INDEX or ALTER TABLE.", + (uchar**) &global_system_variables.maria_sort_buff_size, + (uchar**) &max_system_variables.maria_sort_buff_size, 0, + GET_ULONG, REQUIRED_ARG, 8192*1024, 4, ~0L, 0, 1, 0}, + {"maria_stats_method", OPT_MARIA_STATS_METHOD, + "Specifies how maria index statistics collection code should threat NULLs. " + "Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), " + "\"nulls_equal\" (emulate 4.0 behavior), and \"nulls_ignored\".", + (uchar**) &maria_stats_method_str, (uchar**) &maria_stats_method_str, 0, + GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, +#endif {"max_allowed_packet", OPT_MAX_ALLOWED_PACKET, "Max packetlength to send/receive from to server.", (uchar**) &global_system_variables.max_allowed_packet, @@ -6083,12 +6158,6 @@ The minimum value for this variable is 4096.", (uchar**) &myisam_data_pointer_size, (uchar**) &myisam_data_pointer_size, 0, GET_ULONG, REQUIRED_ARG, 6, 2, 7, 0, 1, 0}, - {"myisam_max_extra_sort_file_size", OPT_MYISAM_MAX_EXTRA_SORT_FILE_SIZE, - "Deprecated option", - (uchar**) &global_system_variables.myisam_max_extra_sort_file_size, - (uchar**) &max_system_variables.myisam_max_extra_sort_file_size, - 0, GET_ULL, REQUIRED_ARG, (ulonglong) MI_MAX_TEMP_LENGTH, - 0, (ulonglong) MAX_FILE_SIZE, 0, 1, 0}, {"myisam_max_sort_file_size", OPT_MYISAM_MAX_SORT_FILE_SIZE, "Don't use the fast sort index method to created index if the temporary file would get bigger than this.", (uchar**) &global_system_variables.myisam_max_sort_file_size, @@ -6136,7 +6205,7 @@ The minimum value for this variable is 4096.", (uchar**) &global_system_variables.net_write_timeout, (uchar**) &max_system_variables.net_write_timeout, 0, GET_ULONG, REQUIRED_ARG, NET_WRITE_TIMEOUT, 1, LONG_TIMEOUT, 0, 1, 0}, - { "old", OPT_OLD_MODE, "Use compatible behavior.", + {"old", OPT_OLD_MODE, "Use compatible behavior.", (uchar**) &global_system_variables.old_mode, (uchar**) &max_system_variables.old_mode, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, @@ -6154,6 +6223,27 @@ The minimum value for this variable is 4096.", (uchar**) &global_system_variables.optimizer_search_depth, (uchar**) &max_system_variables.optimizer_search_depth, 0, GET_ULONG, OPT_ARG, MAX_TABLES+1, 0, MAX_TABLES+2, 0, 1, 0}, +#ifdef WITH_MARIA_STORAGE_ENGINE + {"pagecache_age_threshold", OPT_KEY_CACHE_AGE_THRESHOLD, + "This characterizes the number of hits a hot block has to be untouched until it is considered aged enough to be downgraded to a warm block. This specifies the percentage ratio of that number of hits to the total number of blocks in key cache", + (uchar**) &maria_pagecache_var.param_age_threshold, + (uchar**) 0, + 0, (GET_ULONG | GET_ASK_ADDR), REQUIRED_ARG, + 300, 100, ~0L, 0, 100, 0}, + {"pagecache_buffer_size", OPT_KEY_BUFFER_SIZE, + "The size of the buffer used for index blocks for MyISAM tables. Increase this to get better index handling (for all reads and multiple writes) to as much as you can afford; 64M on a 256M machine that mainly runs MySQL is quite common.", + (uchar**) &maria_pagecache_var.param_buff_size, + (uchar**) 0, + 0, (GET_ULL | GET_ASK_ADDR), + REQUIRED_ARG, KEY_CACHE_SIZE, MALLOC_OVERHEAD, ~(ulong) 0, MALLOC_OVERHEAD, + IO_SIZE, KEY_CACHE_SIZE}, + {"pagecache_division_limit", OPT_KEY_CACHE_DIVISION_LIMIT, + "The minimum percentage of warm blocks in key cache", + (uchar**) &maria_pagecache_var.param_division_limit, + (uchar**) 0, + 0, (GET_ULONG | GET_ASK_ADDR) , REQUIRED_ARG, 100, + 1, 100, 0, 1, 0}, +#endif /* WITH_MARIA_STORAGE_ENGINE */ {"plugin_dir", OPT_PLUGIN_DIR, "Directory for plugins.", (uchar**) &opt_plugin_dir_ptr, (uchar**) &opt_plugin_dir_ptr, 0, @@ -6162,7 +6252,6 @@ The minimum value for this variable is 4096.", "Optional colon separated list of plugins to load, where each plugin is " "identified by name and path to library seperated by an equals.", (uchar**) &opt_plugin_load, (uchar**) &opt_plugin_load, 0, - GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"preload_buffer_size", OPT_PRELOAD_BUFFER_SIZE, "The size of the buffer that is allocated when preloading indexes", (uchar**) &global_system_variables.preload_buff_size, @@ -6310,8 +6399,8 @@ The minimum value for this variable is 4096.", REQUIRED_ARG, 20, 1, 16384, 0, 1, 0}, #endif {"thread_stack", OPT_THREAD_STACK, - "The stack size for each thread.", (uchar**) &thread_stack, - (uchar**) &thread_stack, 0, GET_ULONG, REQUIRED_ARG,DEFAULT_THREAD_STACK, + "The stack size for each thread.", (uchar**) &my_thread_stack_size, + (uchar**) &my_thread_stack_size, 0, GET_ULONG, REQUIRED_ARG,DEFAULT_THREAD_STACK, 1024L*128L, ~0L, 0, 1024, 0}, { "time_format", OPT_TIME_FORMAT, "The TIME format (for future).", @@ -6324,12 +6413,12 @@ The minimum value for this variable is 4096.", (uchar**) &max_system_variables.tmp_table_size, 0, GET_ULL, REQUIRED_ARG, 16*1024*1024L, 1024, MAX_MEM_TABLE_SIZE, 0, 1, 0}, {"transaction_alloc_block_size", OPT_TRANS_ALLOC_BLOCK_SIZE, - "Allocation block size for various transaction-related structures", + "Allocation block size for transactions to be stored in binary log", (uchar**) &global_system_variables.trans_alloc_block_size, (uchar**) &max_system_variables.trans_alloc_block_size, 0, GET_ULONG, REQUIRED_ARG, QUERY_ALLOC_BLOCK_SIZE, 1024, ~0L, 0, 1024, 0}, {"transaction_prealloc_size", OPT_TRANS_PREALLOC_SIZE, - "Persistent buffer for various transaction-related structures", + "Persistent buffer for transactions to be stored in binary log", (uchar**) &global_system_variables.trans_prealloc_size, (uchar**) &max_system_variables.trans_prealloc_size, 0, GET_ULONG, REQUIRED_ARG, TRANS_ALLOC_PREALLOC_SIZE, 1024, ~0L, 0, 1024, 0}, @@ -7075,15 +7164,25 @@ static void mysql_init_variables(void) global_query_id= thread_id= 1L; strmov(server_version, MYSQL_SERVER_VERSION); myisam_recover_options_str= sql_mode_str= "OFF"; - myisam_stats_method_str= "nulls_unequal"; + myisam_stats_method_str= maria_stats_method_str= "nulls_unequal"; my_bind_addr = htonl(INADDR_ANY); threads.empty(); thread_cache.empty(); key_caches.empty(); +#ifdef WITH_MARIA_STORAGE_ENGINE + pagecaches.empty(); +#endif /* WITH_MARIA_STORAGE_ENGINE */ if (!(dflt_key_cache= get_or_create_key_cache(default_key_cache_base.str, - default_key_cache_base.length))) + default_key_cache_base.length))) exit(1); - multi_keycache_init(); /* set key_cache_hash.default_value = dflt_key_cache */ + + /* set key_cache_hash.default_value = dflt_key_cache */ + multi_keycache_init(); + +#ifdef WITH_MARIA_STORAGE_ENGINE + /* set pagecache_hash.default_value = maria_pagecache */ + multi_pagecache_init(); +#endif /* Set directory paths */ strmake(language, LANGUAGE, sizeof(language)-1); @@ -7126,6 +7225,7 @@ static void mysql_init_variables(void) when collecting index statistics for MyISAM tables. */ global_system_variables.myisam_stats_method= MI_STATS_METHOD_NULLS_NOT_EQUAL; + global_system_variables.maria_stats_method= MI_STATS_METHOD_NULLS_NOT_EQUAL; /* Variables that depends on compile options */ #ifndef DBUG_OFF @@ -7657,7 +7757,6 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), int method; LINT_INIT(method_conv); - myisam_stats_method_str= argument; method= find_type_or_exit(argument, &myisam_stats_method_typelib, opt->name); switch (method-1) { @@ -8131,6 +8230,9 @@ void refresh_status(THD *thd) /* Reset the counters of all key caches (default and named). */ process_key_caches(reset_key_cache_counters); +#ifdef WITH_MARIA_STORAGE_ENGINE + process_pagecaches(reset_pagecache_counters); +#endif /* WITH_MARIA_STORAGE_ENGINE */ pthread_mutex_unlock(&LOCK_status); /* diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 4944c994d3d..3ee7e8ee813 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -4299,7 +4299,6 @@ static bool ror_intersect_add(ROR_INTERSECT_INFO *info, } info->out_rows *= selectivity_mult; - DBUG_PRINT("info", ("info->total_cost= %g", info->total_cost)); if (is_cpk_scan) { diff --git a/sql/set_var.cc b/sql/set_var.cc index d9869ce6809..5b28e257f5a 100644 --- a/sql/set_var.cc +++ b/sql/set_var.cc @@ -54,6 +54,9 @@ #include #include #include +#ifdef WITH_MARIA_STORAGE_ENGINE +#include +#endif #include "events.h" @@ -113,6 +116,7 @@ static void fix_max_join_size(THD *thd, enum_var_type type); static void fix_query_cache_size(THD *thd, enum_var_type type); static void fix_query_cache_min_res_unit(THD *thd, enum_var_type type); static void fix_myisam_max_sort_file_size(THD *thd, enum_var_type type); +static void fix_maria_max_sort_file_size(THD *thd, enum_var_type type); static void fix_max_binlog_size(THD *thd, enum_var_type type); static void fix_max_relay_log_size(THD *thd, enum_var_type type); static void fix_max_connections(THD *thd, enum_var_type type); @@ -121,6 +125,9 @@ static void fix_thd_mem_root(THD *thd, enum_var_type type); static void fix_trans_mem_root(THD *thd, enum_var_type type); static void fix_server_id(THD *thd, enum_var_type type); static KEY_CACHE *create_key_cache(const char *name, uint length); +#ifdef WITH_MARIA_STORAGE_ENGINE +static PAGECACHE *create_pagecache(const char *name, uint length); +#endif /* WITH_MARIA_STORAGE_ENGINE */ void fix_sql_mode_var(THD *thd, enum_var_type type); static uchar *get_error_count(THD *thd); static uchar *get_warning_count(THD *thd); @@ -238,6 +245,14 @@ static sys_var_key_cache_long sys_key_cache_division_limit(&vars, "key_cache_div static sys_var_key_cache_long sys_key_cache_age_threshold(&vars, "key_cache_age_threshold", offsetof(KEY_CACHE, param_age_threshold)); +#ifdef WITH_MARIA_STORAGE_ENGINE +sys_var_pagecache_long sys_pagecache_division_limit("pagecache_division_limit", + offsetof(PAGECACHE, + param_division_limit)); +sys_var_pagecache_long sys_pagecache_age_threshold("pagecache_age_threshold", + offsetof(KEY_CACHE, + param_age_threshold)); +#endif /* WITH_MARIA_STORAGE_ENGINE */ static sys_var_bool_ptr sys_local_infile(&vars, "local_infile", &opt_local_infile); static sys_var_trust_routine_creators @@ -330,6 +345,14 @@ static sys_var_thd_enum sys_myisam_stats_method(&vars, "myisam_stats_met &myisam_stats_method_typelib, NULL); +sys_var_thd_ulonglong sys_maria_max_sort_file_size("maria_max_sort_file_size", &SV::maria_max_sort_file_size, fix_maria_max_sort_file_size, 1); +sys_var_thd_ulong sys_maria_repair_threads("maria_repair_threads", &SV::maria_repair_threads); +sys_var_thd_ulong sys_maria_sort_buffer_size("maria_sort_buffer_size", &SV::maria_sort_buff_size); +sys_var_thd_enum sys_maria_stats_method("maria_stats_method", + &SV::maria_stats_method, + &myisam_stats_method_typelib, + NULL); + static sys_var_thd_ulong sys_net_buffer_length(&vars, "net_buffer_length", &SV::net_buffer_length); static sys_var_thd_ulong sys_net_read_timeout(&vars, "net_read_timeout", @@ -624,6 +647,7 @@ static sys_var_have_plugin sys_have_csv(&vars, "have_csv", C_STRING_WITH_LEN("cs static sys_var_have_variable sys_have_dlopen(&vars, "have_dynamic_loading", &have_dlopen); static sys_var_have_variable sys_have_geometry(&vars, "have_geometry", &have_geometry); static sys_var_have_plugin sys_have_innodb(&vars, "have_innodb", C_STRING_WITH_LEN("innodb"), MYSQL_STORAGE_ENGINE_PLUGIN); +static sys_var_have_variable sys_have_maria_db(&vars, "have_maria", &have_maria_db); static sys_var_have_plugin sys_have_ndbcluster(&vars, "have_ndbcluster", C_STRING_WITH_LEN("ndbcluster"), MYSQL_STORAGE_ENGINE_PLUGIN); static sys_var_have_variable sys_have_openssl(&vars, "have_openssl", &have_ssl); static sys_var_have_variable sys_have_ssl(&vars, "have_ssl", &have_ssl); @@ -702,7 +726,7 @@ static SHOW_VAR fixed_vars[]= { #ifdef HAVE_THR_SETCONCURRENCY {"thread_concurrency", (char*) &concurrency, SHOW_LONG}, #endif - {"thread_stack", (char*) &thread_stack, SHOW_LONG}, + {"thread_stack", (char*) &my_thread_stack_size, SHOW_LONG}, }; @@ -839,6 +863,16 @@ fix_myisam_max_sort_file_size(THD *thd, enum_var_type type) (my_off_t) global_system_variables.myisam_max_sort_file_size; } +static void +fix_maria_max_sort_file_size(THD *thd, enum_var_type type) +{ +#ifdef WITH_MARIA_STORAGE_ENGINE + maria_max_temp_length= + (my_off_t) global_system_variables.myisam_max_sort_file_size; +#endif +} + + /* Set the OPTION_BIG_SELECTS flag if max_join_size == HA_POS_ERROR */ @@ -1893,15 +1927,30 @@ LEX_STRING default_key_cache_base= {(char *) "default", 7 }; static KEY_CACHE zero_key_cache; +#ifdef WITH_MARIA_STORAGE_ENGINE +LEX_STRING maria_pagecache_base= {(char *) "default", 7 }; +static PAGECACHE zero_pagecache; +#endif /* WITH_MARIA_STORAGE_ENGINE */ + KEY_CACHE *get_key_cache(LEX_STRING *cache_name) { safe_mutex_assert_owner(&LOCK_global_system_variables); if (!cache_name || ! cache_name->length) cache_name= &default_key_cache_base; return ((KEY_CACHE*) find_named(&key_caches, - cache_name->str, cache_name->length, 0)); + cache_name->str, cache_name->length, 0)); } +#ifdef WITH_MARIA_STORAGE_ENGINE +PAGECACHE *get_pagecache(LEX_STRING *cache_name) +{ + safe_mutex_assert_owner(&LOCK_global_system_variables); + if (!cache_name || ! cache_name->length) + cache_name= &default_key_cache_base; + return ((PAGECACHE*) find_named(&pagecaches, + cache_name->str, cache_name->length, 0)); +} +#endif /* WITH_MARIA_STORAGE_ENGINE */ uchar *sys_var_key_cache_param::value_ptr(THD *thd, enum_var_type type, LEX_STRING *base) @@ -1913,6 +1962,18 @@ uchar *sys_var_key_cache_param::value_ptr(THD *thd, enum_var_type type, } +#ifdef WITH_MARIA_STORAGE_ENGINE +byte *sys_var_pagecache_param::value_ptr(THD *thd, enum_var_type type, + LEX_STRING *base) +{ + PAGECACHE *pagecache= get_pagecache(base); + if (!pagecache) + pagecache= &zero_pagecache; + return (byte*) pagecache + offset ; +} +#endif /* WITH_MARIA_STORAGE_ENGINE */ + + bool sys_var_key_buffer_size::update(THD *thd, set_var *var) { ulonglong tmp= var->save_result.ulonglong_value; @@ -2049,6 +2110,60 @@ end: } +#ifdef WITH_MARIA_STORAGE_ENGINE +bool sys_var_pagecache_long::update(THD *thd, set_var *var) +{ + ulong tmp= (ulong) var->value->val_int(); + LEX_STRING *base_name= &var->base; + bool error= 0; + + if (!base_name->length) + base_name= &maria_pagecache_base; + + pthread_mutex_lock(&LOCK_global_system_variables); + PAGECACHE *pagecache= get_pagecache(base_name); + + if (!pagecache && !(pagecache= create_pagecache(base_name->str, + base_name->length))) + { + error= 1; + goto end; + } + + /* + Abort if some other thread is changing the key cache + TODO: This should be changed so that we wait until the previous + assignment is done and then do the new assign + */ + if (pagecache->in_init) + goto end; + + *((ulong*) (((char*) pagecache) + offset))= + (ulong) getopt_ull_limit_value(tmp, option_limits); + + /* + Don't create a new key cache if it didn't exist + (pagecaches are created only when the user sets block_size) + */ + pagecache->in_init= 1; + + pthread_mutex_unlock(&LOCK_global_system_variables); + + /* + TODO: uncomment whan it will be implemented + error= (bool) (ha_resize_pagecache(pagecache)); + */ + + pthread_mutex_lock(&LOCK_global_system_variables); + pagecache->in_init= 0; + +end: + pthread_mutex_unlock(&LOCK_global_system_variables); + return error; +} +#endif /* WITH_MARIA_STORAGE_ENGINE */ + + bool sys_var_log_state::update(THD *thd, set_var *var) { bool res= 0; @@ -3538,12 +3653,85 @@ bool process_key_caches(int (* func) (const char *name, KEY_CACHE *)) } +#ifdef WITH_MARIA_STORAGE_ENGINE + +static PAGECACHE *create_pagecache(const char *name, uint length) +{ + PAGECACHE *pagecache; + DBUG_ENTER("create_pagecache"); + DBUG_PRINT("enter",("name: %.*s", length, name)); + + if ((pagecache= (PAGECACHE*) my_malloc(sizeof(PAGECACHE), + MYF(MY_ZEROFILL | MY_WME)))) + { + if (!new NAMED_LIST(&pagecaches, name, length, (gptr) pagecache)) + { + my_free((char*) pagecache, MYF(0)); + pagecache= 0; + } + else + { + /* + Set default values for a key cache + The values in maria_pagecache_var is set by my_getopt() at startup + We don't set 'buff_size' as this is used to enable the key cache + */ + pagecache->param_buff_size= (maria_pagecache_var.param_buff_size ? + maria_pagecache_var.param_buff_size: + KEY_CACHE_SIZE); + pagecache->param_division_limit= maria_pagecache_var.param_division_limit; + pagecache->param_age_threshold= maria_pagecache_var.param_age_threshold; + } + } + DBUG_RETURN(pagecache); +} + + +PAGECACHE *get_or_create_pagecache(const char *name, uint length) +{ + LEX_STRING pagecache_name; + PAGECACHE *pagecache; + + pagecache_name.str= (char *) name; + pagecache_name.length= length; + pthread_mutex_lock(&LOCK_global_system_variables); + if (!(pagecache= get_pagecache(&pagecache_name))) + pagecache= create_pagecache(name, length); + pthread_mutex_unlock(&LOCK_global_system_variables); + return pagecache; +} + + +void free_pagecache(const char *name, PAGECACHE *pagecache) +{ + ha_end_pagecache(pagecache); + my_free((char*) pagecache, MYF(0)); +} + + +bool process_pagecaches(int (* func) (const char *name, PAGECACHE *)) +{ + I_List_iterator it(pagecaches); + NAMED_LIST *element; + + while ((element= it++)) + { + PAGECACHE *pagecache= (PAGECACHE *) element->data; + func(element->name, pagecache); + } + return 0; +} + +#endif /* WITH_MARIA_STORAGE_ENGINE */ + + void sys_var_trust_routine_creators::warn_deprecated(THD *thd) { WARN_DEPRECATED(thd, "5.2", "log_bin_trust_routine_creators", "'log_bin_trust_function_creators'"); } + void sys_var_trust_routine_creators::set_default(THD *thd, enum_var_type type) { warn_deprecated(thd); diff --git a/sql/set_var.h b/sql/set_var.h index a998dc93b84..ab865f312b3 100644 --- a/sql/set_var.h +++ b/sql/set_var.h @@ -733,6 +733,33 @@ public: }; +#ifdef WITH_MARIA_STORAGE_ENGINE +class sys_var_pagecache_param :public sys_var +{ +protected: + size_t offset; +public: + sys_var_pagecache_param(const char *name_arg, size_t offset_arg) + :sys_var(name_arg), offset(offset_arg) + {} + byte *value_ptr(THD *thd, enum_var_type type, LEX_STRING *base); + bool check_default(enum_var_type type) { return 1; } + bool is_struct() { return 1; } +}; + + +class sys_var_pagecache_long :public sys_var_pagecache_param +{ +public: + sys_var_pagecache_long(const char *name_arg, size_t offset_arg) + :sys_var_pagecache_param(name_arg, offset_arg) + {} + bool update(THD *thd, set_var *var); + SHOW_TYPE type() { return SHOW_LONG; } +}; +#endif /* WITH_MARIA_STORAGE_ENGINE */ + + class sys_var_thd_date_time_format :public sys_var_thd { DATE_TIME_FORMAT *SV::*offset; @@ -1131,7 +1158,11 @@ public: my_free((uchar*) name, MYF(0)); } friend bool process_key_caches(int (* func) (const char *name, - KEY_CACHE *)); + KEY_CACHE *)); +#ifdef WITH_MARIA_STORAGE_ENGINE + friend bool process_pagecaches(int (* func) (const char *name, + PAGECACHE *)); +#endif /* WITH_MARIA_STORAGE_ENGINE */ friend void delete_elements(I_List *list, void (*free_element)(const char*, uchar*)); }; @@ -1141,6 +1172,9 @@ public: extern sys_var_thd_bool sys_old_alter_table; extern sys_var_thd_bool sys_old_passwords; extern LEX_STRING default_key_cache_base; +#ifdef WITH_MARIA_STORAGE_ENGINE +extern LEX_STRING maria_pagecache_base; +#endif /* WITH_MARIA_STORAGE_ENGINE */ /* For sql_yacc */ struct sys_var_with_base @@ -1182,3 +1216,8 @@ void free_key_cache(const char *name, KEY_CACHE *key_cache); bool process_key_caches(int (* func) (const char *name, KEY_CACHE *)); void delete_elements(I_List *list, void (*free_element)(const char*, uchar*)); +#ifdef WITH_MARIA_STORAGE_ENGINE +PAGECACHE *get_or_create_pagecache(const char *name, uint length); +void free_pagecache(const char *name, PAGECACHE *pagecache); +bool process_pagecaches(int (* func) (const char *name, PAGECACHE *)); +#endif /* WITH_MARIA_STORAGE_ENGINE */ diff --git a/sql/slave.cc b/sql/slave.cc index 6c7968c2b3f..512c4bc8d12 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -28,6 +28,8 @@ #include #include +static Log_event* next_event(RELAY_LOG_INFO* rli); + #ifdef HAVE_REPLICATION #include "rpl_tblmap.h" diff --git a/sql/sql_class.h b/sql/sql_class.h index 0f42a4c9988..a52ec55f040 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -236,6 +236,7 @@ struct system_variables uint dynamic_variables_size; /* how many bytes are in use */ ulonglong myisam_max_extra_sort_file_size; + ulonglong maria_max_sort_file_size; ulonglong myisam_max_sort_file_size; ulonglong max_heap_table_size; ulonglong tmp_table_size; @@ -251,6 +252,9 @@ struct system_variables ulong max_sort_length; ulong max_tmp_tables; ulong max_insert_delayed_threads; + ulong maria_repair_threads; + ulong maria_sort_buff_size; + ulong maria_stats_method; ulong multi_range_count; ulong myisam_repair_threads; ulong myisam_sort_buff_size; diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 2b43c6f0c40..80a60b3366d 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -5053,10 +5053,10 @@ bool check_stack_overrun(THD *thd, long margin, long stack_used; DBUG_ASSERT(thd == current_thd); if ((stack_used=used_stack(thd->thread_stack,(char*) &stack_used)) >= - (long) (thread_stack - margin)) + (long) (my_thread_stack_size - margin)) { sprintf(errbuff[0],ER(ER_STACK_OVERRUN_NEED_MORE), - stack_used,thread_stack,margin); + stack_used,my_thread_stack_size,margin); my_message(ER_STACK_OVERRUN_NEED_MORE,errbuff[0],MYF(0)); thd->fatal_error(); return 1; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 98620800434..9ca44d588a4 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -25,6 +25,7 @@ #include "sql_cursor.h" #include +#include #include #include @@ -9547,7 +9548,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List &fields, table->s= share; init_tmp_table_share(share, "", 0, tmpname, tmppath); share->blob_field= blob_field; - share->blob_ptr_size= mi_portable_sizeof_char_ptr; + share->blob_ptr_size= portable_sizeof_char_ptr; share->db_low_byte_first=1; // True for HEAP and MyISAM share->table_charset= param->table_charset; share->primary_key= MAX_KEY; // Indicate no primary key @@ -10112,7 +10113,7 @@ TABLE *create_virtual_tmp_table(THD *thd, List &field_list) table->s= share; share->blob_field= blob_field; share->fields= field_count; - share->blob_ptr_size= mi_portable_sizeof_char_ptr; + share->blob_ptr_size= portable_sizeof_char_ptr; setup_tmp_table_column_bitmaps(table, bitmaps); /* Create all fields and calculate the total length of record */ diff --git a/sql/sql_sort.h b/sql/sql_sort.h index da28ca07e2c..1572f6304e1 100644 --- a/sql/sql_sort.h +++ b/sql/sql_sort.h @@ -34,7 +34,9 @@ the callback function 'unpack_addon_fields'. */ -typedef struct st_sort_addon_field { /* Sort addon packed field */ +typedef struct st_sort_addon_field +{ + /* Sort addon packed field */ Field *field; /* Original field */ uint offset; /* Offset from the last sorted field */ uint null_offset; /* Offset to to null bit from the last sorted field */ @@ -42,13 +44,6 @@ typedef struct st_sort_addon_field { /* Sort addon packed field */ uint8 null_bit; /* Null bit mask for the field */ } SORT_ADDON_FIELD; -typedef struct st_buffpek { /* Struktur om sorteringsbuffrarna */ - my_off_t file_pos; /* Where we are in the sort file */ - uchar *base,*key; /* key pointers */ - ha_rows count; /* Number of rows in table */ - ulong mem_count; /* numbers of keys in memory */ - ulong max_keys; /* Max keys in buffert */ -} BUFFPEK; typedef struct st_sort_param { uint rec_length; /* Length of sorted records */ diff --git a/sql/sql_test.cc b/sql/sql_test.cc index 5bd01eea68c..6d75c90eb1d 100644 --- a/sql/sql_test.cc +++ b/sql/sql_test.cc @@ -459,7 +459,7 @@ void mysql_print_status() VOID(my_getwd(current_dir, sizeof(current_dir),MYF(0))); printf("Current dir: %s\n", current_dir); printf("Running threads: %d Stack size: %ld\n", thread_count, - (long) thread_stack); + (long) my_thread_stack_size); thr_print_locks(); // Write some debug info #ifndef DBUG_OFF print_cached_tables(); @@ -536,7 +536,7 @@ Estimated memory (with thread stack): %ld\n", (int) info.uordblks, (int) info.fordblks, (int) info.keepcost, - (long) (thread_count * thread_stack + info.hblkhd + info.arena)); + (long) (thread_count * my_thread_stack_size + info.hblkhd + info.arena)); #endif Events::dump_internal_status(); diff --git a/sql/uniques.cc b/sql/uniques.cc index 7a05ceaddfc..619a516e54a 100644 --- a/sql/uniques.cc +++ b/sql/uniques.cc @@ -448,7 +448,7 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size, */ for (top= begin; top != end; ++top) { - top->base= merge_buffer + (top - begin) * piece_size; + top->base= (byte*) (merge_buffer + (top - begin) * piece_size); top->max_keys= max_key_count_per_piece; bytes_read= read_to_buffer(file, top, key_length); if (bytes_read == (uint) (-1)) diff --git a/sql/unireg.cc b/sql/unireg.cc index 57847bc70c6..c74e046b133 100644 --- a/sql/unireg.cc +++ b/sql/unireg.cc @@ -284,8 +284,10 @@ bool mysql_create_frm(THD *thd, const char *file_name, my_free(keybuff, MYF(0)); if (opt_sync_frm && !(create_info->options & HA_LEX_CREATE_TMP_TABLE) && - my_sync(file, MYF(MY_WME))) - goto err2; + (my_sync(file, MYF(MY_WME)) || + my_sync_dir_by_file(file_name, MYF(MY_WME)))) + goto err2; + if (my_close(file,MYF(MY_WME))) goto err3; diff --git a/storage/Makefile.am b/storage/Makefile.am index b978453d29d..eec499aabf4 100644 --- a/storage/Makefile.am +++ b/storage/Makefile.am @@ -19,7 +19,12 @@ AUTOMAKE_OPTIONS = foreign # These are built from source in the Docs directory EXTRA_DIST = -SUBDIRS = @mysql_se_dirs@ +# Until we remove fulltext-related references from Maria to MyISAM +# MyISAM must be built before Maria, which is not the case by default +# because of alphabetical order +# So we put myisam first; this is very ugly regarding plugins' logic +# but it works, and we'll remove it soon. +SUBDIRS = myisam @mysql_se_dirs@ # Don't update the files from bitkeeper %::SCCS/s.% diff --git a/storage/maria/CMakeLists.txt b/storage/maria/CMakeLists.txt new file mode 100644 index 00000000000..cfe23054e2f --- /dev/null +++ b/storage/maria/CMakeLists.txt @@ -0,0 +1 @@ +# empty for the moment; will fill it when we build under Windows diff --git a/storage/maria/Makefile.am b/storage/maria/Makefile.am new file mode 100644 index 00000000000..fbb25584910 --- /dev/null +++ b/storage/maria/Makefile.am @@ -0,0 +1,165 @@ +# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +MYSQLDATAdir = $(localstatedir) +MYSQLSHAREdir = $(pkgdatadir) +MYSQLBASEdir= $(prefix) +MYSQLLIBdir= $(pkglibdir) +INCLUDES = -I$(top_srcdir)/include -I$(top_builddir)/include \ + -I$(top_srcdir)/regex \ + -I$(top_srcdir)/sql \ + -I$(srcdir) +WRAPLIBS= + +LDADD = + +DEFS = @DEFS@ + +# "." is needed first because tests in unittest need libmaria +SUBDIRS = . unittest + +EXTRA_DIST = ma_test_all.sh ma_test_all.res ma_ft_stem.c CMakeLists.txt plug.in +pkgdata_DATA = ma_test_all ma_test_all.res +pkglib_LIBRARIES = libmaria.a +bin_PROGRAMS = maria_chk maria_pack maria_ftdump +maria_chk_DEPENDENCIES= $(LIBRARIES) +# Only reason to link with libmyisam.a here is that it's where some fulltext +# pieces are (but soon we'll remove fulltext dependencies from Maria). +# For now, it imposes that storage/myisam be built before storage/maria. +maria_chk_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ + $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ +maria_pack_DEPENDENCIES=$(LIBRARIES) +maria_pack_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ + $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ +noinst_PROGRAMS = ma_test1 ma_test2 ma_test3 ma_rt_test ma_sp_test +noinst_HEADERS = maria_def.h ma_rt_index.h ma_rt_key.h ma_rt_mbr.h \ + ma_sp_defs.h ma_fulltext.h ma_ftdefs.h ma_ft_test1.h \ + ma_ft_eval.h trnman.h lockman.h tablockman.h \ + ma_control_file.h ha_maria.h ma_blockrec.h \ + ma_loghandler.h ma_loghandler_lsn.h ma_pagecache.h \ + ma_commit.h +ma_test1_DEPENDENCIES= $(LIBRARIES) +ma_test1_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ + $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ +ma_test2_DEPENDENCIES= $(LIBRARIES) +ma_test2_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ + $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ +ma_test3_DEPENDENCIES= $(LIBRARIES) +ma_test3_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ + $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ +#ma_ft_test1_DEPENDENCIES= $(LIBRARIES) +#ma_ft_eval_DEPENDENCIES= $(LIBRARIES) +maria_ftdump_DEPENDENCIES= $(LIBRARIES) +maria_ftdump_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ + $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ +ma_rt_test_DEPENDENCIES= $(LIBRARIES) +ma_rt_test_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ + $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ +ma_sp_test_DEPENDENCIES= $(LIBRARIES) +ma_sp_test_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ + $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ +libmaria_a_SOURCES = ma_init.c ma_open.c ma_extra.c ma_info.c ma_rkey.c \ + ma_rnext.c ma_rnext_same.c \ + ma_search.c ma_page.c ma_key.c ma_locking.c \ + ma_rrnd.c ma_scan.c ma_cache.c \ + ma_statrec.c ma_packrec.c ma_dynrec.c \ + ma_blockrec.c ma_bitmap.c \ + ma_update.c ma_write.c ma_unique.c \ + ma_delete.c \ + ma_rprev.c ma_rfirst.c ma_rlast.c ma_rsame.c \ + ma_rsamepos.c ma_panic.c ma_close.c ma_create.c\ + ma_range.c ma_dbug.c ma_checksum.c \ + ma_changed.c ma_static.c ma_delete_all.c \ + ma_delete_table.c ma_rename.c ma_check.c \ + ma_keycache.c ma_preload.c ma_ft_parser.c \ + ma_ft_update.c ma_ft_boolean_search.c \ + ma_ft_nlq_search.c ft_maria.c ma_sort.c \ + ha_maria.cc trnman.c lockman.c tablockman.c \ + ma_rt_index.c ma_rt_key.c ma_rt_mbr.c ma_rt_split.c \ + ma_sp_key.c ma_control_file.c ma_loghandler.c \ + ma_pagecache.c ma_pagecaches.c \ + ma_commit.c +CLEANFILES = test?.MA? FT?.MA? isam.log ma_test_all ma_rt_test.MA? sp_test.MA? + +SUFFIXES = .sh + +.sh: + @RM@ -f $@ $@-t + @SED@ \ + -e 's!@''bindir''@!$(bindir)!g' \ + -e 's!@''scriptdir''@!$(bindir)!g' \ + -e 's!@''prefix''@!$(prefix)!g' \ + -e 's!@''datadir''@!$(datadir)!g' \ + -e 's!@''localstatedir''@!$(localstatedir)!g' \ + -e 's!@''libexecdir''@!$(libexecdir)!g' \ + -e 's!@''CC''@!@CC@!'\ + -e 's!@''CXX''@!@CXX@!'\ + -e 's!@''GXX''@!@GXX@!'\ + -e 's!@''PERL''@!@PERL@!' \ + -e 's!@''CFLAGS''@!@SAVE_CFLAGS@!'\ + -e 's!@''CXXFLAGS''@!@SAVE_CXXFLAGS@!'\ + -e 's!@''LDFLAGS''@!@SAVE_LDFLAGS@!'\ + -e 's!@''VERSION''@!@VERSION@!' \ + -e 's!@''MYSQL_SERVER_SUFFIX''@!@MYSQL_SERVER_SUFFIX@!' \ + -e 's!@''COMPILATION_COMMENT''@!@COMPILATION_COMMENT@!' \ + -e 's!@''MACHINE_TYPE''@!@MACHINE_TYPE@!' \ + -e 's!@''HOSTNAME''@!@HOSTNAME@!' \ + -e 's!@''SYSTEM_TYPE''@!@SYSTEM_TYPE@!' \ + -e 's!@''CHECK_PID''@!@CHECK_PID@!' \ + -e 's!@''FIND_PROC''@!@FIND_PROC@!' \ + -e 's!@''MYSQLD_DEFAULT_SWITCHES''@!@MYSQLD_DEFAULT_SWITCHES@!' \ + -e 's!@''MYSQL_UNIX_ADDR''@!@MYSQL_UNIX_ADDR@!' \ + -e 's!@''TARGET_LINUX''@!@TARGET_LINUX@!' \ + -e "s!@""CONF_COMMAND""@!@CONF_COMMAND@!" \ + -e 's!@''MYSQLD_USER''@!@MYSQLD_USER@!' \ + -e 's!@''sysconfdir''@!@sysconfdir@!' \ + -e 's!@''SHORT_MYSQL_INTRO''@!@SHORT_MYSQL_INTRO@!' \ + -e 's!@''SHARED_LIB_VERSION''@!@SHARED_LIB_VERSION@!' \ + -e 's!@''MYSQL_BASE_VERSION''@!@MYSQL_BASE_VERSION@!' \ + -e 's!@''MYSQL_NO_DASH_VERSION''@!@MYSQL_NO_DASH_VERSION@!' \ + -e 's!@''MYSQL_TCP_PORT''@!@MYSQL_TCP_PORT@!' \ + -e 's!@''PERL_DBI_VERSION''@!@PERL_DBI_VERSION@!' \ + -e 's!@''PERL_DBD_VERSION''@!@PERL_DBD_VERSION@!' \ + -e 's!@''PERL_DATA_DUMPER''@!@PERL_DATA_DUMPER@!' \ + $< > $@-t + @CHMOD@ +x $@-t + @MV@ $@-t $@ + +# Don't update the files from bitkeeper +%::SCCS/s.% diff --git a/storage/maria/ft_maria.c b/storage/maria/ft_maria.c new file mode 100644 index 00000000000..06e7c4bd59c --- /dev/null +++ b/storage/maria/ft_maria.c @@ -0,0 +1,48 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +/* + This function is for interface functions between fulltext and maria +*/ + +#include "ma_ftdefs.h" + +FT_INFO *maria_ft_init_search(uint flags, void *info, uint keynr, + byte *query, uint query_len, CHARSET_INFO *cs, + byte *record) +{ + FT_INFO *res; + if (flags & FT_BOOL) + res= maria_ft_init_boolean_search((MARIA_HA *) info, keynr, query, + query_len, cs); + else + res= maria_ft_init_nlq_search((MARIA_HA *) info, keynr, query, query_len, + flags, record); + return res; +} + +const struct _ft_vft _ma_ft_vft_nlq = { + maria_ft_nlq_read_next, maria_ft_nlq_find_relevance, + maria_ft_nlq_close_search, maria_ft_nlq_get_relevance, + maria_ft_nlq_reinit_search +}; +const struct _ft_vft _ma_ft_vft_boolean = { + maria_ft_boolean_read_next, maria_ft_boolean_find_relevance, + maria_ft_boolean_close_search, maria_ft_boolean_get_relevance, + maria_ft_boolean_reinit_search +}; + diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc new file mode 100644 index 00000000000..e05f97a384d --- /dev/null +++ b/storage/maria/ha_maria.cc @@ -0,0 +1,2275 @@ +/* Copyright (C) 2006,2004 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#define MYSQL_SERVER 1 +#include "mysql_priv.h" +#include +#include +#include +#include +#include "ha_maria.h" +#include "trnman_public.h" + +#include "maria_def.h" +#include "ma_rt_index.h" +#include "ma_blockrec.h" +#include "ma_commit.h" + +#define MARIA_CANNOT_ROLLBACK HA_NO_TRANSACTIONS +#ifdef MARIA_CANNOT_ROLLBACK +#define trans_register_ha(A, B, C) do { /* nothing */ } while(0) +#endif + +ulong maria_recover_options= HA_RECOVER_NONE; +static handlerton *maria_hton; + +/* bits in maria_recover_options */ +const char *maria_recover_names[]= +{ + "DEFAULT", "BACKUP", "FORCE", "QUICK", NullS +}; +TYPELIB maria_recover_typelib= +{ + array_elements(maria_recover_names) - 1, "", + maria_recover_names, NULL +}; + +const char *maria_stats_method_names[]= +{ + "nulls_unequal", "nulls_equal", + "nulls_ignored", NullS +}; +TYPELIB maria_stats_method_typelib= +{ + array_elements(maria_stats_method_names) - 1, "", + maria_stats_method_names, NULL +}; + + +/***************************************************************************** +** MARIA tables +*****************************************************************************/ + +static handler *maria_create_handler(handlerton *hton, + TABLE_SHARE * table, + MEM_ROOT *mem_root) +{ + return new (mem_root) ha_maria(hton, table); +} + + +// collect errors printed by maria_check routines + +static void _ma_check_print_msg(HA_CHECK *param, const char *msg_type, + const char *fmt, va_list args) +{ + THD *thd= (THD *) param->thd; + Protocol *protocol= thd->protocol; + uint length, msg_length; + char msgbuf[MARIA_MAX_MSG_BUF]; + char name[NAME_LEN * 2 + 2]; + + msg_length= my_vsnprintf(msgbuf, sizeof(msgbuf), fmt, args); + msgbuf[sizeof(msgbuf) - 1]= 0; // healthy paranoia + + DBUG_PRINT(msg_type, ("message: %s", msgbuf)); + + if (!thd->vio_ok()) + { + sql_print_error(msgbuf); + return; + } + + if (param->testflag & + (T_CREATE_MISSING_KEYS | T_SAFE_REPAIR | T_AUTO_REPAIR)) + { + my_message(ER_NOT_KEYFILE, msgbuf, MYF(MY_WME)); + return; + } + length= (uint) (strxmov(name, param->db_name, ".", param->table_name, + NullS) - name); + protocol->prepare_for_resend(); + protocol->store(name, length, system_charset_info); + protocol->store(param->op_name, system_charset_info); + protocol->store(msg_type, system_charset_info); + protocol->store(msgbuf, msg_length, system_charset_info); + if (protocol->write()) + sql_print_error("Failed on my_net_write, writing to stderr instead: %s\n", + msgbuf); + return; +} + + +/* + Convert TABLE object to Maria key and column definition + + SYNOPSIS + table2maria() + table_arg in TABLE object. + keydef_out out Maria key definition. + recinfo_out out Maria column definition. + records_out out Number of fields. + + DESCRIPTION + This function will allocate and initialize Maria key and column + definition for further use in ma_create or for a check for underlying + table conformance in merge engine. + + RETURN VALUE + 0 OK + # error code +*/ + +int table2maria(TABLE *table_arg, MARIA_KEYDEF **keydef_out, + MARIA_COLUMNDEF **recinfo_out, uint *records_out) +{ + uint i, j, recpos, minpos, fieldpos, temp_length, length; + enum ha_base_keytype type= HA_KEYTYPE_BINARY; + byte *record; + KEY *pos; + MARIA_KEYDEF *keydef; + MARIA_COLUMNDEF *recinfo, *recinfo_pos; + HA_KEYSEG *keyseg; + TABLE_SHARE *share= table_arg->s; + uint options= share->db_options_in_use; + DBUG_ENTER("table2maria"); + + if (!(my_multi_malloc(MYF(MY_WME), + recinfo_out, (share->fields * 2 + 2) * sizeof(MARIA_COLUMNDEF), + keydef_out, share->keys * sizeof(MARIA_KEYDEF), + &keyseg, + (share->key_parts + share->keys) * sizeof(HA_KEYSEG), + NullS))) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); /* purecov: inspected */ + keydef= *keydef_out; + recinfo= *recinfo_out; + pos= table_arg->key_info; + for (i= 0; i < share->keys; i++, pos++) + { + keydef[i].flag= (pos->flags & (HA_NOSAME | HA_FULLTEXT | HA_SPATIAL)); + keydef[i].key_alg= pos->algorithm == HA_KEY_ALG_UNDEF ? + (pos->flags & HA_SPATIAL ? HA_KEY_ALG_RTREE : HA_KEY_ALG_BTREE) : + pos->algorithm; + keydef[i].block_length= pos->block_size; + keydef[i].seg= keyseg; + keydef[i].keysegs= pos->key_parts; + for (j= 0; j < pos->key_parts; j++) + { + Field *field= pos->key_part[j].field; + type= field->key_type(); + keydef[i].seg[j].flag= pos->key_part[j].key_part_flag; + + if (options & HA_OPTION_PACK_KEYS || + (pos->flags & (HA_PACK_KEY | HA_BINARY_PACK_KEY | + HA_SPACE_PACK_USED))) + { + if (pos->key_part[j].length > 8 && + (type == HA_KEYTYPE_TEXT || + type == HA_KEYTYPE_NUM || + (type == HA_KEYTYPE_BINARY && !field->zero_pack()))) + { + /* No blobs here */ + if (j == 0) + keydef[i].flag|= HA_PACK_KEY; + if (!(field->flags & ZEROFILL_FLAG) && + (field->type() == MYSQL_TYPE_STRING || + field->type() == MYSQL_TYPE_VAR_STRING || + ((int) (pos->key_part[j].length - field->decimals())) >= 4)) + keydef[i].seg[j].flag|= HA_SPACE_PACK; + } + else if (j == 0 && (!(pos->flags & HA_NOSAME) || pos->key_length > 16)) + keydef[i].flag|= HA_BINARY_PACK_KEY; + } + keydef[i].seg[j].type= (int) type; + keydef[i].seg[j].start= pos->key_part[j].offset; + keydef[i].seg[j].length= pos->key_part[j].length; + keydef[i].seg[j].bit_start= keydef[i].seg[j].bit_end= + keydef[i].seg[j].bit_length= 0; + keydef[i].seg[j].bit_pos= 0; + keydef[i].seg[j].language= field->charset()->number; + + if (field->null_ptr) + { + keydef[i].seg[j].null_bit= field->null_bit; + keydef[i].seg[j].null_pos= (uint) (field->null_ptr- + (uchar*) table_arg->record[0]); + } + else + { + keydef[i].seg[j].null_bit= 0; + keydef[i].seg[j].null_pos= 0; + } + if (field->type() == MYSQL_TYPE_BLOB || + field->type() == MYSQL_TYPE_GEOMETRY) + { + keydef[i].seg[j].flag|= HA_BLOB_PART; + /* save number of bytes used to pack length */ + keydef[i].seg[j].bit_start= (uint) (field->pack_length() - + share->blob_ptr_size); + } + else if (field->type() == MYSQL_TYPE_BIT) + { + keydef[i].seg[j].bit_length= ((Field_bit *) field)->bit_len; + keydef[i].seg[j].bit_start= ((Field_bit *) field)->bit_ofs; + keydef[i].seg[j].bit_pos= (uint) (((Field_bit *) field)->bit_ptr - + (uchar*) table_arg->record[0]); + } + } + keyseg+= pos->key_parts; + } + if (table_arg->found_next_number_field) + keydef[share->next_number_index].flag|= HA_AUTO_KEY; + record= table_arg->record[0]; + recpos= 0; + recinfo_pos= recinfo; + while (recpos < (uint) share->reclength) + { + Field **field, *found= 0; + minpos= share->reclength; + length= 0; + + for (field= table_arg->field; *field; field++) + { + if ((fieldpos= (*field)->offset(record)) >= recpos && + fieldpos <= minpos) + { + /* skip null fields */ + if (!(temp_length= (*field)->pack_length_in_rec())) + continue; /* Skip null-fields */ + if (! found || fieldpos < minpos || + (fieldpos == minpos && temp_length < length)) + { + minpos= fieldpos; + found= *field; + length= temp_length; + } + } + } + DBUG_PRINT("loop", ("found: 0x%lx recpos: %d minpos: %d length: %d", + (long) found, recpos, minpos, length)); + if (recpos != minpos) + { // Reserved space (Null bits?) + bzero((char*) recinfo_pos, sizeof(*recinfo_pos)); + recinfo_pos->type= FIELD_NORMAL; + recinfo_pos++->length= (uint16) (minpos - recpos); + } + if (!found) + break; + + if (found->flags & BLOB_FLAG) + recinfo_pos->type= FIELD_BLOB; + else if (found->type() == MYSQL_TYPE_VARCHAR) + recinfo_pos->type= FIELD_VARCHAR; + else if (!(options & HA_OPTION_PACK_RECORD) || + (found->zero_pack() && (found->flags & PRI_KEY_FLAG))) + recinfo_pos->type= FIELD_NORMAL; + else if (found->zero_pack()) + recinfo_pos->type= FIELD_SKIP_ZERO; + else + recinfo_pos->type= ((length <= 3 || + (found->flags & ZEROFILL_FLAG)) ? + FIELD_NORMAL : + found->type() == MYSQL_TYPE_STRING || + found->type() == MYSQL_TYPE_VAR_STRING ? + FIELD_SKIP_ENDSPACE : + FIELD_SKIP_PRESPACE); + if (found->null_ptr) + { + recinfo_pos->null_bit= found->null_bit; + recinfo_pos->null_pos= (uint) (found->null_ptr - + (uchar*) table_arg->record[0]); + } + else + { + recinfo_pos->null_bit= 0; + recinfo_pos->null_pos= 0; + } + (recinfo_pos++)->length= (uint16) length; + recpos= minpos + length; + DBUG_PRINT("loop", ("length: %d type: %d", + recinfo_pos[-1].length,recinfo_pos[-1].type)); + } + *records_out= (uint) (recinfo_pos - recinfo); + DBUG_RETURN(0); +} + + +/* + Check for underlying table conformance + + SYNOPSIS + maria_check_definition() + t1_keyinfo in First table key definition + t1_recinfo in First table record definition + t1_keys in Number of keys in first table + t1_recs in Number of records in first table + t2_keyinfo in Second table key definition + t2_recinfo in Second table record definition + t2_keys in Number of keys in second table + t2_recs in Number of records in second table + strict in Strict check switch + + DESCRIPTION + This function compares two Maria definitions. By intention it was done + to compare merge table definition against underlying table definition. + It may also be used to compare dot-frm and MAI definitions of Maria + table as well to compare different Maria table definitions. + + For merge table it is not required that number of keys in merge table + must exactly match number of keys in underlying table. When calling this + function for underlying table conformance check, 'strict' flag must be + set to false, and converted merge definition must be passed as t1_*. + + Otherwise 'strict' flag must be set to 1 and it is not required to pass + converted dot-frm definition as t1_*. + + RETURN VALUE + 0 - Equal definitions. + 1 - Different definitions. + + NOTES + This is currently not used. In MyISAM the corresponding function + (myisam_check_definition()) is used only by MERGE tables + (in ha_myisammrg.cc). +*/ +int maria_check_definition(MARIA_KEYDEF *t1_keyinfo, + MARIA_COLUMNDEF *t1_recinfo, + uint t1_keys, uint t1_recs, + MARIA_KEYDEF *t2_keyinfo, + MARIA_COLUMNDEF *t2_recinfo, + uint t2_keys, uint t2_recs, bool strict) +{ + uint i, j; + DBUG_ENTER("maria_check_definition"); + if ((strict ? t1_keys != t2_keys : t1_keys > t2_keys)) + { + DBUG_PRINT("error", ("Number of keys differs: t1_keys=%u, t2_keys=%u", + t1_keys, t2_keys)); + DBUG_RETURN(1); + } + if (t1_recs != t2_recs) + { + DBUG_PRINT("error", ("Number of recs differs: t1_recs=%u, t2_recs=%u", + t1_recs, t2_recs)); + DBUG_RETURN(1); + } + for (i= 0; i < t1_keys; i++) + { + HA_KEYSEG *t1_keysegs= t1_keyinfo[i].seg; + HA_KEYSEG *t2_keysegs= t2_keyinfo[i].seg; + if (t1_keyinfo[i].keysegs != t2_keyinfo[i].keysegs || + t1_keyinfo[i].key_alg != t2_keyinfo[i].key_alg) + { + DBUG_PRINT("error", ("Key %d has different definition", i)); + DBUG_PRINT("error", ("t1_keysegs=%d, t1_key_alg=%d", + t1_keyinfo[i].keysegs, t1_keyinfo[i].key_alg)); + DBUG_PRINT("error", ("t2_keysegs=%d, t2_key_alg=%d", + t2_keyinfo[i].keysegs, t2_keyinfo[i].key_alg)); + DBUG_RETURN(1); + } + for (j= t1_keyinfo[i].keysegs; j--;) + { + if (t1_keysegs[j].type != t2_keysegs[j].type || + t1_keysegs[j].language != t2_keysegs[j].language || + t1_keysegs[j].null_bit != t2_keysegs[j].null_bit || + t1_keysegs[j].length != t2_keysegs[j].length) + { + DBUG_PRINT("error", ("Key segment %d (key %d) has different " + "definition", j, i)); + DBUG_PRINT("error", ("t1_type=%d, t1_language=%d, t1_null_bit=%d, " + "t1_length=%d", + t1_keysegs[j].type, t1_keysegs[j].language, + t1_keysegs[j].null_bit, t1_keysegs[j].length)); + DBUG_PRINT("error", ("t2_type=%d, t2_language=%d, t2_null_bit=%d, " + "t2_length=%d", + t2_keysegs[j].type, t2_keysegs[j].language, + t2_keysegs[j].null_bit, t2_keysegs[j].length)); + + DBUG_RETURN(1); + } + } + } + for (i= 0; i < t1_recs; i++) + { + MARIA_COLUMNDEF *t1_rec= &t1_recinfo[i]; + MARIA_COLUMNDEF *t2_rec= &t2_recinfo[i]; + if (t1_rec->type != t2_rec->type || + t1_rec->length != t2_rec->length || + t1_rec->null_bit != t2_rec->null_bit) + { + DBUG_PRINT("error", ("Field %d has different definition", i)); + DBUG_PRINT("error", ("t1_type=%d, t1_length=%d, t1_null_bit=%d", + t1_rec->type, t1_rec->length, t1_rec->null_bit)); + DBUG_PRINT("error", ("t2_type=%d, t2_length=%d, t2_null_bit=%d", + t2_rec->type, t2_rec->length, t2_rec->null_bit)); + DBUG_RETURN(1); + } + } + DBUG_RETURN(0); +} + + +extern "C" { + +volatile int *_ma_killed_ptr(HA_CHECK *param) +{ + /* In theory Unsafe conversion, but should be ok for now */ + return (int*) &(((THD *) (param->thd))->killed); +} + + +void _ma_check_print_error(HA_CHECK *param, const char *fmt, ...) +{ + param->error_printed |= 1; + param->out_flag |= O_DATA_LOST; + va_list args; + va_start(args, fmt); + _ma_check_print_msg(param, "error", fmt, args); + va_end(args); +} + + +void _ma_check_print_info(HA_CHECK *param, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + _ma_check_print_msg(param, "info", fmt, args); + va_end(args); +} + + +void _ma_check_print_warning(HA_CHECK *param, const char *fmt, ...) +{ + param->warning_printed= 1; + param->out_flag |= O_DATA_LOST; + va_list args; + va_start(args, fmt); + _ma_check_print_msg(param, "warning", fmt, args); + va_end(args); +} + +} + + +ha_maria::ha_maria(handlerton *hton, TABLE_SHARE *table_arg): +handler(hton, table_arg), file(0), +int_table_flags(HA_NULL_IN_KEY | HA_CAN_FULLTEXT | HA_CAN_SQL_HANDLER | + HA_DUPLICATE_POS | HA_CAN_INDEX_BLOBS | HA_AUTO_PART_KEY | + HA_FILE_BASED | HA_CAN_GEOMETRY | MARIA_CANNOT_ROLLBACK | + HA_CAN_INSERT_DELAYED | HA_CAN_BIT_FIELD | HA_CAN_RTREEKEYS | + HA_HAS_RECORDS | HA_STATS_RECORDS_IS_EXACT), +can_enable_indexes(1) +{} + + +handler *ha_maria::clone(MEM_ROOT *mem_root) +{ + ha_maria *new_handler= static_cast (handler::clone(mem_root)); + if (new_handler) + new_handler->file->state= file->state; + return new_handler; +} + + +static const char *ha_maria_exts[]= +{ + MARIA_NAME_IEXT, + MARIA_NAME_DEXT, + NullS +}; + + +const char **ha_maria::bas_ext() const +{ + return ha_maria_exts; +} + + +const char *ha_maria::index_type(uint key_number) +{ + return ((table->key_info[key_number].flags & HA_FULLTEXT) ? + "FULLTEXT" : + (table->key_info[key_number].flags & HA_SPATIAL) ? + "SPATIAL" : + (table->key_info[key_number].algorithm == HA_KEY_ALG_RTREE) ? + "RTREE" : "BTREE"); +} + + +double ha_maria::scan_time() +{ + if (file->s->data_file_type == BLOCK_RECORD) + return ulonglong2double(stats.data_file_length - file->s->block_size) / max(file->s->block_size / 2, IO_SIZE) + 2; + return handler::scan_time(); +} + +/* + We need to be able to store at least two keys on an index page as the + splitting algorithms depends on this. (With only one key on a page + we also can't use any compression, which may make the index file much + larger) + We use HA_MAX_KEY_BUFF as this is a stack restriction imposed by the + handler interface. + + We also need to reserve place for a record pointer (8) and 3 bytes + per key segment to store the length of the segment + possible null bytes. + These extra bytes are required here so that maria_create() will surely + accept any keys created which the returned key data storage length. +*/ + +uint ha_maria::max_supported_key_length() const +{ + uint tmp= (maria_max_key_length() - 8 - HA_MAX_KEY_SEG*3); + return min(HA_MAX_KEY_BUFF, tmp); +} + + +#ifdef HAVE_REPLICATION +int ha_maria::net_read_dump(NET * net) +{ + int data_fd= file->dfile.file; + int error= 0; + + my_seek(data_fd, 0L, MY_SEEK_SET, MYF(MY_WME)); + for (;;) + { + ulong packet_len= my_net_read(net); + if (!packet_len) + break; // end of file + if (packet_len == packet_error) + { + sql_print_error("ha_maria::net_read_dump - read error "); + error= -1; + goto err; + } + if (my_write(data_fd, (byte *) net->read_pos, (uint) packet_len, + MYF(MY_WME | MY_FNABP))) + { + error= errno; + goto err; + } + } +err: + return error; +} + + +int ha_maria::dump(THD * thd, int fd) +{ + MARIA_SHARE *share= file->s; + NET *net= &thd->net; + uint block_size= share->block_size; + my_off_t bytes_to_read= share->state.state.data_file_length; + int data_fd= file->dfile.file; + byte *buf= (byte *) my_malloc(block_size, MYF(MY_WME)); + if (!buf) + return ENOMEM; + + int error= 0; + my_seek(data_fd, 0L, MY_SEEK_SET, MYF(MY_WME)); + for (; bytes_to_read > 0;) + { + uint bytes= my_read(data_fd, buf, block_size, MYF(MY_WME)); + if (bytes == MY_FILE_ERROR) + { + error= errno; + goto err; + } + + if (fd >= 0) + { + if (my_write(fd, buf, bytes, MYF(MY_WME | MY_FNABP))) + { + error= errno ? errno : EPIPE; + goto err; + } + } + else + { + if (my_net_write(net, (char*) buf, bytes)) + { + error= errno ? errno : EPIPE; + goto err; + } + } + bytes_to_read -= bytes; + } + + if (fd < 0) + { + if (my_net_write(net, "", 0)) + error= errno ? errno : EPIPE; + net_flush(net); + } + +err: + my_free((gptr) buf, MYF(0)); + return error; +} +#endif /* HAVE_REPLICATION */ + + +bool ha_maria::check_if_locking_is_allowed(uint sql_command, + ulong type, TABLE *table, + uint count, + bool called_by_privileged_thread) +{ + /* + To be able to open and lock for reading system tables like 'mysql.proc', + when we already have some tables opened and locked, and avoid deadlocks + we have to disallow write-locking of these tables with any other tables. + */ + if (table->s->system_table && + table->reginfo.lock_type >= TL_WRITE_ALLOW_WRITE && count != 1) + { + my_error(ER_WRONG_LOCK_OF_SYSTEM_TABLE, MYF(0), table->s->db.str, + table->s->table_name.str); + return FALSE; + } + + /* + Deny locking of the log tables, which is incompatible with + concurrent insert. Unless called from a logger THD (general_log_thd + or slow_log_thd) or by a privileged thread. + */ + if (!called_by_privileged_thread) + return check_if_log_table_locking_is_allowed(sql_command, type, table); + + return TRUE; +} + + + /* Name is here without an extension */ + +int ha_maria::open(const char *name, int mode, uint test_if_locked) +{ + uint i; + +#ifdef NOT_USED + /* + If the user wants to have memory mapped data files, add an + open_flag. Do not memory map temporary tables because they are + expected to be inserted and thus extended a lot. Memory mapping is + efficient for files that keep their size, but very inefficient for + growing files. Using an open_flag instead of calling ma_extra(... + HA_EXTRA_MMAP ...) after maxs_open() has the advantage that the + mapping is not repeated for every open, but just done on the initial + open, when the MyISAM share is created. Everytime the server + requires to open a new instance of a table it calls this method. We + will always supply HA_OPEN_MMAP for a permanent table. However, the + Maria storage engine will ignore this flag if this is a secondary + open of a table that is in use by other threads already (if the + Maria share exists already). + */ + if (!(test_if_locked & HA_OPEN_TMP_TABLE) && opt_maria_use_mmap) + test_if_locked|= HA_OPEN_MMAP; +#endif + + if (!(file= maria_open(name, mode, test_if_locked | HA_OPEN_FROM_SQL_LAYER))) + return (my_errno ? my_errno : -1); + + if (test_if_locked & (HA_OPEN_IGNORE_IF_LOCKED | HA_OPEN_TMP_TABLE)) + VOID(maria_extra(file, HA_EXTRA_NO_WAIT_LOCK, 0)); + + info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST); + if (!(test_if_locked & HA_OPEN_WAIT_IF_LOCKED)) + VOID(maria_extra(file, HA_EXTRA_WAIT_LOCK, 0)); + save_transactional= file->s->base.transactional; + if ((data_file_type= file->s->data_file_type) != STATIC_RECORD) + int_table_flags |= HA_REC_NOT_IN_SEQ; + if (file->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) + int_table_flags |= HA_HAS_CHECKSUM; + + for (i= 0; i < table->s->keys; i++) + { + struct st_plugin_int *parser= table->key_info[i].parser; + if (table->key_info[i].flags & HA_USES_PARSER) + file->s->keyinfo[i].parser= + (struct st_mysql_ftparser *) parser->plugin->info; + table->key_info[i].block_size= file->s->keyinfo[i].block_length; + } + return (0); +} + + +int ha_maria::close(void) +{ + MARIA_HA *tmp= file; + file= 0; + return maria_close(tmp); +} + + +int ha_maria::write_row(byte * buf) +{ + statistic_increment(table->in_use->status_var.ha_write_count, &LOCK_status); + + /* If we have a timestamp column, update it to the current time */ + if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT) + table->timestamp_field->set_time(); + + /* + If we have an auto_increment column and we are writing a changed row + or a new row, then update the auto_increment value in the record. + */ + if (table->next_number_field && buf == table->record[0]) + { + int error; + if ((error= update_auto_increment())) + return error; + } + return maria_write(file, buf); +} + + +int ha_maria::check(THD * thd, HA_CHECK_OPT * check_opt) +{ + if (!file) + return HA_ADMIN_INTERNAL_ERROR; + int error; + HA_CHECK param; + MARIA_SHARE *share= file->s; + const char *old_proc_info= thd->proc_info; + + thd->proc_info= "Checking table"; + maria_chk_init(¶m); + param.thd= thd; + param.op_name= "check"; + param.db_name= table->s->db.str; + param.table_name= table->alias; + param.testflag= check_opt->flags | T_CHECK | T_SILENT; + param.stats_method= (enum_handler_stats_method) thd->variables. + maria_stats_method; + + if (!(table->db_stat & HA_READ_ONLY)) + param.testflag |= T_STATISTICS; + param.using_global_keycache= 1; + + if (!maria_is_crashed(file) && + (((param.testflag & T_CHECK_ONLY_CHANGED) && + !(share->state.changed & (STATE_CHANGED | STATE_CRASHED | + STATE_CRASHED_ON_REPAIR)) && + share->state.open_count == 0) || + ((param.testflag & T_FAST) && (share->state.open_count == + (uint) (share->global_changed ? 1 : + 0))))) + return HA_ADMIN_ALREADY_DONE; + + error= maria_chk_status(¶m, file); // Not fatal + error= maria_chk_size(¶m, file); + if (!error) + error |= maria_chk_del(¶m, file, param.testflag); + if (!error) + error= maria_chk_key(¶m, file); + if (!error) + { + if ((!(param.testflag & T_QUICK) && + ((share->options & + (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) || + (param.testflag & (T_EXTEND | T_MEDIUM)))) || maria_is_crashed(file)) + { + uint old_testflag= param.testflag; + param.testflag |= T_MEDIUM; + if (!(error= init_io_cache(¶m.read_cache, file->dfile.file, + my_default_record_cache_size, READ_CACHE, + share->pack.header_length, 1, MYF(MY_WME)))) + { + error= maria_chk_data_link(¶m, file, param.testflag & T_EXTEND); + end_io_cache(&(param.read_cache)); + } + param.testflag= old_testflag; + } + } + if (!error) + { + if ((share->state.changed & (STATE_CHANGED | + STATE_CRASHED_ON_REPAIR | + STATE_CRASHED | STATE_NOT_ANALYZED)) || + (param.testflag & T_STATISTICS) || maria_is_crashed(file)) + { + file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED; + pthread_mutex_lock(&share->intern_lock); + share->state.changed &= ~(STATE_CHANGED | STATE_CRASHED | + STATE_CRASHED_ON_REPAIR); + if (!(table->db_stat & HA_READ_ONLY)) + error= maria_update_state_info(¶m, file, UPDATE_TIME | UPDATE_OPEN_COUNT | + UPDATE_STAT); + pthread_mutex_unlock(&share->intern_lock); + info(HA_STATUS_NO_LOCK | HA_STATUS_TIME | HA_STATUS_VARIABLE | + HA_STATUS_CONST); + } + } + else if (!maria_is_crashed(file) && !thd->killed) + { + maria_mark_crashed(file); + file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED; + } + + thd->proc_info= old_proc_info; + return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK; +} + + +/* + Analyze the key distribution in the table + As the table may be only locked for read, we have to take into account that + two threads may do an analyze at the same time! +*/ + +int ha_maria::analyze(THD *thd, HA_CHECK_OPT * check_opt) +{ + int error= 0; + HA_CHECK param; + MARIA_SHARE *share= file->s; + + maria_chk_init(¶m); + param.thd= thd; + param.op_name= "analyze"; + param.db_name= table->s->db.str; + param.table_name= table->alias; + param.testflag= (T_FAST | T_CHECK | T_SILENT | T_STATISTICS | + T_DONT_CHECK_CHECKSUM); + param.using_global_keycache= 1; + param.stats_method= (enum_handler_stats_method) thd->variables. + maria_stats_method; + + if (!(share->state.changed & STATE_NOT_ANALYZED)) + return HA_ADMIN_ALREADY_DONE; + + error= maria_chk_key(¶m, file); + if (!error) + { + pthread_mutex_lock(&share->intern_lock); + error= maria_update_state_info(¶m, file, UPDATE_STAT); + pthread_mutex_unlock(&share->intern_lock); + } + else if (!maria_is_crashed(file) && !thd->killed) + maria_mark_crashed(file); + return error ? HA_ADMIN_CORRUPT : HA_ADMIN_OK; +} + + +int ha_maria::restore(THD * thd, HA_CHECK_OPT *check_opt) +{ + HA_CHECK_OPT tmp_check_opt; + char *backup_dir= thd->lex->backup_dir; + char src_path[FN_REFLEN], dst_path[FN_REFLEN]; + char table_name[FN_REFLEN]; + int error; + const char *errmsg; + DBUG_ENTER("restore"); + + VOID(tablename_to_filename(table->s->table_name.str, table_name, + sizeof(table_name))); + + if (fn_format_relative_to_data_home(src_path, table_name, backup_dir, + MARIA_NAME_DEXT)) + DBUG_RETURN(HA_ADMIN_INVALID); + + strxmov(dst_path, table->s->normalized_path.str, MARIA_NAME_DEXT, NullS); + if (my_copy(src_path, dst_path, MYF(MY_WME))) + { + error= HA_ADMIN_FAILED; + errmsg= "Failed in my_copy (Error %d)"; + goto err; + } + + tmp_check_opt.init(); + tmp_check_opt.flags |= T_VERY_SILENT | T_CALC_CHECKSUM | T_QUICK; + DBUG_RETURN(repair(thd, &tmp_check_opt)); + +err: + { + HA_CHECK param; + maria_chk_init(¶m); + param.thd= thd; + param.op_name= "restore"; + param.db_name= table->s->db.str; + param.table_name= table->s->table_name.str; + param.testflag= 0; + _ma_check_print_error(¶m, errmsg, my_errno); + DBUG_RETURN(error); + } +} + + +int ha_maria::backup(THD * thd, HA_CHECK_OPT *check_opt) +{ + char *backup_dir= thd->lex->backup_dir; + char src_path[FN_REFLEN], dst_path[FN_REFLEN]; + char table_name[FN_REFLEN]; + int error; + const char *errmsg; + DBUG_ENTER("ha_maria::backup"); + + VOID(tablename_to_filename(table->s->table_name.str, table_name, + sizeof(table_name))); + + if (fn_format_relative_to_data_home(dst_path, table_name, backup_dir, + reg_ext)) + { + errmsg= "Failed in fn_format() for .frm file (errno: %d)"; + error= HA_ADMIN_INVALID; + goto err; + } + + strxmov(src_path, table->s->normalized_path.str, reg_ext, NullS); + if (my_copy(src_path, dst_path, + MYF(MY_WME | MY_HOLD_ORIGINAL_MODES | MY_DONT_OVERWRITE_FILE))) + { + error= HA_ADMIN_FAILED; + errmsg= "Failed copying .frm file (errno: %d)"; + goto err; + } + + /* Change extension */ + if (fn_format_relative_to_data_home(dst_path, table_name, backup_dir, + MARIA_NAME_DEXT)) + { + errmsg= "Failed in fn_format() for .MYD file (errno: %d)"; + error= HA_ADMIN_INVALID; + goto err; + } + + strxmov(src_path, table->s->normalized_path.str, MARIA_NAME_DEXT, NullS); + if (my_copy(src_path, dst_path, + MYF(MY_WME | MY_HOLD_ORIGINAL_MODES | MY_DONT_OVERWRITE_FILE))) + { + errmsg= "Failed copying .MYD file (errno: %d)"; + error= HA_ADMIN_FAILED; + goto err; + } + DBUG_RETURN(HA_ADMIN_OK); + +err: + { + HA_CHECK param; + maria_chk_init(¶m); + param.thd= thd; + param.op_name= "backup"; + param.db_name= table->s->db.str; + param.table_name= table->s->table_name.str; + param.testflag= 0; + _ma_check_print_error(¶m, errmsg, my_errno); + DBUG_RETURN(error); + } +} + + +int ha_maria::repair(THD * thd, HA_CHECK_OPT *check_opt) +{ + int error; + HA_CHECK param; + ha_rows start_records; + + if (!file) + return HA_ADMIN_INTERNAL_ERROR; + + maria_chk_init(¶m); + param.thd= thd; + param.op_name= "repair"; + param.testflag= ((check_opt->flags & ~(T_EXTEND)) | + T_SILENT | T_FORCE_CREATE | T_CALC_CHECKSUM | + (check_opt->flags & T_EXTEND ? T_REP : T_REP_BY_SORT)); + param.sort_buffer_length= check_opt->sort_buffer_size; + start_records= file->state->records; + while ((error= repair(thd, param, 0)) && param.retry_repair) + { + param.retry_repair= 0; + if (test_all_bits(param.testflag, + (uint) (T_RETRY_WITHOUT_QUICK | T_QUICK))) + { + param.testflag &= ~T_RETRY_WITHOUT_QUICK; + sql_print_information("Retrying repair of: '%s' without quick", + table->s->path.str); + continue; + } + param.testflag &= ~T_QUICK; + if ((param.testflag & T_REP_BY_SORT)) + { + param.testflag= (param.testflag & ~T_REP_BY_SORT) | T_REP; + sql_print_information("Retrying repair of: '%s' with keycache", + table->s->path.str); + continue; + } + break; + } + if (!error && start_records != file->state->records && + !(check_opt->flags & T_VERY_SILENT)) + { + char llbuff[22], llbuff2[22]; + sql_print_information("Found %s of %s rows when repairing '%s'", + llstr(file->state->records, llbuff), + llstr(start_records, llbuff2), + table->s->path.str); + } + return error; +} + +int ha_maria::optimize(THD * thd, HA_CHECK_OPT *check_opt) +{ + int error; + if (!file) + return HA_ADMIN_INTERNAL_ERROR; + HA_CHECK param; + + maria_chk_init(¶m); + param.thd= thd; + param.op_name= "optimize"; + param.testflag= (check_opt->flags | T_SILENT | T_FORCE_CREATE | + T_REP_BY_SORT | T_STATISTICS | T_SORT_INDEX); + param.sort_buffer_length= check_opt->sort_buffer_size; + if ((error= repair(thd, param, 1)) && param.retry_repair) + { + sql_print_warning("Warning: Optimize table got errno %d, retrying", + my_errno); + param.testflag &= ~T_REP_BY_SORT; + error= repair(thd, param, 1); + } + return error; +} + + +int ha_maria::repair(THD *thd, HA_CHECK ¶m, bool do_optimize) +{ + int error= 0; + uint local_testflag= param.testflag; + bool optimize_done= !do_optimize, statistics_done= 0; + const char *old_proc_info= thd->proc_info; + char fixed_name[FN_REFLEN]; + MARIA_SHARE *share= file->s; + ha_rows rows= file->state->records; + DBUG_ENTER("ha_maria::repair"); + + param.db_name= table->s->db.str; + param.table_name= table->alias; + param.tmpfile_createflag= O_RDWR | O_TRUNC; + param.using_global_keycache= 1; + param.thd= thd; + param.tmpdir= &mysql_tmpdir_list; + param.out_flag= 0; + strmov(fixed_name, file->s->open_file_name); + +#ifndef TO_BE_FIXED + /* QQ: Until we have repair for block format, lie that it succeded */ + if (file->s->data_file_type == BLOCK_RECORD) + { + if (do_optimize) + DBUG_RETURN(analyze(thd, (HA_CHECK_OPT*) 0)); + DBUG_RETURN(HA_ADMIN_OK); + } +#endif + + // Don't lock tables if we have used LOCK TABLE + if (!thd->locked_tables && + maria_lock_database(file, table->s->tmp_table ? F_EXTRA_LCK : F_WRLCK)) + { + _ma_check_print_error(¶m, ER(ER_CANT_LOCK), my_errno); + DBUG_RETURN(HA_ADMIN_FAILED); + } + + if (!do_optimize || + ((file->state->del || share->state.split != file->state->records) && + (!(param.testflag & T_QUICK) || + (share->state.changed & (STATE_NOT_OPTIMIZED_KEYS | + STATE_NOT_OPTIMIZED_ROWS))))) + { + ulonglong key_map= ((local_testflag & T_CREATE_MISSING_KEYS) ? + maria_get_mask_all_keys_active(share->base.keys) : + share->state.key_map); + uint testflag= param.testflag; + if (maria_test_if_sort_rep(file, file->state->records, key_map, 0) && + (local_testflag & T_REP_BY_SORT)) + { + local_testflag |= T_STATISTICS; + param.testflag |= T_STATISTICS; // We get this for free + statistics_done= 1; + if (thd->variables.maria_repair_threads > 1) + { + char buf[40]; + /* TODO: respect maria_repair_threads variable */ + my_snprintf(buf, 40, "Repair with %d threads", my_count_bits(key_map)); + thd->proc_info= buf; + error= maria_repair_parallel(¶m, file, fixed_name, + param.testflag & T_QUICK); + thd->proc_info= "Repair done"; // to reset proc_info, as + // it was pointing to local buffer + } + else + { + thd->proc_info= "Repair by sorting"; + error= maria_repair_by_sort(¶m, file, fixed_name, + param.testflag & T_QUICK); + } + } + else + { + thd->proc_info= "Repair with keycache"; + param.testflag &= ~T_REP_BY_SORT; + error= maria_repair(¶m, file, fixed_name, param.testflag & T_QUICK); + } + param.testflag= testflag; + optimize_done= 1; + } + if (!error) + { + if ((local_testflag & T_SORT_INDEX) && + (share->state.changed & STATE_NOT_SORTED_PAGES)) + { + optimize_done= 1; + thd->proc_info= "Sorting index"; + error= maria_sort_index(¶m, file, fixed_name); + } + if (!statistics_done && (local_testflag & T_STATISTICS)) + { + if (share->state.changed & STATE_NOT_ANALYZED) + { + optimize_done= 1; + thd->proc_info= "Analyzing"; + error= maria_chk_key(¶m, file); + } + else + local_testflag &= ~T_STATISTICS; // Don't update statistics + } + } + thd->proc_info= "Saving state"; + if (!error) + { + if ((share->state.changed & STATE_CHANGED) || maria_is_crashed(file)) + { + share->state.changed &= ~(STATE_CHANGED | STATE_CRASHED | + STATE_CRASHED_ON_REPAIR); + file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED; + } + /* + the following 'if', thought conceptually wrong, + is a useful optimization nevertheless. + */ + if (file->state != &file->s->state.state) + file->s->state.state= *file->state; + if (file->s->base.auto_key) + _ma_update_auto_increment_key(¶m, file, 1); + if (optimize_done) + error= maria_update_state_info(¶m, file, + UPDATE_TIME | UPDATE_OPEN_COUNT | + (local_testflag & + T_STATISTICS ? UPDATE_STAT : 0)); + info(HA_STATUS_NO_LOCK | HA_STATUS_TIME | HA_STATUS_VARIABLE | + HA_STATUS_CONST); + if (rows != file->state->records && !(param.testflag & T_VERY_SILENT)) + { + char llbuff[22], llbuff2[22]; + _ma_check_print_warning(¶m, "Number of rows changed from %s to %s", + llstr(rows, llbuff), + llstr(file->state->records, llbuff2)); + } + if (!error) + error= _ma_repair_write_log_record(¶m, file); + } + else + { + maria_mark_crashed_on_repair(file); + file->update |= HA_STATE_CHANGED | HA_STATE_ROW_CHANGED; + maria_update_state_info(¶m, file, 0); + } + thd->proc_info= old_proc_info; + if (!thd->locked_tables) + maria_lock_database(file, F_UNLCK); + DBUG_RETURN(error ? HA_ADMIN_FAILED : + !optimize_done ? HA_ADMIN_ALREADY_DONE : HA_ADMIN_OK); +} + + +/* + Assign table indexes to a specific key cache. +*/ + +int ha_maria::assign_to_keycache(THD * thd, HA_CHECK_OPT *check_opt) +{ + PAGECACHE *new_pagecache= check_opt->pagecache; + const char *errmsg= 0; + int error= HA_ADMIN_OK; + ulonglong map= ~(ulonglong) 0; + TABLE_LIST *table_list= table->pos_in_table_list; + DBUG_ENTER("ha_maria::assign_to_keycache"); + + /* Check validity of the index references */ + if (table_list->use_index) + { + /* We only come here when the user did specify an index map */ + key_map kmap; + if (get_key_map_from_key_list(&kmap, table, table_list->use_index)) + { + errmsg= thd->net.last_error; + error= HA_ADMIN_FAILED; + goto err; + } + map= kmap.to_ulonglong(); + } + + if ((error= maria_assign_to_pagecache(file, map, new_pagecache))) + { + char buf[STRING_BUFFER_USUAL_SIZE]; + my_snprintf(buf, sizeof(buf), + "Failed to flush to index file (errno: %d)", error); + errmsg= buf; + error= HA_ADMIN_CORRUPT; + } + +err: + if (error != HA_ADMIN_OK) + { + /* Send error to user */ + HA_CHECK param; + maria_chk_init(¶m); + param.thd= thd; + param.op_name= "assign_to_keycache"; + param.db_name= table->s->db.str; + param.table_name= table->s->table_name.str; + param.testflag= 0; + _ma_check_print_error(¶m, errmsg); + } + DBUG_RETURN(error); +} + + +/* + Preload pages of the index file for a table into the key cache. +*/ + +int ha_maria::preload_keys(THD * thd, HA_CHECK_OPT *check_opt) +{ + int error; + const char *errmsg; + ulonglong map= ~(ulonglong) 0; + TABLE_LIST *table_list= table->pos_in_table_list; + my_bool ignore_leaves= table_list->ignore_leaves; + + DBUG_ENTER("ha_maria::preload_keys"); + + /* Check validity of the index references */ + if (table_list->use_index) + { + key_map kmap; + get_key_map_from_key_list(&kmap, table, table_list->use_index); + if (kmap.is_set_all()) + { + errmsg= thd->net.last_error; + error= HA_ADMIN_FAILED; + goto err; + } + if (!kmap.is_clear_all()) + map= kmap.to_ulonglong(); + } + + maria_extra(file, HA_EXTRA_PRELOAD_BUFFER_SIZE, + (void*) &thd->variables.preload_buff_size); + + if ((error= maria_preload(file, map, ignore_leaves))) + { + switch (error) { + case HA_ERR_NON_UNIQUE_BLOCK_SIZE: + errmsg= "Indexes use different block sizes"; + break; + case HA_ERR_OUT_OF_MEM: + errmsg= "Failed to allocate buffer"; + break; + default: + char buf[ERRMSGSIZE + 20]; + my_snprintf(buf, ERRMSGSIZE, + "Failed to read from index file (errno: %d)", my_errno); + errmsg= buf; + } + error= HA_ADMIN_FAILED; + goto err; + } + + DBUG_RETURN(HA_ADMIN_OK); + +err: + { + HA_CHECK param; + maria_chk_init(¶m); + param.thd= thd; + param.op_name= "preload_keys"; + param.db_name= table->s->db.str; + param.table_name= table->s->table_name.str; + param.testflag= 0; + _ma_check_print_error(¶m, errmsg); + DBUG_RETURN(error); + } +} + + +/* + Disable indexes, making it persistent if requested. + + SYNOPSIS + disable_indexes() + mode mode of operation: + HA_KEY_SWITCH_NONUNIQ disable all non-unique keys + HA_KEY_SWITCH_ALL disable all keys + HA_KEY_SWITCH_NONUNIQ_SAVE dis. non-uni. and make persistent + HA_KEY_SWITCH_ALL_SAVE dis. all keys and make persistent + + IMPLEMENTATION + HA_KEY_SWITCH_NONUNIQ is not implemented. + HA_KEY_SWITCH_ALL_SAVE is not implemented. + + RETURN + 0 ok + HA_ERR_WRONG_COMMAND mode not implemented. +*/ + +int ha_maria::disable_indexes(uint mode) +{ + int error; + + if (mode == HA_KEY_SWITCH_ALL) + { + /* call a storage engine function to switch the key map */ + error= maria_disable_indexes(file); + } + else if (mode == HA_KEY_SWITCH_NONUNIQ_SAVE) + { + maria_extra(file, HA_EXTRA_NO_KEYS, 0); + info(HA_STATUS_CONST); // Read new key info + error= 0; + } + else + { + /* mode not implemented */ + error= HA_ERR_WRONG_COMMAND; + } + return error; +} + + +/* + Enable indexes, making it persistent if requested. + + SYNOPSIS + enable_indexes() + mode mode of operation: + HA_KEY_SWITCH_NONUNIQ enable all non-unique keys + HA_KEY_SWITCH_ALL enable all keys + HA_KEY_SWITCH_NONUNIQ_SAVE en. non-uni. and make persistent + HA_KEY_SWITCH_ALL_SAVE en. all keys and make persistent + + DESCRIPTION + Enable indexes, which might have been disabled by disable_index() before. + The modes without _SAVE work only if both data and indexes are empty, + since the MARIA repair would enable them persistently. + To be sure in these cases, call handler::delete_all_rows() before. + + IMPLEMENTATION + HA_KEY_SWITCH_NONUNIQ is not implemented. + HA_KEY_SWITCH_ALL_SAVE is not implemented. + + RETURN + 0 ok + !=0 Error, among others: + HA_ERR_CRASHED data or index is non-empty. Delete all rows and retry. + HA_ERR_WRONG_COMMAND mode not implemented. +*/ + +int ha_maria::enable_indexes(uint mode) +{ + int error; + + if (maria_is_all_keys_active(file->s->state.key_map, file->s->base.keys)) + { + /* All indexes are enabled already. */ + return 0; + } + + if (mode == HA_KEY_SWITCH_ALL) + { + error= maria_enable_indexes(file); + /* + Do not try to repair on error, + as this could make the enabled state persistent, + but mode==HA_KEY_SWITCH_ALL forbids it. + */ + } + else if (mode == HA_KEY_SWITCH_NONUNIQ_SAVE) + { + THD *thd= current_thd; + HA_CHECK param; + const char *save_proc_info= thd->proc_info; + thd->proc_info= "Creating index"; + maria_chk_init(¶m); + param.op_name= "recreating_index"; + param.testflag= (T_SILENT | T_REP_BY_SORT | T_QUICK | + T_CREATE_MISSING_KEYS); + param.myf_rw &= ~MY_WAIT_IF_FULL; + param.sort_buffer_length= thd->variables.maria_sort_buff_size; + param.stats_method= (enum_handler_stats_method) thd->variables. + maria_stats_method; + param.tmpdir= &mysql_tmpdir_list; + if ((error= (repair(thd, param, 0) != HA_ADMIN_OK)) && param.retry_repair) + { + sql_print_warning("Warning: Enabling keys got errno %d, retrying", + my_errno); + /* Repairing by sort failed. Now try standard repair method. */ + param.testflag &= ~(T_REP_BY_SORT | T_QUICK); + error= (repair(thd, param, 0) != HA_ADMIN_OK); + /* + If the standard repair succeeded, clear all error messages which + might have been set by the first repair. They can still be seen + with SHOW WARNINGS then. + */ + if (!error) + thd->clear_error(); + } + info(HA_STATUS_CONST); + thd->proc_info= save_proc_info; + } + else + { + /* mode not implemented */ + error= HA_ERR_WRONG_COMMAND; + } + return error; +} + + +/* + Test if indexes are disabled. + + + SYNOPSIS + indexes_are_disabled() + no parameters + + + RETURN + 0 indexes are not disabled + 1 all indexes are disabled + [2 non-unique indexes are disabled - NOT YET IMPLEMENTED] +*/ + +int ha_maria::indexes_are_disabled(void) +{ + return maria_indexes_are_disabled(file); +} + + +/* + prepare for a many-rows insert operation + e.g. - disable indexes (if they can be recreated fast) or + activate special bulk-insert optimizations + + SYNOPSIS + start_bulk_insert(rows) + rows Rows to be inserted + 0 if we don't know + + NOTICE + Do not forget to call end_bulk_insert() later! +*/ + +void ha_maria::start_bulk_insert(ha_rows rows) +{ + DBUG_ENTER("ha_maria::start_bulk_insert"); + THD *thd= current_thd; + ulong size= min(thd->variables.read_buff_size, + table->s->avg_row_length * rows); + DBUG_PRINT("info", ("start_bulk_insert: rows %lu size %lu", + (ulong) rows, size)); + + /* don't enable row cache if too few rows */ + if (!rows || (rows > MARIA_MIN_ROWS_TO_USE_WRITE_CACHE)) + maria_extra(file, HA_EXTRA_WRITE_CACHE, (void*) &size); + + can_enable_indexes= maria_is_all_keys_active(file->s->state.key_map, + file->s->base.keys); + /* TODO: Remove when we have repair() working */ + can_enable_indexes= 0; + + if (!(specialflag & SPECIAL_SAFE_MODE)) + { + /* + Only disable old index if the table was empty and we are inserting + a lot of rows. + We should not do this for only a few rows as this is slower and + we don't want to update the key statistics based of only a few rows. + */ + if (file->state->records == 0 && can_enable_indexes && + (!rows || rows >= MARIA_MIN_ROWS_TO_DISABLE_INDEXES)) + maria_disable_non_unique_index(file, rows); + else + if (!file->bulk_insert && + (!rows || rows >= MARIA_MIN_ROWS_TO_USE_BULK_INSERT)) + { + maria_init_bulk_insert(file, thd->variables.bulk_insert_buff_size, rows); + } + } + DBUG_VOID_RETURN; +} + + +/* + end special bulk-insert optimizations, + which have been activated by start_bulk_insert(). + + SYNOPSIS + end_bulk_insert() + no arguments + + RETURN + 0 OK + != 0 Error +*/ + +int ha_maria::end_bulk_insert() +{ + int err; + DBUG_ENTER("ha_maria::end_bulk_insert"); + maria_end_bulk_insert(file); + err= maria_extra(file, HA_EXTRA_NO_CACHE, 0); + DBUG_RETURN(err ? err : can_enable_indexes ? + enable_indexes(HA_KEY_SWITCH_NONUNIQ_SAVE) : 0); +} + + +bool ha_maria::check_and_repair(THD *thd) +{ + int error= 0; + int marked_crashed; + char *old_query; + uint old_query_length; + HA_CHECK_OPT check_opt; + DBUG_ENTER("ha_maria::check_and_repair"); + + check_opt.init(); + check_opt.flags= T_MEDIUM | T_AUTO_REPAIR; + // Don't use quick if deleted rows + if (!file->state->del && (maria_recover_options & HA_RECOVER_QUICK)) + check_opt.flags |= T_QUICK; + sql_print_warning("Checking table: '%s'", table->s->path.str); + + old_query= thd->query; + old_query_length= thd->query_length; + pthread_mutex_lock(&LOCK_thread_count); + thd->query= table->s->table_name.str; + thd->query_length= table->s->table_name.length; + pthread_mutex_unlock(&LOCK_thread_count); + + if ((marked_crashed= maria_is_crashed(file)) || check(thd, &check_opt)) + { + sql_print_warning("Recovering table: '%s'", table->s->path.str); + check_opt.flags= + ((maria_recover_options & HA_RECOVER_BACKUP ? T_BACKUP_DATA : 0) | + (marked_crashed ? 0 : T_QUICK) | + (maria_recover_options & HA_RECOVER_FORCE ? 0 : T_SAFE_REPAIR) | + T_AUTO_REPAIR); + if (repair(thd, &check_opt)) + error= 1; + } + pthread_mutex_lock(&LOCK_thread_count); + thd->query= old_query; + thd->query_length= old_query_length; + pthread_mutex_unlock(&LOCK_thread_count); + DBUG_RETURN(error); +} + + +bool ha_maria::is_crashed() const +{ + return (file->s->state.changed & STATE_CRASHED || + (my_disable_locking && file->s->state.open_count)); +} + + +int ha_maria::update_row(const byte * old_data, byte * new_data) +{ + statistic_increment(table->in_use->status_var.ha_update_count, &LOCK_status); + if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) + table->timestamp_field->set_time(); + return maria_update(file, old_data, new_data); +} + + +int ha_maria::delete_row(const byte * buf) +{ + statistic_increment(table->in_use->status_var.ha_delete_count, &LOCK_status); + return maria_delete(file, buf); +} + + +int ha_maria::index_read(byte * buf, const byte * key, + uint key_len, enum ha_rkey_function find_flag) +{ + DBUG_ASSERT(inited == INDEX); + statistic_increment(table->in_use->status_var.ha_read_key_count, + &LOCK_status); + int error= maria_rkey(file, buf, active_index, key, key_len, find_flag); + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + + +int ha_maria::index_read_idx(byte * buf, uint index, const byte * key, + uint key_len, enum ha_rkey_function find_flag) +{ + statistic_increment(table->in_use->status_var.ha_read_key_count, + &LOCK_status); + int error= maria_rkey(file, buf, index, key, key_len, find_flag); + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + + +int ha_maria::index_read_last(byte * buf, const byte * key, uint key_len) +{ + DBUG_ENTER("ha_maria::index_read_last"); + DBUG_ASSERT(inited == INDEX); + statistic_increment(table->in_use->status_var.ha_read_key_count, + &LOCK_status); + int error= maria_rkey(file, buf, active_index, key, key_len, + HA_READ_PREFIX_LAST); + table->status= error ? STATUS_NOT_FOUND : 0; + DBUG_RETURN(error); +} + + +int ha_maria::index_next(byte * buf) +{ + DBUG_ASSERT(inited == INDEX); + statistic_increment(table->in_use->status_var.ha_read_next_count, + &LOCK_status); + int error= maria_rnext(file, buf, active_index); + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + + +int ha_maria::index_prev(byte * buf) +{ + DBUG_ASSERT(inited == INDEX); + statistic_increment(table->in_use->status_var.ha_read_prev_count, + &LOCK_status); + int error= maria_rprev(file, buf, active_index); + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + + +int ha_maria::index_first(byte * buf) +{ + DBUG_ASSERT(inited == INDEX); + statistic_increment(table->in_use->status_var.ha_read_first_count, + &LOCK_status); + int error= maria_rfirst(file, buf, active_index); + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + + +int ha_maria::index_last(byte * buf) +{ + DBUG_ASSERT(inited == INDEX); + statistic_increment(table->in_use->status_var.ha_read_last_count, + &LOCK_status); + int error= maria_rlast(file, buf, active_index); + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + + +int ha_maria::index_next_same(byte * buf, + const byte *key __attribute__ ((unused)), + uint length __attribute__ ((unused))) +{ + DBUG_ASSERT(inited == INDEX); + statistic_increment(table->in_use->status_var.ha_read_next_count, + &LOCK_status); + int error= maria_rnext_same(file, buf); + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + + +int ha_maria::rnd_init(bool scan) +{ + if (scan) + return maria_scan_init(file); + return maria_reset(file); // Free buffers +} + + +int ha_maria::rnd_end() +{ + /* Safe to call even if we don't have started a scan */ + maria_scan_end(file); + return 0; +} + + +int ha_maria::rnd_next(byte *buf) +{ + statistic_increment(table->in_use->status_var.ha_read_rnd_next_count, + &LOCK_status); + int error= maria_scan(file, buf); + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + + +int ha_maria::restart_rnd_next(byte *buf, byte *pos) +{ + return rnd_pos(buf, pos); +} + + +int ha_maria::rnd_pos(byte * buf, byte *pos) +{ + statistic_increment(table->in_use->status_var.ha_read_rnd_count, + &LOCK_status); + int error= maria_rrnd(file, buf, my_get_ptr(pos, ref_length)); + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + + +void ha_maria::position(const byte * record) +{ + my_off_t row_position= maria_position(file); + my_store_ptr(ref, ref_length, row_position); +} + + +int ha_maria::info(uint flag) +{ + MARIA_INFO maria_info; + char name_buff[FN_REFLEN]; + + (void) maria_status(file, &maria_info, flag); + if (flag & HA_STATUS_VARIABLE) + { + stats.records= maria_info.records; + stats.deleted= maria_info.deleted; + stats.data_file_length= maria_info.data_file_length; + stats.index_file_length= maria_info.index_file_length; + stats.delete_length= maria_info.delete_length; + stats.check_time= maria_info.check_time; + stats.mean_rec_length= maria_info.mean_reclength; + } + if (flag & HA_STATUS_CONST) + { + TABLE_SHARE *share= table->s; + stats.max_data_file_length= maria_info.max_data_file_length; + stats.max_index_file_length= maria_info.max_index_file_length; + stats.create_time= maria_info.create_time; + ref_length= maria_info.reflength; + share->db_options_in_use= maria_info.options; + stats.block_size= maria_block_size; + + /* Update share */ + if (share->tmp_table == NO_TMP_TABLE) + pthread_mutex_lock(&share->mutex); + share->keys_in_use.set_prefix(share->keys); + share->keys_in_use.intersect_extended(maria_info.key_map); + share->keys_for_keyread.intersect(share->keys_in_use); + share->db_record_offset= maria_info.record_offset; + if (share->key_parts) + memcpy((char*) table->key_info[0].rec_per_key, + (char*) maria_info.rec_per_key, + sizeof(table->key_info[0].rec_per_key) * share->key_parts); + if (share->tmp_table == NO_TMP_TABLE) + pthread_mutex_unlock(&share->mutex); + + /* + Set data_file_name and index_file_name to point at the symlink value + if table is symlinked (Ie; Real name is not same as generated name) + */ + data_file_name= index_file_name= 0; + fn_format(name_buff, file->s->open_file_name, "", MARIA_NAME_DEXT, + MY_APPEND_EXT | MY_UNPACK_FILENAME); + if (strcmp(name_buff, maria_info.data_file_name)) + data_file_name=maria_info.data_file_name; + fn_format(name_buff, file->s->open_file_name, "", MARIA_NAME_IEXT, + MY_APPEND_EXT | MY_UNPACK_FILENAME); + if (strcmp(name_buff, maria_info.index_file_name)) + index_file_name=maria_info.index_file_name; + } + if (flag & HA_STATUS_ERRKEY) + { + errkey= maria_info.errkey; + my_store_ptr(dup_ref, ref_length, maria_info.dup_key_pos); + } + /* Faster to always update, than to do it based on flag */ + stats.update_time= maria_info.update_time; + stats.auto_increment_value= maria_info.auto_increment; + + return 0; +} + + +int ha_maria::extra(enum ha_extra_function operation) +{ + if ((specialflag & SPECIAL_SAFE_MODE) && operation == HA_EXTRA_KEYREAD) + return 0; + return maria_extra(file, operation, 0); +} + +int ha_maria::reset(void) +{ + return maria_reset(file); +} + +/* To be used with WRITE_CACHE and EXTRA_CACHE */ + +int ha_maria::extra_opt(enum ha_extra_function operation, ulong cache_size) +{ + if ((specialflag & SPECIAL_SAFE_MODE) && operation == HA_EXTRA_WRITE_CACHE) + return 0; + return maria_extra(file, operation, (void*) &cache_size); +} + + +int ha_maria::delete_all_rows() +{ + return maria_delete_all_rows(file); +} + + +int ha_maria::delete_table(const char *name) +{ + return maria_delete_table(name); +} + +#define THD_TRN (*(TRN **)thd_ha_data(thd, maria_hton)) + +int ha_maria::external_lock(THD *thd, int lock_type) +{ + TRN *trn= THD_TRN; + DBUG_ENTER("ha_maria::external_lock"); + if (!save_transactional) + goto skip_transaction; + if (!trn && lock_type != F_UNLCK) /* no transaction yet - open it now */ + { + trn= trnman_new_trn(& thd->mysys_var->mutex, + & thd->mysys_var->suspend, + thd->thread_stack + STACK_DIRECTION * + (my_thread_stack_size - STACK_MIN_SIZE)); + if (!trn) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + + DBUG_PRINT("info", ("THD_TRN set to 0x%lx", (ulong)trn)); + THD_TRN= trn; + if (thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) + trans_register_ha(thd, TRUE, maria_hton); + } + if (lock_type != F_UNLCK) + { + this->file->trn= trn; + if (!trnman_increment_locked_tables(trn)) + { + trans_register_ha(thd, FALSE, maria_hton); + trnman_new_statement(trn); + } + if (!thd->transaction.on) + { + /* + No need to log REDOs/UNDOs. If this is an internal temporary table + which will be renamed to a permanent table (like in ALTER TABLE), + the rename happens after unlocking so will be durable (and the table + will get its create_rename_lsn). + Note: if we wanted to enable users to have an old backup and apply + tons of archived logs to roll-forward, we could then not disable + REDOs/UNDOs in this case. + */ + file->s->base.transactional= FALSE; + } + } + else + { + this->file->trn= 0; /* TODO: remove it also in commit and rollback */ + if (trn && trnman_has_locked_tables(trn)) + { + if (!trnman_decrement_locked_tables(trn)) + { + /* autocommit ? rollback a transaction */ +#ifdef MARIA_CANNOT_ROLLBACK + if (ma_commit(trn)) + DBUG_RETURN(1); + THD_TRN= 0; +#else + if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) + { + trnman_rollback_trn(trn); + DBUG_PRINT("info", ("THD_TRN set to 0x0")); + THD_TRN= 0; + } +#endif + } + } + file->s->base.transactional= save_transactional; + } +skip_transaction: + DBUG_RETURN(maria_lock_database(file, !table->s->tmp_table ? + lock_type : ((lock_type == F_UNLCK) ? + F_UNLCK : F_EXTRA_LCK))); +} + +int ha_maria::start_stmt(THD *thd, thr_lock_type lock_type) +{ + TRN *trn= THD_TRN; + if (save_transactional) + { + DBUG_ASSERT(trn); // this may be called only after external_lock() + DBUG_ASSERT(trnman_has_locked_tables(trn)); + DBUG_ASSERT(lock_type != F_UNLCK); + /* + As external_lock() was already called, don't increment locked_tables. + Note that we call the function below possibly several times when + statement starts (once per table). This is ok as long as that function + does cheap operations. Otherwise, we will need to do it only on first + call to start_stmt(). + */ + trnman_new_statement(trn); + } + return 0; +} + +THR_LOCK_DATA **ha_maria::store_lock(THD *thd, + THR_LOCK_DATA **to, + enum thr_lock_type lock_type) +{ + if (lock_type != TL_IGNORE && file->lock.type == TL_UNLOCK) + file->lock.type= lock_type; + *to++= &file->lock; + return to; +} + + +void ha_maria::update_create_info(HA_CREATE_INFO *create_info) +{ + ha_maria::info(HA_STATUS_AUTO | HA_STATUS_CONST); + if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) + { + create_info->auto_increment_value= stats.auto_increment_value; + } + create_info->data_file_name= data_file_name; + create_info->index_file_name= index_file_name; +} + + +enum row_type ha_maria::get_row_type() const +{ + switch (file->s->data_file_type) { + case STATIC_RECORD: return ROW_TYPE_FIXED; + case DYNAMIC_RECORD: return ROW_TYPE_DYNAMIC; + case BLOCK_RECORD: return ROW_TYPE_PAGES; + case COMPRESSED_RECORD: return ROW_TYPE_COMPRESSED; + default: return ROW_TYPE_NOT_USED; + } +} + + +static enum data_file_type maria_row_type(HA_CREATE_INFO *info) +{ + switch (info->row_type) { + case ROW_TYPE_FIXED: return STATIC_RECORD; + case ROW_TYPE_DYNAMIC: return DYNAMIC_RECORD; + default: return BLOCK_RECORD; + } +} + + +int ha_maria::create(const char *name, register TABLE *table_arg, + HA_CREATE_INFO *ha_create_info) +{ + int error; + uint create_flags= 0, records, i; + char buff[FN_REFLEN]; + MARIA_KEYDEF *keydef; + MARIA_COLUMNDEF *recinfo; + MARIA_CREATE_INFO create_info; + TABLE_SHARE *share= table_arg->s; + uint options= share->db_options_in_use; + enum data_file_type row_type; + DBUG_ENTER("ha_maria::create"); + + for (i= 0; i < share->keys; i++) + { + if (table_arg->key_info[i].flags & HA_USES_PARSER) + { + create_flags|= HA_CREATE_RELIES_ON_SQL_LAYER; + break; + } + } + row_type= maria_row_type(ha_create_info); + if ((error= table2maria(table_arg, &keydef, &recinfo, &records))) + DBUG_RETURN(error); /* purecov: inspected */ + bzero((char*) &create_info, sizeof(create_info)); + create_info.max_rows= share->max_rows; + create_info.reloc_rows= share->min_rows; + create_info.with_auto_increment= share->next_number_key_offset == 0; + create_info.auto_increment= (ha_create_info->auto_increment_value ? + ha_create_info->auto_increment_value -1 : + (ulonglong) 0); + create_info.data_file_length= ((ulonglong) share->max_rows * + share->avg_row_length); + create_info.data_file_name= ha_create_info->data_file_name; + create_info.index_file_name= ha_create_info->index_file_name; + create_info.transactional= row_type == BLOCK_RECORD; + + if (ha_create_info->options & HA_LEX_CREATE_TMP_TABLE) + create_flags|= HA_CREATE_TMP_TABLE; + if (options & HA_OPTION_PACK_RECORD) + create_flags|= HA_PACK_RECORD; + if (options & HA_OPTION_CHECKSUM) + create_flags|= HA_CREATE_CHECKSUM; + if (options & HA_OPTION_DELAY_KEY_WRITE) + create_flags|= HA_CREATE_DELAY_KEY_WRITE; + + /* TODO: Check that the following fn_format is really needed */ + error= + maria_create(fn_format(buff, name, "", "", + MY_UNPACK_FILENAME | MY_APPEND_EXT), + row_type, share->keys, keydef, + records, recinfo, + 0, (MARIA_UNIQUEDEF *) 0, + &create_info, create_flags); + + my_free((gptr) recinfo, MYF(0)); + DBUG_RETURN(error); +} + + +int ha_maria::rename_table(const char *from, const char *to) +{ + return maria_rename(from, to); +} + + +void ha_maria::get_auto_increment(ulonglong offset, ulonglong increment, + ulonglong nb_desired_values, + ulonglong *first_value, + ulonglong *nb_reserved_values) +{ + ulonglong nr; + int error; + byte key[HA_MAX_KEY_LENGTH]; + + if (!table->s->next_number_key_offset) + { // Autoincrement at key-start + ha_maria::info(HA_STATUS_AUTO); + *first_value= stats.auto_increment_value; + /* Maria has only table-level lock for now, so reserves to +inf */ + *nb_reserved_values= ULONGLONG_MAX; + return; + } + + /* it's safe to call the following if bulk_insert isn't on */ + maria_flush_bulk_insert(file, table->s->next_number_index); + + (void) extra(HA_EXTRA_KEYREAD); + key_copy(key, table->record[0], + table->key_info + table->s->next_number_index, + table->s->next_number_key_offset); + error= maria_rkey(file, table->record[1], (int) table->s->next_number_index, + key, table->s->next_number_key_offset, HA_READ_PREFIX_LAST); + if (error) + nr= 1; + else + { + /* Get data from record[1] */ + nr= ((ulonglong) table->next_number_field-> + val_int_offset(table->s->rec_buff_length) + 1); + } + extra(HA_EXTRA_NO_KEYREAD); + *first_value= nr; + /* + MySQL needs to call us for next row: assume we are inserting ("a",null) + here, we return 3, and next this statement will want to insert ("b",null): + there is no reason why ("b",3+1) would be the good row to insert: maybe it + already exists, maybe 3+1 is too large... + */ + *nb_reserved_values= 1; +} + + +/* + Find out how many rows there is in the given range + + SYNOPSIS + records_in_range() + inx Index to use + min_key Start of range. Null pointer if from first key + max_key End of range. Null pointer if to last key + + NOTES + min_key.flag can have one of the following values: + HA_READ_KEY_EXACT Include the key in the range + HA_READ_AFTER_KEY Don't include key in range + + max_key.flag can have one of the following values: + HA_READ_BEFORE_KEY Don't include key in range + HA_READ_AFTER_KEY Include all 'end_key' values in the range + + RETURN + HA_POS_ERROR Something is wrong with the index tree. + 0 There is no matching keys in the given range + number > 0 There is approximately 'number' matching rows in + the range. +*/ + +ha_rows ha_maria::records_in_range(uint inx, key_range *min_key, + key_range *max_key) +{ + return (ha_rows) maria_records_in_range(file, (int) inx, min_key, max_key); +} + + +int ha_maria::ft_read(byte * buf) +{ + int error; + + if (!ft_handler) + return -1; + + thread_safe_increment(table->in_use->status_var.ha_read_next_count, + &LOCK_status); // why ? + + error= ft_handler->please->read_next(ft_handler, (char*) buf); + + table->status= error ? STATUS_NOT_FOUND : 0; + return error; +} + + +uint ha_maria::checksum() const +{ + return (uint) file->state->checksum; +} + + +bool ha_maria::check_if_incompatible_data(HA_CREATE_INFO *info, + uint table_changes) +{ + uint options= table->s->db_options_in_use; + + if (info->auto_increment_value != stats.auto_increment_value || + info->data_file_name != data_file_name || + info->index_file_name != index_file_name || + maria_row_type(info) != data_file_type || + table_changes == IS_EQUAL_NO || + table_changes & IS_EQUAL_PACK_LENGTH) // Not implemented yet + return COMPATIBLE_DATA_NO; + + if ((options & (HA_OPTION_PACK_RECORD | HA_OPTION_CHECKSUM | + HA_OPTION_DELAY_KEY_WRITE)) != + (info->table_options & (HA_OPTION_PACK_RECORD | HA_OPTION_CHECKSUM | + HA_OPTION_DELAY_KEY_WRITE))) + return COMPATIBLE_DATA_NO; + return COMPATIBLE_DATA_YES; +} + + +static int maria_hton_panic(handlerton *hton, ha_panic_function flag) +{ + return maria_panic(flag); +} + + +static int maria_commit(handlerton *hton __attribute__ ((unused)), + THD *thd, bool all) +{ + TRN *trn= THD_TRN; + DBUG_ENTER("maria_commit"); + trnman_reset_locked_tables(trn); + /* statement or transaction ? */ + if ((thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) && !all) + DBUG_RETURN(0); // end of statement + DBUG_PRINT("info", ("THD_TRN set to 0x0")); + THD_TRN= 0; + DBUG_RETURN(ma_commit(trn)); // end of transaction +} + + +static int maria_rollback(handlerton *hton __attribute__ ((unused)), + THD *thd, bool all) +{ + TRN *trn= THD_TRN; + DBUG_ENTER("maria_rollback"); + trnman_reset_locked_tables(trn); + /* statement or transaction ? */ + if ((thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) && !all) + { + trnman_rollback_statement(trn); + DBUG_RETURN(0); // end of statement + } + DBUG_PRINT("info", ("THD_TRN set to 0x0")); + THD_TRN= 0; + DBUG_RETURN(trnman_rollback_trn(trn) ? + HA_ERR_OUT_OF_MEM : 0); // end of transaction +} + + +static int ha_maria_init(void *p) +{ + int res; + maria_hton= (handlerton *)p; + maria_hton->state= SHOW_OPTION_YES; + maria_hton->db_type= DB_TYPE_MARIA; + maria_hton->create= maria_create_handler; + maria_hton->panic= maria_hton_panic; + maria_hton->commit= maria_commit; + maria_hton->rollback= maria_rollback; + /* TODO: decide if we support Maria being used for log tables */ + maria_hton->flags= HTON_CAN_RECREATE | HTON_SUPPORT_LOG_TABLES; + bzero(maria_log_pagecache, sizeof(*maria_log_pagecache)); + maria_data_root= mysql_real_data_home; + res= maria_init() || ma_control_file_create_or_open() || + (init_pagecache(maria_log_pagecache, + TRANSLOG_PAGECACHE_SIZE, 0, 0, + TRANSLOG_PAGE_SIZE) == 0) || + translog_init(maria_data_root, TRANSLOG_FILE_SIZE, + MYSQL_VERSION_ID, server_id, maria_log_pagecache, + TRANSLOG_DEFAULT_FLAGS) || + trnman_init(); + maria_multi_threaded= TRUE; + return res; +} + + +struct st_mysql_storage_engine maria_storage_engine= +{ MYSQL_HANDLERTON_INTERFACE_VERSION }; + +mysql_declare_plugin(maria) +{ + MYSQL_STORAGE_ENGINE_PLUGIN, + &maria_storage_engine, + "Maria", + "MySQL AB", + "Traditional transactional MySQL tables", + PLUGIN_LICENSE_GPL, + ha_maria_init, /* Plugin Init */ + NULL, /* Plugin Deinit */ + 0x0100, /* 1.0 */ + NULL, /* status variables */ + NULL, /* system variables */ + NULL /* config options */ +} +mysql_declare_plugin_end; diff --git a/storage/maria/ha_maria.h b/storage/maria/ha_maria.h new file mode 100644 index 00000000000..a2f6b190657 --- /dev/null +++ b/storage/maria/ha_maria.h @@ -0,0 +1,153 @@ +/* Copyright (C) 2006,2004 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +/* class for the the maria handler */ + +#include + +#define HA_RECOVER_NONE 0 /* No automatic recover */ +#define HA_RECOVER_DEFAULT 1 /* Automatic recover active */ +#define HA_RECOVER_BACKUP 2 /* Make a backupfile on recover */ +#define HA_RECOVER_FORCE 4 /* Recover even if we loose rows */ +#define HA_RECOVER_QUICK 8 /* Don't check rows in data file */ + +extern ulong maria_sort_buffer_size; +extern TYPELIB maria_recover_typelib; +extern ulong maria_recover_options; + +class ha_maria :public handler +{ + MARIA_HA *file; + ulonglong int_table_flags; + char *data_file_name, *index_file_name; + enum data_file_type data_file_type; + bool can_enable_indexes; + /** + @brief for temporarily disabling table's transactionality + (if THD::transaction::on is false), remember the original value here + */ + bool save_transactional; + int repair(THD * thd, HA_CHECK ¶m, bool optimize); + +public: + ha_maria(handlerton *hton, TABLE_SHARE * table_arg); + ~ha_maria() {} + handler *clone(MEM_ROOT *mem_root); + const char *table_type() const + { return "MARIA"; } + const char *index_type(uint key_number); + const char **bas_ext() const; + ulonglong table_flags() const + { return int_table_flags; } + ulong index_flags(uint inx, uint part, bool all_parts) const + { + return ((table_share->key_info[inx].algorithm == HA_KEY_ALG_FULLTEXT) ? + 0 : HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE | + HA_READ_ORDER | HA_KEYREAD_ONLY); + } + uint max_supported_keys() const + { return MARIA_MAX_KEY; } + uint max_supported_key_length() const; + uint max_supported_key_part_length() const + { return max_supported_key_length(); } + enum row_type get_row_type() const; + uint checksum() const; + virtual double scan_time(); + + virtual bool check_if_locking_is_allowed(uint sql_command, + ulong type, TABLE * table, + uint count, + bool called_by_logger_thread); + int open(const char *name, int mode, uint test_if_locked); + int close(void); + int write_row(byte * buf); + int update_row(const byte * old_data, byte * new_data); + int delete_row(const byte * buf); + int index_read(byte * buf, const byte * key, + uint key_len, enum ha_rkey_function find_flag); + int index_read_idx(byte * buf, uint idx, const byte * key, + uint key_len, enum ha_rkey_function find_flag); + int index_read_last(byte * buf, const byte * key, uint key_len); + int index_next(byte * buf); + int index_prev(byte * buf); + int index_first(byte * buf); + int index_last(byte * buf); + int index_next_same(byte * buf, const byte * key, uint keylen); + int ft_init() + { + if (!ft_handler) + return 1; + ft_handler->please->reinit_search(ft_handler); + return 0; + } + FT_INFO *ft_init_ext(uint flags, uint inx, String * key) + { + return maria_ft_init_search(flags, file, inx, + (byte *) key->ptr(), key->length(), + key->charset(), table->record[0]); + } + int ft_read(byte * buf); + int rnd_init(bool scan); + int rnd_end(void); + int rnd_next(byte * buf); + int rnd_pos(byte * buf, byte * pos); + int restart_rnd_next(byte * buf, byte * pos); + void position(const byte * record); + int info(uint); + int extra(enum ha_extra_function operation); + int extra_opt(enum ha_extra_function operation, ulong cache_size); + int reset(void); + int external_lock(THD * thd, int lock_type); + int start_stmt(THD *thd, thr_lock_type lock_type); + int delete_all_rows(void); + int disable_indexes(uint mode); + int enable_indexes(uint mode); + int indexes_are_disabled(void); + void start_bulk_insert(ha_rows rows); + int end_bulk_insert(); + ha_rows records_in_range(uint inx, key_range * min_key, key_range * max_key); + void update_create_info(HA_CREATE_INFO * create_info); + int create(const char *name, TABLE * form, HA_CREATE_INFO * create_info); + THR_LOCK_DATA **store_lock(THD * thd, THR_LOCK_DATA ** to, + enum thr_lock_type lock_type); + virtual void get_auto_increment(ulonglong offset, ulonglong increment, + ulonglong nb_desired_values, + ulonglong *first_value, + ulonglong *nb_reserved_values); + int rename_table(const char *from, const char *to); + int delete_table(const char *name); + int check(THD * thd, HA_CHECK_OPT * check_opt); + int analyze(THD * thd, HA_CHECK_OPT * check_opt); + int repair(THD * thd, HA_CHECK_OPT * check_opt); + bool check_and_repair(THD * thd); + bool is_crashed() const; + bool auto_repair() const + { return maria_recover_options != 0; } + int optimize(THD * thd, HA_CHECK_OPT * check_opt); + int restore(THD * thd, HA_CHECK_OPT * check_opt); + int backup(THD * thd, HA_CHECK_OPT * check_opt); + int assign_to_keycache(THD * thd, HA_CHECK_OPT * check_opt); + int preload_keys(THD * thd, HA_CHECK_OPT * check_opt); + bool check_if_incompatible_data(HA_CREATE_INFO * info, uint table_changes); +#ifdef HAVE_REPLICATION + int dump(THD * thd, int fd); + int net_read_dump(NET * net); +#endif +}; diff --git a/storage/maria/lockman.c b/storage/maria/lockman.c new file mode 100644 index 00000000000..8316d70bb29 --- /dev/null +++ b/storage/maria/lockman.c @@ -0,0 +1,786 @@ +/* QQ: TODO - allocate everything from dynarrays !!! (benchmark) */ +/* QQ: TODO instant duration locks */ +/* QQ: #warning automatically place S instead of LS if possible */ + +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Generic Lock Manager + + Lock manager handles locks on "resources", a resource must be uniquely + identified by a 64-bit number. Lock manager itself does not imply + anything about the nature of a resource - it can be a row, a table, a + database, or just anything. + + Locks belong to "lock owners". A Lock owner is uniquely identified by a + 16-bit number. A function loid2lo must be provided by the application + that takes such a number as an argument and returns a LOCK_OWNER + structure. + + Lock levels are completely defined by three tables. Lock compatibility + matrix specifies which locks can be held at the same time on a resource. + Lock combining matrix specifies what lock level has the same behaviour as + a pair of two locks of given levels. getlock_result matrix simplifies + intention locking and lock escalation for an application, basically it + defines which locks are intention locks and which locks are "loose" + locks. It is only used to provide better diagnostics for the + application, lock manager itself does not differentiate between normal, + intention, and loose locks. + + Internally lock manager is based on a lock-free hash, see lf_hash.c for + details. All locks are stored in a hash, with a resource id as a search + key, so all locks for the same resource will be considered collisions and + will be put in a one (lock-free) linked list. The main lock-handling + logic is in the inner loop that searches for a lock in such a linked + list - lockfind(). + + This works as follows. Locks generally are added to the end of the list + (with one exception, see below). When scanning the list it is always + possible to determine what locks are granted (active) and what locks are + waiting - first lock is obviously active, the second is active if it's + compatible with the first, and so on, a lock is active if it's compatible + with all previous locks and all locks before it are also active. + To calculate the "compatible with all previous locks" all locks are + accumulated in prev_lock variable using lock_combining_matrix. + + Lock upgrades: when a thread that has a lock on a given resource, + requests a new lock on the same resource and the old lock is not enough + to satisfy new lock requirements (which is defined by + lock_combining_matrix[old_lock][new_lock] != old_lock), a new lock is + placed in the list. Depending on other locks it is immediately active or + it will wait for other locks. Here's an exception to "locks are added + to the end" rule - upgraded locks are added after the last active lock + but before all waiting locks. Old lock (the one we upgraded from) is + not removed from the list, indeed it may be needed if the new lock was + in a savepoint that gets rolled back. So old lock is marked as "ignored" + (IGNORE_ME flag). New lock gets an UPGRADED flag. + + Loose locks add an important exception to the above. Loose locks do not + always commute with other locks. In the list IX-LS both locks are active, + while in the LS-IX list only the first lock is active. This creates a + problem in lock upgrades. If the list was IX-LS and the owner of the + first lock wants to place LS lock (which can be immediately granted), the + IX lock is upgraded to LSIX and the list becomes IX-LS-LSIX, which, + according to the lock compatibility matrix means that the last lock is + waiting - of course it all happened because IX and LS were swapped and + they don't commute. To work around this there's ACTIVE flag which is set + in every lock that never waited (was placed active), and this flag + overrides "compatible with all previous locks" rule. + + When a lock is placed to the end of the list it's either compatible with + all locks and all locks are active - new lock becomes active at once, or + it conflicts with some of the locks, in this case in the 'blocker' + variable a conflicting lock is returned and the calling thread waits on a + pthread condition in the LOCK_OWNER structure of the owner of the + conflicting lock. Or a new lock is compatible with all locks, but some + existing locks are not compatible with each other (example: request IS, + when the list is S-IX) - that is not all locks are active. In this case a + first waiting lock is returned in the 'blocker' variable, lockman_getlock() + notices that a "blocker" does not conflict with the requested lock, and + "dereferences" it, to find the lock that it's waiting on. The calling + thread than begins to wait on the same lock. + + To better support table-row relations where one needs to lock the table + with an intention lock before locking the row, extended diagnostics is + provided. When an intention lock (presumably on a table) is granted, + lockman_getlock() returns one of GOT_THE_LOCK (no need to lock the row, + perhaps the thread already has a normal lock on this table), + GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE (need to lock the row, as usual), + GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE (only need to check + whether it's possible to lock the row, but no need to lock it - perhaps + the thread has a loose lock on this table). This is defined by + getlock_result[] table. +*/ + +#include +#include +#include +#include +#include "lockman.h" + +/* + Lock compatibility matrix. + + It's asymmetric. Read it as "Somebody has the lock , can I set the lock ?" + + ') Though you can take LS lock while somebody has S lock, it makes no + sense - it's simpler to take S lock too. + + 1 - compatible + 0 - incompatible + -1 - "impossible", so that we can assert the impossibility. +*/ +static int lock_compatibility_matrix[10][10]= +{ /* N S X IS IX SIX LS LX SLX LSIX */ + { -1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* N */ + { -1, 1, 0, 1, 0, 0, 1, 0, 0, 0 }, /* S */ + { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* X */ + { -1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, /* IS */ + { -1, 0, 0, 1, 1, 0, 1, 1, 0, 1 }, /* IX */ + { -1, 0, 0, 1, 0, 0, 1, 0, 0, 0 }, /* SIX */ + { -1, 1, 0, 1, 0, 0, 1, 0, 0, 0 }, /* LS */ + { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* LX */ + { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* SLX */ + { -1, 0, 0, 1, 0, 0, 1, 0, 0, 0 } /* LSIX */ +}; + +/* + Lock combining matrix. + + It's symmetric. Read it as "what lock level L is identical to the + set of two locks A and B" + + One should never get N from it, we assert the impossibility +*/ +static enum lock_type lock_combining_matrix[10][10]= +{/* N S X IS IX SIX LS LX SLX LSIX */ + { N, S, X, IS, IX, SIX, S, SLX, SLX, SIX}, /* N */ + { S, S, X, S, SIX, SIX, S, SLX, SLX, SIX}, /* S */ + { X, X, X, X, X, X, X, X, X, X}, /* X */ + { IS, S, X, IS, IX, SIX, LS, LX, SLX, LSIX}, /* IS */ + { IX, SIX, X, IX, IX, SIX, LSIX, LX, SLX, LSIX}, /* IX */ + { SIX, SIX, X, SIX, SIX, SIX, SIX, SLX, SLX, SIX}, /* SIX */ + { LS, S, X, LS, LSIX, SIX, LS, LX, SLX, LSIX}, /* LS */ + { LX, SLX, X, LX, LX, SLX, LX, LX, SLX, LX}, /* LX */ + { SLX, SLX, X, SLX, SLX, SLX, SLX, SLX, SLX, SLX}, /* SLX */ + { LSIX, SIX, X, LSIX, LSIX, SIX, LSIX, LX, SLX, LSIX} /* LSIX */ +}; + +#define REPEAT_ONCE_MORE 0 +#define OK_TO_PLACE_THE_LOCK 1 +#define OK_TO_PLACE_THE_REQUEST 2 +#define ALREADY_HAVE_THE_LOCK 4 +#define ALREADY_HAVE_THE_REQUEST 8 +#define PLACE_NEW_DISABLE_OLD 16 +#define REQUEST_NEW_DISABLE_OLD 32 +#define RESOURCE_WAS_UNLOCKED 64 + +#define NEED_TO_WAIT (OK_TO_PLACE_THE_REQUEST | ALREADY_HAVE_THE_REQUEST |\ + REQUEST_NEW_DISABLE_OLD) +#define ALREADY_HAVE (ALREADY_HAVE_THE_LOCK | ALREADY_HAVE_THE_REQUEST) +#define LOCK_UPGRADE (PLACE_NEW_DISABLE_OLD | REQUEST_NEW_DISABLE_OLD) + + +/* + the return codes for lockman_getlock + + It's asymmetric. Read it as "I have the lock , + what value should be returned for ?" + + 0 means impossible combination (assert!) + + Defines below help to preserve the table structure. + I/L/A values are self explanatory + x means the combination is possible (assert should not crash) + but it cannot happen in row locks, only in table locks (S,X), + or lock escalations (LS,LX) +*/ +#define I GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE +#define L GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE +#define A GOT_THE_LOCK +#define x GOT_THE_LOCK +static enum lockman_getlock_result getlock_result[10][10]= +{/* N S X IS IX SIX LS LX SLX LSIX */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, /* N */ + { 0, x, 0, A, 0, 0, x, 0, 0, 0}, /* S */ + { 0, x, x, A, A, 0, x, x, 0, 0}, /* X */ + { 0, 0, 0, I, 0, 0, 0, 0, 0, 0}, /* IS */ + { 0, 0, 0, I, I, 0, 0, 0, 0, 0}, /* IX */ + { 0, x, 0, A, I, 0, x, 0, 0, 0}, /* SIX */ + { 0, 0, 0, L, 0, 0, x, 0, 0, 0}, /* LS */ + { 0, 0, 0, L, L, 0, x, x, 0, 0}, /* LX */ + { 0, x, 0, A, L, 0, x, x, 0, 0}, /* SLX */ + { 0, 0, 0, L, I, 0, x, 0, 0, 0} /* LSIX */ +}; +#undef I +#undef L +#undef A +#undef x + +LF_REQUIRE_PINS(4); + +typedef struct lockman_lock { + uint64 resource; + struct lockman_lock *lonext; + intptr volatile link; + uint32 hashnr; + /* QQ: TODO - remove hashnr from LOCK */ + uint16 loid; + uchar lock; /* sizeof(uchar) <= sizeof(enum) */ + uchar flags; +} LOCK; + +#define IGNORE_ME 1 +#define UPGRADED 2 +#define ACTIVE 4 + +typedef struct { + intptr volatile *prev; + LOCK *curr, *next; + LOCK *blocker, *upgrade_from; +} CURSOR; + +#define PTR(V) (LOCK *)((V) & (~(intptr)1)) +#define DELETED(V) ((V) & 1) + +/* + NOTE + cursor is positioned in either case + pins[0..3] are used, they are NOT removed on return +*/ +static int lockfind(LOCK * volatile *head, LOCK *node, + CURSOR *cursor, LF_PINS *pins) +{ + uint32 hashnr, cur_hashnr; + uint64 resource, cur_resource; + intptr link; + my_bool cur_active, compatible, upgrading, prev_active; + enum lock_type lock, prev_lock, cur_lock; + uint16 loid, cur_loid; + int cur_flags, flags; + + hashnr= node->hashnr; + resource= node->resource; + lock= node->lock; + loid= node->loid; + flags= node->flags; + +retry: + cursor->prev= (intptr *)head; + prev_lock= N; + cur_active= TRUE; + compatible= TRUE; + upgrading= FALSE; + cursor->blocker= cursor->upgrade_from= 0; + _lf_unpin(pins, 3); + do { + cursor->curr= PTR(*cursor->prev); + _lf_pin(pins, 1, cursor->curr); + } while(*cursor->prev != (intptr)cursor->curr && LF_BACKOFF); + for (;;) + { + if (!cursor->curr) + break; + do { + link= cursor->curr->link; + cursor->next= PTR(link); + _lf_pin(pins, 0, cursor->next); + } while(link != cursor->curr->link && LF_BACKOFF); + cur_hashnr= cursor->curr->hashnr; + cur_resource= cursor->curr->resource; + cur_lock= cursor->curr->lock; + cur_loid= cursor->curr->loid; + cur_flags= cursor->curr->flags; + if (*cursor->prev != (intptr)cursor->curr) + { + (void)LF_BACKOFF; + goto retry; + } + if (!DELETED(link)) + { + if (cur_hashnr > hashnr || + (cur_hashnr == hashnr && cur_resource >= resource)) + { + if (cur_hashnr > hashnr || cur_resource > resource) + break; + /* ok, we have a lock for this resource */ + DBUG_ASSERT(lock_compatibility_matrix[prev_lock][cur_lock] >= 0); + DBUG_ASSERT(lock_compatibility_matrix[cur_lock][lock] >= 0); + if ((cur_flags & IGNORE_ME) && ! (flags & IGNORE_ME)) + { + DBUG_ASSERT(cur_active); + if (cur_loid == loid) + cursor->upgrade_from= cursor->curr; + } + else + { + prev_active= cur_active; + if (cur_flags & ACTIVE) + DBUG_ASSERT(prev_active == TRUE); + else + cur_active&= lock_compatibility_matrix[prev_lock][cur_lock]; + if (upgrading && !cur_active /*&& !(cur_flags & UPGRADED)*/) + break; + if (prev_active && !cur_active) + { + cursor->blocker= cursor->curr; + _lf_pin(pins, 3, cursor->curr); + } + if (cur_loid == loid) + { + /* we already have a lock on this resource */ + DBUG_ASSERT(lock_combining_matrix[cur_lock][lock] != N); + DBUG_ASSERT(!upgrading || (flags & IGNORE_ME)); + if (lock_combining_matrix[cur_lock][lock] == cur_lock) + { + /* new lock is compatible */ + if (cur_active) + { + cursor->blocker= cursor->curr; /* loose-locks! */ + _lf_unpin(pins, 3); /* loose-locks! */ + return ALREADY_HAVE_THE_LOCK; + } + else + return ALREADY_HAVE_THE_REQUEST; + } + /* not compatible, upgrading */ + upgrading= TRUE; + cursor->upgrade_from= cursor->curr; + } + else + { + if (!lock_compatibility_matrix[cur_lock][lock]) + { + compatible= FALSE; + cursor->blocker= cursor->curr; + _lf_pin(pins, 3, cursor->curr); + } + } + prev_lock= lock_combining_matrix[prev_lock][cur_lock]; + DBUG_ASSERT(prev_lock != N); + } + } + cursor->prev= &(cursor->curr->link); + _lf_pin(pins, 2, cursor->curr); + } + else + { + if (my_atomic_casptr((void **)cursor->prev, + (void **)&cursor->curr, cursor->next)) + _lf_alloc_free(pins, cursor->curr); + else + { + (void)LF_BACKOFF; + goto retry; + } + } + cursor->curr= cursor->next; + _lf_pin(pins, 1, cursor->curr); + } + /* + either the end of lock list - no more locks for this resource, + or upgrading and the end of active lock list + */ + if (upgrading) + { + if (compatible /*&& prev_active*/) + return PLACE_NEW_DISABLE_OLD; + else + return REQUEST_NEW_DISABLE_OLD; + } + if (cur_active && compatible) + { + /* + either no locks for this resource or all are compatible. + ok to place the lock in any case. + */ + return prev_lock == N ? RESOURCE_WAS_UNLOCKED + : OK_TO_PLACE_THE_LOCK; + } + /* we have a lock conflict. ok to place a lock request. And wait */ + return OK_TO_PLACE_THE_REQUEST; +} + +/* + NOTE + it uses pins[0..3], on return pins 0..2 are removed, pin 3 (blocker) stays +*/ +static int lockinsert(LOCK * volatile *head, LOCK *node, LF_PINS *pins, + LOCK **blocker) +{ + CURSOR cursor; + int res; + + do + { + res= lockfind(head, node, &cursor, pins); + DBUG_ASSERT(res != ALREADY_HAVE_THE_REQUEST); + if (!(res & ALREADY_HAVE)) + { + if (res & LOCK_UPGRADE) + { + node->flags|= UPGRADED; + node->lock= lock_combining_matrix[cursor.upgrade_from->lock][node->lock]; + } + if (!(res & NEED_TO_WAIT)) + node->flags|= ACTIVE; + node->link= (intptr)cursor.curr; + DBUG_ASSERT(node->link != (intptr)node); + DBUG_ASSERT(cursor.prev != &node->link); + if (!my_atomic_casptr((void **)cursor.prev, (void **)&cursor.curr, node)) + { + res= REPEAT_ONCE_MORE; + node->flags&= ~ACTIVE; + } + if (res & LOCK_UPGRADE) + cursor.upgrade_from->flags|= IGNORE_ME; + /* + QQ: is this OK ? if a reader has already read upgrade_from, + it may find it conflicting with node :( + - see the last test from test_lockman_simple() + */ + } + + } while (res == REPEAT_ONCE_MORE); + _lf_unpin(pins, 0); + _lf_unpin(pins, 1); + _lf_unpin(pins, 2); + /* + note that blocker is not necessarily pinned here (when it's == curr). + this is ok as in such a case it's either a dummy node for + initialize_bucket() and dummy nodes don't need pinning, + or it's a lock of the same transaction for lockman_getlock, + and it cannot be removed by another thread + */ + *blocker= cursor.blocker; + return res; +} + +/* + NOTE + it uses pins[0..3], on return pins 0..2 are removed, pin 3 (blocker) stays +*/ +static int lockpeek(LOCK * volatile *head, LOCK *node, LF_PINS *pins, + LOCK **blocker) +{ + CURSOR cursor; + int res; + + res= lockfind(head, node, &cursor, pins); + + _lf_unpin(pins, 0); + _lf_unpin(pins, 1); + _lf_unpin(pins, 2); + if (blocker) + *blocker= cursor.blocker; + return res; +} + +/* + NOTE + it uses pins[0..3], on return all pins are removed. + + One _must_ have the lock (or request) to call this +*/ +static int lockdelete(LOCK * volatile *head, LOCK *node, LF_PINS *pins) +{ + CURSOR cursor; + int res; + + do + { + res= lockfind(head, node, &cursor, pins); + DBUG_ASSERT(res & ALREADY_HAVE); + + if (cursor.upgrade_from) + cursor.upgrade_from->flags&= ~IGNORE_ME; + + /* + XXX this does not work with savepoints, as old lock is left ignored. + It cannot be unignored, as would basically mean moving the lock back + in the lock chain (from upgraded). And the latter is not allowed - + because it breaks list scanning. So old ignored lock must be deleted, + new - same - lock must be installed right after the lock we're deleting, + then we can delete. Good news is - this is only required when rolling + back a savepoint. + */ + if (my_atomic_casptr((void **)&(cursor.curr->link), + (void **)&cursor.next, 1+(char *)cursor.next)) + { + if (my_atomic_casptr((void **)cursor.prev, + (void **)&cursor.curr, cursor.next)) + _lf_alloc_free(pins, cursor.curr); + else + lockfind(head, node, &cursor, pins); + } + else + { + res= REPEAT_ONCE_MORE; + if (cursor.upgrade_from) + cursor.upgrade_from->flags|= IGNORE_ME; + } + } while (res == REPEAT_ONCE_MORE); + _lf_unpin(pins, 0); + _lf_unpin(pins, 1); + _lf_unpin(pins, 2); + _lf_unpin(pins, 3); + return res; +} + +void lockman_init(LOCKMAN *lm, loid_to_lo_func *func, uint timeout) +{ + lf_alloc_init(&lm->alloc, sizeof(LOCK), offsetof(LOCK, lonext)); + lf_dynarray_init(&lm->array, sizeof(LOCK **)); + lm->size= 1; + lm->count= 0; + lm->loid_to_lo= func; + lm->lock_timeout= timeout; +} + +void lockman_destroy(LOCKMAN *lm) +{ + LOCK *el= *(LOCK **)_lf_dynarray_lvalue(&lm->array, 0); + while (el) + { + intptr next= el->link; + if (el->hashnr & 1) + lf_alloc_direct_free(&lm->alloc, el); + else + my_free((void *)el, MYF(0)); + el= (LOCK *)next; + } + lf_alloc_destroy(&lm->alloc); + lf_dynarray_destroy(&lm->array); +} + +/* TODO: optimize it */ +#define MAX_LOAD 1 + +static void initialize_bucket(LOCKMAN *lm, LOCK * volatile *node, + uint bucket, LF_PINS *pins) +{ + int res; + uint parent= my_clear_highest_bit(bucket); + LOCK *dummy= (LOCK *)my_malloc(sizeof(LOCK), MYF(MY_WME)); + LOCK **tmp= 0, *cur; + LOCK * volatile *el= _lf_dynarray_lvalue(&lm->array, parent); + + if (*el == NULL && bucket) + initialize_bucket(lm, el, parent, pins); + dummy->hashnr= my_reverse_bits(bucket); + dummy->loid= 0; + dummy->lock= X; /* doesn't matter, in fact */ + dummy->resource= 0; + dummy->flags= 0; + res= lockinsert(el, dummy, pins, &cur); + DBUG_ASSERT(res & (ALREADY_HAVE_THE_LOCK | RESOURCE_WAS_UNLOCKED)); + if (res & ALREADY_HAVE_THE_LOCK) + { + my_free((void *)dummy, MYF(0)); + dummy= cur; + } + my_atomic_casptr((void **)node, (void **)&tmp, dummy); +} + +static inline uint calc_hash(uint64 resource) +{ + const uchar *pos= (uchar *)&resource; + ulong nr1= 1, nr2= 4, i; + for (i= 0; i < sizeof(resource) ; i++, pos++) + { + nr1^= (ulong) ((((uint) nr1 & 63)+nr2) * ((uint)*pos)) + (nr1 << 8); + nr2+= 3; + } + return nr1 & INT_MAX32; +} + +/* + RETURN + see enum lockman_getlock_result + NOTE + uses pins[0..3], they're removed on return +*/ +enum lockman_getlock_result lockman_getlock(LOCKMAN *lm, LOCK_OWNER *lo, + uint64 resource, + enum lock_type lock) +{ + int res; + uint csize, bucket, hashnr; + LOCK *node, * volatile *el, *blocker; + LF_PINS *pins= lo->pins; + enum lock_type old_lock; + + DBUG_ASSERT(lo->loid); + lf_rwlock_by_pins(pins); + node= (LOCK *)_lf_alloc_new(pins); + node->flags= 0; + node->lock= lock; + node->loid= lo->loid; + node->resource= resource; + hashnr= calc_hash(resource); + bucket= hashnr % lm->size; + el= _lf_dynarray_lvalue(&lm->array, bucket); + if (*el == NULL) + initialize_bucket(lm, el, bucket, pins); + node->hashnr= my_reverse_bits(hashnr) | 1; + res= lockinsert(el, node, pins, &blocker); + if (res & ALREADY_HAVE) + { + int r; + old_lock= blocker->lock; + _lf_alloc_free(pins, node); + lf_rwunlock_by_pins(pins); + r= getlock_result[old_lock][lock]; + DBUG_ASSERT(r); + return r; + } + /* a new value was added to the hash */ + csize= lm->size; + if ((my_atomic_add32(&lm->count, 1)+1.0) / csize > MAX_LOAD) + my_atomic_cas32(&lm->size, &csize, csize*2); + node->lonext= lo->all_locks; + lo->all_locks= node; + for ( ; res & NEED_TO_WAIT; res= lockpeek(el, node, pins, &blocker)) + { + LOCK_OWNER *wait_for_lo; + ulonglong deadline; + struct timespec timeout; + + _lf_assert_pin(pins, 3); /* blocker must be pinned here */ + wait_for_lo= lm->loid_to_lo(blocker->loid); + + /* + now, this is tricky. blocker is not necessarily a LOCK + we're waiting for. If it's compatible with what we want, + then we're waiting for a lock that blocker is waiting for + (see two places where blocker is set in lockfind) + In the latter case, let's "dereference" it + */ + if (lock_compatibility_matrix[blocker->lock][lock]) + { + blocker= wait_for_lo->all_locks; + _lf_pin(pins, 3, blocker); + if (blocker != wait_for_lo->all_locks) + continue; + wait_for_lo= wait_for_lo->waiting_for; + } + + /* + note that the blocker transaction may have ended by now, + its LOCK_OWNER and short id were reused, so 'wait_for_lo' may point + to an unrelated - albeit valid - LOCK_OWNER + */ + if (!wait_for_lo) + continue; + + lo->waiting_for= wait_for_lo; + lf_rwunlock_by_pins(pins); + + /* + We lock a mutex - it may belong to a wrong LOCK_OWNER, but it must + belong to _some_ LOCK_OWNER. It means, we can never free() a LOCK_OWNER, + if there're other active LOCK_OWNERs. + */ + /* QQ: race condition here */ + pthread_mutex_lock(wait_for_lo->mutex); + if (DELETED(blocker->link)) + { + /* + blocker transaction was ended, or a savepoint that owned + the lock was rolled back. Either way - the lock was removed + */ + pthread_mutex_unlock(wait_for_lo->mutex); + lf_rwlock_by_pins(pins); + continue; + } + + /* yuck. waiting */ + deadline= my_getsystime() + lm->lock_timeout * 10000; + timeout.tv_sec= deadline/10000000; + timeout.tv_nsec= (deadline % 10000000) * 100; + do + { + pthread_cond_timedwait(wait_for_lo->cond, wait_for_lo->mutex, &timeout); + } while (!DELETED(blocker->link) && my_getsystime() < deadline); + pthread_mutex_unlock(wait_for_lo->mutex); + lf_rwlock_by_pins(pins); + if (!DELETED(blocker->link)) + { + /* + timeout. + note that we _don't_ release the lock request here. + Instead we're relying on the caller to abort the transaction, + and release all locks at once - see lockman_release_locks() + */ + _lf_unpin(pins, 3); + lf_rwunlock_by_pins(pins); + return DIDNT_GET_THE_LOCK; + } + } + lo->waiting_for= 0; + _lf_assert_unpin(pins, 3); /* unpin should not be needed */ + lf_rwunlock_by_pins(pins); + return getlock_result[lock][lock]; +} + +/* + RETURN + 0 - deleted + 1 - didn't (not found) + NOTE + see lockdelete() for pin usage notes +*/ +int lockman_release_locks(LOCKMAN *lm, LOCK_OWNER *lo) +{ + LOCK * volatile *el, *node, *next; + uint bucket; + LF_PINS *pins= lo->pins; + + pthread_mutex_lock(lo->mutex); + lf_rwlock_by_pins(pins); + for (node= lo->all_locks; node; node= next) + { + next= node->lonext; + bucket= calc_hash(node->resource) % lm->size; + el= _lf_dynarray_lvalue(&lm->array, bucket); + if (*el == NULL) + initialize_bucket(lm, el, bucket, pins); + lockdelete(el, node, pins); + my_atomic_add32(&lm->count, -1); + } + lf_rwunlock_by_pins(pins); + lo->all_locks= 0; + /* now signal all waiters */ + pthread_cond_broadcast(lo->cond); + pthread_mutex_unlock(lo->mutex); + return 0; +} + +#ifdef MY_LF_EXTRA_DEBUG +static const char *lock2str[]= +{ "N", "S", "X", "IS", "IX", "SIX", "LS", "LX", "SLX", "LSIX" }; +/* + NOTE + the function below is NOT thread-safe !!! +*/ +void print_lockhash(LOCKMAN *lm) +{ + LOCK *el= *(LOCK **)_lf_dynarray_lvalue(&lm->array, 0); + printf("hash: size %u count %u\n", lm->size, lm->count); + while (el) + { + intptr next= el->link; + if (el->hashnr & 1) + { + printf("0x%08lx { resource %lu, loid %u, lock %s", + (long) el->hashnr, (ulong) el->resource, el->loid, + lock2str[el->lock]); + if (el->flags & IGNORE_ME) printf(" IGNORE_ME"); + if (el->flags & UPGRADED) printf(" UPGRADED"); + if (el->flags & ACTIVE) printf(" ACTIVE"); + if (DELETED(next)) printf(" ***DELETED***"); + printf("}\n"); + } + else + { + /*printf("0x%08x { dummy }\n", el->hashnr);*/ + DBUG_ASSERT(el->resource == 0 && el->loid == 0 && el->lock == X); + } + el= PTR(next); + } +} +#endif diff --git a/storage/maria/lockman.h b/storage/maria/lockman.h new file mode 100644 index 00000000000..279a5537f76 --- /dev/null +++ b/storage/maria/lockman.h @@ -0,0 +1,76 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _lockman_h +#define _lockman_h + +/* + Lock levels: + ^^^^^^^^^^^ + + N - "no lock", not a lock, used sometimes internally to simplify the code + S - Shared + X - eXclusive + IS - Intention Shared + IX - Intention eXclusive + SIX - Shared + Intention eXclusive + LS - Loose Shared + LX - Loose eXclusive + SLX - Shared + Loose eXclusive + LSIX - Loose Shared + Intention eXclusive +*/ +enum lock_type { N, S, X, IS, IX, SIX, LS, LX, SLX, LSIX, LOCK_TYPE_LAST }; + +struct lockman_lock; + +typedef struct st_lock_owner LOCK_OWNER; +struct st_lock_owner { + LF_PINS *pins; /* must be allocated from lockman's pinbox */ + struct lockman_lock *all_locks; /* a LIFO */ + LOCK_OWNER *waiting_for; + pthread_cond_t *cond; /* transactions waiting for this, wait on 'cond' */ + pthread_mutex_t *mutex; /* mutex is required to use 'cond' */ + uint16 loid; +}; + +typedef LOCK_OWNER *loid_to_lo_func(uint16); +typedef struct { + LF_DYNARRAY array; /* hash itself */ + LF_ALLOCATOR alloc; /* allocator for elements */ + int32 volatile size; /* size of array */ + int32 volatile count; /* number of elements in the hash */ + uint lock_timeout; + loid_to_lo_func *loid_to_lo; +} LOCKMAN; +#define DIDNT_GET_THE_LOCK 0 +enum lockman_getlock_result { + NO_MEMORY_FOR_LOCK=1, DEADLOCK, LOCK_TIMEOUT, + GOT_THE_LOCK, + GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE, + GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE +}; + +void lockman_init(LOCKMAN *, loid_to_lo_func *, uint); +void lockman_destroy(LOCKMAN *); +enum lockman_getlock_result lockman_getlock(LOCKMAN *lm, LOCK_OWNER *lo, + uint64 resource, + enum lock_type lock); +int lockman_release_locks(LOCKMAN *, LOCK_OWNER *); + +#ifdef EXTRA_DEBUG +void print_lockhash(LOCKMAN *lm); +#endif + +#endif diff --git a/storage/maria/ma_bitmap.c b/storage/maria/ma_bitmap.c new file mode 100644 index 00000000000..e1308bce487 --- /dev/null +++ b/storage/maria/ma_bitmap.c @@ -0,0 +1,2003 @@ +/* Copyright (C) 2007 Michael Widenius + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Bitmap handling (for records in block) + + The data file starts with a bitmap page, followed by as many data + pages as the bitmap can cover. After this there is a new bitmap page + and more data pages etc. + + The bitmap code assumes there is always an active bitmap page and thus + that there is at least one bitmap page in the file + + Structure of bitmap page: + + Fixed size records (to be implemented later): + + 2 bits are used to indicate: + + 0 Empty + 1 0-75 % full (at least room for 2 records) + 2 75-100 % full (at least room for one record) + 3 100 % full (no more room for records) + + Assuming 8K pages, this will allow us to map: + 8192 (bytes per page) * 4 (pages mapped per byte) * 8192 (page size)= 256M + + (For Maria this will be 7*4 * 8192 = 224K smaller because of LSN) + + Note that for fixed size rows, we can't add more columns without doing + a full reorganization of the table. The user can always force a dynamic + size row format by specifying ROW_FORMAT=dynamic. + + + Dynamic size records: + + 3 bits are used to indicate + + 0 Empty page + 1 0-30 % full (at least room for 3 records) + 2 30-60 % full (at least room for 2 records) + 3 60-90 % full (at least room for one record) + 4 100 % full (no more room for records) + 5 Tail page, 0-40 % full + 6 Tail page, 40-80 % full + 7 Full tail page or full blob page + + Assuming 8K pages, this will allow us to map: + 8192 (bytes per page) * 8 bits/byte / 3 bits/page * 8192 (page size)= 170.7M + + Note that values 1-3 may be adjust for each individual table based on + 'min record length'. Tail pages are for overflow data which can be of + any size and thus doesn't have to be adjusted for different tables. + If we add more columns to the table, some of the originally calculated + 'cut off' points may not be optimal, but they shouldn't be 'drasticly + wrong'. + + When allocating data from the bitmap, we are trying to do it in a + 'best fit' manner. Blobs and varchar blocks are given out in large + continuous extents to allow fast access to these. Before allowing a + row to 'flow over' to other blocks, we will compact the page and use + all space on it. If there is many rows in the page, we will ensure + there is *LEFT_TO_GROW_ON_SPLIT* bytes left on the page to allow other + rows to grow. + + The bitmap format allows us to extend the row file in big chunks, if needed. + + When calculating the size for a packed row, we will calculate the following + things separately: + - Row header + null_bits + empty_bits fixed size segments etc. + - Size of all char/varchar fields + - Size of each blob field + + The bitmap handler will get all the above information and return + either one page or a set of pages to put the different parts. + + Bitmaps are read on demand in response to insert/delete/update operations. + The following bitmap pointers will be cached and stored on disk on close: + - Current insert_bitmap; When inserting new data we will first try to + fill this one. + - First bitmap which is not completely full. This is updated when we + free data with an update or delete. + + While flushing out bitmaps, we will cache the status of the bitmap in memory + to avoid having to read a bitmap for insert of new data that will not + be of any use + - Total empty space + - Largest number of continuous pages + + Bitmap ONLY goes to disk in the following scenarios + - The file is closed (and we flush all changes to disk) + - On checkpoint + (Ie: When we do a checkpoint, we have to ensure that all bitmaps are + put on disk even if they are not in the page cache). + - When explicitely requested (for example on backup or after recvoery, + to simplify things) +*/ + +#include "maria_def.h" +#include "ma_blockrec.h" + +/* Number of pages to store blob parts */ +#define BLOB_SEGMENT_MIN_SIZE 128 + +#define FULL_HEAD_PAGE 4 +#define FULL_TAIL_PAGE 7 + +uchar maria_bitmap_marker[2]= {(uchar) 'b',(uchar) 'm'}; + +static my_bool _ma_read_bitmap_page(MARIA_SHARE *share, + MARIA_FILE_BITMAP *bitmap, + ulonglong page); + + +/* Write bitmap page to key cache */ + +static inline my_bool write_changed_bitmap(MARIA_SHARE *share, + MARIA_FILE_BITMAP *bitmap) +{ + DBUG_ASSERT(share->pagecache->block_size == bitmap->block_size); + return (pagecache_write(share->pagecache, + &bitmap->file, bitmap->page, 0, + (byte*) bitmap->map, PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, 0)); +} + +/* + Initialize bitmap variables in share + + SYNOPSIS + _ma_bitmap_init() + share Share handler + file data file handler + + NOTES + This is called the first time a file is opened. + + RETURN + 0 ok + 1 error +*/ + +my_bool _ma_bitmap_init(MARIA_SHARE *share, File file) +{ + uint aligned_bit_blocks; + uint max_page_size; + MARIA_FILE_BITMAP *bitmap= &share->bitmap; + uint size= share->block_size; +#ifndef DBUG_OFF + /* We want to have a copy of the bitmap to be able to print differences */ + size*= 2; +#endif + + if (!(bitmap->map= (uchar*) my_malloc(size, MYF(MY_WME)))) + return 1; + + bitmap->file.file= file; + bitmap->changed= 0; + bitmap->block_size= share->block_size; + /* Size needs to be alligned on 6 */ + aligned_bit_blocks= share->block_size / 6; + bitmap->total_size= aligned_bit_blocks * 6; + /* + In each 6 bytes, we have 6*8/3 = 16 pages covered + The +1 is to add the bitmap page, as this doesn't have to be covered + */ + bitmap->pages_covered= aligned_bit_blocks * 16 + 1; + + /* Update size for bits */ + /* TODO; Make this dependent of the row size */ + max_page_size= share->block_size - PAGE_OVERHEAD_SIZE; + bitmap->sizes[0]= max_page_size; /* Empty page */ + bitmap->sizes[1]= max_page_size - max_page_size * 30 / 100; + bitmap->sizes[2]= max_page_size - max_page_size * 60 / 100; + bitmap->sizes[3]= max_page_size - max_page_size * 90 / 100; + bitmap->sizes[4]= 0; /* Full page */ + bitmap->sizes[5]= max_page_size - max_page_size * 40 / 100; + bitmap->sizes[6]= max_page_size - max_page_size * 80 / 100; + bitmap->sizes[7]= 0; + + pthread_mutex_init(&share->bitmap.bitmap_lock, MY_MUTEX_INIT_SLOW); + + /* + We can't read a page yet, as in some case we don't have an active + page cache yet. + Pretend we have a dummy, full and not changed bitmap page in memory. + */ + + bitmap->page= ~(ulonglong) 0; + bitmap->used_size= bitmap->total_size; + bfill(bitmap->map, share->block_size, 255); + if (share->state.first_bitmap_with_space == ~(ulonglong) 0) + { + /* Start scanning for free space from start of file */ + share->state.first_bitmap_with_space = 0; + } + return 0; +} + + +/* + Free data allocated by _ma_bitmap_init + + SYNOPSIS + _ma_bitmap_end() + share Share handler +*/ + +my_bool _ma_bitmap_end(MARIA_SHARE *share) +{ + my_bool res= _ma_flush_bitmap(share); + pthread_mutex_destroy(&share->bitmap.bitmap_lock); + my_free((byte*) share->bitmap.map, MYF(MY_ALLOW_ZERO_PTR)); + share->bitmap.map= 0; + return res; +} + + +/* + Flush bitmap to disk + + SYNOPSIS + _ma_flush_bitmap() + share Share handler + + NOTES + In the future, _ma_flush_bitmap() will be called to flush changes don't + by this thread (ie, checking the changed flag is ok). The reason we + check it again in the mutex is that if someone else did a flush at the + same time, we don't have to do the write. + + RETURN + 0 ok + 1 error +*/ + +my_bool _ma_flush_bitmap(MARIA_SHARE *share) +{ + my_bool res= 0; + if (share->bitmap.changed) + { + pthread_mutex_lock(&share->bitmap.bitmap_lock); + if (share->bitmap.changed) + { + res= write_changed_bitmap(share, &share->bitmap); + share->bitmap.changed= 0; + } + pthread_mutex_unlock(&share->bitmap.bitmap_lock); + } + return res; +} + + +/* + Intialize bitmap in memory to a zero bitmap + + SYNOPSIS + _ma_bitmap_delete_all() + share Share handler + + NOTES + This is called on ma_delete_all (truncate data file). +*/ + +void _ma_bitmap_delete_all(MARIA_SHARE *share) +{ + MARIA_FILE_BITMAP *bitmap= &share->bitmap; + if (bitmap->map) /* Not in create */ + { + bzero(bitmap->map, share->block_size); + memcpy(bitmap->map + share->block_size - 2, maria_bitmap_marker, 2); + bitmap->changed= 0; + bitmap->page= 0; + bitmap->used_size= bitmap->total_size; + } +} + + +/* + Return bitmap pattern for the smallest head block that can hold 'size' + + SYNOPSIS + size_to_head_pattern() + bitmap Bitmap + size Requested size + + RETURN + 0-3 For a description of the bitmap sizes, see the header +*/ + +static uint size_to_head_pattern(MARIA_FILE_BITMAP *bitmap, uint size) +{ + if (size <= bitmap->sizes[3]) + return 3; + if (size <= bitmap->sizes[2]) + return 2; + if (size <= bitmap->sizes[1]) + return 1; + DBUG_ASSERT(size <= bitmap->sizes[0]); + return 0; +} + + +/* + Return bitmap pattern for head block where there is size bytes free + + SYNOPSIS + _ma_free_size_to_head_pattern() + bitmap Bitmap + size Requested size + + RETURN + 0-4 (Possible bitmap patterns for head block) +*/ + +uint _ma_free_size_to_head_pattern(MARIA_FILE_BITMAP *bitmap, uint size) +{ + if (size < bitmap->sizes[3]) + return 4; + if (size < bitmap->sizes[2]) + return 3; + if (size < bitmap->sizes[1]) + return 2; + return (size < bitmap->sizes[0]) ? 1 : 0; +} + + +/* + Return bitmap pattern for the smallest tail block that can hold 'size' + + SYNOPSIS + size_to_tail_pattern() + bitmap Bitmap + size Requested size + + RETURN + 0, 5 or 6 For a description of the bitmap sizes, see the header +*/ + +static uint size_to_tail_pattern(MARIA_FILE_BITMAP *bitmap, uint size) +{ + if (size <= bitmap->sizes[6]) + return 6; + if (size <= bitmap->sizes[5]) + return 5; + DBUG_ASSERT(size <= bitmap->sizes[0]); + return 0; +} + + +/* + Return bitmap pattern for tail block where there is size bytes free + + SYNOPSIS + free_size_to_tail_pattern() + bitmap Bitmap + size Requested size + + RETURN + 0, 5, 6, 7 For a description of the bitmap sizes, see the header +*/ + +static uint free_size_to_tail_pattern(MARIA_FILE_BITMAP *bitmap, uint size) +{ + if (size >= bitmap->sizes[0]) + return 0; /* Revert to empty page */ + if (size < bitmap->sizes[6]) + return 7; + if (size < bitmap->sizes[5]) + return 6; + return 5; +} + + +/* + Return size guranteed to be available on a page + + SYNOPSIS + pattern_to_head_size() + bitmap Bitmap + pattern Pattern (0-7) + + RETURN + 0 - block_size +*/ + +static inline uint pattern_to_size(MARIA_FILE_BITMAP *bitmap, uint pattern) +{ + DBUG_ASSERT(pattern <= 7); + return bitmap->sizes[pattern]; +} + + +/* + Print bitmap for debugging + + SYNOPSIS + _ma_print_bitmap() + bitmap Bitmap to print + + IMPLEMENTATION + Prints all changed bits since last call to _ma_print_bitmap(). + This is done by having a copy of the last bitmap in + bitmap->map+bitmap->block_size. +*/ + +#ifndef DBUG_OFF + +const char *bits_to_txt[]= +{ + "empty", "00-30% full", "30-60% full", "60-90% full", "full", + "tail 00-40 % full", "tail 40-80 % full", "tail/blob full" +}; + +static void _ma_print_bitmap(MARIA_FILE_BITMAP *bitmap) +{ + uchar *pos, *end, *org_pos; + ulong page; + + end= bitmap->map + bitmap->used_size; + DBUG_LOCK_FILE; + fprintf(DBUG_FILE,"\nBitmap page changes at page %lu\n", + (ulong) bitmap->page); + + page= (ulong) bitmap->page+1; + for (pos= bitmap->map, org_pos= bitmap->map + bitmap->block_size ; + pos < end ; + pos+= 6, org_pos+= 6) + { + ulonglong bits= uint6korr(pos); /* 6 bytes = 6*8/3= 16 patterns */ + ulonglong org_bits= uint6korr(org_pos); + uint i; + + /* + Test if there is any changes in the next 16 bitmaps (to not have to + loop through all bits if we know they are the same) + */ + if (bits != org_bits) + { + for (i= 0; i < 16 ; i++, bits>>= 3, org_bits>>= 3) + { + if ((bits & 7) != (org_bits & 7)) + fprintf(DBUG_FILE, "Page: %8lu %s -> %s\n", page+i, + bits_to_txt[org_bits & 7], bits_to_txt[bits & 7]); + } + } + page+= 16; + } + fputc('\n', DBUG_FILE); + DBUG_UNLOCK_FILE; + memcpy(bitmap->map + bitmap->block_size, bitmap->map, bitmap->block_size); +} + +#endif /* DBUG_OFF */ + + +/*************************************************************************** + Reading & writing bitmap pages +***************************************************************************/ + +/* + Read a given bitmap page + + SYNOPSIS + read_bitmap_page() + info Maria handler + bitmap Bitmap handler + page Page to read + + TODO + Update 'bitmap->used_size' to real size of used bitmap + + RETURN + 0 ok + 1 error (Error writing old bitmap or reading bitmap page) +*/ + +static my_bool _ma_read_bitmap_page(MARIA_SHARE *share, + MARIA_FILE_BITMAP *bitmap, + ulonglong page) +{ + my_off_t position= page * bitmap->block_size; + my_bool res; + DBUG_ENTER("_ma_read_bitmap_page"); + DBUG_ASSERT(page % bitmap->pages_covered == 0); + + bitmap->page= page; + if (position >= share->state.state.data_file_length) + { + share->state.state.data_file_length= position + bitmap->block_size; + bzero(bitmap->map, bitmap->block_size); + memcpy(bitmap->map + share->block_size - 2, maria_bitmap_marker, 2); + bitmap->used_size= 0; +#ifndef DBUG_OFF + memcpy(bitmap->map + bitmap->block_size, bitmap->map, bitmap->block_size); +#endif + DBUG_RETURN(0); + } + bitmap->used_size= bitmap->total_size; + DBUG_ASSERT(share->pagecache->block_size == bitmap->block_size); + res= pagecache_read(share->pagecache, + (PAGECACHE_FILE*)&bitmap->file, page, 0, + (byte*) bitmap->map, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, 0) == 0; +#ifndef DBUG_OFF + if (!res) + memcpy(bitmap->map + bitmap->block_size, bitmap->map, bitmap->block_size); +#endif + DBUG_RETURN(res); +} + + +/* + Change to another bitmap page + + SYNOPSIS + _ma_change_bitmap_page() + info Maria handler + bitmap Bitmap handler + page Bitmap page to read + + NOTES + If old bitmap was changed, write it out before reading new one + We return empty bitmap if page is outside of file size + + RETURN + 0 ok + 1 error (Error writing old bitmap or reading bitmap page) +*/ + +static my_bool _ma_change_bitmap_page(MARIA_HA *info, + MARIA_FILE_BITMAP *bitmap, + ulonglong page) +{ + DBUG_ENTER("_ma_change_bitmap_page"); + + if (bitmap->changed) + { + if (write_changed_bitmap(info->s, bitmap)) + DBUG_RETURN(1); + bitmap->changed= 0; + } + DBUG_RETURN(_ma_read_bitmap_page(info->s, bitmap, page)); +} + + +/* + Read next suitable bitmap + + SYNOPSIS + move_to_next_bitmap() + bitmap Bitmap handle + + NOTES + The found bitmap may be full, so calling function may need to call this + repeatedly until it finds enough space. + + TODO + Add cache of bitmaps to not read something that is not usable + + RETURN + 0 ok + 1 error (either couldn't save old bitmap or read new one +*/ + +static my_bool move_to_next_bitmap(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap) +{ + ulonglong page= bitmap->page; + MARIA_STATE_INFO *state= &info->s->state; + DBUG_ENTER("move_to_next_bitmap"); + + if (state->first_bitmap_with_space != ~(ulonglong) 0 && + state->first_bitmap_with_space != page) + { + page= state->first_bitmap_with_space; + state->first_bitmap_with_space= ~(ulonglong) 0; + } + else + page+= bitmap->pages_covered; + DBUG_RETURN(_ma_change_bitmap_page(info, bitmap, page)); +} + + +/**************************************************************************** + Allocate data in bitmaps +****************************************************************************/ + +/* + Store data in 'block' and mark the place used in the bitmap + + SYNOPSIS + fill_block() + bitmap Bitmap handle + block Store data about what we found + best_data Pointer to best 6 byte aligned area in bitmap->map + best_pos Which bit in *best_data the area starts + 0 = first bit pattern, 1 second bit pattern etc + best_bits The original value of the bits at best_pos + fill_pattern Bitmap pattern to store in best_data[best_pos] + + NOTES + We mark all pages to be 'TAIL's, which means that + block->page_count is really a row position inside the page. +*/ + +static void fill_block(MARIA_FILE_BITMAP *bitmap, + MARIA_BITMAP_BLOCK *block, + uchar *best_data, uint best_pos, uint best_bits, + uint fill_pattern) +{ + uint page, offset, tmp; + uchar *data; + + /* For each 6 bytes we have 6*8/3= 16 patterns */ + page= (best_data - bitmap->map) / 6 * 16 + best_pos; + block->page= bitmap->page + 1 + page; + block->page_count= 1 + TAIL_BIT; + block->empty_space= pattern_to_size(bitmap, best_bits); + block->sub_blocks= 1; + block->org_bitmap_value= best_bits; + block->used= BLOCKUSED_TAIL; /* See _ma_bitmap_release_unused() */ + + /* + Mark place used by reading/writing 2 bytes at a time to handle + bitmaps in overlapping bytes + */ + best_pos*= 3; + data= best_data+ best_pos / 8; + offset= best_pos & 7; + tmp= uint2korr(data); + + /* we turn off the 3 bits and replace them with fill_pattern */ + tmp= (tmp & ~(7 << offset)) | (fill_pattern << offset); + int2store(data, tmp); + bitmap->changed= 1; + DBUG_EXECUTE("bitmap", _ma_print_bitmap(bitmap);); +} + + +/* + Allocate data for head block + + SYNOPSIS + allocate_head() + bitmap bitmap + size Size of data region we need to store + block Store found information here + + IMPLEMENTATION + Find the best-fit page to put a region of 'size' + This is defined as the first page of the set of pages + with the smallest free space that can hold 'size'. + + RETURN + 0 ok (block is updated) + 1 error (no space in bitmap; block is not touched) +*/ + + +static my_bool allocate_head(MARIA_FILE_BITMAP *bitmap, uint size, + MARIA_BITMAP_BLOCK *block) +{ + uint min_bits= size_to_head_pattern(bitmap, size); + uchar *data= bitmap->map, *end= data + bitmap->used_size; + uchar *best_data= 0; + uint best_bits= (uint) -1, best_pos; + DBUG_ENTER("allocate_head"); + + LINT_INIT(best_pos); + DBUG_ASSERT(size <= FULL_PAGE_SIZE(bitmap->block_size)); + + for (; data < end; data += 6) + { + ulonglong bits= uint6korr(data); /* 6 bytes = 6*8/3= 16 patterns */ + uint i; + + /* + Skip common patterns + We can skip empty pages (if we already found a match) or + anything matching the following pattern as this will be either + a full page or a tail page + */ + if ((!bits && best_data) || + ((bits & LL(04444444444444444)) == LL(04444444444444444))) + continue; + for (i= 0; i < 16 ; i++, bits >>= 3) + { + uint pattern= bits & 7; + if (pattern <= min_bits) + { + /* There is enough space here */ + if (pattern == min_bits) + { + /* There is exactly enough space here, return this page */ + best_bits= min_bits; + best_data= data; + best_pos= i; + goto found; + } + if ((int) pattern > (int) best_bits) + { + /* + There is more than enough space here and it's better than what + we have found so far. Remember it, as we will choose it if we + don't find anything in this bitmap page. + */ + best_bits= pattern; + best_data= data; + best_pos= i; + } + } + } + } + if (!best_data) /* Found no place */ + { + if (bitmap->used_size == bitmap->total_size) + DBUG_RETURN(1); /* No space in bitmap */ + /* Allocate data at end of bitmap */ + bitmap->used_size+= 6; + best_data= data; + best_pos= best_bits= 0; + } + +found: + fill_block(bitmap, block, best_data, best_pos, best_bits, FULL_HEAD_PAGE); + DBUG_RETURN(0); +} + + +/* + Allocate data for tail block + + SYNOPSIS + allocate_tail() + bitmap bitmap + size Size of block we need to find + block Store found information here + + RETURN + 0 ok (block is updated) + 1 error (no space in bitmap; block is not touched) +*/ + + +static my_bool allocate_tail(MARIA_FILE_BITMAP *bitmap, uint size, + MARIA_BITMAP_BLOCK *block) +{ + uint min_bits= size_to_tail_pattern(bitmap, size); + uchar *data= bitmap->map, *end= data + bitmap->used_size; + uchar *best_data= 0; + uint best_bits= (uint) -1, best_pos; + DBUG_ENTER("allocate_tail"); + DBUG_PRINT("enter", ("size: %u", size)); + + LINT_INIT(best_pos); + DBUG_ASSERT(size <= FULL_PAGE_SIZE(bitmap->block_size)); + + for (; data < end; data += 6) + { + ulonglong bits= uint6korr(data); /* 6 bytes = 6*8/3= 16 patterns */ + uint i; + + /* + Skip common patterns + We can skip empty pages (if we already found a match) or + the following patterns: 1-4 (head pages, not suitable for tail) or + 7 (full tail page). See 'Dynamic size records' comment at start of file. + + At the moment we only skip full tail pages (ie, all bits are + set) as this is easy to detect with one simple test and is a + quite common case if we have blobs. + */ + + if ((!bits && best_data) || bits == LL(0xffffffffffff)) + continue; + for (i= 0; i < 16; i++, bits >>= 3) + { + uint pattern= bits & 7; + if (pattern <= min_bits && (!pattern || pattern >= 5)) + { + if (pattern == min_bits) + { + best_bits= min_bits; + best_data= data; + best_pos= i; + goto found; + } + if ((int) pattern > (int) best_bits) + { + best_bits= pattern; + best_data= data; + best_pos= i; + } + } + } + } + if (!best_data) + { + if (bitmap->used_size == bitmap->total_size) + DBUG_RETURN(1); + /* Allocate data at end of bitmap */ + bitmap->used_size+= 6; + best_pos= best_bits= 0; + } + +found: + fill_block(bitmap, block, best_data, best_pos, best_bits, FULL_TAIL_PAGE); + DBUG_RETURN(0); +} + + +/* + Allocate data for full blocks + + SYNOPSIS + allocate_full_pages() + bitmap bitmap + pages_needed Total size in pages (bitmap->total_size) we would like to have + block Store found information here + full_page 1 if we are not allowed to split extent + + IMPLEMENTATION + We will return the smallest area >= size. If there is no such + block, we will return the biggest area that satisfies + area_size >= min(BLOB_SEGMENT_MIN_SIZE*full_page_size, size) + + To speed up searches, we will only consider areas that has at least 16 free + pages starting on an even boundary. When finding such an area, we will + extend it with all previous and following free pages. This will ensure + we don't get holes between areas + + RETURN + # Blocks used + 0 error (no space in bitmap; block is not touched) +*/ + +static ulong allocate_full_pages(MARIA_FILE_BITMAP *bitmap, + ulong pages_needed, + MARIA_BITMAP_BLOCK *block, my_bool full_page) +{ + uchar *data= bitmap->map, *data_end= data + bitmap->used_size; + uchar *page_end= data + bitmap->total_size; + uchar *best_data= 0; + uint min_size; + uint best_area_size, best_prefix_area_size, best_suffix_area_size; + uint page, size; + ulonglong best_prefix_bits; + DBUG_ENTER("allocate_full_pages"); + DBUG_PRINT("enter", ("pages_needed: %lu", pages_needed)); + + /* Following variables are only used if best_data is set */ + LINT_INIT(best_prefix_bits); + LINT_INIT(best_prefix_area_size); + LINT_INIT(best_suffix_area_size); + + min_size= pages_needed; + if (!full_page && min_size > BLOB_SEGMENT_MIN_SIZE) + min_size= BLOB_SEGMENT_MIN_SIZE; + best_area_size= ~(uint) 0; + + for (; data < page_end; data+= 6) + { + ulonglong bits= uint6korr(data); /* 6 bytes = 6*8/3= 16 patterns */ + uchar *data_start; + ulonglong prefix_bits= 0; + uint area_size, prefix_area_size, suffix_area_size; + + /* Find area with at least 16 free pages */ + if (bits) + continue; + data_start= data; + /* Find size of area */ + for (data+=6 ; data < data_end ; data+= 6) + { + if ((bits= uint6korr(data))) + break; + } + area_size= (data - data_start) / 6 * 16; + if (area_size >= best_area_size) + continue; + prefix_area_size= suffix_area_size= 0; + if (!bits) + { + /* + End of page; All the rest of the bits on page are part of area + This is needed because bitmap->used_size only covers the set bits + in the bitmap. + */ + area_size+= (page_end - data) / 6 * 16; + if (area_size >= best_area_size) + break; + data= page_end; + } + else + { + /* Add bits at end of page */ + for (; !(bits & 7); bits >>= 3) + suffix_area_size++; + area_size+= suffix_area_size; + } + if (data_start != bitmap->map) + { + /* Add bits before page */ + bits= prefix_bits= uint6korr(data_start - 6); + DBUG_ASSERT(bits != 0); + /* 111 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 */ + if (!(bits & LL(07000000000000000))) + { + data_start-= 6; + do + { + prefix_area_size++; + bits<<= 3; + } while (!(bits & LL(07000000000000000))); + area_size+= prefix_area_size; + /* Calculate offset to page from data_start */ + prefix_area_size= 16 - prefix_area_size; + } + } + if (area_size >= min_size && area_size <= best_area_size) + { + best_data= data_start; + best_area_size= area_size; + best_prefix_bits= prefix_bits; + best_prefix_area_size= prefix_area_size; + best_suffix_area_size= suffix_area_size; + + /* Prefer to put data in biggest possible area */ + if (area_size <= pages_needed) + min_size= area_size; + else + min_size= pages_needed; + } + } + if (!best_data) + DBUG_RETURN(0); /* No room on page */ + + /* + Now allocate min(pages_needed, area_size), starting from + best_start + best_prefix_area_size + */ + if (best_area_size > pages_needed) + best_area_size= pages_needed; + + /* For each 6 bytes we have 6*8/3= 16 patterns */ + page= ((best_data - bitmap->map) * 8) / 3 + best_prefix_area_size; + block->page= bitmap->page + 1 + page; + block->page_count= best_area_size; + block->empty_space= 0; + block->sub_blocks= 1; + block->org_bitmap_value= 0; + block->used= 0; + DBUG_PRINT("info", ("page: %lu page_count: %u", + (ulong) block->page, block->page_count)); + + if (best_prefix_area_size) + { + ulonglong tmp; + /* Convert offset back to bits */ + best_prefix_area_size= 16 - best_prefix_area_size; + if (best_area_size < best_prefix_area_size) + { + tmp= (LL(1) << best_area_size*3) - 1; + best_area_size= best_prefix_area_size; /* for easy end test */ + } + else + tmp= (LL(1) << best_prefix_area_size*3) - 1; + tmp<<= (16 - best_prefix_area_size) * 3; + DBUG_ASSERT((best_prefix_bits & tmp) == 0); + best_prefix_bits|= tmp; + int6store(best_data, best_prefix_bits); + if (!(best_area_size-= best_prefix_area_size)) + { + DBUG_EXECUTE("bitmap", _ma_print_bitmap(bitmap);); + DBUG_RETURN(block->page_count); + } + best_data+= 6; + } + best_area_size*= 3; /* Bits to set */ + size= best_area_size/8; /* Bytes to set */ + bfill(best_data, size, 255); + best_data+= size; + if ((best_area_size-= size * 8)) + { + /* fill last byte */ + *best_data|= (uchar) ((1 << best_area_size) -1); + best_data++; + } + if (data_end < best_data) + bitmap->used_size= (uint) (best_data - bitmap->map); + bitmap->changed= 1; + DBUG_EXECUTE("bitmap", _ma_print_bitmap(bitmap);); + DBUG_RETURN(block->page_count); +} + + +/**************************************************************************** + Find right bitmaps where to store data +****************************************************************************/ + +/* + Find right bitmap and position for head block + + SYNOPSIS + find_head() + info Maria handler + length Size of data region we need store + position Position in bitmap_blocks where to store the + information for the head block. + + RETURN + 0 ok + 1 error +*/ + +static my_bool find_head(MARIA_HA *info, uint length, uint position) +{ + MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; + MARIA_BITMAP_BLOCK *block; + /* + There is always place for the head block in bitmap_blocks as these are + preallocated at _ma_init_block_record(). + */ + block= dynamic_element(&info->bitmap_blocks, position, MARIA_BITMAP_BLOCK *); + + while (allocate_head(bitmap, length, block)) + if (move_to_next_bitmap(info, bitmap)) + return 1; + return 0; +} + + +/* + Find right bitmap and position for tail + + SYNOPSIS + find_tail() + info Maria handler + length Size of data region we need store + position Position in bitmap_blocks where to store the + information for the head block. + + RETURN + 0 ok + 1 error +*/ + +static my_bool find_tail(MARIA_HA *info, uint length, uint position) +{ + MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; + MARIA_BITMAP_BLOCK *block; + DBUG_ENTER("find_tail"); + + /* Needed, as there is no error checking in dynamic_element */ + if (allocate_dynamic(&info->bitmap_blocks, position)) + DBUG_RETURN(1); + block= dynamic_element(&info->bitmap_blocks, position, MARIA_BITMAP_BLOCK *); + + while (allocate_tail(bitmap, length, block)) + if (move_to_next_bitmap(info, bitmap)) + DBUG_RETURN(1); + DBUG_RETURN(0); +} + + +/* + Find right bitmap and position for full blocks in one extent + + SYNOPSIS + find_mid() + info Maria handler. + pages How many pages to allocate. + position Position in bitmap_blocks where to store the + information for the head block. + NOTES + This is used to allocate the main extent after the 'head' block + (Ie, the middle part of the head-middle-tail entry) + + RETURN + 0 ok + 1 error +*/ + +static my_bool find_mid(MARIA_HA *info, ulong pages, uint position) +{ + MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; + MARIA_BITMAP_BLOCK *block; + block= dynamic_element(&info->bitmap_blocks, position, MARIA_BITMAP_BLOCK *); + + while (!allocate_full_pages(bitmap, pages, block, 1)) + { + if (move_to_next_bitmap(info, bitmap)) + return 1; + } + return 0; +} + + +/* + Find right bitmap and position for putting a blob + + SYNOPSIS + find_blob() + info Maria handler. + length Length of the blob + + NOTES + The extents are stored last in info->bitmap_blocks + + IMPLEMENTATION + Allocate all full pages for the block + optionally one tail + + RETURN + 0 ok + 1 error +*/ + +static my_bool find_blob(MARIA_HA *info, ulong length) +{ + MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; + uint full_page_size= FULL_PAGE_SIZE(info->s->block_size); + ulong pages; + uint rest_length, used; + uint first_block_pos; + MARIA_BITMAP_BLOCK *first_block= 0; + DBUG_ENTER("find_blob"); + DBUG_PRINT("enter", ("length: %lu", length)); + + pages= length / full_page_size; + rest_length= (uint) (length - pages * full_page_size); + if (rest_length >= MAX_TAIL_SIZE(info->s->block_size)) + { + pages++; + rest_length= 0; + } + + if (pages) + { + MARIA_BITMAP_BLOCK *block; + if (allocate_dynamic(&info->bitmap_blocks, + info->bitmap_blocks.elements + + pages / BLOB_SEGMENT_MIN_SIZE + 2)) + DBUG_RETURN(1); + first_block_pos= info->bitmap_blocks.elements; + block= dynamic_element(&info->bitmap_blocks, info->bitmap_blocks.elements, + MARIA_BITMAP_BLOCK*); + first_block= block; + do + { + used= allocate_full_pages(bitmap, + (pages >= 65535 ? 65535 : (uint) pages), block, + 0); + if (!used) + { + if (move_to_next_bitmap(info, bitmap)) + DBUG_RETURN(1); + } + else + { + pages-= used; + info->bitmap_blocks.elements++; + block++; + } + } while (pages != 0); + } + if (rest_length && find_tail(info, rest_length, + info->bitmap_blocks.elements++)) + DBUG_RETURN(1); + if (first_block) + first_block->sub_blocks= info->bitmap_blocks.elements - first_block_pos; + DBUG_RETURN(0); +} + + +/* + Find pages to put ALL blobs + + SYNOPSIS + allocate_blobs() + info Maria handler + row Information of what is in the row (from calc_record_size()) + + RETURN + 0 ok + 1 error +*/ + +static my_bool allocate_blobs(MARIA_HA *info, MARIA_ROW *row) +{ + ulong *length, *end; + uint elements; + /* + Reserve size for: + head block + one extent + tail block + */ + elements= info->bitmap_blocks.elements; + for (length= row->blob_lengths, end= length + info->s->base.blobs; + length < end; length++) + { + if (*length && find_blob(info, *length)) + return 1; + } + row->extents_count= (info->bitmap_blocks.elements - elements); + return 0; +} + + +/* + Store in the bitmap the new size for a head page + + SYNOPSIS + use_head() + info Maria handler + page Page number to update + (Note that caller guarantees this is in the active + bitmap) + size How much free space is left on the page + block_position In which info->bitmap_block we have the + information about the head block. + + NOTES + This is used on update where we are updating an existing head page +*/ + +static void use_head(MARIA_HA *info, ulonglong page, uint size, + uint block_position) +{ + MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; + MARIA_BITMAP_BLOCK *block; + uchar *data; + uint offset, tmp, offset_page; + + block= dynamic_element(&info->bitmap_blocks, block_position, + MARIA_BITMAP_BLOCK*); + block->page= page; + block->page_count= 1 + TAIL_BIT; + block->empty_space= size; + block->sub_blocks= 1; + block->used= BLOCKUSED_TAIL; + + /* + Mark place used by reading/writing 2 bytes at a time to handle + bitmaps in overlapping bytes + */ + offset_page= (uint) (page - bitmap->page - 1) * 3; + offset= offset_page & 7; + data= bitmap->map + offset_page / 8; + tmp= uint2korr(data); + block->org_bitmap_value= (tmp >> offset) & 7; + tmp= (tmp & ~(7 << offset)) | (FULL_HEAD_PAGE << offset); + int2store(data, tmp); + bitmap->changed= 1; + DBUG_EXECUTE("bitmap", _ma_print_bitmap(bitmap);); +} + + +/* + Find out where to split the row (ie, what goes in head, middle, tail etc) + + SYNOPSIS + find_where_to_split_row() + share Maria share + row Information of what is in the row (from calc_record_size()) + extents_length Number of bytes needed to store all extents + split_size Free size on the page (The head length must be less + than this) + + RETURN + row_length for the head block. +*/ + +static uint find_where_to_split_row(MARIA_SHARE *share, MARIA_ROW *row, + uint extents_length, uint split_size) +{ + uint row_length= row->base_length; + uint *lengths, *lengths_end; + + DBUG_ASSERT(row_length < split_size); + /* + Store first in all_field_lengths the different parts that are written + to the row. This needs to be in same order as in + ma_block_rec.c::write_block_record() + */ + row->null_field_lengths[-3]= extents_length; + row->null_field_lengths[-2]= share->base.fixed_not_null_fields_length; + row->null_field_lengths[-1]= row->field_lengths_length; + for (lengths= row->null_field_lengths - EXTRA_LENGTH_FIELDS, + lengths_end= (lengths + share->base.pack_fields - share->base.blobs + + EXTRA_LENGTH_FIELDS); lengths < lengths_end; lengths++) + { + if (row_length + *lengths > split_size) + break; + row_length+= *lengths; + } + return row_length; +} + + +/* + Find where to write the middle parts of the row and the tail + + SYNOPSIS + write_rest_of_head() + info Maria handler + position Position in bitmap_blocks. Is 0 for rows that needs + full blocks (ie, has a head, middle part and optional tail) + rest_length How much left of the head block to write. + + RETURN + 0 ok + 1 error +*/ + +static my_bool write_rest_of_head(MARIA_HA *info, uint position, + ulong rest_length) +{ + MARIA_SHARE *share= info->s; + uint full_page_size= FULL_PAGE_SIZE(share->block_size); + MARIA_BITMAP_BLOCK *block; + DBUG_ENTER("write_rest_of_head"); + DBUG_PRINT("enter", ("position: %u rest_length: %lu", position, + rest_length)); + + if (position == 0) + { + /* Write out full pages */ + uint pages= rest_length / full_page_size; + + rest_length%= full_page_size; + if (rest_length >= MAX_TAIL_SIZE(share->block_size)) + { + /* Put tail on a full page */ + pages++; + rest_length= 0; + } + if (find_mid(info, pages, 1)) + DBUG_RETURN(1); + /* + Insert empty block after full pages, to allow write_block_record() to + split segment into used + free page + */ + block= dynamic_element(&info->bitmap_blocks, 2, MARIA_BITMAP_BLOCK*); + block->page_count= 0; + block->used= 0; + } + if (rest_length) + { + if (find_tail(info, rest_length, ELEMENTS_RESERVED_FOR_MAIN_PART - 1)) + DBUG_RETURN(1); + } + else + { + /* Empty tail block */ + block= dynamic_element(&info->bitmap_blocks, + ELEMENTS_RESERVED_FOR_MAIN_PART - 1, + MARIA_BITMAP_BLOCK *); + block->page_count= 0; + block->used= 0; + } + DBUG_RETURN(0); +} + + +/* + Find where to store one row + + SYNPOSIS + _ma_bitmap_find_place() + info Maria handler + row Information about row to write + blocks Store data about allocated places here + + RETURN + 0 ok + 1 error +*/ + +my_bool _ma_bitmap_find_place(MARIA_HA *info, MARIA_ROW *row, + MARIA_BITMAP_BLOCKS *blocks) +{ + MARIA_SHARE *share= info->s; + my_bool res= 1; + uint full_page_size, position, max_page_size; + uint head_length, row_length, rest_length, extents_length; + DBUG_ENTER("_ma_bitmap_find_place"); + + blocks->count= 0; + blocks->tail_page_skipped= blocks->page_skipped= 0; + row->extents_count= 0; + + /* + Reserve place for the following blocks: + - Head block + - Full page block + - Marker block to allow write_block_record() to split full page blocks + into full and free part + - Tail block + */ + + info->bitmap_blocks.elements= ELEMENTS_RESERVED_FOR_MAIN_PART; + max_page_size= (share->block_size - PAGE_OVERHEAD_SIZE); + + pthread_mutex_lock(&share->bitmap.bitmap_lock); + + if (row->total_length <= max_page_size) + { + /* Row fits in one page */ + position= ELEMENTS_RESERVED_FOR_MAIN_PART - 1; + if (find_head(info, (uint) row->total_length, position)) + goto abort; + goto end; + } + + /* + First allocate all blobs (so that we can find out the needed size for + the main block. + */ + if (row->blob_length && allocate_blobs(info, row)) + goto abort; + + extents_length= row->extents_count * ROW_EXTENT_SIZE; + if ((head_length= (row->head_length + extents_length)) <= max_page_size) + { + /* Main row part fits into one page */ + position= ELEMENTS_RESERVED_FOR_MAIN_PART - 1; + if (find_head(info, head_length, position)) + goto abort; + goto end; + } + + /* Allocate enough space */ + head_length+= ELEMENTS_RESERVED_FOR_MAIN_PART * ROW_EXTENT_SIZE; + + /* The first segment size is stored in 'row_length' */ + row_length= find_where_to_split_row(share, row, extents_length, + max_page_size); + + full_page_size= FULL_PAGE_SIZE(share->block_size); + position= 0; + if (head_length - row_length <= full_page_size) + position= ELEMENTS_RESERVED_FOR_MAIN_PART -2; /* Only head and tail */ + if (find_head(info, row_length, position)) + goto abort; + rest_length= head_length - row_length; + if (write_rest_of_head(info, position, rest_length)) + goto abort; + +end: + blocks->block= dynamic_element(&info->bitmap_blocks, position, + MARIA_BITMAP_BLOCK*); + blocks->block->sub_blocks= ELEMENTS_RESERVED_FOR_MAIN_PART - position; + /* First block's page_count is for all blocks */ + blocks->count= info->bitmap_blocks.elements - position; + res= 0; + +abort: + pthread_mutex_unlock(&share->bitmap.bitmap_lock); + DBUG_RETURN(res); +} + + +/* + Find where to put row on update (when head page is already defined) + + SYNPOSIS + _ma_bitmap_find_new_place() + info Maria handler + row Information about row to write + page On which page original row was stored + free_size Free size on head page + blocks Store data about allocated places here + + NOTES + This function is only called when the new row can't fit in the space of + the old row in the head page. + + This is essently same as _ma_bitmap_find_place() except that + we don't call find_head() to search in bitmaps where to put the page. + + RETURN + 0 ok + 1 error +*/ + +my_bool _ma_bitmap_find_new_place(MARIA_HA *info, MARIA_ROW *row, + ulonglong page, uint free_size, + MARIA_BITMAP_BLOCKS *blocks) +{ + MARIA_SHARE *share= info->s; + my_bool res= 1; + uint full_page_size, position; + uint head_length, row_length, rest_length, extents_length; + DBUG_ENTER("_ma_bitmap_find_new_place"); + + blocks->count= 0; + blocks->tail_page_skipped= blocks->page_skipped= 0; + row->extents_count= 0; + info->bitmap_blocks.elements= ELEMENTS_RESERVED_FOR_MAIN_PART; + + pthread_mutex_lock(&share->bitmap.bitmap_lock); + if (share->bitmap.page != page / share->bitmap.pages_covered && + _ma_change_bitmap_page(info, &share->bitmap, + page / share->bitmap.pages_covered)) + goto abort; + + /* + First allocate all blobs (so that we can find out the needed size for + the main block. + */ + if (row->blob_length && allocate_blobs(info, row)) + goto abort; + + extents_length= row->extents_count * ROW_EXTENT_SIZE; + if ((head_length= (row->head_length + extents_length)) <= free_size) + { + /* Main row part fits into one page */ + position= ELEMENTS_RESERVED_FOR_MAIN_PART - 1; + use_head(info, page, head_length, position); + goto end; + } + + /* Allocate enough space */ + head_length+= ELEMENTS_RESERVED_FOR_MAIN_PART * ROW_EXTENT_SIZE; + + /* The first segment size is stored in 'row_length' */ + row_length= find_where_to_split_row(share, row, extents_length, free_size); + + full_page_size= FULL_PAGE_SIZE(share->block_size); + position= 0; + if (head_length - row_length <= full_page_size) + position= ELEMENTS_RESERVED_FOR_MAIN_PART -2; /* Only head and tail */ + use_head(info, page, row_length, position); + rest_length= head_length - row_length; + + if (write_rest_of_head(info, position, rest_length)) + goto abort; + +end: + blocks->block= dynamic_element(&info->bitmap_blocks, position, + MARIA_BITMAP_BLOCK*); + blocks->block->sub_blocks= ELEMENTS_RESERVED_FOR_MAIN_PART - position; + /* First block's page_count is for all blocks */ + blocks->count= info->bitmap_blocks.elements - position; + res= 0; + +abort: + pthread_mutex_unlock(&share->bitmap.bitmap_lock); + DBUG_RETURN(res); +} + + +/**************************************************************************** + Clear and reset bits +****************************************************************************/ + +/* + Set fill pattern for a page + + set_page_bits() + info Maria handler + bitmap Bitmap handler + page Adress to page + fill_pattern Pattern (not size) for page + + NOTES + Page may not be part of active bitmap + + RETURN + 0 ok + 1 error +*/ + +static my_bool set_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap, + ulonglong page, uint fill_pattern) +{ + ulonglong bitmap_page; + uint offset_page, offset, tmp, org_tmp; + uchar *data; + DBUG_ENTER("set_page_bits"); + + bitmap_page= page - page % bitmap->pages_covered; + if (bitmap_page != bitmap->page && + _ma_change_bitmap_page(info, bitmap, bitmap_page)) + DBUG_RETURN(1); + + /* Find page number from start of bitmap */ + offset_page= page - bitmap->page - 1; + /* + Mark place used by reading/writing 2 bytes at a time to handle + bitmaps in overlapping bytes + */ + offset_page*= 3; + offset= offset_page & 7; + data= bitmap->map + offset_page / 8; + org_tmp= tmp= uint2korr(data); + tmp= (tmp & ~(7 << offset)) | (fill_pattern << offset); + if (tmp == org_tmp) + DBUG_RETURN(0); /* No changes */ + int2store(data, tmp); + + bitmap->changed= 1; + DBUG_EXECUTE("bitmap", _ma_print_bitmap(bitmap);); + if (fill_pattern != 3 && fill_pattern != 7 && + bitmap_page < info->s->state.first_bitmap_with_space) + info->s->state.first_bitmap_with_space= bitmap_page; + DBUG_RETURN(0); +} + + +/* + Get bitmap pattern for a given page + + SYNOPSIS + get_page_bits() + info Maria handler + bitmap Bitmap handler + page Page number + + RETURN + 0-7 Bitmap pattern + ~0 Error (couldn't read page) +*/ + +static uint get_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap, + ulonglong page) +{ + ulonglong bitmap_page; + uint offset_page, offset, tmp; + uchar *data; + DBUG_ENTER("get_page_bits"); + + bitmap_page= page - page % bitmap->pages_covered; + if (bitmap_page != bitmap->page && + _ma_change_bitmap_page(info, bitmap, bitmap_page)) + DBUG_RETURN(~ (uint) 0); + + /* Find page number from start of bitmap */ + offset_page= page - bitmap->page - 1; + /* + Mark place used by reading/writing 2 bytes at a time to handle + bitmaps in overlapping bytes + */ + offset_page*= 3; + offset= offset_page & 7; + data= bitmap->map + offset_page / 8; + tmp= uint2korr(data); + DBUG_RETURN((tmp >> offset) & 7); +} + + +/* + Mark all pages in a region as free + + SYNOPSIS + _ma_reset_full_page_bits() + info Maria handler + bitmap Bitmap handler + page Start page + page_count Number of pages + + NOTES + We assume that all pages in region is covered by same bitmap + One must have a lock on info->s->bitmap.bitmap_lock + + RETURN + 0 ok + 1 Error (when reading bitmap) +*/ + +my_bool _ma_reset_full_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap, + ulonglong page, uint page_count) +{ + ulonglong bitmap_page; + uint offset, bit_start, bit_count, tmp; + uchar *data; + DBUG_ENTER("_ma_reset_full_page_bits"); + DBUG_PRINT("enter", ("page: %lu page_count: %u", (ulong) page, page_count)); + safe_mutex_assert_owner(&info->s->bitmap.bitmap_lock); + + bitmap_page= page - page % bitmap->pages_covered; + if (bitmap_page != bitmap->page && + _ma_change_bitmap_page(info, bitmap, bitmap_page)) + DBUG_RETURN(1); + + /* Find page number from start of bitmap */ + page= page - bitmap->page - 1; + + /* Clear bits from 'page * 3' -> '(page + page_count) * 3' */ + bit_start= page * 3; + bit_count= page_count * 3; + + data= bitmap->map + bit_start / 8; + offset= bit_start & 7; + + tmp= (255 << offset); /* Bits to keep */ + if (bit_count + offset < 8) + { + /* Only clear bits between 'offset' and 'offset+bit_count-1' */ + tmp^= (255 << (offset + bit_count)); + } + *data&= ~tmp; + + if ((int) (bit_count-= (8 - offset)) > 0) + { + uint fill; + data++; + /* + -1 is here to avoid one 'if' statement and to let the following code + handle the last byte + */ + if ((fill= (bit_count - 1) / 8)) + { + bzero(data, fill); + data+= fill; + } + bit_count-= fill * 8; /* Bits left to clear */ + tmp= (1 << bit_count) - 1; + *data&= ~tmp; + } + if (bitmap_page < info->s->state.first_bitmap_with_space) + info->s->state.first_bitmap_with_space= bitmap_page; + bitmap->changed= 1; + DBUG_EXECUTE("bitmap", _ma_print_bitmap(bitmap);); + DBUG_RETURN(0); +} + + +/* + Correct bitmap pages to reflect the true allocation + + SYNOPSIS + _ma_bitmap_release_unused() + info Maria handle + blocks Bitmap blocks + + IMPLEMENTATION + If block->used & BLOCKUSED_TAIL is set: + If block->used & BLOCKUSED_USED is set, then the bits for the + corresponding page is set according to block->empty_space + If block->used & BLOCKUSED_USED is not set, then the bits for + the corresponding page is set to org_bitmap_value; + + If block->used & BLOCKUSED_TAIL is not set: + if block->used is not set, the bits for the corresponding page are + cleared + + For the first block (head block) the logic is same as for a tail block + + Note that we may have 'filler blocks' that are used to split a block + in half; These can be recognized by that they have page_count == 0. + + RETURN + 0 ok + 1 error (Couldn't write or read bitmap page) +*/ + +my_bool _ma_bitmap_release_unused(MARIA_HA *info, MARIA_BITMAP_BLOCKS *blocks) +{ + MARIA_BITMAP_BLOCK *block= blocks->block, *end= block + blocks->count; + MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; + uint bits, current_bitmap_value; + DBUG_ENTER("_ma_bitmap_release_unused"); + + /* + We can skip FULL_HEAD_PAGE (4) as the page was marked as 'full' + when we allocated space in the page + */ + current_bitmap_value= FULL_HEAD_PAGE; + + pthread_mutex_lock(&info->s->bitmap.bitmap_lock); + + /* First handle head block */ + if (block->used & BLOCKUSED_USED) + { + DBUG_PRINT("info", ("head empty_space: %u", block->empty_space)); + bits= _ma_free_size_to_head_pattern(bitmap, block->empty_space); + if (block->used & BLOCKUSED_USE_ORG_BITMAP) + current_bitmap_value= block->org_bitmap_value; + } + else + bits= block->org_bitmap_value; + if (bits != current_bitmap_value && + set_page_bits(info, bitmap, block->page, bits)) + goto err; + + + /* Handle all full pages and tail pages (for head page and blob) */ + for (block++; block < end; block++) + { + if (!block->page_count) + continue; /* Skip 'filler blocks' */ + + if (block->used & BLOCKUSED_TAIL) + { + if (block->used & BLOCKUSED_USED) + { + DBUG_PRINT("info", ("tail empty_space: %u", block->empty_space)); + bits= free_size_to_tail_pattern(bitmap, block->empty_space); + } + else + bits= block->org_bitmap_value; + + /* + The page has all bits set; The following test is an optimization + to not set the bits to the same value as before. + */ + if (bits != FULL_TAIL_PAGE && + set_page_bits(info, bitmap, block->page, bits)) + goto err; + } + if (!(block->used & BLOCKUSED_USED) && + _ma_reset_full_page_bits(info, bitmap, + block->page, block->page_count)) + goto err; + } + pthread_mutex_unlock(&info->s->bitmap.bitmap_lock); + DBUG_RETURN(0); + +err: + pthread_mutex_unlock(&info->s->bitmap.bitmap_lock); + DBUG_RETURN(1); +} + + +/* + Free full pages from bitmap and pagecache + + SYNOPSIS + _ma_bitmap_free_full_pages() + info Maria handle + extents Extents (as stored on disk) + count Number of extents + + IMPLEMENTATION + Mark all full pages (not tails) from extents as free, both in bitmap + and page cache. + + RETURN + 0 ok + 1 error (Couldn't write or read bitmap page) +*/ + +my_bool _ma_bitmap_free_full_pages(MARIA_HA *info, const byte *extents, + uint count) +{ + DBUG_ENTER("_ma_bitmap_free_full_pages"); + + pthread_mutex_lock(&info->s->bitmap.bitmap_lock); + for (; count--; extents += ROW_EXTENT_SIZE) + { + ulonglong page= uint5korr(extents); + uint page_count= uint2korr(extents + ROW_EXTENT_PAGE_SIZE); + if (!(page_count & TAIL_BIT)) + { + if (pagecache_delete_pages(info->s->pagecache, &info->dfile, page, + page_count, PAGECACHE_LOCK_WRITE, 1)) + DBUG_RETURN(1); + if (_ma_reset_full_page_bits(info, &info->s->bitmap, page, page_count)) + { + pthread_mutex_unlock(&info->s->bitmap.bitmap_lock); + DBUG_RETURN(1); + } + } + } + pthread_mutex_unlock(&info->s->bitmap.bitmap_lock); + DBUG_RETURN(0); +} + + +/* + Mark in the bitmap how much free space there is on a page + + SYNOPSIS + _ma_bitmap_set() + info Mari handler + page Adress to page + head 1 if page is a head page, 0 if tail page + empty_space How much empty space there is on page + + RETURN + 0 ok + 1 error +*/ + +my_bool _ma_bitmap_set(MARIA_HA *info, ulonglong page, my_bool head, + uint empty_space) +{ + MARIA_FILE_BITMAP *bitmap= &info->s->bitmap; + uint bits; + my_bool res; + DBUG_ENTER("_ma_bitmap_set"); + + pthread_mutex_lock(&info->s->bitmap.bitmap_lock); + bits= (head ? + _ma_free_size_to_head_pattern(bitmap, empty_space) : + free_size_to_tail_pattern(bitmap, empty_space)); + res= set_page_bits(info, bitmap, page, bits); + pthread_mutex_unlock(&info->s->bitmap.bitmap_lock); + DBUG_RETURN(res); +} + + +/* + Check that bitmap pattern is correct for a page + + NOTES + Used in maria_chk + + SYNOPSIS + _ma_check_bitmap_data() + info Maria handler + page_type What kind of page this is + page Adress to page + empty_space Empty space on page + bitmap_pattern Store here the pattern that was in the bitmap for the + page. This is always updated. + + RETURN + 0 ok + 1 error +*/ + +my_bool _ma_check_bitmap_data(MARIA_HA *info, + enum en_page_type page_type, ulonglong page, + uint empty_space, uint *bitmap_pattern) +{ + uint bits; + switch (page_type) { + case UNALLOCATED_PAGE: + case MAX_PAGE_TYPE: + bits= 0; + break; + case HEAD_PAGE: + bits= _ma_free_size_to_head_pattern(&info->s->bitmap, empty_space); + break; + case TAIL_PAGE: + bits= free_size_to_tail_pattern(&info->s->bitmap, empty_space); + break; + case BLOB_PAGE: + bits= FULL_TAIL_PAGE; + break; + } + return (*bitmap_pattern= get_page_bits(info, &info->s->bitmap, page)) != + bits; +} + + +/* + Check if the page type matches the one that we have in the bitmap + + SYNOPSIS + _ma_check_if_right_bitmap_type() + info Maria handler + page_type What kind of page this is + page Adress to page + bitmap_pattern Store here the pattern that was in the bitmap for the + page. This is always updated. + + NOTES + Used in maria_chk + + RETURN + 0 ok + 1 error +*/ + +my_bool _ma_check_if_right_bitmap_type(MARIA_HA *info, + enum en_page_type page_type, + ulonglong page, + uint *bitmap_pattern) +{ + if ((*bitmap_pattern= get_page_bits(info, &info->s->bitmap, page)) > 7) + return 1; /* Couldn't read page */ + switch (page_type) { + case HEAD_PAGE: + return *bitmap_pattern < 1 || *bitmap_pattern > 4; + case TAIL_PAGE: + return *bitmap_pattern < 5; + case BLOB_PAGE: + return *bitmap_pattern != 7; + default: + break; + } + DBUG_ASSERT(0); + return 1; +} diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c new file mode 100644 index 00000000000..d2512f1e025 --- /dev/null +++ b/storage/maria/ma_blockrec.c @@ -0,0 +1,3995 @@ +/* Copyright (C) 2007 Michael Widenius + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Storage of records in block + + Some clarifications about the abbrev used: + + NULL fields -> Fields that may have contain a NULL value. + Not null fields -> Fields that may not contain a NULL value. + Critical fields -> Fields that can't be null and can't be dropped without + causing a table reorganization. + + + Maria will have a LSN at start of each page (excluding the bitmap pages) + + The different page types that are in a data file are: + + Bitmap pages Map of free pages in the next extent (8192 page size + gives us 256M of mapped pages / bitmap) + Head page Start of rows are stored on this page. + A rowid always points to a head page + Blob page This page is totally filled with data from one blob or by + a set of long VARCHAR/CHAR fields + Tail page This contains the last part from different rows, blobs + or varchar fields. + + The data file starts with a bitmap page, followed by as many data + pages as the bitmap can cover. After this there is a new bitmap page + and more data pages etc. + + For information about the bitmap page, see ma_bitmap.c + + Structure of data and tail page: + + The page has a row directory at end of page to allow us to do deletes + without having to reorganize the page. It also allows us to later store + some more bytes after each row to allow them to grow without having to move + around other rows. + + Page header: + + LSN 7 bytes Log position for last page change + PAGE_TYPE 1 byte 1 for head / 2 for tail / 3 for blob + NO 1 byte Number of row/tail entries on page + empty space 2 bytes Empty space on page + + The most significant bit in PAGE_TYPE is set to 1 if the data on the page + can be compacted to get more space. (PAGE_CAN_BE_COMPACTED) + + Row data + + Row directory of NO entries, that consist of the following for each row + (in reverse order; i.e., first record is stored last): + + Position 2 bytes Position of row on page + Length 2 bytes Length of entry + + For Position and Length, the 1 most significant bit of the position and + the 1 most significant bit of the length could be used for some states of + the row (in other words, we should try to keep these reserved) + + eof flag 1 byte Reserved for full page read testing. (Ie, did the + previous write get the whole block on disk. + + ---------------- + + Structure of blob pages: + + LSN 7 bytes Log position for last page change + PAGE_TYPE 1 byte 3 + + data + + ----------------- + + Row data structure: + + Flag 1 byte Marker of which header field exists + TRANSID 6 bytes TRANSID of changing transaction + (optional, added on insert and first + update/delete) + VER_PTR 7 bytes Pointer to older version in log + (undo record) + (optional, added after first + update/delete) + DELETE_TRANSID 6 bytes (optional). TRANSID of original row. + Added on delete. + Nulls_extended 1 byte To allow us to add new DEFAULT NULL + fields (optional, added after first + change of row after alter table) + Number of ROW_EXTENT's 1-3 byte Length encoded, optional + This is the number of extents the + row is split into + First row_extent 7 byte Pointer to first row extent (optional) + + Total length of length array 1-3 byte Only used if we have + char/varchar/blob fields. + Row checksum 1 byte Only if table created with checksums + Null_bits .. One bit for each NULL field (a field that may + have the value NULL) + Empty_bits .. One bit for each field that may be 'empty'. + (Both for null and not null fields). + This bit is 1 if the value for the field is + 0 or empty string. + + field_offsets 2 byte/offset + For each 32'th field, there is one offset + that points to where the field information + starts in the block. This is to provide + fast access to later field in the row + when we only need to return a small + set of fields. + TODO: Implement this. + + Things marked above as 'optional' will only be present if the + corresponding bit is set in 'Flag' field. Flag gives us a way to + get more space on a page when doing page compaction as we don't need + to store TRANSID that have committed before the smallest running + transaction we have in memory. + + Data in the following order: + (Field order is precalculated when table is created) + + Critical fixed length, not null, fields. (Note, these can't be dropped) + Fixed length, null fields + + Length array, 1-4 byte per field for all CHAR/VARCHAR/BLOB fields. + Number of bytes used in length array per entry is depending on max length + for field. + + ROW_EXTENT's + CHAR data (space stripped) + VARCHAR data + BLOB data + + Fields marked in null_bits or empty_bits are not stored in data part or + length array. + + If row doesn't fit into the given block, then the first EXTENT will be + stored last on the row. This is done so that we don't break any field + data in the middle. + + We first try to store the full row into one block. If that's not possible + we move out each big blob into their own extents. If this is not enough we + move out a concatenation of all varchars to their own extent. + + Each blob and the concatenated char/varchar fields are stored the following + way: + - Store the parts in as many full-contiguous pages as possible. + - The last part, that doesn't fill a full page, is stored in tail page. + + When doing an insert of a new row, we don't have to have + VER_PTR in the row. This will make rows that are not changed stored + efficiently. On update and delete we would add TRANSID (if it was an old + committed row) and VER_PTR to + the row. On row page compaction we can easily detect rows where + TRANSID was committed before the longest running transaction + started and we can then delete TRANSID and VER_PTR from the row to + gain more space. + + If a row is deleted in Maria, we change TRANSID to the deleting + transaction's id, change VER_PTR to point to the undo record for the delete, + and add DELETE_TRANSID (the id of the transaction which last + inserted/updated the row before its deletion). DELETE_TRANSID allows an old + transaction to avoid reading the log to know if it can see the last version + before delete (in other words it reduces the probability of having to follow + VER_PTR). TODO: depending on a compilation option, evaluate the performance + impact of not storing DELETE_TRANSID (which would make the row smaller). + + Description of the different parts: + + Flag is coded as: + + Description bit + TRANS_ID_exists 0 + VER_PTR_exists 1 + Row is deleted 2 (Means that DELETE_TRANSID exists) + Nulls_extended_exists 3 + Row is split 7 This means that 'Number_of_row_extents' exists + + Nulls_extended is the number of new DEFAULT NULL fields in the row + compared to the number of DEFAULT NULL fields when the first version + of the table was created. If Nulls_extended doesn't exist in the row, + we know it's 0 as this must be one of the original rows from when the + table was created first time. This coding allows us to add 255*8 = + 2048 new fields without requiring a full alter table. + + Empty_bits is used to allow us to store 0, 0.0, empty string, empty + varstring and empty blob efficiently. (This is very good for data + warehousing where NULL's are often regarded as evil). Having this + bitmap also allows us to drop information of a field during a future + delete if field was deleted with ALTER TABLE DROP COLUMN. To be able + to handle DROP COLUMN, we must store in the index header the fields + that has been dropped. When unpacking a row we will ignore dropped + fields. When storing a row, we will mark a dropped field either with a + null in the null bit map or in the empty_bits and not store any data + for it. + TODO: Add code for handling dropped fields. + + + A ROW EXTENT is range of pages. One ROW_EXTENT is coded as: + + START_PAGE 5 bytes + PAGE_COUNT 2 bytes. High bit is used to indicate tail page/ + end of blob + With 8K pages, we can cover 256M in one extent. This coding gives us a + maximum file size of 2^40*8192 = 8192 tera + + As an example of ROW_EXTENT handling, assume a row with one integer + field (value 5), two big VARCHAR fields (size 250 and 8192*3), and 2 + big BLOB fields that we have updated. + + The record format for storing this into an empty file would be: + + Page 1: + + 00 00 00 00 00 00 00 LSN + 01 Only one row in page + xx xx Empty space on page + + 10 Flag: row split, VER_PTR exists + 01 00 00 00 00 00 TRANSID 1 + 00 00 00 00 00 01 00 VER_PTR to first block in LOG file 1 + 5 Number of row extents + 02 00 00 00 00 03 00 VARCHAR's are stored in full pages 2,3,4 + 0 No null fields + 0 No empty fields + 05 00 00 00 00 00 80 Tail page for VARCHAR, rowid 0 + 06 00 00 00 00 80 00 First blob, stored at page 6-133 + 05 00 00 00 00 01 80 Tail of first blob (896 bytes) at page 5 + 86 00 00 00 00 80 00 Second blob, stored at page 134-262 + 05 00 00 00 00 02 80 Tail of second blob (896 bytes) at page 5 + 05 00 5 integer + FA Length of first varchar field (size 250) + 00 60 Length of second varchar field (size 8192*3) + 00 60 10 First medium BLOB, 1M + 01 00 10 00 Second BLOB, 1M + xx xx xx xx xx xx Varchars are stored here until end of page + + ..... until end of page + + 09 00 F4 1F 00 (Start position 9, length 8180, end byte) +*/ + +#define SANITY_CHECKS + +#include "maria_def.h" +#include "ma_blockrec.h" +#include +#include "trnman.h" + +/* + Struct for having a cursor over a set of extent. + This is used to loop over all extents for a row when reading + the row data. It's also used to store the tail positions for + a read row to be used by a later update/delete command. +*/ + +typedef struct st_maria_extent_cursor +{ + /* + Pointer to packed byte array of extents for the row. + Format is described above in the header + */ + byte *extent; + /* Where data starts on page; Only for debugging */ + byte *data_start; + /* Position to all tails in the row. Updated when reading a row */ + MARIA_RECORD_POS *tail_positions; + /* Current page */ + my_off_t page; + /* How many pages in the page region */ + uint page_count; + /* Total number of extents (i.e., entries in the 'extent' slot) */ + uint extent_count; + /* <> 0 if current extent is a tail page; Set while using cursor */ + uint tail; + /* + <> 1 if we are working on the first extent (i.e., the one that is store in + the row header, not an extent that is stored as part of the row data). + */ + my_bool first_extent; +} MARIA_EXTENT_CURSOR; + + +static my_bool delete_tails(MARIA_HA *info, MARIA_RECORD_POS *tails); +static my_bool delete_head_or_tail(MARIA_HA *info, + ulonglong page, uint record_number, + my_bool head); +static void _ma_print_directory(byte *buff, uint block_size); +static void compact_page(byte *buff, uint block_size, uint rownr, + my_bool extend_block); +static uchar *store_page_range(uchar *to, MARIA_BITMAP_BLOCK *block, + uint block_size, ulong length); +static size_t fill_insert_undo_parts(MARIA_HA *info, const byte *record, + LEX_STRING *log_parts, + uint *log_parts_count); +static size_t fill_update_undo_parts(MARIA_HA *info, const byte *oldrec, + const byte *newrec, + LEX_STRING *log_parts, + uint *log_parts_count); + +/**************************************************************************** + Initialization +****************************************************************************/ + +/* + Initialize data needed for block structures +*/ + + +/* Size of the different header elements for a row */ + +static uchar header_sizes[]= +{ + TRANSID_SIZE, + VERPTR_SIZE, + TRANSID_SIZE, /* Delete transid */ + 1 /* Null extends */ +}; + +/* + Calculate array of all used headers + + Used to speed up: + + size= 1; + if (flag & 1) + size+= TRANSID_SIZE; + if (flag & 2) + size+= VERPTR_SIZE; + if (flag & 4) + size+= TRANSID_SIZE + if (flag & 8) + size+= 1; + + NOTES + This is called only once at startup of Maria +*/ + +static uchar total_header_size[1 << array_elements(header_sizes)]; +#define PRECALC_HEADER_BITMASK (array_elements(total_header_size) -1) + +void _ma_init_block_record_data(void) +{ + uint i; + bzero(total_header_size, sizeof(total_header_size)); + total_header_size[0]= FLAG_SIZE; /* Flag byte */ + for (i= 1; i < array_elements(total_header_size); i++) + { + uint size= FLAG_SIZE, j, bit; + for (j= 0; (bit= (1 << j)) <= i; j++) + { + if (i & bit) + size+= header_sizes[j]; + } + total_header_size[i]= size; + } +} + + +my_bool _ma_once_init_block_record(MARIA_SHARE *share, File data_file) +{ + + share->base.max_data_file_length= + (((ulonglong) 1 << ((share->base.rec_reflength-1)*8))-1) * + share->block_size; +#if SIZEOF_OFF_T == 4 + set_if_smaller(share->base.max_data_file_length, INT_MAX32); +#endif + return _ma_bitmap_init(share, data_file); +} + + +my_bool _ma_once_end_block_record(MARIA_SHARE *share) +{ + int res= _ma_bitmap_end(share); + if (share->bitmap.file.file >= 0) + { + if (flush_pagecache_blocks(share->pagecache, &share->bitmap.file, + share->temporary ? FLUSH_IGNORE_CHANGED : + FLUSH_RELEASE)) + res= 1; + /* + File must be synced as it is going out of the maria_open_list and so + becoming unknown to Checkpoint. + */ + if (my_sync(share->bitmap.file.file, MYF(MY_WME)) || + my_close(share->bitmap.file.file, MYF(MY_WME))) + res= 1; + /* + Trivial assignment to guard against multiple invocations + (May happen if file are closed but we want to keep the maria object + around a bit longer) + */ + share->bitmap.file.file= -1; + } + if (share->id != 0) + translog_deassign_id_from_share(share); + return res; +} + + +/* Init info->cur_row structure */ + +my_bool _ma_init_block_record(MARIA_HA *info) +{ + MARIA_ROW *row= &info->cur_row, *new_row= &info->new_row; + DBUG_ENTER("_ma_init_block_record"); + + if (!my_multi_malloc(MY_WME, + &row->empty_bits, info->s->base.pack_bytes, + &row->field_lengths, + info->s->base.max_field_lengths + 2, + &row->blob_lengths, sizeof(ulong) * info->s->base.blobs, + &row->null_field_lengths, (sizeof(uint) * + (info->s->base.fields - + info->s->base.blobs + + EXTRA_LENGTH_FIELDS)), + &row->tail_positions, (sizeof(MARIA_RECORD_POS) * + (info->s->base.blobs + 2)), + &new_row->empty_bits, info->s->base.pack_bytes, + &new_row->field_lengths, + info->s->base.max_field_lengths + 2, + &new_row->blob_lengths, + sizeof(ulong) * info->s->base.blobs, + &new_row->null_field_lengths, (sizeof(uint) * + (info->s->base.fields - + info->s->base.blobs + + EXTRA_LENGTH_FIELDS)), + &info->log_row_parts, + sizeof(*info->log_row_parts) * + (TRANSLOG_INTERNAL_PARTS + 2 + + info->s->base.fields + 2), + &info->update_field_data, + (info->s->base.fields * 4 + + info->s->base.max_field_lengths + 1 + 4), + NullS, 0)) + DBUG_RETURN(1); + /* Skip over bytes used to store length of field length for logging */ + row->field_lengths+= 2; + new_row->field_lengths+= 2; + if (my_init_dynamic_array(&info->bitmap_blocks, + sizeof(MARIA_BITMAP_BLOCK), + ELEMENTS_RESERVED_FOR_MAIN_PART, 16)) + goto err; + /* The following should be big enough for all purposes */ + if (my_init_dynamic_array(&info->pinned_pages, + sizeof(MARIA_PINNED_PAGE), + max(info->s->base.blobs + 2, + MARIA_MAX_TREE_LEVELS*2), 16)) + goto err; + row->base_length= new_row->base_length= info->s->base_length; + + /* + We need to reserve 'EXTRA_LENGTH_FIELDS' number of parts in + null_field_lengths to allow splitting of rows in 'find_where_to_split_row' + */ + + row->null_field_lengths+= EXTRA_LENGTH_FIELDS; + new_row->null_field_lengths+= EXTRA_LENGTH_FIELDS; + + DBUG_RETURN(0); + +err: + _ma_end_block_record(info); + DBUG_RETURN(1); +} + + +void _ma_end_block_record(MARIA_HA *info) +{ + DBUG_ENTER("_ma_end_block_record"); + my_free((gptr) info->cur_row.empty_bits, MYF(MY_ALLOW_ZERO_PTR)); + delete_dynamic(&info->bitmap_blocks); + delete_dynamic(&info->pinned_pages); + my_free((gptr) info->cur_row.extents, MYF(MY_ALLOW_ZERO_PTR)); + /* + The data file is closed, when needed, in ma_once_end_block_record(). + The following protects us from doing an extra, not allowed, close + in maria_close() + */ + info->dfile.file= -1; + DBUG_VOID_RETURN; +} + + +/**************************************************************************** + Helper functions +****************************************************************************/ + +/* + Return the next used byte on the page after a directory entry. + + SYNOPSIS + start_of_next_entry() + dir Directory entry to be used + + RETURN + # Position in page where next entry starts. + Everything between the '*dir' and this are free to be used. +*/ + +static inline uint start_of_next_entry(byte *dir) +{ + byte *prev; + /* + Find previous used entry. (There is always a previous entry as + the directory never starts with a deleted entry) + */ + for (prev= dir - DIR_ENTRY_SIZE ; + prev[0] == 0 && prev[1] == 0 ; + prev-= DIR_ENTRY_SIZE) + {} + return (uint) uint2korr(prev); +} + + +/* + Check that a region is all zero + + SYNOPSIS + check_if_zero() + pos Start of memory to check + length length of memory region + + NOTES + Used mainly to detect rows with wrong extent information +*/ + +static my_bool check_if_zero(byte *pos, uint length) +{ + byte *end; + for (end= pos+ length; pos != end ; pos++) + if (pos[0] != 0) + return 1; + return 0; +} + + +/* + Unpin all pinned pages + + SYNOPSIS + _ma_unpin_all_pages() + info Maria handler + undo_lsn LSN for undo pages. 0 if we shouldn't write undo (error) + + NOTE + We unpin pages in the reverse order as they where pinned; This may not + be strictly necessary but may simplify things in the future. + + info->s->rec_lsn contains the lsn for the first REDO + + RETURN + 0 ok + 1 error (fatal disk error) + +*/ + +void _ma_unpin_all_pages(MARIA_HA *info, LSN undo_lsn) +{ + MARIA_PINNED_PAGE *page_link= ((MARIA_PINNED_PAGE*) + dynamic_array_ptr(&info->pinned_pages, 0)); + MARIA_PINNED_PAGE *pinned_page= page_link + info->pinned_pages.elements; + DBUG_ENTER("_ma_unpin_all_pages"); + DBUG_PRINT("info", ("undo_lsn: %lu", (ulong) undo_lsn)); + + /* True if not disk error */ + DBUG_ASSERT(undo_lsn != 0 || !info->s->base.transactional); + + if (!info->s->base.transactional) + { + /* + If this is a transactional table but with transactionality temporarily + disabled (like in ALTER TABLE) we need to give a sensible LSN to pages + and not 0. If this is not a transactional table it will reduce to 0. + */ + undo_lsn= info->s->state.create_rename_lsn; + } + + while (pinned_page-- != page_link) + pagecache_unlock_by_link(info->s->pagecache, pinned_page->link, + pinned_page->unlock, PAGECACHE_UNPIN, + info->trn->rec_lsn, undo_lsn); + + info->trn->rec_lsn= 0; + info->pinned_pages.elements= 0; + DBUG_VOID_RETURN; +} + + +/* + Find free position in directory + + SYNOPSIS + find_free_position() + buff Page + block_size Size of page + res_rownr Store index to free position here + res_length Store length of found segment here + empty_space Store length of empty space on disk here. This is + all empty space, including the found block. + + NOTES + If there is a free directory entry (entry with position == 0), + then use it and change it to be the size of the empty block + after the previous entry. This guarantees that all row entries + are stored on disk in inverse directory order, which makes life easier for + 'compact_page()' and to know if there is free space after any block. + + If there is no free entry (entry with position == 0), then we create + a new one. If there is not space for the directory entry (because + the last block overlapps with the directory), we compact the page. + + We will update the offset and the length of the found dir entry to + match the position and empty space found. + + buff[EMPTY_SPACE_OFFSET] is NOT updated but left up to the caller + + RETURN + 0 Error (directory full or last block goes over directory) + # Pointer to directory entry on page +*/ + +static byte *find_free_position(byte *buff, uint block_size, uint *res_rownr, + uint *res_length, uint *empty_space) +{ + uint max_entry= (uint) ((uchar*) buff)[DIR_COUNT_OFFSET]; + uint entry, length, first_pos; + byte *dir, *end; + DBUG_ENTER("find_free_position"); + DBUG_PRINT("info", ("max_entry: %u", max_entry)); + + dir= (buff + block_size - DIR_ENTRY_SIZE * max_entry - PAGE_SUFFIX_SIZE); + end= buff + block_size - PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE; + + *empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET); + + /* Search after first empty position */ + first_pos= PAGE_HEADER_SIZE; + for (entry= 0 ; dir <= end ; end-= DIR_ENTRY_SIZE, entry++) + { + uint tmp= uint2korr(end); + if (!tmp) /* Found not used entry */ + { + length= start_of_next_entry(end) - first_pos; + int2store(end, first_pos); /* Update dir entry */ + int2store(end + 2, length); + *res_rownr= entry; + *res_length= length; + DBUG_RETURN(end); + } + first_pos= tmp + uint2korr(end + 2); + } + /* No empty places in dir; create a new one */ + dir= end; + /* Check if there is place for the directory entry */ + if (max_entry == MAX_ROWS_PER_PAGE) + DBUG_RETURN(0); + /* Check if there is place for the directory entry */ + if ((uint) (dir - buff) < first_pos) + { + /* Create place for directory */ + compact_page(buff, block_size, max_entry-1, 0); + first_pos= (uint2korr(end + DIR_ENTRY_SIZE) + + uint2korr(end + DIR_ENTRY_SIZE+ 2)); + *empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET); + } + buff[DIR_COUNT_OFFSET]= (byte) (uchar) max_entry+1; + length= (uint) (dir - buff - first_pos); + DBUG_ASSERT(length <= *empty_space - DIR_ENTRY_SIZE); + int2store(dir, first_pos); + int2store(dir+2, length); /* Current max length */ + *res_rownr= max_entry; + *res_length= length; + + /* Reduce directory entry size from free space size */ + (*empty_space)-= DIR_ENTRY_SIZE; + DBUG_RETURN(dir); +} + + +/**************************************************************************** + Updating records +****************************************************************************/ + +/* + Calculate length of all the different field parts + + SYNOPSIS + calc_record_size() + info Maria handler + record Row to store + row Store statistics about row here + + NOTES + The statistics is used to find out how much space a row will need + and also where we can split a row when we need to split it into several + extents. +*/ + +static void calc_record_size(MARIA_HA *info, const byte *record, + MARIA_ROW *row) +{ + MARIA_SHARE *share= info->s; + byte *field_length_data; + MARIA_COLUMNDEF *column, *end_column; + uint *null_field_lengths= row->null_field_lengths; + ulong *blob_lengths= row->blob_lengths; + + row->normal_length= row->char_length= row->varchar_length= + row->blob_length= row->extents_count= 0; + + /* Create empty bitmap and calculate length of each varlength/char field */ + bzero(row->empty_bits, share->base.pack_bytes); + field_length_data= row->field_lengths; + for (column= share->columndef + share->base.fixed_not_null_fields, + end_column= share->columndef + share->base.fields; + column < end_column; column++, null_field_lengths++) + { + if ((record[column->null_pos] & column->null_bit)) + { + if (column->type != FIELD_BLOB) + *null_field_lengths= 0; + else + *blob_lengths++= 0; + continue; + } + switch ((enum en_fieldtype) column->type) { + case FIELD_CHECK: + case FIELD_NORMAL: /* Fixed length field */ + case FIELD_ZERO: + DBUG_ASSERT(column->empty_bit == 0); + /* fall through */ + case FIELD_SKIP_PRESPACE: /* Not packed */ + row->normal_length+= column->length; + *null_field_lengths= column->length; + break; + case FIELD_SKIP_ZERO: /* Fixed length field */ + if (memcmp(record+ column->offset, maria_zero_string, + column->length) == 0) + { + row->empty_bits[column->empty_pos] |= column->empty_bit; + *null_field_lengths= 0; + } + else + { + row->normal_length+= column->length; + *null_field_lengths= column->length; + } + break; + case FIELD_SKIP_ENDSPACE: /* CHAR */ + { + const char *pos, *end; + for (pos= record + column->offset, end= pos + column->length; + end > pos && end[-1] == ' '; end--) + ; + if (pos == end) /* If empty string */ + { + row->empty_bits[column->empty_pos]|= column->empty_bit; + *null_field_lengths= 0; + } + else + { + uint length= (end - pos); + if (column->length <= 255) + *field_length_data++= (byte) (uchar) length; + else + { + int2store(field_length_data, length); + field_length_data+= 2; + } + row->char_length+= length; + *null_field_lengths= length; + } + break; + } + case FIELD_VARCHAR: + { + uint length, field_length_data_length; + const byte *field_pos= record + column->offset; + /* 256 is correct as this includes the length byte */ + + field_length_data[0]= field_pos[0]; + if (column->length <= 256) + { + length= (uint) (uchar) *field_pos; + field_length_data_length= 1; + } + else + { + length= uint2korr(field_pos); + field_length_data[1]= field_pos[1]; + field_length_data_length= 2; + } + *null_field_lengths= length; + if (!length) + { + row->empty_bits[column->empty_pos]|= column->empty_bit; + break; + } + row->varchar_length+= length; + *null_field_lengths= length; + field_length_data+= field_length_data_length; + break; + } + case FIELD_BLOB: + { + const byte *field_pos= record + column->offset; + uint size_length= column->length - portable_sizeof_char_ptr; + ulong blob_length= _ma_calc_blob_length(size_length, field_pos); + + *blob_lengths++= blob_length; + if (!blob_length) + row->empty_bits[column->empty_pos]|= column->empty_bit; + else + { + row->blob_length+= blob_length; + memcpy(field_length_data, field_pos, size_length); + field_length_data+= size_length; + } + break; + } + default: + DBUG_ASSERT(0); + } + } + row->field_lengths_length= (uint) (field_length_data - row->field_lengths); + row->head_length= (row->base_length + + share->base.fixed_not_null_fields_length + + row->field_lengths_length + + size_to_store_key_length(row->field_lengths_length) + + row->normal_length + + row->char_length + row->varchar_length); + row->total_length= (row->head_length + row->blob_length); + if (row->total_length < share->base.min_row_length) + row->total_length= share->base.min_row_length; +} + + +/* + Compact page by removing all space between rows + + IMPLEMENTATION + Move up all rows to start of page. + Move blocks that are directly after each other with one memmove. + + TODO LATER + Remove TRANSID from rows that are visible to all transactions + + SYNOPSIS + compact_page() + buff Page to compact + block_size Size of page + recnr Put empty data after this row + extend_block If 1, extend the block at 'rownr' to cover the + whole block. +*/ + + +static void compact_page(byte *buff, uint block_size, uint rownr, + my_bool extend_block) +{ + uint max_entry= (uint) ((uchar *) buff)[DIR_COUNT_OFFSET]; + uint page_pos, next_free_pos, start_of_found_block, diff, end_of_found_block; + byte *dir, *end; + DBUG_ENTER("compact_page"); + DBUG_PRINT("enter", ("rownr: %u", rownr)); + DBUG_ASSERT(max_entry > 0 && + max_entry < (block_size - PAGE_HEADER_SIZE - + PAGE_SUFFIX_SIZE) / DIR_ENTRY_SIZE); + + /* Move all entries before and including rownr up to start of page */ + dir= buff + block_size - DIR_ENTRY_SIZE * (rownr+1) - PAGE_SUFFIX_SIZE; + end= buff + block_size - DIR_ENTRY_SIZE - PAGE_SUFFIX_SIZE; + page_pos= next_free_pos= start_of_found_block= PAGE_HEADER_SIZE; + diff= 0; + for (; dir <= end ; end-= DIR_ENTRY_SIZE) + { + uint offset= uint2korr(end); + + if (offset) + { + uint row_length= uint2korr(end + 2); + DBUG_ASSERT(offset >= page_pos); + DBUG_ASSERT(buff + offset + row_length <= dir); + + if (offset != next_free_pos) + { + uint length= (next_free_pos - start_of_found_block); + /* + There was empty space before this and prev block + Check if we have to move previous block up to page start + */ + if (page_pos != start_of_found_block) + { + /* move up previous block */ + memmove(buff + page_pos, buff + start_of_found_block, length); + } + page_pos+= length; + /* next continuous block starts here */ + start_of_found_block= offset; + diff= offset - page_pos; + } + int2store(end, offset - diff); /* correct current pos */ + next_free_pos= offset + row_length; + } + } + if (page_pos != start_of_found_block) + { + uint length= (next_free_pos - start_of_found_block); + memmove(buff + page_pos, buff + start_of_found_block, length); + } + start_of_found_block= uint2korr(dir); + + if (rownr != max_entry - 1) + { + /* Move all entries after rownr to end of page */ + uint rownr_length; + next_free_pos= end_of_found_block= page_pos= + block_size - DIR_ENTRY_SIZE * max_entry - PAGE_SUFFIX_SIZE; + diff= 0; + /* End points to entry before 'rownr' */ + for (dir= buff + end_of_found_block ; dir <= end ; dir+= DIR_ENTRY_SIZE) + { + uint offset= uint2korr(dir); + uint row_length= uint2korr(dir + 2); + uint row_end= offset + row_length; + if (!offset) + continue; + DBUG_ASSERT(offset >= start_of_found_block && row_end <= next_free_pos); + + if (row_end != next_free_pos) + { + uint length= (end_of_found_block - next_free_pos); + if (page_pos != end_of_found_block) + { + /* move next block down */ + memmove(buff + page_pos - length, buff + next_free_pos, length); + } + page_pos-= length; + /* next continuous block starts here */ + end_of_found_block= row_end; + diff= page_pos - row_end; + } + int2store(dir, offset + diff); /* correct current pos */ + next_free_pos= offset; + } + if (page_pos != end_of_found_block) + { + uint length= (end_of_found_block - next_free_pos); + memmove(buff + page_pos - length, buff + next_free_pos, length); + next_free_pos= page_pos- length; + } + /* Extend rownr block to cover hole */ + rownr_length= next_free_pos - start_of_found_block; + int2store(dir+2, rownr_length); + } + else + { + if (extend_block) + { + /* Extend last block cover whole page */ + uint length= (uint) (dir - buff) - start_of_found_block; + int2store(dir+2, length); + } + buff[PAGE_TYPE_OFFSET]&= ~(byte) PAGE_CAN_BE_COMPACTED; + } + DBUG_EXECUTE("directory", _ma_print_directory(buff, block_size);); + DBUG_VOID_RETURN; +} + + + +/* + Read or initialize new head or tail page + + SYNOPSIS + get_head_or_tail_page() + info Maria handler + block Block to read + buff Suggest this buffer to key cache + length Minimum space needed + page_type HEAD_PAGE || TAIL_PAGE + res Store result position here + + NOTES + We don't decremented buff[EMPTY_SPACE_OFFSET] with the allocated data + as we don't know how much data the caller will actually use. + + RETURN + 0 ok All slots in 'res' are updated + 1 error my_errno is set +*/ + +struct st_row_pos_info +{ + byte *buff; /* page buffer */ + byte *data; /* Place for data */ + byte *dir; /* Directory */ + uint length; /* Length for data */ + uint rownr; /* Offset in directory */ + uint empty_space; /* Space left on page */ +}; + +static my_bool get_head_or_tail_page(MARIA_HA *info, + MARIA_BITMAP_BLOCK *block, + byte *buff, uint length, uint page_type, + enum pagecache_page_lock lock, + struct st_row_pos_info *res) +{ + uint block_size; + MARIA_PINNED_PAGE page_link; + MARIA_SHARE *share= info->s; + DBUG_ENTER("get_head_or_tail_page"); + DBUG_PRINT("enter", ("length: %u", length)); + + block_size= share->block_size; + if (block->org_bitmap_value == 0) /* Empty block */ + { + /* New page */ + bzero(buff, PAGE_HEADER_SIZE); + + /* + We zero the rest of the block to avoid getting old memory information + to disk and to allow the file to be compressed better if archived. + The rest of the code does not assume the block is zeroed above + PAGE_OVERHEAD_SIZE + */ + bzero(buff+ PAGE_HEADER_SIZE, block_size - PAGE_HEADER_SIZE); + + buff[PAGE_TYPE_OFFSET]= (byte) page_type; + buff[DIR_COUNT_OFFSET]= 1; + res->buff= buff; + res->empty_space= res->length= (block_size - PAGE_OVERHEAD_SIZE); + res->data= (buff + PAGE_HEADER_SIZE); + res->dir= res->data + res->length; + res->rownr= 0; + /* Store position to the first row */ + int2store(res->dir, PAGE_HEADER_SIZE); + DBUG_ASSERT(length <= res->length); + } + else + { + byte *dir; + /* Read old page */ + DBUG_ASSERT(share->pagecache->block_size == block_size); + if (!(res->buff= pagecache_read(share->pagecache, + &info->dfile, + (my_off_t) block->page, 0, + buff, share->page_type, + lock, &page_link.link))) + DBUG_RETURN(1); + page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK; + push_dynamic(&info->pinned_pages, (void*) &page_link); + + DBUG_ASSERT((res->buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == page_type); + if (!(dir= find_free_position(res->buff, block_size, &res->rownr, + &res->length, &res->empty_space))) + goto crashed; + + if (res->length < length) + { + if (res->empty_space + res->length < length) + { + compact_page(res->buff, block_size, res->rownr, 1); + /* All empty space are now after current position */ + dir= (res->buff + block_size - DIR_ENTRY_SIZE * res->rownr - + PAGE_SUFFIX_SIZE); + res->length= res->empty_space= uint2korr(dir+2); + } + if (res->length < length) + goto crashed; /* Wrong bitmap information */ + } + res->dir= dir; + res->data= res->buff + uint2korr(dir); + } + DBUG_RETURN(0); + +crashed: + my_errno= HA_ERR_WRONG_IN_RECORD; /* File crashed */ + DBUG_RETURN(1); +} + + +/* + Write tail for head data or blob + + SYNOPSIS + write_tail() + info Maria handler + block Block to tail page + row_part Data to write to page + length Length of data + + NOTES + block->page_count is updated to the directory offset for the tail + so that we can store the position in the row extent information + + RETURN + 0 ok + block->page_count is set to point (dir entry + TAIL_BIT) + + 1 error; In this case my_errno is set to the error +*/ + +static my_bool write_tail(MARIA_HA *info, + MARIA_BITMAP_BLOCK *block, + byte *row_part, uint length) +{ + MARIA_SHARE *share= share= info->s; + MARIA_PINNED_PAGE page_link; + uint block_size= share->block_size, empty_space; + struct st_row_pos_info row_pos; + my_off_t position; + my_bool res, block_is_read; + DBUG_ENTER("write_tail"); + DBUG_PRINT("enter", ("page: %lu length: %u", + (ulong) block->page, length)); + + info->keyread_buff_used= 1; + + /* page will be pinned & locked by get_head_or_tail_page */ + if (get_head_or_tail_page(info, block, info->keyread_buff, length, + TAIL_PAGE, PAGECACHE_LOCK_WRITE, + &row_pos)) + DBUG_RETURN(1); + block_is_read= block->org_bitmap_value != 0; + + memcpy(row_pos.data, row_part, length); + + { + /* Log changes in tail block */ + uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE]; + LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2]; + LSN lsn; + + /* Log REDO changes of tail page */ + page_store(log_data+ FILEID_STORE_SIZE, block->page); + dirpos_store(log_data+ FILEID_STORE_SIZE + PAGE_STORE_SIZE, + row_pos.rownr); + log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); + log_array[TRANSLOG_INTERNAL_PARTS + 1].str= (char*) row_pos.data; + log_array[TRANSLOG_INTERNAL_PARTS + 1].length= length; + if (translog_write_record(&lsn, LOGREC_REDO_INSERT_ROW_TAIL, + info->trn, share, sizeof(log_data) + length, + TRANSLOG_INTERNAL_PARTS + 2, log_array, + log_data)) + DBUG_RETURN(1); + } + + /* + Don't allocate smaller block than MIN_TAIL_SIZE (we want to give rows + some place to grow in the future) + */ + if (length < MIN_TAIL_SIZE) + length= MIN_TAIL_SIZE; + int2store(row_pos.dir + 2, length); + empty_space= row_pos.empty_space - length; + int2store(row_pos.buff + EMPTY_SPACE_OFFSET, empty_space); + block->page_count= row_pos.rownr + TAIL_BIT; + /* + If there is less directory entries free than number of possible tails + we can write for a row, we mark the page full to ensure that we don't + during _ma_bitmap_find_place() allocate more entries on the tail page + than it can hold + */ + block->empty_space= ((uint) ((uchar*) row_pos.buff)[DIR_COUNT_OFFSET] <= + MAX_ROWS_PER_PAGE - 1 - share->base.blobs ? + empty_space : 0); + block->used= BLOCKUSED_USED | BLOCKUSED_TAIL; + + /* Increase data file size, if extended */ + position= (my_off_t) block->page * block_size; + if (info->state->data_file_length <= position) + info->state->data_file_length= position + block_size; + + DBUG_ASSERT(share->pagecache->block_size == block_size); + if (!(res= pagecache_write(share->pagecache, + &info->dfile, block->page, 0, + row_pos.buff,share->page_type, + block_is_read ? PAGECACHE_LOCK_WRITE_TO_READ : + PAGECACHE_LOCK_READ, + block_is_read ? PAGECACHE_PIN_LEFT_PINNED : + PAGECACHE_PIN, + PAGECACHE_WRITE_DELAY, &page_link.link))) + { + page_link.unlock= PAGECACHE_LOCK_READ_UNLOCK; + if (block_is_read) + { + /* Change the lock used when we read the page */ + set_dynamic(&info->pinned_pages, (void*) &page_link, + info->pinned_pages.elements-1); + } + else + push_dynamic(&info->pinned_pages, (void*) &page_link); + } + DBUG_RETURN(res); +} + + +/* + Write full pages + + SYNOPSIS + write_full_pages() + info Maria handler + lsn LSN for the undo record + block Where to write data + data Data to write + length Length of data + + NOTES + Logging of the changes to the full pages are done in the caller + write_block_record(). + + RETURN + 0 ok + 1 error on write +*/ + +static my_bool write_full_pages(MARIA_HA *info, + LSN lsn, + MARIA_BITMAP_BLOCK *block, + byte *data, ulong length) +{ + my_off_t page; + MARIA_SHARE *share= share= info->s; + uint block_size= share->block_size; + uint data_size= FULL_PAGE_SIZE(block_size); + byte *buff= info->keyread_buff; + uint page_count; + my_off_t position; + DBUG_ENTER("write_full_pages"); + DBUG_PRINT("enter", ("length: %lu page: %lu page_count: %lu", + (ulong) length, (ulong) block->page, + (ulong) block->page_count)); + + info->keyread_buff_used= 1; + page= block->page; + page_count= block->page_count; + + position= (my_off_t) (page + page_count) * block_size; + if (info->state->data_file_length < position) + info->state->data_file_length= position; + + /* Increase data file size, if extended */ + + for (; length; data+= data_size) + { + uint copy_length; + if (!page_count--) + { + block++; + page= block->page; + page_count= block->page_count - 1; + DBUG_PRINT("info", ("page: %lu page_count: %lu", + (ulong) block->page, (ulong) block->page_count)); + + position= (page + page_count + 1) * block_size; + if (info->state->data_file_length < position) + info->state->data_file_length= position; + } + lsn_store(buff, lsn); + buff[PAGE_TYPE_OFFSET]= (byte) BLOB_PAGE; + copy_length= min(data_size, length); + memcpy(buff + LSN_SIZE + PAGE_TYPE_SIZE, data, copy_length); + length-= copy_length; + + DBUG_ASSERT(share->pagecache->block_size == block_size); + if (pagecache_write(share->pagecache, + &info->dfile, page, 0, + buff, share->page_type, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0)) + DBUG_RETURN(1); + page++; + block->used= BLOCKUSED_USED; + } + DBUG_RETURN(0); +} + + +/* + Store ranges of full pages in compact format for logging + + SYNOPSIS + store_page_range() + to Store data here + block Where pages are to be written + block_size block size + length Length of data to be written + Normally this is full pages, except for the last + tail block that may only partly fit the last page. + + RETURN + # end position for 'to' +*/ + +static uchar *store_page_range(uchar *to, MARIA_BITMAP_BLOCK *block, + uint block_size, ulong length) +{ + uint data_size= FULL_PAGE_SIZE(block_size); + ulong pages_left= (length + data_size -1) / data_size; + uint page_count; + DBUG_ENTER("store_page_range"); + + do + { + ulonglong page; + page= block->page; + page_count= block->page_count; + block++; + if (page_count > pages_left) + page_count= pages_left; + + page_store(to, page); + to+= PAGE_STORE_SIZE; + pagerange_store(to, page_count); + to+= PAGERANGE_STORE_SIZE; + } while ((pages_left-= page_count)); + DBUG_RETURN(to); +} + + +/* + Store packed extent data + + SYNOPSIS + store_extent_info() + to Store first packed data here + row_extents_second_part Store rest here + first_block First block to store + count Number of blocks + + NOTES + We don't have to store the position for the head block +*/ + +static void store_extent_info(byte *to, + byte *row_extents_second_part, + MARIA_BITMAP_BLOCK *first_block, + uint count) +{ + MARIA_BITMAP_BLOCK *block, *end_block; + uint copy_length; + my_bool first_found= 0; + + for (block= first_block, end_block= first_block+count ; + block < end_block; block++) + { + /* The following is only false for marker blocks */ + if (likely(block->used & BLOCKUSED_USED)) + { + DBUG_ASSERT(block->page_count != 0); + page_store(to, block->page); + pagerange_store(to + PAGE_STORE_SIZE, block->page_count); + to+= ROW_EXTENT_SIZE; + if (!first_found) + { + first_found= 1; + to= row_extents_second_part; + } + } + } + copy_length= (count - 1) * ROW_EXTENT_SIZE; + /* + In some unlikely cases we have allocated to many blocks. Clear this + data. + */ + bzero(to, (my_size_t) (row_extents_second_part + copy_length - to)); +} + + +/* + Free regions of pages with logging + + RETURN + 0 ok + 1 error +*/ + +static my_bool free_full_pages(MARIA_HA *info, MARIA_ROW *row) +{ + uchar log_data[FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE]; + LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2]; + LSN lsn; + size_t extents_length= row->extents_count * ROW_EXTENT_SIZE; + DBUG_ENTER("free_full_pages"); + + pagerange_store(log_data + FILEID_STORE_SIZE, + row->extents_count); + log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); + log_array[TRANSLOG_INTERNAL_PARTS + 1].str= row->extents; + log_array[TRANSLOG_INTERNAL_PARTS + 1].length= extents_length; + if (translog_write_record(&lsn, LOGREC_REDO_PURGE_BLOCKS, info->trn, + info->s, sizeof(log_data) + extents_length, + TRANSLOG_INTERNAL_PARTS + 2, log_array, + log_data)) + DBUG_RETURN(1); + + DBUG_RETURN (_ma_bitmap_free_full_pages(info, row->extents, + row->extents_count)); +} + + +/* + Free one page range + + NOTES + This is very similar to free_full_pages() + + RETURN + 0 ok + 1 error +*/ + +static my_bool free_full_page_range(MARIA_HA *info, ulonglong page, uint count) +{ + uchar log_data[FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE + + ROW_EXTENT_SIZE]; + LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; + my_bool res= 0; + + if (pagecache_delete_pages(info->s->pagecache, &info->dfile, + page, count, PAGECACHE_LOCK_WRITE, 0)) + res= 1; + + if (info->s->base.transactional) + { + LSN lsn; + DBUG_ASSERT(info->trn->rec_lsn); + pagerange_store(log_data + FILEID_STORE_SIZE, 1); + int5store(log_data + FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE, + page); + int2store(log_data + FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE + 5, + count); + log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); + + if (translog_write_record(&lsn, LOGREC_REDO_PURGE_BLOCKS, + info->trn, info->s, sizeof(log_data), + TRANSLOG_INTERNAL_PARTS + 1, log_array, + log_data)) + res= 1; + + } + pthread_mutex_lock(&info->s->bitmap.bitmap_lock); + if (_ma_reset_full_page_bits(info, &info->s->bitmap, page, + count)) + res= 1; + pthread_mutex_unlock(&info->s->bitmap.bitmap_lock); + return res; +} + + +/** + @brief Write a record to a (set of) pages + + @param info Maria handler + @param old_record Original record in case of update; NULL in case of + insert + @param record Record we should write + @param row Statistics about record (calculated by + calc_record_size()) + @param map_blocks On which pages the record should be stored + @param row_pos Position on head page where to put head part of + record + + @note + On return all pinned pages are released. + + @return Operation status + @retval 0 OK + @retval 1 Error +*/ + +static my_bool write_block_record(MARIA_HA *info, + const byte *old_record, const byte *record, + MARIA_ROW *row, + MARIA_BITMAP_BLOCKS *bitmap_blocks, + my_bool head_block_is_read, + struct st_row_pos_info *row_pos) +{ + byte *data, *end_of_data, *tmp_data_used, *tmp_data; + byte *row_extents_first_part, *row_extents_second_part; + byte *field_length_data; + byte *page_buff; + MARIA_BITMAP_BLOCK *block, *head_block; + MARIA_SHARE *share= info->s; + MARIA_COLUMNDEF *column, *end_column; + MARIA_PINNED_PAGE page_link; + uint block_size, flag; + ulong *blob_lengths; + my_bool row_extents_in_use, blob_full_pages_exists; + LSN lsn; + my_off_t position; + DBUG_ENTER("write_block_record"); + + LINT_INIT(row_extents_first_part); + LINT_INIT(row_extents_second_part); + + head_block= bitmap_blocks->block; + block_size= share->block_size; + + page_buff= row_pos->buff; + /* Position on head page where we should store the head part */ + data= row_pos->data; + end_of_data= data + row_pos->length; + + /* Write header */ + flag= share->base.default_row_flag; + row_extents_in_use= 0; + if (unlikely(row->total_length > row_pos->length)) + { + /* Need extent */ + if (bitmap_blocks->count <= 1) + goto crashed; /* Wrong in bitmap */ + flag|= ROW_FLAG_EXTENTS; + row_extents_in_use= 1; + } + /* For now we have only a minimum header */ + *data++= (uchar) flag; + if (unlikely(flag & ROW_FLAG_NULLS_EXTENDED)) + *data++= (uchar) (share->base.null_bytes - + share->base.original_null_bytes); + if (row_extents_in_use) + { + /* Store first extent in header */ + store_key_length_inc(data, bitmap_blocks->count - 1); + row_extents_first_part= data; + data+= ROW_EXTENT_SIZE; + } + if (share->base.pack_fields) + store_key_length_inc(data, row->field_lengths_length); + if (share->calc_checksum) + *(data++)= (byte) info->cur_row.checksum; + memcpy(data, record, share->base.null_bytes); + data+= share->base.null_bytes; + memcpy(data, row->empty_bits, share->base.pack_bytes); + data+= share->base.pack_bytes; + + /* + Allocate a buffer of rest of data (except blobs) + + To avoid double copying of data, we copy as many columns that fits into + the page. The rest goes into info->packed_row. + + Using an extra buffer, instead of doing continuous writes to different + pages, uses less code and we don't need to have to do a complex call + for every data segment we want to store. + */ + if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size, + row->head_length)) + DBUG_RETURN(1); + + tmp_data_used= 0; /* Either 0 or last used byte in 'data' */ + tmp_data= data; + + if (row_extents_in_use) + { + uint copy_length= (bitmap_blocks->count - 2) * ROW_EXTENT_SIZE; + if (!tmp_data_used && tmp_data + copy_length > end_of_data) + { + tmp_data_used= tmp_data; + tmp_data= info->rec_buff; + } + row_extents_second_part= tmp_data; + /* + We will copy the extents here when we have figured out the tail + positions. + */ + tmp_data+= copy_length; + } + + /* Copy fields that has fixed lengths (primary key etc) */ + for (column= share->columndef, + end_column= column + share->base.fixed_not_null_fields; + column < end_column; column++) + { + if (!tmp_data_used && tmp_data + column->length > end_of_data) + { + tmp_data_used= tmp_data; + tmp_data= info->rec_buff; + } + memcpy(tmp_data, record + column->offset, column->length); + tmp_data+= column->length; + } + + /* Copy length of data for variable length fields */ + if (!tmp_data_used && tmp_data + row->field_lengths_length > end_of_data) + { + tmp_data_used= tmp_data; + tmp_data= info->rec_buff; + } + field_length_data= row->field_lengths; + memcpy(tmp_data, field_length_data, row->field_lengths_length); + tmp_data+= row->field_lengths_length; + + /* Copy variable length fields and fields with null/zero */ + for (end_column= share->columndef + share->base.fields - share->base.blobs; + column < end_column ; + column++) + { + const byte *field_pos; + ulong length; + if ((record[column->null_pos] & column->null_bit) || + (row->empty_bits[column->empty_pos] & column->empty_bit)) + continue; + + field_pos= record + column->offset; + switch ((enum en_fieldtype) column->type) { + case FIELD_NORMAL: /* Fixed length field */ + case FIELD_SKIP_PRESPACE: + case FIELD_SKIP_ZERO: /* Fixed length field */ + length= column->length; + break; + case FIELD_SKIP_ENDSPACE: /* CHAR */ + /* Char that is space filled */ + if (column->length <= 255) + length= (uint) (uchar) *field_length_data++; + else + { + length= uint2korr(field_length_data); + field_length_data+= 2; + } + break; + case FIELD_VARCHAR: + if (column->length <= 256) + { + length= (uint) (uchar) *field_length_data++; + field_pos++; /* Skip length byte */ + } + else + { + length= uint2korr(field_length_data); + field_length_data+= 2; + field_pos+= 2; + } + break; + default: /* Wrong data */ + DBUG_ASSERT(0); + break; + } + if (!tmp_data_used && tmp_data + length > end_of_data) + { + /* Data didn't fit in page; Change to use tmp buffer */ + tmp_data_used= tmp_data; + tmp_data= info->rec_buff; + } + memcpy((char*) tmp_data, (char*) field_pos, length); + tmp_data+= length; + } + + block= head_block + head_block->sub_blocks; /* Point to first blob data */ + + end_column= column + share->base.blobs; + blob_lengths= row->blob_lengths; + if (!tmp_data_used) + { + /* Still room on page; Copy as many blobs we can into this page */ + data= tmp_data; + for (; column < end_column && *blob_lengths < (ulong) (end_of_data - data); + column++, blob_lengths++) + { + byte *tmp_pos; + uint length; + if (!*blob_lengths) /* Null or "" */ + continue; + length= column->length - portable_sizeof_char_ptr; + memcpy_fixed((byte*) &tmp_pos, record + column->offset + length, + sizeof(char*)); + memcpy(data, tmp_pos, *blob_lengths); + data+= *blob_lengths; + /* Skip over tail page that was to be used to store blob */ + block++; + bitmap_blocks->tail_page_skipped= 1; + } + if (head_block->sub_blocks > 1) + { + /* We have allocated pages that where not used */ + bitmap_blocks->page_skipped= 1; + } + } + else + data= tmp_data_used; /* Get last used on page */ + + { + /* Update page directory */ + uint length= (uint) (data - row_pos->data); + DBUG_PRINT("info", ("head length: %u", length)); + if (length < info->s->base.min_row_length) + length= info->s->base.min_row_length; + + int2store(row_pos->dir + 2, length); + /* update empty space at start of block */ + row_pos->empty_space-= length; + int2store(page_buff + EMPTY_SPACE_OFFSET, row_pos->empty_space); + /* Mark in bitmaps how the current page was actually used */ + head_block->empty_space= row_pos->empty_space; + if (page_buff[DIR_COUNT_OFFSET] == (char) MAX_ROWS_PER_PAGE) + head_block->empty_space= 0; /* Page is full */ + head_block->used= BLOCKUSED_USED; + } + + /* + Now we have to write tail pages, as we need to store the position + to them in the row extent header. + + We first write out all blob tails, to be able to store them in + the current page or 'tmp_data'. + + Then we write the tail of the non-blob fields (The position to the + tail page is stored either in row header, the extents in the head + page or in the first full page of the non-blob data. It's never in + the tail page of the non-blob data) + */ + + blob_full_pages_exists= 0; + if (row_extents_in_use) + { + if (column != end_column) /* If blob fields */ + { + MARIA_COLUMNDEF *save_column= column; + MARIA_BITMAP_BLOCK *save_block= block; + MARIA_BITMAP_BLOCK *end_block; + ulong *save_blob_lengths= blob_lengths; + + for (; column < end_column; column++, blob_lengths++) + { + byte *blob_pos; + if (!*blob_lengths) /* Null or "" */ + continue; + if (block[block->sub_blocks - 1].used & BLOCKUSED_TAIL) + { + uint length; + length= column->length - portable_sizeof_char_ptr; + memcpy_fixed((byte *) &blob_pos, record + column->offset + length, + sizeof(char*)); + length= *blob_lengths % FULL_PAGE_SIZE(block_size); /* tail size */ + if (length != *blob_lengths) + blob_full_pages_exists= 1; + if (write_tail(info, block + block->sub_blocks-1, + blob_pos + *blob_lengths - length, + length)) + goto disk_err; + } + else + blob_full_pages_exists= 1; + + for (end_block= block + block->sub_blocks; block < end_block; block++) + { + /* + Set only a bit, to not cause bitmap code to believe a block is full + when there is still a lot of entries in it + */ + block->used|= BLOCKUSED_USED; + } + } + column= save_column; + block= save_block; + blob_lengths= save_blob_lengths; + } + + if (tmp_data_used) /* non blob data overflows */ + { + MARIA_BITMAP_BLOCK *cur_block, *end_block, *last_head_block; + MARIA_BITMAP_BLOCK *head_tail_block= 0; + ulong length; + ulong data_length= (tmp_data - info->rec_buff); + +#ifdef SANITY_CHECK + if (cur_block->sub_blocks == 1) + goto crashed; /* no reserved full or tails */ +#endif + + /* + Find out where to write tail for non-blob fields. + + Problem here is that the bitmap code may have allocated more + space than we need. We have to handle the following cases: + + - Bitmap code allocated a tail page we don't need. + - The last full page allocated needs to be changed to a tail page + (Because we where able to put more data on the head page than + the bitmap allocation assumed) + + The reserved pages in bitmap_blocks for the main page has one of + the following allocations: + - Full pages, with following blocks: + # * full pages + empty page ; To be used if we change last full to tail page. This + has 'count' = 0. + tail page (optional, if last full page was part full) + - One tail page + */ + + cur_block= head_block + 1; + end_block= head_block + head_block->sub_blocks; + /* + Loop until we have find a block bigger than we need or + we find the empty page block. + */ + while (data_length >= (length= (cur_block->page_count * + FULL_PAGE_SIZE(block_size))) && + cur_block->page_count) + { +#ifdef SANITY_CHECK + if ((cur_block == end_block) || (cur_block->used & BLOCKUSED_BIT)) + goto crashed; +#endif + data_length-= length; + (cur_block++)->used= BLOCKUSED_USED; + } + last_head_block= cur_block; + if (data_length) + { + if (cur_block->page_count == 0) + { + /* Skip empty filler block */ + cur_block++; + } +#ifdef SANITY_CHECK + if ((cur_block >= end_block)) + goto crashed; +#endif + if (cur_block->used & BLOCKUSED_TAIL) + { + DBUG_ASSERT(data_length < MAX_TAIL_SIZE(block_size)); + /* tail written to full tail page */ + cur_block->used= BLOCKUSED_USED; + head_tail_block= cur_block; + } + else if (data_length > length - MAX_TAIL_SIZE(block_size)) + { + /* tail written to full page */ + cur_block->used= BLOCKUSED_USED; + if ((cur_block != end_block - 1) && + (end_block[-1].used & BLOCKUSED_TAIL)) + bitmap_blocks->tail_page_skipped= 1; + } + else + { + /* + cur_block is a full block, followed by an empty and optional + tail block. Change cur_block to a tail block or split it + into full blocks and tail blocks. + + TODO: + If there is enough space on the following tail block, use + this instead of creating a new tail block. + */ + DBUG_ASSERT(cur_block[1].page_count == 0); + if (cur_block->page_count == 1) + { + /* convert full block to tail block */ + cur_block->used= BLOCKUSED_USED | BLOCKUSED_TAIL; + head_tail_block= cur_block; + } + else + { + DBUG_ASSERT(data_length < length - FULL_PAGE_SIZE(block_size)); + DBUG_PRINT("info", ("Splitting blocks into full and tail")); + cur_block[1].page= (cur_block->page + cur_block->page_count - 1); + cur_block[1].page_count= 1; /* Avoid DBUG_ASSERT */ + cur_block[1].used= BLOCKUSED_USED | BLOCKUSED_TAIL; + cur_block->page_count--; + cur_block->used= BLOCKUSED_USED; + last_head_block= head_tail_block= cur_block+1; + } + if (end_block[-1].used & BLOCKUSED_TAIL) + bitmap_blocks->tail_page_skipped= 1; + } + } + else + { + /* Must be an empty or tail page */ + DBUG_ASSERT(cur_block->page_count == 0 || + cur_block->used & BLOCKUSED_TAIL); + if (end_block[-1].used & BLOCKUSED_TAIL) + bitmap_blocks->tail_page_skipped= 1; + } + + /* + Write all extents into page or tmp_data + + Note that we still don't have a correct position for the tail + of the non-blob fields. + */ + store_extent_info(row_extents_first_part, + row_extents_second_part, + head_block+1, bitmap_blocks->count - 1); + if (head_tail_block) + { + ulong data_length= (tmp_data - info->rec_buff); + uint length; + byte *extent_data; + + length= (uint) (data_length % FULL_PAGE_SIZE(block_size)); + if (write_tail(info, head_tail_block, + info->rec_buff + data_length - length, + length)) + goto disk_err; + tmp_data-= length; /* Remove the tail */ + + /* Store the tail position for the non-blob fields */ + if (head_tail_block == head_block + 1) + { + /* + We had a head block + tail block, which means that the + tail block is the first extent + */ + extent_data= row_extents_first_part; + } + else + { + /* + We have a head block + some full blocks + tail block + last_head_block is pointing after the last used extent + for the head block. + */ + extent_data= row_extents_second_part + + ((last_head_block - head_block) - 2) * ROW_EXTENT_SIZE; + } + DBUG_ASSERT(uint2korr(extent_data+5) & TAIL_BIT); + int5store(extent_data, head_tail_block->page); + int2store(extent_data + 5, head_tail_block->page_count); + } + } + else + store_extent_info(row_extents_first_part, + row_extents_second_part, + head_block+1, bitmap_blocks->count - 1); + } + + if (share->base.transactional) + { + uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE]; + LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2]; + size_t data_length= (size_t) (data - row_pos->data); + + /* Log REDO changes of head page */ + page_store(log_data+ FILEID_STORE_SIZE, head_block->page); + dirpos_store(log_data+ FILEID_STORE_SIZE + PAGE_STORE_SIZE, + row_pos->rownr); + log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); + log_array[TRANSLOG_INTERNAL_PARTS + 1].str= (char*) row_pos->data; + log_array[TRANSLOG_INTERNAL_PARTS + 1].length= data_length; + if (translog_write_record(&lsn, LOGREC_REDO_INSERT_ROW_HEAD, info->trn, + share, sizeof(log_data) + data_length, + TRANSLOG_INTERNAL_PARTS + 2, log_array, + log_data)) + goto disk_err; + } + + /* Increase data file size, if extended */ + position= (my_off_t) head_block->page * block_size; + if (info->state->data_file_length <= position) + info->state->data_file_length= position + block_size; + + DBUG_ASSERT(share->pagecache->block_size == block_size); + if (pagecache_write(share->pagecache, + &info->dfile, head_block->page, 0, + page_buff, share->page_type, + head_block_is_read ? PAGECACHE_LOCK_WRITE_TO_READ : + PAGECACHE_LOCK_READ, + head_block_is_read ? PAGECACHE_PIN_LEFT_PINNED : + PAGECACHE_PIN, + PAGECACHE_WRITE_DELAY, &page_link.link)) + goto disk_err; + page_link.unlock= PAGECACHE_LOCK_READ_UNLOCK; + if (head_block_is_read) + { + /* Head page is always the first pinned page */ + set_dynamic(&info->pinned_pages, (void*) &page_link, 0); + } + else + push_dynamic(&info->pinned_pages, (void*) &page_link); + + if (share->base.transactional && (tmp_data_used || blob_full_pages_exists)) + { + /* + Log REDO writes for all full pages (head part and all blobs) + We write all here to be able to generate the UNDO record early + so that we can write the LSN for the UNDO record to all full pages. + */ + uchar tmp_log_data[FILEID_STORE_SIZE + LSN_STORE_SIZE + PAGE_STORE_SIZE + + ROW_EXTENT_SIZE * ROW_EXTENTS_ON_STACK]; + uchar *log_data, *log_pos; + LEX_STRING tmp_log_array[TRANSLOG_INTERNAL_PARTS + 2 + + ROW_EXTENTS_ON_STACK]; + LEX_STRING *log_array_pos, *log_array; + int error; + ulong log_entry_length= 0; + + /* If few extents, then allocate things on stack to avoid a malloc call */ + if (bitmap_blocks->count < ROW_EXTENTS_ON_STACK) + { + log_array= tmp_log_array; + log_data= tmp_log_data; + } + else + { + if (my_multi_malloc(MY_WME, &log_array, + (uint) ((bitmap_blocks->count + + TRANSLOG_INTERNAL_PARTS + 2) * + sizeof(*log_array)), + &log_data, bitmap_blocks->count * ROW_EXTENT_SIZE, + NullS)) + goto disk_err; + } + log_pos= log_data + FILEID_STORE_SIZE; + log_array_pos= log_array+ TRANSLOG_INTERNAL_PARTS+1; + + if (tmp_data_used) + { + /* Full head pages */ + size_t data_length= (ulong) (tmp_data - info->rec_buff); + log_pos= store_page_range(log_pos, head_block+1, block_size, + data_length); + log_array_pos->str= (char*) info->rec_buff; + log_array_pos->length= data_length; + log_entry_length+= data_length; + log_array_pos++; + } + if (blob_full_pages_exists) + { + MARIA_COLUMNDEF *tmp_column= column; + ulong *tmp_blob_lengths= blob_lengths; + MARIA_BITMAP_BLOCK *tmp_block= block; + + /* Full blob pages */ + for (; tmp_column < end_column; tmp_column++, tmp_blob_lengths++) + { + ulong blob_length; + uint length; + + if (!*tmp_blob_lengths) /* Null or "" */ + continue; + length= tmp_column->length - portable_sizeof_char_ptr; + blob_length= *tmp_blob_lengths; + if (tmp_block[tmp_block->sub_blocks - 1].used & BLOCKUSED_TAIL) + blob_length-= (blob_length % FULL_PAGE_SIZE(block_size)); + if (blob_length) + { + memcpy_fixed((byte*) &log_array_pos->str, + record + column->offset + length, + sizeof(byte*)); + log_array_pos->length= blob_length; + log_entry_length+= blob_length; + log_array_pos++; + + log_pos= store_page_range(log_pos, tmp_block, block_size, + blob_length); + tmp_block+= tmp_block->sub_blocks; + } + } + } + + log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (size_t) (log_pos - + log_data); + log_entry_length+= (log_pos - log_data); + + /* trn->rec_lsn is already set earlier in this function */ + error= translog_write_record(&lsn, LOGREC_REDO_INSERT_ROW_BLOBS, + info->trn, share, log_entry_length, + (uint) (log_array_pos - log_array), + log_array, log_data); + if (log_array != tmp_log_array) + my_free((gptr) log_array, MYF(0)); + if (error) + goto disk_err; + } + + /* Write UNDO record */ + if (share->base.transactional) + { + uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE + + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE]; + LEX_STRING *log_array= info->log_row_parts; + + /* LOGREC_UNDO_ROW_INSERT & LOGREC_UNDO_ROW_INSERT share same header */ + lsn_store(log_data, info->trn->undo_lsn); + page_store(log_data+ LSN_STORE_SIZE + FILEID_STORE_SIZE, + head_block->page); + dirpos_store(log_data+ LSN_STORE_SIZE + FILEID_STORE_SIZE + + PAGE_STORE_SIZE, + row_pos->rownr); + + log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); + + if (!old_record) + { + /* Write UNDO log record for the INSERT */ + if (translog_write_record(&lsn, LOGREC_UNDO_ROW_INSERT, + info->trn, share, sizeof(log_data), + TRANSLOG_INTERNAL_PARTS + 1, log_array, + log_data + LSN_STORE_SIZE)) + goto disk_err; + } + else + { + /* Write UNDO log record for the UPDATE */ + size_t row_length; + uint row_parts_count; + row_length= fill_update_undo_parts(info, old_record, record, + info->log_row_parts + + TRANSLOG_INTERNAL_PARTS + 1, + &row_parts_count); + if (translog_write_record(&lsn, LOGREC_UNDO_ROW_UPDATE, info->trn, + share, sizeof(log_data) + row_length, + TRANSLOG_INTERNAL_PARTS + 1 + row_parts_count, + log_array, log_data + LSN_STORE_SIZE)) + goto disk_err; + } + } + + _ma_unpin_all_pages(info, info->trn->undo_lsn); + + if (tmp_data_used) + { + /* + Write data stored in info->rec_buff to pages + This is the char/varchar data that didn't fit into the head page. + */ + DBUG_ASSERT(bitmap_blocks->count != 0); + if (write_full_pages(info, info->trn->undo_lsn, head_block + 1, + info->rec_buff, (ulong) (tmp_data - info->rec_buff))) + goto disk_err; + } + + /* Write rest of blobs (data, but no tails as they are already written) */ + for (; column < end_column; column++, blob_lengths++) + { + byte *blob_pos; + uint length; + ulong blob_length; + if (!*blob_lengths) /* Null or "" */ + continue; + length= column->length - portable_sizeof_char_ptr; + memcpy_fixed((byte*) &blob_pos, record + column->offset + length, + sizeof(char*)); + /* remove tail part */ + blob_length= *blob_lengths; + if (block[block->sub_blocks - 1].used & BLOCKUSED_TAIL) + blob_length-= (blob_length % FULL_PAGE_SIZE(block_size)); + + if (write_full_pages(info, info->trn->undo_lsn, block, + blob_pos, blob_length)) + goto disk_err; + block+= block->sub_blocks; + } + /* Release not used space in used pages */ + if (_ma_bitmap_release_unused(info, bitmap_blocks)) + goto disk_err; + DBUG_RETURN(0); + +crashed: + /* Something was wrong with data on page */ + my_errno= HA_ERR_WRONG_IN_RECORD; + +disk_err: + /** + @todo RECOVERY we are going to let dirty pages go to disk while we have + logged UNDO, this violates WAL. If we have not written any full pages, + all dirty pages are pinned so we could just delete them from the + pagecache. Moreover, we have written some REDOs without a closing UNDO, + it's possible that a next operation by this transaction succeeds and then + Recovery would glue the "orphan REDOs" to the succeeded operation and + execute the failed REDOs. + */ + /* Unpin all pinned pages to not cause problems for disk cache */ + _ma_unpin_all_pages(info, 0); + + DBUG_RETURN(1); +} + + +/* + Write a record (to get the row id for it) + + SYNOPSIS + _ma_write_init_block_record() + info Maria handler + record Record to write + + NOTES + This is done BEFORE we write the keys to the row! + + RETURN + HA_OFFSET_ERROR Something went wrong + # Rowid for row +*/ + +MARIA_RECORD_POS _ma_write_init_block_record(MARIA_HA *info, + const byte *record) +{ + MARIA_BITMAP_BLOCKS *blocks= &info->cur_row.insert_blocks; + struct st_row_pos_info row_pos; + DBUG_ENTER("_ma_write_init_block_record"); + + calc_record_size(info, record, &info->cur_row); + if (_ma_bitmap_find_place(info, &info->cur_row, blocks)) + DBUG_RETURN(HA_OFFSET_ERROR); /* Error reading bitmap */ + /* page will be pinned & locked by get_head_or_tail_page */ + if (get_head_or_tail_page(info, blocks->block, info->buff, + info->s->base.min_row_length, HEAD_PAGE, + PAGECACHE_LOCK_WRITE, &row_pos)) + DBUG_RETURN(HA_OFFSET_ERROR); + info->cur_row.lastpos= ma_recordpos(blocks->block->page, row_pos.rownr); + if (info->s->calc_checksum) + info->cur_row.checksum= (info->s->calc_checksum)(info,record); + if (write_block_record(info, (byte*) 0, record, &info->cur_row, + blocks, blocks->block->org_bitmap_value != 0, + &row_pos)) + DBUG_RETURN(HA_OFFSET_ERROR); /* Error reading bitmap */ + DBUG_PRINT("exit", ("Rowid: %lu", (ulong) info->cur_row.lastpos)); + info->s->state.split++; + DBUG_RETURN(info->cur_row.lastpos); +} + + +/* + Dummy function for (*info->s->write_record)() + + Nothing to do here, as we already wrote the record in + _ma_write_init_block_record() +*/ + +my_bool _ma_write_block_record(MARIA_HA *info __attribute__ ((unused)), + const byte *record __attribute__ ((unused))) +{ + return 0; /* Row already written */ +} + + +/** + @brief Remove row written by _ma_write_block_record() + + @param info Maria handler + + @note + This is called in case we got a duplicate unique key while + writing keys. + + @return Operation status + @retval 0 OK + @retval 1 Error +*/ + +my_bool _ma_write_abort_block_record(MARIA_HA *info) +{ + my_bool res= 0; + MARIA_BITMAP_BLOCKS *blocks= &info->cur_row.insert_blocks; + MARIA_BITMAP_BLOCK *block, *end; + DBUG_ENTER("_ma_abort_write_block_record"); + + if (delete_head_or_tail(info, + ma_recordpos_to_page(info->cur_row.lastpos), + ma_recordpos_to_dir_entry(info->cur_row.lastpos), 1)) + res= 1; + for (block= blocks->block + 1, end= block + blocks->count - 1; block < end; + block++) + { + if (block->used & BLOCKUSED_TAIL) + { + /* + block->page_count is set to the tail directory entry number in + write_block_record() + */ + if (delete_head_or_tail(info, block->page, block->page_count & ~TAIL_BIT, + 0)) + res= 1; + } + else if (block->used & BLOCKUSED_USED) + { + if (free_full_page_range(info, block->page, block->page_count)) + res= 1; + } + } + + if (info->s->base.transactional) + { + LSN lsn; + LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; + uchar log_data[LSN_STORE_SIZE]; + + /* + Write UNDO record + This entry is just an end marker for the abort_insert as we will never + really undo a failed insert. Note that this UNDO will cause recover + to ignore the LOGREC_UNDO_ROW_INSERT that is the previous entry + in the UNDO chain. + */ + /** + @todo RECOVERY BUG + We will soon change that: we will here execute the UNDO records + generated while we were trying to write the row; this will log some + CLRs which will replace this LOGREC_UNDO_PURGE. + */ + lsn_store(log_data, info->trn->undo_lsn); + log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); + if (translog_write_record(&lsn, LOGREC_UNDO_ROW_PURGE, + info->trn, NULL, sizeof(log_data), + TRANSLOG_INTERNAL_PARTS + 1, log_array, NULL)) + res= 1; + } + _ma_unpin_all_pages(info, info->trn->undo_lsn); + DBUG_RETURN(res); +} + + +/* + Update a record + + NOTES + For the moment, we assume that info->curr_row.extents is always updated + when a row is read. In the future we may decide to read this on demand + for rows split into many extents. +*/ + +my_bool _ma_update_block_record(MARIA_HA *info, MARIA_RECORD_POS record_pos, + const byte *oldrec, const byte *record) +{ + MARIA_BITMAP_BLOCKS *blocks= &info->cur_row.insert_blocks; + byte *buff; + MARIA_ROW *cur_row= &info->cur_row, *new_row= &info->new_row; + MARIA_PINNED_PAGE page_link; + uint rownr, org_empty_size, head_length; + uint block_size= info->s->block_size; + byte *dir; + ulonglong page; + struct st_row_pos_info row_pos; + MARIA_SHARE *share= info->s; + DBUG_ENTER("_ma_update_block_record"); + DBUG_PRINT("enter", ("rowid: %lu", (long) record_pos)); + + calc_record_size(info, record, new_row); + page= ma_recordpos_to_page(record_pos); + + DBUG_ASSERT(share->pagecache->block_size == block_size); + if (!(buff= pagecache_read(share->pagecache, + &info->dfile, (my_off_t) page, 0, + info->buff, share->page_type, + PAGECACHE_LOCK_WRITE, &page_link.link))) + DBUG_RETURN(1); + page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK; + push_dynamic(&info->pinned_pages, (void*) &page_link); + + org_empty_size= uint2korr(buff + EMPTY_SPACE_OFFSET); + rownr= ma_recordpos_to_dir_entry(record_pos); + dir= (buff + block_size - DIR_ENTRY_SIZE * rownr - + DIR_ENTRY_SIZE - PAGE_SUFFIX_SIZE); + + if ((org_empty_size + cur_row->head_length) >= new_row->total_length) + { + uint empty, offset, length; + MARIA_BITMAP_BLOCK block; + + /* + We can fit the new row in the same page as the original head part + of the row + */ + block.org_bitmap_value= _ma_free_size_to_head_pattern(&share->bitmap, + org_empty_size); + offset= uint2korr(dir); + length= uint2korr(dir + 2); + empty= 0; + if (new_row->total_length > length) + { + /* See if there is empty space after */ + if (rownr != (uint) ((uchar *) buff)[DIR_COUNT_OFFSET] - 1) + empty= start_of_next_entry(dir) - (offset + length); + if (new_row->total_length > length + empty) + { + compact_page(buff, share->block_size, rownr, 1); + org_empty_size= 0; + length= uint2korr(dir + 2); + } + } + row_pos.buff= buff; + row_pos.rownr= rownr; + row_pos.empty_space= org_empty_size + length; + row_pos.dir= dir; + row_pos.data= buff + uint2korr(dir); + row_pos.length= length + empty; + blocks->block= █ + blocks->count= 1; + block.page= page; + block.sub_blocks= 1; + block.used= BLOCKUSED_USED | BLOCKUSED_USE_ORG_BITMAP; + block.empty_space= row_pos.empty_space; + /* Update cur_row, if someone calls update at once again */ + cur_row->head_length= new_row->total_length; + + if (free_full_pages(info, cur_row)) + goto err; + DBUG_RETURN(write_block_record(info, oldrec, record, new_row, blocks, + 1, &row_pos)); + } + /* + Allocate all size in block for record + QQ: Need to improve this to do compact if we can fit one more blob into + the head page + */ + head_length= uint2korr(dir + 2); + if (buff[PAGE_TYPE_OFFSET] & PAGE_CAN_BE_COMPACTED && org_empty_size && + (head_length < new_row->head_length || + (new_row->total_length <= head_length && + org_empty_size + head_length >= new_row->total_length))) + { + compact_page(buff, share->block_size, rownr, 1); + org_empty_size= 0; + head_length= uint2korr(dir + 2); + } + + /* Delete old row */ + if (delete_tails(info, cur_row->tail_positions)) + goto err; + if (free_full_pages(info, cur_row)) + goto err; + if (_ma_bitmap_find_new_place(info, new_row, page, head_length, blocks)) + goto err; + + row_pos.buff= buff; + row_pos.rownr= rownr; + row_pos.empty_space= org_empty_size + head_length; + row_pos.dir= dir; + row_pos.data= buff + uint2korr(dir); + row_pos.length= head_length; + DBUG_RETURN(write_block_record(info, oldrec, record, new_row, blocks, 1, + &row_pos)); + +err: + _ma_unpin_all_pages(info, 0); + DBUG_RETURN(1); +} + + +/* + Delete a head a tail part + + SYNOPSIS + delete_head_or_tail() + info Maria handler + page Page (not file offset!) on which the row is + head 1 if this is a head page + + NOTES + Uses info->keyread_buff + + RETURN + 0 ok + 1 error +*/ + +static my_bool delete_head_or_tail(MARIA_HA *info, + ulonglong page, uint record_number, + my_bool head) +{ + MARIA_SHARE *share= info->s; + uint number_of_records, empty_space, length; + uint block_size= share->block_size; + byte *buff, *dir; + LSN lsn; + MARIA_PINNED_PAGE page_link; + DBUG_ENTER("delete_head_or_tail"); + + info->keyread_buff_used= 1; + DBUG_ASSERT(info->s->pagecache->block_size == block_size); + if (!(buff= pagecache_read(share->pagecache, + &info->dfile, page, 0, + info->keyread_buff, + info->s->page_type, + PAGECACHE_LOCK_WRITE, &page_link.link))) + DBUG_RETURN(1); + page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK; + push_dynamic(&info->pinned_pages, (void*) &page_link); + + number_of_records= (uint) ((uchar *) buff)[DIR_COUNT_OFFSET]; +#ifdef SANITY_CHECKS + if (record_number >= number_of_records || + record_number > ((block_size - LSN_SIZE - PAGE_TYPE_SIZE - 1 - + PAGE_SUFFIX_SIZE) / DIR_ENTRY_SIZE)) + { + DBUG_PRINT("error", ("record_number: %u number_of_records: %u", + record_number, number_of_records)); + DBUG_RETURN(1); + } +#endif + + dir= (buff + block_size - DIR_ENTRY_SIZE * record_number - + DIR_ENTRY_SIZE - PAGE_SUFFIX_SIZE); + dir[0]= dir[1]= 0; /* Delete entry */ + length= uint2korr(dir + 2); + empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET); + + if (record_number == number_of_records - 1) + { + /* Delete this entry and all following empty directory entries */ + byte *end= buff + block_size - PAGE_SUFFIX_SIZE; + do + { + number_of_records--; + dir+= DIR_ENTRY_SIZE; + empty_space+= DIR_ENTRY_SIZE; + } while (dir < end && dir[0] == 0 && dir[1] == 0); + buff[DIR_COUNT_OFFSET]= (byte) (uchar) number_of_records; + } + empty_space+= length; + if (number_of_records != 0) + { + uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE]; + LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; + + /* Update directory */ + int2store(buff + EMPTY_SPACE_OFFSET, empty_space); + buff[PAGE_TYPE_OFFSET]|= (byte) PAGE_CAN_BE_COMPACTED; + DBUG_ASSERT(share->pagecache->block_size == block_size); + + /* Log REDO data */ + page_store(log_data+ FILEID_STORE_SIZE, page); + dirpos_store(log_data+ FILEID_STORE_SIZE + PAGE_STORE_SIZE, + record_number); + + log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); + if (translog_write_record(&lsn, (head ? LOGREC_REDO_PURGE_ROW_HEAD : + LOGREC_REDO_PURGE_ROW_TAIL), + info->trn, share, sizeof(log_data), + TRANSLOG_INTERNAL_PARTS + 1, log_array, + log_data)) + DBUG_RETURN(1); + if (pagecache_write(share->pagecache, + &info->dfile, page, 0, + buff, share->page_type, + PAGECACHE_LOCK_WRITE_TO_READ, + PAGECACHE_PIN_LEFT_PINNED, + PAGECACHE_WRITE_DELAY, &page_link.link)) + DBUG_RETURN(1); + + /* Change the lock used when we read the page */ + page_link.unlock= PAGECACHE_LOCK_READ_UNLOCK; + set_dynamic(&info->pinned_pages, (void*) &page_link, + info->pinned_pages.elements-1); + } + else + { + uchar log_data[FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE + + PAGE_STORE_SIZE + PAGERANGE_STORE_SIZE]; + LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; + + pagerange_store(log_data + FILEID_STORE_SIZE, 1); + page_store(log_data+ FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE, page); + pagerange_store(log_data + FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE + + PAGE_STORE_SIZE, 1); + log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); + if (translog_write_record(&lsn, LOGREC_REDO_PURGE_BLOCKS, + info->trn, share, sizeof(log_data), + TRANSLOG_INTERNAL_PARTS + 1, log_array, + log_data)) + DBUG_RETURN(1); + DBUG_ASSERT(empty_space >= info->s->bitmap.sizes[0]); + } + DBUG_PRINT("info", ("empty_space: %u", empty_space)); + DBUG_RETURN(_ma_bitmap_set(info, page, head, empty_space)); +} + + +/* + delete all tails + + SYNOPSIS + delete_tails() + info Handler + tails Pointer to vector of tail positions, ending with 0 + + NOTES + Uses info->keyread_buff + + RETURN + 0 ok + 1 error +*/ + +static my_bool delete_tails(MARIA_HA *info, MARIA_RECORD_POS *tails) +{ + my_bool res= 0; + DBUG_ENTER("delete_tails"); + for (; *tails; tails++) + { + if (delete_head_or_tail(info, + ma_recordpos_to_page(*tails), + ma_recordpos_to_dir_entry(*tails), 0)) + res= 1; + } + DBUG_RETURN(res); +} + + +/* + Delete a record + + NOTES + For the moment, we assume that info->cur_row.extents is always updated + when a row is read. In the future we may decide to read this on demand + for rows with many splits. +*/ + +my_bool _ma_delete_block_record(MARIA_HA *info, const byte *record) +{ + ulonglong page; + uint record_number; + DBUG_ENTER("_ma_delete_block_record"); + + page= ma_recordpos_to_page(info->cur_row.lastpos); + record_number= ma_recordpos_to_dir_entry(info->cur_row.lastpos); + + if (delete_head_or_tail(info, page, record_number, 1) || + delete_tails(info, info->cur_row.tail_positions)) + goto err; + + info->s->state.split--; + + if (info->cur_row.extents && free_full_pages(info, &info->cur_row)) + goto err; + + if (info->s->base.transactional) + { + LSN lsn; + uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + + DIR_COUNT_SIZE]; + size_t row_length; + uint row_parts_count; + + /* Write UNDO record */ + lsn_store(log_data, info->trn->undo_lsn); + page_store(log_data+ LSN_STORE_SIZE + FILEID_STORE_SIZE, page); + dirpos_store(log_data+ LSN_STORE_SIZE + FILEID_STORE_SIZE + + PAGE_STORE_SIZE, record_number); + + info->log_row_parts[TRANSLOG_INTERNAL_PARTS].str= (char*) log_data; + info->log_row_parts[TRANSLOG_INTERNAL_PARTS].length= sizeof(log_data); + row_length= fill_insert_undo_parts(info, record, info->log_row_parts + + TRANSLOG_INTERNAL_PARTS + 1, + &row_parts_count); + + if (translog_write_record(&lsn, LOGREC_UNDO_ROW_DELETE, info->trn, + info->s, sizeof(log_data) + row_length, + TRANSLOG_INTERNAL_PARTS + 1 + row_parts_count, + info->log_row_parts, log_data + LSN_STORE_SIZE)) + goto err; + + } + + _ma_unpin_all_pages(info, info->trn->undo_lsn); + DBUG_RETURN(0); + +err: + _ma_unpin_all_pages(info, 0); + DBUG_RETURN(1); +} + + +/**************************************************************************** + Reading of records +****************************************************************************/ + +/* + Read position to record from record directory at end of page + + SYNOPSIS + get_record_position() + buff page buffer + block_size block size for page + record_number Record number in index + end_of_data pointer to end of data for record + + RETURN + 0 Error in data + # Pointer to start of record. + In this case *end_of_data is set. +*/ + +static byte *get_record_position(byte *buff, uint block_size, + uint record_number, byte **end_of_data) +{ + uint number_of_records= (uint) ((uchar *) buff)[DIR_COUNT_OFFSET]; + byte *dir; + byte *data; + uint offset, length; + +#ifdef SANITY_CHECKS + if (record_number >= number_of_records || + record_number > ((block_size - PAGE_HEADER_SIZE - PAGE_SUFFIX_SIZE) / + DIR_ENTRY_SIZE)) + { + DBUG_PRINT("error", + ("Wrong row number: record_number: %u number_of_records: %u", + record_number, number_of_records)); + return 0; + } +#endif + + dir= (buff + block_size - DIR_ENTRY_SIZE * record_number - + DIR_ENTRY_SIZE - PAGE_SUFFIX_SIZE); + offset= uint2korr(dir); + length= uint2korr(dir + 2); +#ifdef SANITY_CHECKS + if (offset < PAGE_HEADER_SIZE || + offset + length > (block_size - + number_of_records * DIR_ENTRY_SIZE - + PAGE_SUFFIX_SIZE)) + { + DBUG_PRINT("error", + ("Wrong row position: record_number: %u offset: %u " + "length: %u number_of_records: %u", + record_number, offset, length, number_of_records)); + return 0; + } +#endif + data= buff + offset; + *end_of_data= data + length; + return data; +} + + +/* + Init extent + + NOTES + extent is a cursor over which pages to read +*/ + +static void init_extent(MARIA_EXTENT_CURSOR *extent, byte *extent_info, + uint extents, MARIA_RECORD_POS *tail_positions) +{ + uint page_count; + extent->extent= extent_info; + extent->extent_count= extents; + extent->page= uint5korr(extent_info); /* First extent */ + page_count= uint2korr(extent_info + ROW_EXTENT_PAGE_SIZE); + extent->page_count= page_count & ~TAIL_BIT; + extent->tail= page_count & TAIL_BIT; + extent->tail_positions= tail_positions; +} + + +/* + Read next extent + + SYNOPSIS + read_next_extent() + info Maria handler + extent Pointer to current extent (this is updated to point + to next) + end_of_data Pointer to end of data in read block (out) + + NOTES + New block is read into info->buff + + RETURN + 0 Error; my_errno is set + # Pointer to start of data in read block + In this case end_of_data is updated to point to end of data. +*/ + +static byte *read_next_extent(MARIA_HA *info, MARIA_EXTENT_CURSOR *extent, + byte **end_of_data) +{ + MARIA_SHARE *share= info->s; + byte *buff, *data; + DBUG_ENTER("read_next_extent"); + + if (!extent->page_count) + { + uint page_count; + if (!--extent->extent_count) + goto crashed; + extent->extent+= ROW_EXTENT_SIZE; + extent->page= uint5korr(extent->extent); + page_count= uint2korr(extent->extent+ROW_EXTENT_PAGE_SIZE); + DBUG_ASSERT(page_count != 0); + extent->tail= page_count & TAIL_BIT; + extent->page_count= (page_count & ~TAIL_BIT); + extent->first_extent= 0; + DBUG_PRINT("info",("New extent. Page: %lu page_count: %u tail_flag: %d", + (ulong) extent->page, extent->page_count, + extent->tail != 0)); + } + + DBUG_ASSERT(share->pagecache->block_size == share->block_size); + if (!(buff= pagecache_read(share->pagecache, + &info->dfile, extent->page, 0, + info->buff, share->page_type, + PAGECACHE_LOCK_LEFT_UNLOCKED, 0))) + { + /* check if we tried to read over end of file (ie: bad data in record) */ + if ((extent->page + 1) * share->block_size > info->state->data_file_length) + goto crashed; + DBUG_RETURN(0); + } + if (!extent->tail) + { + /* Full data page */ + DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == BLOB_PAGE); + extent->page++; /* point to next page */ + extent->page_count--; + *end_of_data= buff + share->block_size; + info->cur_row.full_page_count++; /* For maria_chk */ + DBUG_RETURN(extent->data_start= buff + LSN_SIZE + PAGE_TYPE_SIZE); + } + /* Found tail. page_count is in this case the position in the tail page */ + + DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == TAIL_PAGE); + *(extent->tail_positions++)= ma_recordpos(extent->page, + extent->page_count); + info->cur_row.tail_count++; /* For maria_chk */ + + if (!(data= get_record_position(buff, share->block_size, + extent->page_count, + end_of_data))) + goto crashed; + extent->data_start= data; + extent->page_count= 0; /* No more data in extent */ + DBUG_RETURN(data); + + +crashed: + my_errno= HA_ERR_WRONG_IN_RECORD; /* File crashed */ + DBUG_PRINT("error", ("wrong extent information")); + DBUG_RETURN(0); +} + + +/* + Read data that may be split over many blocks + + SYNOPSIS + read_long_data() + info Maria handler + to Store result string here (this is allocated) + extent Pointer to current extent position + data Current position in buffer + end_of_data End of data in buffer + + NOTES + When we have to read a new buffer, it's read into info->buff + + This loop is implemented by goto's instead of a for() loop as + the code is notable smaller and faster this way (and it's not nice + to jump into a for loop() or into a 'then' clause) + + RETURN + 0 ok + 1 error +*/ + +static my_bool read_long_data(MARIA_HA *info, byte *to, ulong length, + MARIA_EXTENT_CURSOR *extent, + byte **data, byte **end_of_data) +{ + DBUG_ENTER("read_long_data"); + DBUG_PRINT("enter", ("length: %lu", length)); + DBUG_ASSERT(*data <= *end_of_data); + + /* + Fields are never split in middle. This means that if length > rest-of-data + we should start reading from the next extent. The reason we may have + data left on the page is that there fixed part of the row was less than + min_row_length and in this case the head block was extended to + min_row_length. + + This may change in the future, which is why we have the loop written + the way it's written. + */ + if (length > (ulong) (*end_of_data - *data)) + *end_of_data= *data; + + for(;;) + { + uint left_length; + left_length= (uint) (*end_of_data - *data); + if (likely(left_length >= length)) + { + memcpy(to, *data, length); + (*data)+= length; + DBUG_RETURN(0); + } + memcpy(to, *data, left_length); + to+= left_length; + length-= left_length; + if (!(*data= read_next_extent(info, extent, end_of_data))) + break; + } + DBUG_RETURN(1); +} + + +/* + Read a record from page (helper function for _ma_read_block_record()) + + SYNOPSIS + _ma_read_block_record2() + info Maria handler + record Store record here + data Start of head data for row + end_of_data End of data for row + + NOTES + The head page is already read by caller + Following data is update in info->cur_row: + + cur_row.head_length is set to size of entry in head block + cur_row.tail_positions is set to point to all tail blocks + cur_row.extents points to extents data + cur_row.extents_counts contains number of extents + cur_row.empty_bits is set to empty bits + cur_row.field_lengths contains packed length of all fields + + RETURN + 0 ok + # Error code +*/ + +int _ma_read_block_record2(MARIA_HA *info, byte *record, + byte *data, byte *end_of_data) +{ + MARIA_SHARE *share= info->s; + byte *field_length_data, *blob_buffer, *start_of_data; + uint flag, null_bytes, cur_null_bytes, row_extents, field_lengths; + my_bool found_blob= 0; + MARIA_EXTENT_CURSOR extent; + MARIA_COLUMNDEF *column, *end_column; + DBUG_ENTER("_ma_read_block_record2"); + + LINT_INIT(field_lengths); + LINT_INIT(field_length_data); + LINT_INIT(blob_buffer); + + start_of_data= data; + flag= (uint) (uchar) data[0]; + cur_null_bytes= share->base.original_null_bytes; + null_bytes= share->base.null_bytes; + info->cur_row.head_length= (uint) (end_of_data - data); + info->cur_row.full_page_count= info->cur_row.tail_count= 0; + + /* Skip trans header (for now, until we have MVCC csupport) */ + data+= total_header_size[(flag & PRECALC_HEADER_BITMASK)]; + if (flag & ROW_FLAG_NULLS_EXTENDED) + cur_null_bytes+= data[-1]; + + row_extents= 0; + if (flag & ROW_FLAG_EXTENTS) + { + uint row_extent_size; + /* + Record is split over many data pages. + Get number of extents and first extent + */ + get_key_length(row_extents, data); + info->cur_row.extents_count= row_extents; + row_extent_size= row_extents * ROW_EXTENT_SIZE; + if (info->cur_row.extents_buffer_length < row_extent_size && + _ma_alloc_buffer(&info->cur_row.extents, + &info->cur_row.extents_buffer_length, + row_extent_size)) + DBUG_RETURN(my_errno); + memcpy(info->cur_row.extents, data, ROW_EXTENT_SIZE); + data+= ROW_EXTENT_SIZE; + init_extent(&extent, info->cur_row.extents, row_extents, + info->cur_row.tail_positions); + } + else + { + info->cur_row.extents_count= 0; + (*info->cur_row.tail_positions)= 0; + extent.page_count= 0; + extent.extent_count= 1; + } + extent.first_extent= 1; + + if (share->base.max_field_lengths) + { + get_key_length(field_lengths, data); + info->cur_row.field_lengths_length= field_lengths; +#ifdef SANITY_CHECKS + if (field_lengths > share->base.max_field_lengths) + goto err; +#endif + } + + if (share->calc_checksum) + info->cur_row.checksum= (uint) (uchar) *data++; + /* data now points on null bits */ + memcpy(record, data, cur_null_bytes); + if (unlikely(cur_null_bytes != null_bytes)) + { + /* + This only happens if we have added more NULL columns with + ALTER TABLE and are fetching an old, not yet modified old row + */ + bzero(record + cur_null_bytes, (uint) (null_bytes - cur_null_bytes)); + } + data+= null_bytes; + /* We copy the empty bits to be able to use them for delete/update */ + memcpy(info->cur_row.empty_bits, data, share->base.pack_bytes); + data+= share->base.pack_bytes; + + /* TODO: Use field offsets, instead of just skipping them */ + data+= share->base.field_offsets * FIELD_OFFSET_SIZE; + + /* + Read row extents (note that first extent was already read into + info->cur_row.extents above) + */ + if (row_extents) + { + if (read_long_data(info, info->cur_row.extents + ROW_EXTENT_SIZE, + (row_extents - 1) * ROW_EXTENT_SIZE, + &extent, &data, &end_of_data)) + DBUG_RETURN(my_errno); + } + + /* + Data now points to start of fixed length field data that can't be null + or 'empty'. Note that these fields can't be split over blocks. + */ + for (column= share->columndef, + end_column= column + share->base.fixed_not_null_fields; + column < end_column; column++) + { + uint column_length= column->length; + if (data >= end_of_data && + !(data= read_next_extent(info, &extent, &end_of_data))) + goto err; + memcpy(record + column->offset, data, column_length); + data+= column_length; + } + + /* Read array of field lengths. This may be stored in several extents */ + if (share->base.max_field_lengths) + { + field_length_data= info->cur_row.field_lengths; + if (read_long_data(info, field_length_data, field_lengths, &extent, + &data, &end_of_data)) + DBUG_RETURN(my_errno); + } + + /* Read variable length data. Each of these may be split over many extents */ + for (end_column= share->columndef + share->base.fields; + column < end_column; column++) + { + enum en_fieldtype type= (enum en_fieldtype) column->type; + byte *field_pos= record + column->offset; + /* First check if field is present in record */ + if ((record[column->null_pos] & column->null_bit) || + (info->cur_row.empty_bits[column->empty_pos] & column->empty_bit)) + { + if (type == FIELD_SKIP_ENDSPACE) + bfill(record + column->offset, column->length, ' '); + else + bzero(record + column->offset, column->fill_length); + continue; + } + switch (type) { + case FIELD_NORMAL: /* Fixed length field */ + case FIELD_SKIP_PRESPACE: + case FIELD_SKIP_ZERO: /* Fixed length field */ + if (data >= end_of_data && + !(data= read_next_extent(info, &extent, &end_of_data))) + goto err; + memcpy(field_pos, data, column->length); + data+= column->length; + break; + case FIELD_SKIP_ENDSPACE: /* CHAR */ + { + /* Char that is space filled */ + uint length; + if (column->length <= 255) + length= (uint) (uchar) *field_length_data++; + else + { + length= uint2korr(field_length_data); + field_length_data+= 2; + } +#ifdef SANITY_CHECKS + if (length > column->length) + goto err; +#endif + if (read_long_data(info, field_pos, length, &extent, &data, + &end_of_data)) + DBUG_RETURN(my_errno); + bfill(field_pos + length, column->length - length, ' '); + break; + } + case FIELD_VARCHAR: + { + ulong length; + if (column->length <= 256) + { + length= (uint) (uchar) (*field_pos++= *field_length_data++); + } + else + { + length= uint2korr(field_length_data); + field_pos[0]= field_length_data[0]; + field_pos[1]= field_length_data[1]; + field_pos+= 2; + field_length_data+= 2; + } + if (read_long_data(info, field_pos, length, &extent, &data, + &end_of_data)) + DBUG_RETURN(my_errno); + break; + } + case FIELD_BLOB: + { + uint size_length= column->length - portable_sizeof_char_ptr; + ulong blob_length= _ma_calc_blob_length(size_length, field_length_data); + + if (!found_blob) + { + /* Calculate total length for all blobs */ + ulong blob_lengths= 0; + byte *length_data= field_length_data; + MARIA_COLUMNDEF *blob_field= column; + + found_blob= 1; + for (; blob_field < end_column; blob_field++) + { + uint size_length; + if ((record[blob_field->null_pos] & blob_field->null_bit) || + (info->cur_row.empty_bits[blob_field->empty_pos] & + blob_field->empty_bit)) + continue; + size_length= blob_field->length - portable_sizeof_char_ptr; + blob_lengths+= _ma_calc_blob_length(size_length, length_data); + length_data+= size_length; + } + DBUG_PRINT("info", ("Total blob length: %lu", blob_lengths)); + if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size, + blob_lengths)) + DBUG_RETURN(my_errno); + blob_buffer= info->rec_buff; + } + + memcpy(field_pos, field_length_data, size_length); + memcpy_fixed(field_pos + size_length, (byte *) & blob_buffer, + sizeof(char*)); + field_length_data+= size_length; + + /* + After we have read one extent, then each blob is in it's own extent + */ + if (extent.first_extent && (ulong) (end_of_data - data) < blob_length) + end_of_data= data; /* Force read of next extent */ + + if (read_long_data(info, blob_buffer, blob_length, &extent, &data, + &end_of_data)) + DBUG_RETURN(my_errno); + blob_buffer+= blob_length; + break; + } + default: +#ifdef EXTRA_DEBUG + DBUG_ASSERT(0); /* purecov: deadcode */ +#endif + goto err; + } + continue; + } + + if (row_extents) + { + DBUG_PRINT("info", ("Row read: page_count: %u extent_count: %u", + extent.page_count, extent.extent_count)); + *extent.tail_positions= 0; /* End marker */ + if (extent.page_count) + goto err; + if (extent.extent_count > 1) + if (check_if_zero(extent.extent + ROW_EXTENT_SIZE, + (extent.extent_count-1) * ROW_EXTENT_SIZE)) + goto err; + } + else + { + DBUG_PRINT("info", ("Row read")); + /* + data should normally point to end_of_date. The only exception is if + the row is very short in which case we allocated 'min_row_length' data + for allowing the row to expand. + */ + if (data != end_of_data && (uint) (end_of_data - start_of_data) > + info->s->base.min_row_length) + goto err; + } + + info->update|= HA_STATE_AKTIV; /* We have an active record */ + DBUG_RETURN(0); + +err: + /* Something was wrong with data on record */ + DBUG_PRINT("error", ("Found record with wrong data")); + DBUG_RETURN((my_errno= HA_ERR_WRONG_IN_RECORD)); +} + + +/* + Read a record based on record position + + SYNOPSIS + _ma_read_block_record() + info Maria handler + record Store record here + record_pos Record position +*/ + +int _ma_read_block_record(MARIA_HA *info, byte *record, + MARIA_RECORD_POS record_pos) +{ + byte *data, *end_of_data, *buff; + uint offset; + uint block_size= info->s->block_size; + DBUG_ENTER("_ma_read_block_record"); + DBUG_PRINT("enter", ("rowid: %lu", (long) record_pos)); + + info->cur_row.lastpos= record_pos; + offset= ma_recordpos_to_dir_entry(record_pos); + + DBUG_ASSERT(info->s->pagecache->block_size == block_size); + if (!(buff= pagecache_read(info->s->pagecache, + &info->dfile, ma_recordpos_to_page(record_pos), 0, + info->buff, info->s->page_type, + PAGECACHE_LOCK_LEFT_UNLOCKED, 0))) + DBUG_RETURN(1); + DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == HEAD_PAGE); + if (!(data= get_record_position(buff, block_size, offset, &end_of_data))) + { + my_errno= HA_ERR_WRONG_IN_RECORD; /* File crashed */ + DBUG_PRINT("error", ("Wrong directory entry in data block")); + DBUG_RETURN(1); + } + DBUG_RETURN(_ma_read_block_record2(info, record, data, end_of_data)); +} + + +/* compare unique constraint between stored rows */ + +my_bool _ma_cmp_block_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, + const byte *record, MARIA_RECORD_POS pos) +{ + byte *org_rec_buff, *old_record; + my_size_t org_rec_buff_size; + int error; + DBUG_ENTER("_ma_cmp_block_unique"); + + if (!(old_record= my_alloca(info->s->base.reclength))) + DBUG_RETURN(1); + + /* Don't let the compare destroy blobs that may be in use */ + org_rec_buff= info->rec_buff; + org_rec_buff_size= info->rec_buff_size; + if (info->s->base.blobs) + { + /* Force realloc of record buffer*/ + info->rec_buff= 0; + info->rec_buff_size= 0; + } + error= _ma_read_block_record(info, old_record, pos); + if (!error) + error= _ma_unique_comp(def, record, old_record, def->null_are_equal); + if (info->s->base.blobs) + { + my_free(info->rec_buff, MYF(MY_ALLOW_ZERO_PTR)); + info->rec_buff= org_rec_buff; + info->rec_buff_size= org_rec_buff_size; + } + DBUG_PRINT("exit", ("result: %d", error)); + my_afree(old_record); + DBUG_RETURN(error != 0); +} + + +/**************************************************************************** + Table scan +****************************************************************************/ + +/* + Allocate buffers for table scan + + SYNOPSIS + _ma_scan_init_block_record(MARIA_HA *info) + + IMPLEMENTATION + We allocate one buffer for the current bitmap and one buffer for the + current page + + RETURN + 0 ok + 1 error (couldn't allocate memory or disk error) +*/ + +my_bool _ma_scan_init_block_record(MARIA_HA *info) +{ + DBUG_ENTER("_ma_scan_init_block_record"); + /* + bitmap_buff may already be allocated if this is the second call to + rnd_init() without a rnd_end() in between, see sql/handler.h + */ + if (!(info->scan.bitmap_buff || + ((info->scan.bitmap_buff= + (byte *) my_malloc(info->s->block_size * 2, MYF(MY_WME)))))) + DBUG_RETURN(1); + info->scan.page_buff= info->scan.bitmap_buff + info->s->block_size; + info->scan.bitmap_end= info->scan.bitmap_buff + info->s->bitmap.total_size; + + /* Set scan variables to get _ma_scan_block() to start with reading bitmap */ + info->scan.number_of_rows= 0; + info->scan.bitmap_pos= info->scan.bitmap_end; + info->scan.bitmap_page= (ulong) - (long) info->s->bitmap.pages_covered; + /* + We have to flush bitmap as we will read the bitmap from the page cache + while scanning rows + */ + DBUG_RETURN(_ma_flush_bitmap(info->s)); +} + + +/* Free buffers allocated by _ma_scan_block_init() */ + +void _ma_scan_end_block_record(MARIA_HA *info) +{ + DBUG_ENTER("_ma_scan_end_block_record"); + my_free(info->scan.bitmap_buff, MYF(MY_ALLOW_ZERO_PTR)); + info->scan.bitmap_buff= 0; + DBUG_VOID_RETURN; +} + + +/* + Read next record while scanning table + + SYNOPSIS + _ma_scan_block_record() + info Maria handler + record Store found here + record_pos Value stored in info->cur_row.next_pos after last call + skip_deleted + + NOTES + - One must have called mi_scan() before this + - In this version, we don't actually need record_pos, we as easily + use a variable in info->scan + + IMPLEMENTATION + Current code uses a lot of goto's to separate the different kind of + states we may be in. This gives us a minimum of executed if's for + the normal cases. I tried several different ways to code this, but + the current one was in the end the most readable and fastest. + + RETURN + 0 ok + # Error code +*/ + +int _ma_scan_block_record(MARIA_HA *info, byte *record, + MARIA_RECORD_POS record_pos, + my_bool skip_deleted __attribute__ ((unused))) +{ + uint block_size; + my_off_t filepos; + MARIA_SHARE *share= info->s; + DBUG_ENTER("_ma_scan_block_record"); + +restart_record_read: + /* Find next row in current page */ + if (likely(record_pos < info->scan.number_of_rows)) + { + uint length, offset; + byte *data, *end_of_data; + + while (!(offset= uint2korr(info->scan.dir))) + { + info->scan.dir-= DIR_ENTRY_SIZE; + record_pos++; +#ifdef SANITY_CHECKS + if (info->scan.dir < info->scan.dir_end) + goto err; +#endif + } + /* found row */ + info->cur_row.lastpos= info->scan.row_base_page + record_pos; + info->cur_row.nextpos= record_pos + 1; + data= info->scan.page_buff + offset; + length= uint2korr(info->scan.dir + 2); + end_of_data= data + length; + info->scan.dir-= DIR_ENTRY_SIZE; /* Point to previous row */ +#ifdef SANITY_CHECKS + if (end_of_data > info->scan.dir_end || + offset < PAGE_HEADER_SIZE || length < share->base.min_block_length) + goto err; +#endif + DBUG_PRINT("info", ("rowid: %lu", (ulong) info->cur_row.lastpos)); + DBUG_RETURN(_ma_read_block_record2(info, record, data, end_of_data)); + } + + /* Find next head page in current bitmap */ +restart_bitmap_scan: + block_size= share->block_size; + if (likely(info->scan.bitmap_pos < info->scan.bitmap_end)) + { + byte *data= info->scan.bitmap_pos; + longlong bits= info->scan.bits; + uint bit_pos= info->scan.bit_pos; + + do + { + while (likely(bits)) + { + uint pattern= bits & 7; + bits >>= 3; + bit_pos++; + if (pattern > 0 && pattern <= 4) + { + /* Found head page; Read it */ + ulong page; + info->scan.bitmap_pos= data; + info->scan.bits= bits; + info->scan.bit_pos= bit_pos; + page= (info->scan.bitmap_page + 1 + + (data - info->scan.bitmap_buff) / 6 * 16 + bit_pos - 1); + info->scan.row_base_page= ma_recordpos(page, 0); + if (!(pagecache_read(share->pagecache, + &info->dfile, + page, 0, info->scan.page_buff, + share->page_type, + PAGECACHE_LOCK_LEFT_UNLOCKED, 0))) + DBUG_RETURN(my_errno); + if (((info->scan.page_buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) != + HEAD_PAGE) || + (info->scan.number_of_rows= + (uint) (uchar) info->scan.page_buff[DIR_COUNT_OFFSET]) == 0) + { + DBUG_PRINT("error", ("Wrong page header")); + DBUG_RETURN((my_errno= HA_ERR_WRONG_IN_RECORD)); + } + info->scan.dir= (info->scan.page_buff + block_size - + PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE); + info->scan.dir_end= (info->scan.dir - + (info->scan.number_of_rows - 1) * + DIR_ENTRY_SIZE); + record_pos= 0; + goto restart_record_read; + } + } + for (data+= 6; data < info->scan.bitmap_end; data+= 6) + { + bits= uint6korr(data); + /* Skip not allocated pages and blob / full tail pages */ + if (bits && bits != LL(07777777777777777)) + break; + } + bit_pos= 0; + } while (data < info->scan.bitmap_end); + } + + /* Read next bitmap */ + info->scan.bitmap_page+= share->bitmap.pages_covered; + filepos= (my_off_t) info->scan.bitmap_page * block_size; + if (unlikely(filepos >= info->state->data_file_length)) + { + DBUG_RETURN((my_errno= HA_ERR_END_OF_FILE)); + } + if (!(pagecache_read(share->pagecache, &info->dfile, + info->scan.bitmap_page, + 0, info->scan.bitmap_buff, PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, 0))) + DBUG_RETURN(my_errno); + /* Skip scanning 'bits' in bitmap scan code */ + info->scan.bitmap_pos= info->scan.bitmap_buff - 6; + info->scan.bits= 0; + goto restart_bitmap_scan; + +err: + DBUG_PRINT("error", ("Wrong data on page")); + DBUG_RETURN((my_errno= HA_ERR_WRONG_IN_RECORD)); +} + + +/* + Compare a row against a stored one + + NOTES + Not implemented, as block record is not supposed to be used in a shared + global environment +*/ + +my_bool _ma_compare_block_record(MARIA_HA *info __attribute__ ((unused)), + const byte *record __attribute__ ((unused))) +{ + return 0; +} + + +#ifndef DBUG_OFF + +static void _ma_print_directory(byte *buff, uint block_size) +{ + uint max_entry= (uint) ((uchar *) buff)[DIR_COUNT_OFFSET], row= 0; + uint end_of_prev_row= PAGE_HEADER_SIZE; + byte *dir, *end; + + dir= buff + block_size - DIR_ENTRY_SIZE * max_entry - PAGE_SUFFIX_SIZE; + end= buff + block_size - DIR_ENTRY_SIZE - PAGE_SUFFIX_SIZE; + + DBUG_LOCK_FILE; + fprintf(DBUG_FILE,"Directory dump (pos:length):\n"); + + for (row= 1; dir <= end ; end-= DIR_ENTRY_SIZE, row++) + { + uint offset= uint2korr(end); + uint length= uint2korr(end+2); + fprintf(DBUG_FILE, " %4u:%4u", offset, offset ? length : 0); + if (!(row % (80/12))) + fputc('\n', DBUG_FILE); + if (offset) + { + DBUG_ASSERT(offset >= end_of_prev_row); + end_of_prev_row= offset + length; + } + } + fputc('\n', DBUG_FILE); + fflush(DBUG_FILE); + DBUG_UNLOCK_FILE; +} +#endif /* DBUG_OFF */ + + +/* + Store an integer with simple packing + + SYNOPSIS + ma_store_integer() + to Store the packed integer here + nr Integer to store + + NOTES + This is mostly used to store field numbers and lengths of strings. + We have to cast the result for the LL() becasue of a bug in Forte CC + compiler. + + Packing used is: + nr < 251 is stored as is (in 1 byte) + Numbers that require 1-4 bytes are stored as char(250+byte_length), data + Bigger numbers are stored as 255, data as ulonglong (not yet done). + + RETURN + Position in 'to' after the packed length +*/ + +uchar *ma_store_length(uchar *to, ulong nr) +{ + if (nr < 251) + { + *to=(uchar) nr; + return to+1; + } + if (nr < 65536) + { + if (nr <= 255) + { + to[0]= (uchar) 251; + to[1]= (uchar) nr; + return to+2; + } + to[0]= (uchar) 252; + int2store(to+1, nr); + return to+3; + } + if (nr < 16777216) + { + *to++= (uchar) 253; + int3store(to, nr); + return to+3; + } + *to++= (uchar) 254; + int4store(to, nr); + return to+4; +} + + +/* Calculate how many bytes needed to store a number */ + +uint ma_calc_length_for_store_length(ulong nr) +{ + if (nr < 251) + return 1; + if (nr < 65536) + { + if (nr <= 255) + return 2; + return 3; + } + if (nr < 16777216) + return 4; + return 5; +} + + +/* + Fill array with pointers to field parts to be stored in log for insert + + SYNOPSIS + fill_insert_undo_parts() + info Maria handler + record Inserted row + log_parts Store pointers to changed memory areas here + log_parts_count See RETURN + + NOTES + We have information in info->cur_row about the read row. + + RETURN + length of data in log_parts. + log_parts_count contains number of used log_parts +*/ + +static size_t fill_insert_undo_parts(MARIA_HA *info, const byte *record, + LEX_STRING *log_parts, + uint *log_parts_count) +{ + MARIA_SHARE *share= info->s; + MARIA_COLUMNDEF *column, *end_column; + uchar *field_lengths= info->cur_row.field_lengths; + size_t row_length; + MARIA_ROW *cur_row= &info->cur_row; + LEX_STRING *start_log_parts; + DBUG_ENTER("fill_insert_undo_parts"); + + start_log_parts= log_parts; + + /* Store null bits */ + log_parts->str= (char*) record; + log_parts->length= share->base.null_bytes; + row_length= log_parts->length; + log_parts++; + + /* Stored bitmap over packed (zero length or all-zero fields) */ + log_parts->str= info->cur_row.empty_bits; + log_parts->length= share->base.pack_bytes; + row_length+= log_parts->length; + log_parts++; + + if (share->base.max_field_lengths) + { + /* Store field lenghts, with a prefix of number of bytes */ + log_parts->str= field_lengths-2; + log_parts->length= info->cur_row.field_lengths_length+2; + int2store(log_parts->str, info->cur_row.field_lengths_length); + row_length+= log_parts->length; + log_parts++; + } + + /* Handle constant length fields that are always present */ + for (column= share->columndef, + end_column= column+ share->base.fixed_not_null_fields; + column < end_column; + column++) + { + log_parts->str= (char*) record + column->offset; + log_parts->length= column->length; + row_length+= log_parts->length; + log_parts++; + } + + /* Handle NULL fields and CHAR/VARCHAR fields */ + for (end_column= share->columndef + share->base.fields - share->base.blobs; + column < end_column; + column++) + { + const uchar *column_pos; + size_t column_length; + if ((record[column->null_pos] & column->null_bit) || + cur_row->empty_bits[column->empty_pos] & column->empty_bit) + continue; + + column_pos= record+ column->offset; + column_length= column->length; + + switch ((enum en_fieldtype) column->type) { + case FIELD_CHECK: + case FIELD_NORMAL: /* Fixed length field */ + case FIELD_ZERO: + case FIELD_SKIP_PRESPACE: /* Not packed */ + case FIELD_SKIP_ZERO: /* Fixed length field */ + break; + case FIELD_SKIP_ENDSPACE: /* CHAR */ + { + if (column->length <= 255) + column_length= *field_lengths++; + else + { + column_length= uint2korr(field_lengths); + field_lengths+= 2; + } + break; + } + case FIELD_VARCHAR: + { + if (column->length <= 256) + { + column_length= *field_lengths; + field_lengths++; + } + else + { + column_length= uint2korr(field_lengths); + field_lengths+= 2; + } + break; + } + default: + DBUG_ASSERT(0); + } + log_parts->str= (char*) column_pos; + log_parts->length= column_length; + row_length+= log_parts->length; + log_parts++; + } + + /* Add blobs */ + for (end_column+= share->base.blobs; column < end_column; column++) + { + const byte *field_pos= record + column->offset; + uint size_length= column->length - portable_sizeof_char_ptr; + ulong blob_length= _ma_calc_blob_length(size_length, field_pos); + + /* + We don't have to check for null, as blob_length is guranteed to be 0 + if the blob is null + */ + if (blob_length) + { + char *blob_pos; + memcpy_fixed((byte*) &blob_pos, record + column->offset + size_length, + sizeof(blob_pos)); + log_parts->str= blob_pos; + log_parts->length= blob_length; + row_length+= log_parts->length; + log_parts++; + } + } + *log_parts_count= (log_parts - start_log_parts); + DBUG_RETURN(row_length); +} + + +/* + Fill array with pointers to field parts to be stored in log for update + + SYNOPSIS + fill_update_undo_parts() + info Maria handler + oldrec Original row + newrec New row + log_parts Store pointers to changed memory areas here + log_parts_count See RETURN + + IMPLEMENTATION + Format of undo record: + + Fields are stored in same order as the field array. + + Number of changed fields (packed) + + For each changed field + Fieldnumber (packed) + Length, if variable length field (packed) + + For each changed field + Data + + Packing is using ma_store_integer() + + The reason we store field numbers & length separated from data (ie, not + after each other) is to get better cpu caching when we loop over + fields (as we probably don't have to access data for each field when we + want to read and old row through the undo log record). + + As a special case, we use '255' for the field number of the null bitmap. + + RETURN + length of data in log_parts. + log_parts_count contains number of used log_parts +*/ + +static size_t fill_update_undo_parts(MARIA_HA *info, const byte *oldrec, + const byte *newrec, + LEX_STRING *log_parts, + uint *log_parts_count) +{ + MARIA_SHARE *share= info->s; + MARIA_COLUMNDEF *column, *end_column; + MARIA_ROW *old_row= &info->cur_row, *new_row= &info->new_row; + uchar *field_data, *start_field_data; + uchar *old_field_lengths= old_row->field_lengths; + uchar *new_field_lengths= new_row->field_lengths; + size_t row_length= 0; + uint field_count= 0; + LEX_STRING *start_log_parts; + my_bool new_column_is_empty; + DBUG_ENTER("fill_update_undo_parts"); + + start_log_parts= log_parts; + + /* + First log part is for number of fields, field numbers and lengths + The +4 is to reserve place for the number of changed fields. + */ + start_field_data= field_data= info->update_field_data + 4; + log_parts++; + + if (memcmp(oldrec, newrec, share->base.null_bytes)) + { + /* Store changed null bits */ + *field_data++= (uchar) 255; /* Special case */ + field_count++; + log_parts->str= (char*) oldrec; + log_parts->length= share->base.null_bytes; + row_length= log_parts->length; + log_parts++; + } + + /* Handle constant length fields */ + for (column= share->columndef, + end_column= column+ share->base.fixed_not_null_fields; + column < end_column; + column++) + { + if (memcmp(oldrec + column->offset, newrec + column->offset, + column->length)) + { + field_data= ma_store_length(field_data, + (uint) (column - share->columndef)); + field_count++; + log_parts->str= (char*) oldrec + column->offset; + log_parts->length= column->length; + row_length+= log_parts->length; + log_parts++; + } + } + + /* Handle the rest: NULL fields and CHAR/VARCHAR fields and BLOB's */ + for (end_column= share->columndef + share->base.fields; + column < end_column; + column++) + { + const uchar *new_column_pos, *old_column_pos; + size_t new_column_length, old_column_length; + + /* First check if old column is null or empty */ + if (oldrec[column->null_pos] & column->null_bit) + { + /* + It's safe to skip this one as either the new column is also null + (no change) or the new_column is not null, in which case the null-bit + maps differed and we have already stored the null bitmap. + */ + continue; + } + if (old_row->empty_bits[column->empty_pos] & column->empty_bit) + { + if (new_row->empty_bits[column->empty_pos] & column->empty_bit) + continue; /* Both are empty; skip */ + + /* Store null length column */ + field_data= ma_store_length(field_data, + (uint) (column - share->columndef)); + field_data= ma_store_length(field_data, 0); + field_count++; + continue; + } + /* + Remember if the 'new' value is empty (as in this case we must always + log the original value + */ + new_column_is_empty= ((newrec[column->null_pos] & column->null_bit) || + (new_row->empty_bits[column->empty_pos] & + column->empty_bit)); + + old_column_pos= oldrec + column->offset; + new_column_pos= newrec + column->offset; + old_column_length= new_column_length= column->length; + + switch ((enum en_fieldtype) column->type) { + case FIELD_CHECK: + case FIELD_NORMAL: /* Fixed length field */ + case FIELD_ZERO: + case FIELD_SKIP_PRESPACE: /* Not packed */ + case FIELD_SKIP_ZERO: /* Fixed length field */ + break; + case FIELD_VARCHAR: + new_column_length--; /* Skip length prefix */ + /* Fall through */ + case FIELD_SKIP_ENDSPACE: /* CHAR */ + { + if (new_column_length <= 255) + { + old_column_length= *old_field_lengths++; + if (!new_column_is_empty) + new_column_length= *new_field_lengths++; + } + else + { + old_column_length= uint2korr(old_field_lengths); + old_field_lengths+= 2; + if (!new_column_is_empty) + { + new_column_length= uint2korr(new_field_lengths); + new_field_lengths+= 2; + } + } + break; + } + case FIELD_BLOB: + { + uint size_length= column->length - portable_sizeof_char_ptr; + old_column_length= _ma_calc_blob_length(size_length, old_column_pos); + memcpy_fixed((byte*) &old_column_pos, + oldrec + column->offset + size_length, + sizeof(old_column_pos)); + if (!new_column_is_empty) + { + new_column_length= _ma_calc_blob_length(size_length, new_column_pos); + memcpy_fixed((byte*) &new_column_pos, + newrec + column->offset + size_length, + sizeof(old_column_pos)); + } + break; + } + default: + DBUG_ASSERT(0); + } + + if (new_column_is_empty || new_column_length != old_column_length || + memcmp(old_column_pos, new_column_pos, new_column_length)) + { + field_data= ma_store_length(field_data, + (uint) (column - share->columndef)); + field_data= ma_store_length(field_data, old_column_length); + field_count++; + + log_parts->str= (char*) old_column_pos; + log_parts->length= old_column_length; + row_length+= log_parts->length; + log_parts++; + } + } + + *log_parts_count= (log_parts - start_log_parts); + + /* Store number of fields before the field/field_lengths */ + start_log_parts->str= ((char*) + (start_field_data - + ma_calc_length_for_store_length(field_count))); + ma_store_length(start_log_parts->str, field_count); + start_log_parts->length= (size_t) ((char*) field_data - + start_log_parts->str); + row_length+= start_log_parts->length; + DBUG_RETURN(row_length); +} diff --git a/storage/maria/ma_blockrec.h b/storage/maria/ma_blockrec.h new file mode 100644 index 00000000000..819d1c2e4d2 --- /dev/null +++ b/storage/maria/ma_blockrec.h @@ -0,0 +1,180 @@ +/* Copyright (C) 2007 Michael Widenius + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Storage of records in block +*/ + +#define LSN_SIZE 7 +#define DIR_COUNT_SIZE 1 /* Stores number of rows on page */ +#define EMPTY_SPACE_SIZE 2 /* Stores empty space on page */ +#define PAGE_TYPE_SIZE 1 +#define PAGE_SUFFIX_SIZE 0 /* Bytes for page suffix */ +#define PAGE_HEADER_SIZE (LSN_SIZE + DIR_COUNT_SIZE + EMPTY_SPACE_SIZE +\ + PAGE_TYPE_SIZE) +#define PAGE_OVERHEAD_SIZE (PAGE_HEADER_SIZE + DIR_ENTRY_SIZE + \ + PAGE_SUFFIX_SIZE) +#define BLOCK_RECORD_POINTER_SIZE 6 + +#define FULL_PAGE_SIZE(block_size) ((block_size) - LSN_SIZE - PAGE_TYPE_SIZE) + +#define ROW_EXTENT_PAGE_SIZE 5 +#define ROW_EXTENT_COUNT_SIZE 2 +#define ROW_EXTENT_SIZE (ROW_EXTENT_PAGE_SIZE + ROW_EXTENT_COUNT_SIZE) +#define TAIL_BIT 0x8000 /* Bit in page_count to signify tail */ +/* Number of extents reserved MARIA_BITMAP_BLOCKS to store head part */ +#define ELEMENTS_RESERVED_FOR_MAIN_PART 4 +/* Fields before 'row->null_field_lengths' used by find_where_to_split_row */ +#define EXTRA_LENGTH_FIELDS 3 + +/* Size for the different parts in the row header (and head page) */ + +#define FLAG_SIZE 1 +#define TRANSID_SIZE 6 +#define VERPTR_SIZE 7 +#define DIR_ENTRY_SIZE 4 +#define FIELD_OFFSET_SIZE 2 /* size of pointers to field starts */ + +/* Minimum header size needed for a new row */ +#define BASE_ROW_HEADER_SIZE FLAG_SIZE +#define TRANS_ROW_EXTRA_HEADER_SIZE TRANSID_SIZE + +#define PAGE_TYPE_MASK 127 +enum en_page_type { UNALLOCATED_PAGE, HEAD_PAGE, TAIL_PAGE, BLOB_PAGE, MAX_PAGE_TYPE }; + +#define PAGE_TYPE_OFFSET LSN_SIZE +#define DIR_COUNT_OFFSET LSN_SIZE+PAGE_TYPE_SIZE +#define EMPTY_SPACE_OFFSET (DIR_COUNT_OFFSET + DIR_COUNT_SIZE) + +#define PAGE_CAN_BE_COMPACTED 128 /* Bit in PAGE_TYPE */ + +/* Bits used for flag byte (one byte, first in record) */ +#define ROW_FLAG_TRANSID 1 +#define ROW_FLAG_VER_PTR 2 +#define ROW_FLAG_DELETE_TRANSID 4 +#define ROW_FLAG_NULLS_EXTENDED 8 +#define ROW_FLAG_EXTENTS 128 +#define ROW_FLAG_ALL (1+2+4+8+128) + +/******** Variables that affects how data pages are utilized ********/ + +/* Minium size of tail segment */ +#define MIN_TAIL_SIZE 32 + +/* + Fixed length part of Max possible header size; See row data structure + table in ma_blockrec.c. +*/ +#define MAX_FIXED_HEADER_SIZE (FLAG_SIZE + 3 + ROW_EXTENT_SIZE + 3) +#define TRANS_MAX_FIXED_HEADER_SIZE (MAX_FIXED_HEADER_SIZE + \ + TRANSID_SIZE + VERPTR_SIZE + \ + TRANSID_SIZE) + +/* We use 1 byte in record header to store number of directory entries */ +#define MAX_ROWS_PER_PAGE 255 + +/* Bits for MARIA_BITMAP_BLOCKS->used */ +/* We stored data on disk in the block */ +#define BLOCKUSED_USED 1 +/* Bitmap on disk is block->org_bitmap_value ; Happens only on update */ +#define BLOCKUSED_USE_ORG_BITMAP 2 +/* We stored tail data on disk for the block */ +#define BLOCKUSED_TAIL 4 + +/******* defines that affects allocation (density) of data *******/ + +/* + If the tail part (from the main block or a blob) would use more than 75 % of + the size of page, store the tail on a full page instead of a shared + tail page. +*/ +#define MAX_TAIL_SIZE(block_size) ((block_size) *3 / 4) + +/* Don't allocate memory for too many row extents on the stack */ +#define ROW_EXTENTS_ON_STACK 32 + +extern uchar maria_bitmap_marker[2]; + +/* Functions to convert MARIA_RECORD_POS to/from page:offset */ + +static inline MARIA_RECORD_POS ma_recordpos(ulonglong page, uint dir_entry) +{ + DBUG_ASSERT(dir_entry <= 255); + return (MARIA_RECORD_POS) ((page << 8) | dir_entry); +} + +static inline my_off_t ma_recordpos_to_page(MARIA_RECORD_POS record_pos) +{ + return record_pos >> 8; +} + +static inline my_off_t ma_recordpos_to_dir_entry(MARIA_RECORD_POS record_pos) +{ + return record_pos & 255; +} + +/* ma_blockrec.c */ +void _ma_init_block_record_data(void); +my_bool _ma_once_init_block_record(MARIA_SHARE *share, File dfile); +my_bool _ma_once_end_block_record(MARIA_SHARE *share); +my_bool _ma_init_block_record(MARIA_HA *info); +void _ma_end_block_record(MARIA_HA *info); + +my_bool _ma_update_block_record(MARIA_HA *info, MARIA_RECORD_POS pos, + const byte *oldrec, const byte *newrec); +my_bool _ma_delete_block_record(MARIA_HA *info, const byte *record); +int _ma_read_block_record(MARIA_HA *info, byte *record, + MARIA_RECORD_POS record_pos); +int _ma_read_block_record2(MARIA_HA *info, byte *record, + byte *data, byte *end_of_data); +int _ma_scan_block_record(MARIA_HA *info, byte *record, + MARIA_RECORD_POS, my_bool); +my_bool _ma_cmp_block_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, + const byte *record, MARIA_RECORD_POS pos); +my_bool _ma_scan_init_block_record(MARIA_HA *info); +void _ma_scan_end_block_record(MARIA_HA *info); + +MARIA_RECORD_POS _ma_write_init_block_record(MARIA_HA *info, + const byte *record); +my_bool _ma_write_block_record(MARIA_HA *info, const byte *record); +my_bool _ma_write_abort_block_record(MARIA_HA *info); +my_bool _ma_compare_block_record(register MARIA_HA *info, + register const byte *record); + +/* ma_bitmap.c */ +my_bool _ma_bitmap_init(MARIA_SHARE *share, File file); +my_bool _ma_bitmap_end(MARIA_SHARE *share); +my_bool _ma_flush_bitmap(MARIA_SHARE *share); +my_bool _ma_bitmap_find_place(MARIA_HA *info, MARIA_ROW *row, + MARIA_BITMAP_BLOCKS *result_blocks); +my_bool _ma_bitmap_release_unused(MARIA_HA *info, MARIA_BITMAP_BLOCKS *blocks); +my_bool _ma_bitmap_free_full_pages(MARIA_HA *info, const byte *extents, + uint count); +my_bool _ma_bitmap_set(MARIA_HA *info, ulonglong pos, my_bool head, + uint empty_space); +my_bool _ma_reset_full_page_bits(MARIA_HA *info, MARIA_FILE_BITMAP *bitmap, + ulonglong page, uint page_count); +uint _ma_free_size_to_head_pattern(MARIA_FILE_BITMAP *bitmap, uint size); +my_bool _ma_bitmap_find_new_place(MARIA_HA *info, MARIA_ROW *new_row, + ulonglong page, uint free_size, + MARIA_BITMAP_BLOCKS *result_blocks); +my_bool _ma_check_bitmap_data(MARIA_HA *info, + enum en_page_type page_type, ulonglong page, + uint empty_space, uint *bitmap_pattern); +my_bool _ma_check_if_right_bitmap_type(MARIA_HA *info, + enum en_page_type page_type, + ulonglong page, + uint *bitmap_pattern); +void _ma_bitmap_delete_all(MARIA_SHARE *share); diff --git a/storage/maria/ma_cache.c b/storage/maria/ma_cache.c new file mode 100644 index 00000000000..e8a4b20571b --- /dev/null +++ b/storage/maria/ma_cache.c @@ -0,0 +1,107 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Functions for read record cacheing with maria + Used for reading dynamic/compressed records from datafile. + + Can fetch data directly from file (outside cache), + if reading a small chunk straight before the cached part (with possible + overlap). + + Can be explicitly asked not to use cache (by not setting READING_NEXT in + flag) - useful for occasional out-of-cache reads, when the next read is + expected to hit the cache again. + + Allows "partial read" errors in the record header (when READING_HEADER flag + is set) - unread part is bzero'ed + + Note: out-of-cache reads are enabled for shared IO_CACHE's too, + as these reads will be cached by OS cache (and my_pread is always atomic) +*/ + + +#include "maria_def.h" + +int _ma_read_cache(IO_CACHE *info, byte *buff, my_off_t pos, uint length, + int flag) +{ + uint read_length,in_buff_length; + my_off_t offset; + char *in_buff_pos; + DBUG_ENTER("_ma_read_cache"); + + if (pos < info->pos_in_file) + { + read_length=length; + if ((my_off_t) read_length > (my_off_t) (info->pos_in_file-pos)) + read_length=(uint) (info->pos_in_file-pos); + info->seek_not_done=1; + if (my_pread(info->file,buff,read_length,pos,MYF(MY_NABP))) + DBUG_RETURN(1); + if (!(length-=read_length)) + DBUG_RETURN(0); + pos+=read_length; + buff+=read_length; + } + if (pos >= info->pos_in_file && + (offset= (my_off_t) (pos - info->pos_in_file)) < + (my_off_t) (info->read_end - info->request_pos)) + { + in_buff_pos=info->request_pos+(uint) offset; + in_buff_length= min(length,(uint) (info->read_end-in_buff_pos)); + memcpy(buff,info->request_pos+(uint) offset,(size_t) in_buff_length); + if (!(length-=in_buff_length)) + DBUG_RETURN(0); + pos+=in_buff_length; + buff+=in_buff_length; + } + else + in_buff_length=0; + if (flag & READING_NEXT) + { + if (pos != (info->pos_in_file + + (uint) (info->read_end - info->request_pos))) + { + info->pos_in_file=pos; /* Force start here */ + info->read_pos=info->read_end=info->request_pos; /* Everything used */ + info->seek_not_done=1; + } + else + info->read_pos=info->read_end; /* All block used */ + if (!(*info->read_function)(info,buff,length)) + DBUG_RETURN(0); + read_length=info->error; + } + else + { + info->seek_not_done=1; + if ((read_length=my_pread(info->file,buff,length,pos,MYF(0))) == length) + DBUG_RETURN(0); + } + if (!(flag & READING_HEADER) || (int) read_length == -1 || + read_length+in_buff_length < 3) + { + DBUG_PRINT("error", + ("Error %d reading next-multi-part block (Got %d bytes)", + my_errno, (int) read_length)); + if (!my_errno || my_errno == -1) + my_errno=HA_ERR_WRONG_IN_RECORD; + DBUG_RETURN(1); + } + bzero(buff+read_length,MARIA_BLOCK_INFO_HEADER_LENGTH - in_buff_length - + read_length); + DBUG_RETURN(0); +} /* _ma_read_cache */ diff --git a/storage/maria/ma_changed.c b/storage/maria/ma_changed.c new file mode 100644 index 00000000000..4d0964581f6 --- /dev/null +++ b/storage/maria/ma_changed.c @@ -0,0 +1,33 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Check if somebody has changed table since last check. */ + +#include "maria_def.h" + + /* Return 0 if table isn't changed */ + +int maria_is_changed(MARIA_HA *info) +{ + int result; + DBUG_ENTER("maria_is_changed"); + if (fast_ma_readinfo(info)) + DBUG_RETURN(-1); + VOID(_ma_writeinfo(info,0)); + result=(int) info->data_changed; + info->data_changed=0; + DBUG_PRINT("exit",("result: %d",result)); + DBUG_RETURN(result); +} diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c new file mode 100644 index 00000000000..0fc2b77304d --- /dev/null +++ b/storage/maria/ma_check.c @@ -0,0 +1,5211 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Describe, check and repair of MARIA tables */ + +/* + About checksum calculation. + + There are two types of checksums. Table checksum and row checksum. + + Row checksum is an additional byte at the end of dynamic length + records. It must be calculated if the table is configured for them. + Otherwise they must not be used. The variable + MYISAM_SHARE::calc_checksum determines if row checksums are used. + MI_INFO::checksum is used as temporary storage during row handling. + For parallel repair we must assure that only one thread can use this + variable. There is no problem on the write side as this is done by one + thread only. But when checking a record after read this could go + wrong. But since all threads read through a common read buffer, it is + sufficient if only one thread checks it. + + Table checksum is an eight byte value in the header of the index file. + It can be calculated even if row checksums are not used. The variable + MI_CHECK::glob_crc is calculated over all records. + MI_SORT_PARAM::calc_checksum determines if this should be done. This + variable is not part of MI_CHECK because it must be set per thread for + parallel repair. The global glob_crc must be changed by one thread + only. And it is sufficient to calculate the checksum once only. +*/ + +#include "ma_ftdefs.h" +#include +#include +#include +#include +#ifdef HAVE_SYS_VADVISE_H +#include +#endif +#ifdef HAVE_SYS_MMAN_H +#include +#endif +#include "ma_rt_index.h" +#include "ma_blockrec.h" +#include "trnman_public.h" + +/* Functions defined in this file */ + +static int check_k_link(HA_CHECK *param, MARIA_HA *info, my_off_t next_link); +static int chk_index(HA_CHECK *param, MARIA_HA *info,MARIA_KEYDEF *keyinfo, + my_off_t page, byte *buff, ha_rows *keys, + ha_checksum *key_checksum, uint level); +static uint isam_key_length(MARIA_HA *info,MARIA_KEYDEF *keyinfo); +static ha_checksum calc_checksum(ha_rows count); +static int writekeys(MARIA_SORT_PARAM *sort_param); +static int sort_one_index(HA_CHECK *param, MARIA_HA *info, + MARIA_KEYDEF *keyinfo, + my_off_t pagepos, File new_file); +static int sort_key_read(MARIA_SORT_PARAM *sort_param, byte *key); +static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, byte *key); +static int sort_get_next_record(MARIA_SORT_PARAM *sort_param); +static int sort_key_cmp(MARIA_SORT_PARAM *sort_param, const void *a, + const void *b); +static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param, + const byte *a); +static int sort_key_write(MARIA_SORT_PARAM *sort_param, const byte *a); +static my_off_t get_record_for_key(MARIA_HA *info,MARIA_KEYDEF *keyinfo, + const byte *key); +static int sort_insert_key(MARIA_SORT_PARAM *sort_param, + reg1 SORT_KEY_BLOCKS *key_block, + const byte *key, my_off_t prev_block); +static int sort_delete_record(MARIA_SORT_PARAM *sort_param); +/*static int _ma_flush_pending_blocks(HA_CHECK *param);*/ +static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks, + uint buffer_length); +static ha_checksum maria_byte_checksum(const byte *buf, uint length); +static void set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share); +static void restore_data_file_type(MARIA_SHARE *share); + +void maria_chk_init(HA_CHECK *param) +{ + bzero((gptr) param,sizeof(*param)); + param->opt_follow_links=1; + param->keys_in_use= ~(ulonglong) 0; + param->search_after_block=HA_OFFSET_ERROR; + param->auto_increment_value= 0; + param->use_buffers=USE_BUFFER_INIT; + param->read_buffer_length=READ_BUFFER_INIT; + param->write_buffer_length=READ_BUFFER_INIT; + param->sort_buffer_length=SORT_BUFFER_INIT; + param->sort_key_blocks=BUFFERS_WHEN_SORTING; + param->tmpfile_createflag=O_RDWR | O_TRUNC | O_EXCL; + param->myf_rw=MYF(MY_NABP | MY_WME | MY_WAIT_IF_FULL); + param->start_check_pos=0; + param->max_record_length= LONGLONG_MAX; + param->pagecache_block_size= KEY_CACHE_BLOCK_SIZE; + param->stats_method= MI_STATS_METHOD_NULLS_NOT_EQUAL; +} + + /* Check the status flags for the table */ + +int maria_chk_status(HA_CHECK *param, register MARIA_HA *info) +{ + MARIA_SHARE *share=info->s; + + if (maria_is_crashed_on_repair(info)) + _ma_check_print_warning(param, + "Table is marked as crashed and last repair failed"); + else if (maria_is_crashed(info)) + _ma_check_print_warning(param, + "Table is marked as crashed"); + if (share->state.open_count != (uint) (info->s->global_changed ? 1 : 0)) + { + /* Don't count this as a real warning, as check can correct this ! */ + uint save=param->warning_printed; + _ma_check_print_warning(param, + share->state.open_count==1 ? + "%d client is using or hasn't closed the table properly" : + "%d clients are using or haven't closed the table properly", + share->state.open_count); + /* If this will be fixed by the check, forget the warning */ + if (param->testflag & T_UPDATE_STATE) + param->warning_printed=save; + } + return 0; +} + +/* + Check delete links in row data +*/ + +int maria_chk_del(HA_CHECK *param, register MARIA_HA *info, uint test_flag) +{ + reg2 ha_rows i; + uint delete_link_length; + my_off_t empty,next_link,old_link; + char buff[22],buff2[22]; + DBUG_ENTER("maria_chk_del"); + + LINT_INIT(old_link); + + if (info->s->data_file_type == BLOCK_RECORD) + DBUG_RETURN(0); /* No delete links here */ + + param->record_checksum=0; + delete_link_length=((info->s->options & HA_OPTION_PACK_RECORD) ? 20 : + info->s->rec_reflength+1); + + if (!(test_flag & T_SILENT)) + puts("- check record delete-chain"); + + next_link=info->s->state.dellink; + if (info->state->del == 0) + { + if (test_flag & T_VERBOSE) + { + puts("No recordlinks"); + } + } + else + { + if (test_flag & T_VERBOSE) + printf("Recordlinks: "); + empty=0; + for (i= info->state->del ; i > 0L && next_link != HA_OFFSET_ERROR ; i--) + { + if (*_ma_killed_ptr(param)) + DBUG_RETURN(1); + if (test_flag & T_VERBOSE) + printf(" %9s",llstr(next_link,buff)); + if (next_link >= info->state->data_file_length) + goto wrong; + if (my_pread(info->dfile.file, (char*) buff, delete_link_length, + next_link,MYF(MY_NABP))) + { + if (test_flag & T_VERBOSE) puts(""); + _ma_check_print_error(param,"Can't read delete-link at filepos: %s", + llstr(next_link,buff)); + DBUG_RETURN(1); + } + if (*buff != '\0') + { + if (test_flag & T_VERBOSE) puts(""); + _ma_check_print_error(param,"Record at pos: %s is not remove-marked", + llstr(next_link,buff)); + goto wrong; + } + if (info->s->options & HA_OPTION_PACK_RECORD) + { + my_off_t prev_link=mi_sizekorr(buff+12); + if (empty && prev_link != old_link) + { + if (test_flag & T_VERBOSE) puts(""); + _ma_check_print_error(param,"Deleted block at %s doesn't point back at previous delete link",llstr(next_link,buff2)); + goto wrong; + } + old_link=next_link; + next_link=mi_sizekorr(buff+4); + empty+=mi_uint3korr(buff+1); + } + else + { + param->record_checksum+=(ha_checksum) next_link; + next_link= _ma_rec_pos(info->s, buff+1); + empty+=info->s->base.pack_reclength; + } + } + if (test_flag & T_VERBOSE) + puts("\n"); + if (empty != info->state->empty) + { + _ma_check_print_warning(param, + "Found %s deleted space in delete link chain. Should be %s", + llstr(empty,buff2), + llstr(info->state->empty,buff)); + } + if (next_link != HA_OFFSET_ERROR) + { + _ma_check_print_error(param, + "Found more than the expected %s deleted rows in delete link chain", + llstr(info->state->del, buff)); + goto wrong; + } + if (i != 0) + { + _ma_check_print_error(param, + "Found %s deleted rows in delete link chain. Should be %s", + llstr(info->state->del - i, buff2), + llstr(info->state->del, buff)); + goto wrong; + } + } + DBUG_RETURN(0); + +wrong: + param->testflag|=T_RETRY_WITHOUT_QUICK; + if (test_flag & T_VERBOSE) puts(""); + _ma_check_print_error(param,"record delete-link-chain corrupted"); + DBUG_RETURN(1); +} /* maria_chk_del */ + + + /* Check delete links in index file */ + +static int check_k_link(HA_CHECK *param, register MARIA_HA *info, + my_off_t next_link) +{ + uint block_size= info->s->block_size; + ha_rows records; + char llbuff[21], llbuff2[21], *buff; + DBUG_ENTER("check_k_link"); + + records= (ha_rows) (info->state->key_file_length / block_size); + while (next_link != HA_OFFSET_ERROR && records > 0) + { + if (*_ma_killed_ptr(param)) + DBUG_RETURN(1); + if (param->testflag & T_VERBOSE) + printf("%16s",llstr(next_link,llbuff)); + + /* Key blocks must lay within the key file length entirely. */ + if (next_link + block_size > info->state->key_file_length) + { + /* purecov: begin tested */ + _ma_check_print_error(param, "Invalid key block position: %s " + "key block size: %u file_length: %s", + llstr(next_link, llbuff), block_size, + llstr(info->state->key_file_length, llbuff2)); + DBUG_RETURN(1); + /* purecov: end */ + } + + /* Key blocks must be aligned at block_size */ + if (next_link & (block_size -1)) + { + /* purecov: begin tested */ + _ma_check_print_error(param, "Mis-aligned key block: %s " + "minimum key block length: %u", + llstr(next_link, llbuff), + block_size); + DBUG_RETURN(1); + /* purecov: end */ + } + + DBUG_ASSERT(info->s->pagecache->block_size == block_size); + if (!(buff= pagecache_read(info->s->pagecache, + &info->s->kfile, next_link/block_size, + DFLT_INIT_HITS, + (byte*) info->buff, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, 0))) + { + /* purecov: begin tested */ + _ma_check_print_error(param, "key cache read error for block: %s", + llstr(next_link,llbuff)); + DBUG_RETURN(1); + /* purecov: end */ + } + next_link=mi_sizekorr(buff); + records--; + param->key_file_blocks+=block_size; + } + if (param->testflag & T_VERBOSE) + { + if (next_link != HA_OFFSET_ERROR) + printf("%16s\n",llstr(next_link,llbuff)); + else + puts(""); + } + DBUG_RETURN (next_link != HA_OFFSET_ERROR); +} /* check_k_link */ + + + /* Check sizes of files */ + +int maria_chk_size(HA_CHECK *param, register MARIA_HA *info) +{ + int error=0; + register my_off_t skr,size; + char buff[22],buff2[22]; + DBUG_ENTER("maria_chk_size"); + + if (!(param->testflag & T_SILENT)) + puts("- check file-size"); + + /* The following is needed if called externally (not from maria_chk) */ + flush_pagecache_blocks(info->s->pagecache, + &info->s->kfile, FLUSH_FORCE_WRITE); + + size= my_seek(info->s->kfile.file, 0L, MY_SEEK_END, MYF(0)); + if ((skr=(my_off_t) info->state->key_file_length) != size) + { + /* Don't give error if file generated by mariapack */ + if (skr > size && maria_is_any_key_active(info->s->state.key_map)) + { + error=1; + _ma_check_print_error(param, + "Size of indexfile is: %-8s Should be: %s", + llstr(size,buff), llstr(skr,buff2)); + } + else if (!(param->testflag & T_VERY_SILENT)) + _ma_check_print_warning(param, + "Size of indexfile is: %-8s Should be: %s", + llstr(size,buff), llstr(skr,buff2)); + } + if (!(param->testflag & T_VERY_SILENT) && + ! (info->s->options & HA_OPTION_COMPRESS_RECORD) && + ulonglong2double(info->state->key_file_length) > + ulonglong2double(info->s->base.margin_key_file_length)*0.9) + _ma_check_print_warning(param,"Keyfile is almost full, %10s of %10s used", + llstr(info->state->key_file_length,buff), + llstr(info->s->base.max_key_file_length-1,buff)); + + size= my_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0)); + skr=(my_off_t) info->state->data_file_length; + if (info->s->options & HA_OPTION_COMPRESS_RECORD) + skr+= MEMMAP_EXTRA_MARGIN; +#ifdef USE_RELOC + if (info->data_file_type == STATIC_RECORD && + skr < (my_off_t) info->s->base.reloc*info->s->base.min_pack_length) + skr=(my_off_t) info->s->base.reloc*info->s->base.min_pack_length; +#endif + if (skr != size) + { + info->state->data_file_length=size; /* Skip other errors */ + if (skr > size && skr != size + MEMMAP_EXTRA_MARGIN) + { + error=1; + _ma_check_print_error(param,"Size of datafile is: %-9s Should be: %s", + llstr(size,buff), llstr(skr,buff2)); + param->testflag|=T_RETRY_WITHOUT_QUICK; + } + else + { + _ma_check_print_warning(param, + "Size of datafile is: %-9s Should be: %s", + llstr(size,buff), llstr(skr,buff2)); + } + } + if (!(param->testflag & T_VERY_SILENT) && + !(info->s->options & HA_OPTION_COMPRESS_RECORD) && + ulonglong2double(info->state->data_file_length) > + (ulonglong2double(info->s->base.max_data_file_length)*0.9)) + _ma_check_print_warning(param, "Datafile is almost full, %10s of %10s used", + llstr(info->state->data_file_length,buff), + llstr(info->s->base.max_data_file_length-1,buff2)); + DBUG_RETURN(error); +} /* maria_chk_size */ + + +/* Check keys */ + +int maria_chk_key(HA_CHECK *param, register MARIA_HA *info) +{ + uint key,found_keys=0,full_text_keys=0,result=0; + ha_rows keys; + ha_checksum old_record_checksum,init_checksum; + my_off_t all_keydata,all_totaldata,key_totlength,length; + ulong *rec_per_key_part; + MARIA_SHARE *share=info->s; + MARIA_KEYDEF *keyinfo; + char buff[22],buff2[22]; + DBUG_ENTER("maria_chk_key"); + + if (!(param->testflag & T_SILENT)) + puts("- check key delete-chain"); + + param->key_file_blocks=info->s->base.keystart; + if (check_k_link(param, info, info->s->state.key_del)) + { + if (param->testflag & T_VERBOSE) puts(""); + _ma_check_print_error(param,"key delete-link-chain corrupted"); + DBUG_RETURN(-1); + } + + if (!(param->testflag & T_SILENT)) puts("- check index reference"); + + all_keydata=all_totaldata=key_totlength=0; + old_record_checksum=0; + init_checksum=param->record_checksum; + if (share->data_file_type == STATIC_RECORD) + old_record_checksum= (calc_checksum(info->state->records + + info->state->del-1) * + share->base.pack_reclength); + rec_per_key_part= param->rec_per_key_part; + for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ; + rec_per_key_part+=keyinfo->keysegs, key++, keyinfo++) + { + param->key_crc[key]=0; + if (! maria_is_key_active(share->state.key_map, key)) + { + /* Remember old statistics for key */ + memcpy((char*) rec_per_key_part, + (char*) (share->state.rec_per_key_part + + (uint) (rec_per_key_part - param->rec_per_key_part)), + keyinfo->keysegs*sizeof(*rec_per_key_part)); + continue; + } + found_keys++; + + param->record_checksum=init_checksum; + + bzero((char*) ¶m->unique_count,sizeof(param->unique_count)); + bzero((char*) ¶m->notnull_count,sizeof(param->notnull_count)); + + if ((!(param->testflag & T_SILENT))) + printf ("- check data record references index: %d\n",key+1); + if (keyinfo->flag & HA_FULLTEXT) + full_text_keys++; + if (share->state.key_root[key] == HA_OFFSET_ERROR && + (info->state->records == 0 || keyinfo->flag & HA_FULLTEXT)) + goto do_stat; + if (!_ma_fetch_keypage(info,keyinfo,share->state.key_root[key], + DFLT_INIT_HITS,info->buff,0)) + { + _ma_check_print_error(param,"Can't read indexpage from filepos: %s", + llstr(share->state.key_root[key],buff)); + if (!(param->testflag & T_INFO)) + DBUG_RETURN(-1); + result= -1; + continue; + } + param->key_file_blocks+=keyinfo->block_length; + keys=0; + param->keydata=param->totaldata=0; + param->key_blocks=0; + param->max_level=0; + if (chk_index(param,info,keyinfo,share->state.key_root[key],info->buff, + &keys, param->key_crc+key,1)) + DBUG_RETURN(-1); + if(!(keyinfo->flag & (HA_FULLTEXT | HA_SPATIAL))) + { + if (keys != info->state->records) + { + _ma_check_print_error(param,"Found %s keys of %s",llstr(keys,buff), + llstr(info->state->records,buff2)); + if (!(param->testflag & T_INFO)) + DBUG_RETURN(-1); + result= -1; + continue; + } + if ((found_keys - full_text_keys == 1 && + !(share->data_file_type == STATIC_RECORD)) || + (param->testflag & T_DONT_CHECK_CHECKSUM)) + old_record_checksum= param->record_checksum; + else if (old_record_checksum != param->record_checksum) + { + if (key) + _ma_check_print_error(param,"Key %u doesn't point at same records that key 1", + key+1); + else + _ma_check_print_error(param,"Key 1 doesn't point at all records"); + if (!(param->testflag & T_INFO)) + DBUG_RETURN(-1); + result= -1; + continue; + } + } + if ((uint) share->base.auto_key -1 == key) + { + /* Check that auto_increment key is bigger than max key value */ + ulonglong auto_increment; + info->lastinx=key; + _ma_read_key_record(info, info->rec_buff, 0); + auto_increment= ma_retrieve_auto_increment(info, info->rec_buff); + if (auto_increment > info->s->state.auto_increment) + { + _ma_check_print_warning(param, "Auto-increment value: %s is smaller " + "than max used value: %s", + llstr(info->s->state.auto_increment,buff2), + llstr(auto_increment, buff)); + } + if (param->testflag & T_AUTO_INC) + { + set_if_bigger(info->s->state.auto_increment, + auto_increment); + set_if_bigger(info->s->state.auto_increment, + param->auto_increment_value); + } + + /* Check that there isn't a row with auto_increment = 0 in the table */ + maria_extra(info,HA_EXTRA_KEYREAD,0); + bzero(info->lastkey,keyinfo->seg->length); + if (!maria_rkey(info, info->rec_buff, key, (const byte*) info->lastkey, + keyinfo->seg->length, HA_READ_KEY_EXACT)) + { + /* Don't count this as a real warning, as maria_chk can't correct it */ + uint save=param->warning_printed; + _ma_check_print_warning(param, "Found row where the auto_increment " + "column has the value 0"); + param->warning_printed=save; + } + maria_extra(info,HA_EXTRA_NO_KEYREAD,0); + } + + length=(my_off_t) isam_key_length(info,keyinfo)*keys + param->key_blocks*2; + if (param->testflag & T_INFO && param->totaldata != 0L && keys != 0L) + printf("Key: %2d: Keyblocks used: %3d%% Packed: %4d%% Max levels: %2d\n", + key+1, + (int) (my_off_t2double(param->keydata)*100.0/my_off_t2double(param->totaldata)), + (int) ((my_off_t2double(length) - my_off_t2double(param->keydata))*100.0/ + my_off_t2double(length)), + param->max_level); + all_keydata+=param->keydata; all_totaldata+=param->totaldata; key_totlength+=length; + +do_stat: + if (param->testflag & T_STATISTICS) + maria_update_key_parts(keyinfo, rec_per_key_part, param->unique_count, + param->stats_method == MI_STATS_METHOD_IGNORE_NULLS? + param->notnull_count: NULL, + (ulonglong)info->state->records); + } + if (param->testflag & T_INFO) + { + if (all_totaldata != 0L && found_keys > 0) + printf("Total: Keyblocks used: %3d%% Packed: %4d%%\n\n", + (int) (my_off_t2double(all_keydata)*100.0/ + my_off_t2double(all_totaldata)), + (int) ((my_off_t2double(key_totlength) - + my_off_t2double(all_keydata))*100.0/ + my_off_t2double(key_totlength))); + else if (all_totaldata != 0L && maria_is_any_key_active(share->state.key_map)) + puts(""); + } + if (param->key_file_blocks != info->state->key_file_length && + param->keys_in_use != ~(ulonglong) 0) + _ma_check_print_warning(param, "Some data are unreferenced in keyfile"); + if (found_keys != full_text_keys) + param->record_checksum=old_record_checksum-init_checksum; /* Remove delete links */ + else + param->record_checksum=0; + DBUG_RETURN(result); +} /* maria_chk_key */ + + +static int chk_index_down(HA_CHECK *param, MARIA_HA *info, + MARIA_KEYDEF *keyinfo, + my_off_t page, byte *buff, ha_rows *keys, + ha_checksum *key_checksum, uint level) +{ + char llbuff[22],llbuff2[22]; + DBUG_ENTER("chk_index_down"); + + /* Key blocks must lay within the key file length entirely. */ + if (page + keyinfo->block_length > info->state->key_file_length) + { + /* purecov: begin tested */ + /* Give it a chance to fit in the real file size. */ + my_off_t max_length= my_seek(info->s->kfile.file, 0L, MY_SEEK_END, MYF(0)); + _ma_check_print_error(param, "Invalid key block position: %s " + "key block size: %u file_length: %s", + llstr(page, llbuff), keyinfo->block_length, + llstr(info->state->key_file_length, llbuff2)); + if (page + keyinfo->block_length > max_length) + goto err; + /* Fix the remembered key file length. */ + info->state->key_file_length= (max_length & + ~ (my_off_t) (keyinfo->block_length - 1)); + /* purecov: end */ + } + + /* Key blocks must be aligned at block length */ + if (page & (info->s->block_size -1)) + { + /* purecov: begin tested */ + _ma_check_print_error(param, "Mis-aligned key block: %s " + "minimum key block length: %u", + llstr(page, llbuff), info->s->block_size); + goto err; + /* purecov: end */ + } + + if (!_ma_fetch_keypage(info,keyinfo,page, DFLT_INIT_HITS,buff,0)) + { + _ma_check_print_error(param,"Can't read key from filepos: %s", + llstr(page,llbuff)); + goto err; + } + param->key_file_blocks+=keyinfo->block_length; + if (chk_index(param,info,keyinfo,page,buff,keys,key_checksum,level)) + goto err; + + DBUG_RETURN(0); + + /* purecov: begin tested */ +err: + DBUG_RETURN(1); + /* purecov: end */ +} + + +/* + "Ignore NULLs" statistics collection method: process first index tuple. + + SYNOPSIS + maria_collect_stats_nonulls_first() + keyseg IN Array of key part descriptions + notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i} + tuples that don't contain NULLs) + key IN Key values tuple + + DESCRIPTION + Process the first index tuple - find out which prefix tuples don't + contain NULLs, and update the array of notnull counters accordingly. +*/ + +static +void maria_collect_stats_nonulls_first(HA_KEYSEG *keyseg, ulonglong *notnull, + const byte *key) +{ + uint first_null, kp; + first_null= ha_find_null(keyseg, (uchar*) key) - keyseg; + /* + All prefix tuples that don't include keypart_{first_null} are not-null + tuples (and all others aren't), increment counters for them. + */ + for (kp= 0; kp < first_null; kp++) + notnull[kp]++; +} + + +/* + "Ignore NULLs" statistics collection method: process next index tuple. + + SYNOPSIS + maria_collect_stats_nonulls_next() + keyseg IN Array of key part descriptions + notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i} + tuples that don't contain NULLs) + prev_key IN Previous key values tuple + last_key IN Next key values tuple + + DESCRIPTION + Process the next index tuple: + 1. Find out which prefix tuples of last_key don't contain NULLs, and + update the array of notnull counters accordingly. + 2. Find the first keypart number where the prev_key and last_key tuples + are different(A), or last_key has NULL value(B), and return it, so the + caller can count number of unique tuples for each key prefix. We don't + need (B) to be counted, and that is compensated back in + maria_update_key_parts(). + + RETURN + 1 + number of first keypart where values differ or last_key tuple has NULL +*/ + +static +int maria_collect_stats_nonulls_next(HA_KEYSEG *keyseg, ulonglong *notnull, + const byte *prev_key, + const byte *last_key) +{ + uint diffs[2]; + uint first_null_seg, kp; + HA_KEYSEG *seg; + + /* + Find the first keypart where values are different or either of them is + NULL. We get results in diffs array: + diffs[0]= 1 + number of first different keypart + diffs[1]=offset: (last_key + diffs[1]) points to first value in + last_key that is NULL or different from corresponding + value in prev_key. + */ + ha_key_cmp(keyseg, (uchar*) prev_key, (uchar*) last_key, USE_WHOLE_KEY, + SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diffs); + seg= keyseg + diffs[0] - 1; + + /* Find first NULL in last_key */ + first_null_seg= ha_find_null(seg, (uchar*) last_key + diffs[1]) - keyseg; + for (kp= 0; kp < first_null_seg; kp++) + notnull[kp]++; + + /* + Return 1+ number of first key part where values differ. Don't care if + these were NULLs and not .... We compensate for that in + maria_update_key_parts. + */ + return diffs[0]; +} + + + /* Check if index is ok */ + +static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo, + my_off_t page, byte *buff, ha_rows *keys, + ha_checksum *key_checksum, uint level) +{ + int flag; + uint used_length,comp_flag,nod_flag,key_length=0; + byte key[HA_MAX_POSSIBLE_KEY_BUFF],*temp_buff,*keypos,*old_keypos,*endpos; + my_off_t next_page,record; + char llbuff[22]; + uint diff_pos[2]; + DBUG_ENTER("chk_index"); + DBUG_DUMP("buff",(byte*) buff,maria_data_on_page(buff)); + + /* TODO: implement appropriate check for RTree keys */ + if (keyinfo->flag & HA_SPATIAL) + DBUG_RETURN(0); + + if (!(temp_buff=(byte*) my_alloca((uint) keyinfo->block_length))) + { + _ma_check_print_error(param,"Not enough memory for keyblock"); + DBUG_RETURN(-1); + } + + if (keyinfo->flag & HA_NOSAME) + comp_flag=SEARCH_FIND | SEARCH_UPDATE; /* Not real duplicates */ + else + comp_flag=SEARCH_SAME; /* Keys in positionorder */ + nod_flag=_ma_test_if_nod(buff); + used_length= maria_data_on_page(buff); + keypos=buff+2+nod_flag; + endpos=buff+used_length; + + param->keydata+=used_length; param->totaldata+=keyinfo->block_length; /* INFO */ + param->key_blocks++; + if (level > param->max_level) + param->max_level=level; + + if (used_length > keyinfo->block_length) + { + _ma_check_print_error(param,"Wrong pageinfo at page: %s", + llstr(page,llbuff)); + goto err; + } + for ( ;; ) + { + if (*_ma_killed_ptr(param)) + goto err; + memcpy(info->lastkey, key, key_length); + info->lastkey_length= key_length; + if (nod_flag) + { + next_page= _ma_kpos(nod_flag,keypos); + if (chk_index_down(param,info,keyinfo,next_page, + temp_buff,keys,key_checksum,level+1)) + goto err; + } + old_keypos=keypos; + if (keypos >= endpos || + (key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,key)) == 0) + break; + if (keypos > endpos) + { + _ma_check_print_error(param,"Wrong key block length at page: %s", + llstr(page,llbuff)); + goto err; + } + if ((*keys)++ && + (flag=ha_key_cmp(keyinfo->seg, (uchar*) info->lastkey, (uchar*) key, + key_length, comp_flag, diff_pos)) >=0) + { + DBUG_DUMP("old", info->lastkey, info->lastkey_length); + DBUG_DUMP("new", key, key_length); + DBUG_DUMP("new_in_page", old_keypos, (uint) (keypos-old_keypos)); + + if (comp_flag & SEARCH_FIND && flag == 0) + _ma_check_print_error(param,"Found duplicated key at page %s", + llstr(page,llbuff)); + else + _ma_check_print_error(param,"Key in wrong position at page %s", + llstr(page,llbuff)); + goto err; + } + if (param->testflag & T_STATISTICS) + { + if (*keys != 1L) /* not first_key */ + { + if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL) + ha_key_cmp(keyinfo->seg, (uchar*) info->lastkey, (uchar*) key, + USE_WHOLE_KEY, SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, + diff_pos); + else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) + { + diff_pos[0]= maria_collect_stats_nonulls_next(keyinfo->seg, + param->notnull_count, + info->lastkey, key); + } + param->unique_count[diff_pos[0]-1]++; + } + else + { + if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) + maria_collect_stats_nonulls_first(keyinfo->seg, param->notnull_count, + key); + } + } + (*key_checksum)+= maria_byte_checksum((byte*) key, + key_length- info->s->rec_reflength); + record= _ma_dpos(info,0,key+key_length); + if (keyinfo->flag & HA_FULLTEXT) /* special handling for ft2 */ + { + uint off; + int subkeys; + get_key_full_length_rdonly(off, key); + subkeys=ft_sintXkorr(key+off); + if (subkeys < 0) + { + ha_rows tmp_keys=0; + if (chk_index_down(param,info,&info->s->ft2_keyinfo,record, + temp_buff,&tmp_keys,key_checksum,1)) + goto err; + if (tmp_keys + subkeys) + { + _ma_check_print_error(param, + "Number of words in the 2nd level tree " + "does not match the number in the header. " + "Parent word in on the page %s, offset %u", + llstr(page,llbuff), (uint) (old_keypos-buff)); + goto err; + } + (*keys)+=tmp_keys-1; + continue; + } + /* fall through */ + } + if (record >= info->state->data_file_length) + { +#ifndef DBUG_OFF + char llbuff2[22], llbuff3[22]; +#endif + _ma_check_print_error(param,"Found key at page %s that points to record outside datafile",llstr(page,llbuff)); + DBUG_PRINT("test",("page: %s record: %s filelength: %s", + llstr(page,llbuff),llstr(record,llbuff2), + llstr(info->state->data_file_length,llbuff3))); + DBUG_DUMP("key",(byte*) key,key_length); + DBUG_DUMP("new_in_page",(char*) old_keypos,(uint) (keypos-old_keypos)); + goto err; + } + param->record_checksum+= (ha_checksum) record; + } + if (keypos != endpos) + { + _ma_check_print_error(param,"Keyblock size at page %s is not correct. Block length: %d key length: %d", + llstr(page,llbuff), used_length, (keypos - buff)); + goto err; + } + my_afree((byte*) temp_buff); + DBUG_RETURN(0); + err: + my_afree((byte*) temp_buff); + DBUG_RETURN(1); +} /* chk_index */ + + + /* Calculate a checksum of 1+2+3+4...N = N*(N+1)/2 without overflow */ + +static ha_checksum calc_checksum(ha_rows count) +{ + ulonglong sum,a,b; + DBUG_ENTER("calc_checksum"); + + sum=0; + a=count; b=count+1; + if (a & 1) + b>>=1; + else + a>>=1; + while (b) + { + if (b & 1) + sum+=a; + a<<=1; b>>=1; + } + DBUG_PRINT("exit",("sum: %lx",(ulong) sum)); + DBUG_RETURN((ha_checksum) sum); +} /* calc_checksum */ + + + /* Calc length of key in normal isam */ + +static uint isam_key_length(MARIA_HA *info, register MARIA_KEYDEF *keyinfo) +{ + uint length; + HA_KEYSEG *keyseg; + DBUG_ENTER("isam_key_length"); + + length= info->s->rec_reflength; + for (keyseg=keyinfo->seg ; keyseg->type ; keyseg++) + length+= keyseg->length; + + DBUG_PRINT("exit",("length: %d",length)); + DBUG_RETURN(length); +} /* key_length */ + + + +static void record_pos_to_txt(MARIA_HA *info, my_off_t recpos, + char *buff) +{ + if (info->s->data_file_type != BLOCK_RECORD) + llstr(recpos, buff); + else + { + my_off_t page= ma_recordpos_to_page(recpos); + uint row= ma_recordpos_to_dir_entry(recpos); + char *end= longlong10_to_str(page, buff, 10); + *(end++)= ':'; + longlong10_to_str(row, end, 10); + } +} + + +/* + Check that keys in records exist in index tree + + SYNOPSIS + check_keys_in_record() + param Check paramenter + info Maria handler + extend Type of check (extended or normal) + start_recpos Position to row + record Record buffer + + NOTES + This function also calculates record checksum & number of rows +*/ + +static int check_keys_in_record(HA_CHECK *param, MARIA_HA *info, int extend, + my_off_t start_recpos, byte *record) +{ + MARIA_KEYDEF *keyinfo; + char llbuff[22+4]; + uint key; + + param->tmp_record_checksum+= (ha_checksum) start_recpos; + param->records++; + if (param->testflag & T_WRITE_LOOP && param->records % WRITE_COUNT == 0) + { + printf("%s\r", llstr(param->records, llbuff)); + VOID(fflush(stdout)); + } + + /* Check if keys match the record */ + for (key=0, keyinfo= info->s->keyinfo; key < info->s->base.keys; + key++,keyinfo++) + { + if (maria_is_key_active(info->s->state.key_map, key)) + { + if(!(keyinfo->flag & HA_FULLTEXT)) + { + uint key_length= _ma_make_key(info,key,info->lastkey,record, + start_recpos); + if (extend) + { + /* We don't need to lock the key tree here as we don't allow + concurrent threads when running maria_chk + */ + int search_result= +#ifdef HAVE_RTREE_KEYS + (keyinfo->flag & HA_SPATIAL) ? + maria_rtree_find_first(info, key, info->lastkey, key_length, + MBR_EQUAL | MBR_DATA) : +#endif + _ma_search(info,keyinfo,info->lastkey,key_length, + SEARCH_SAME, info->s->state.key_root[key]); + if (search_result) + { + record_pos_to_txt(info, start_recpos, llbuff); + _ma_check_print_error(param, + "Record at: %14s " + "Can't find key for index: %2d", + llbuff, key+1); + if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)) + return -1; + } + } + else + param->tmp_key_crc[key]+= + maria_byte_checksum((byte*) info->lastkey, key_length); + } + } + } + return 0; +} + + +/* + Functions to loop through all rows and check if they are ok + + NOTES + One function for each record format + + RESULT + 0 ok + -1 Interrupted by user + 1 Error +*/ + +static int check_static_record(HA_CHECK *param, MARIA_HA *info, int extend, + byte *record) +{ + my_off_t start_recpos, pos; + char llbuff[22]; + + pos= 0; + while (pos < info->state->data_file_length) + { + if (*_ma_killed_ptr(param)) + return -1; + if (my_b_read(¶m->read_cache,(byte*) record, + info->s->base.pack_reclength)) + { + _ma_check_print_error(param, + "got error: %d when reading datafile at position: %s", + my_errno, llstr(pos, llbuff)); + return 1; + } + start_recpos= pos; + pos+= info->s->base.pack_reclength; + param->splits++; + if (*record == '\0') + { + param->del_blocks++; + param->del_length+= info->s->base.pack_reclength; + continue; /* Record removed */ + } + param->glob_crc+= _ma_static_checksum(info,record); + param->used+= info->s->base.pack_reclength; + if (check_keys_in_record(param, info, extend, start_recpos, record)) + return 1; + } + return 0; +} + + +static int check_dynamic_record(HA_CHECK *param, MARIA_HA *info, int extend, + byte *record) +{ + MARIA_BLOCK_INFO block_info; + my_off_t start_recpos, start_block, pos; + byte *to; + ulong left_length; + uint b_type; + char llbuff[22],llbuff2[22],llbuff3[22]; + DBUG_ENTER("check_dynamic_record"); + + LINT_INIT(left_length); + LINT_INIT(start_recpos); + LINT_INIT(to); + + pos= 0; + while (pos < info->state->data_file_length) + { + my_bool got_error= 0; + int flag; + if (*_ma_killed_ptr(param)) + DBUG_RETURN(-1); + + flag= block_info.second_read=0; + block_info.next_filepos=pos; + do + { + if (_ma_read_cache(¶m->read_cache,(byte*) block_info.header, + (start_block=block_info.next_filepos), + sizeof(block_info.header), + (flag ? 0 : READING_NEXT) | READING_HEADER)) + { + _ma_check_print_error(param, + "got error: %d when reading datafile at " + "position: %s", + my_errno, llstr(start_block, llbuff)); + DBUG_RETURN(1); + } + + if (start_block & (MARIA_DYN_ALIGN_SIZE-1)) + { + _ma_check_print_error(param,"Wrong aligned block at %s", + llstr(start_block,llbuff)); + DBUG_RETURN(1); + } + b_type= _ma_get_block_info(&block_info,-1,start_block); + if (b_type & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | + BLOCK_FATAL_ERROR)) + { + if (b_type & BLOCK_SYNC_ERROR) + { + if (flag) + { + _ma_check_print_error(param,"Unexpected byte: %d at link: %s", + (int) block_info.header[0], + llstr(start_block,llbuff)); + DBUG_RETURN(1); + } + pos=block_info.filepos+block_info.block_len; + goto next; + } + if (b_type & BLOCK_DELETED) + { + if (block_info.block_len < info->s->base.min_block_length) + { + _ma_check_print_error(param, + "Deleted block with impossible length %lu at %s", + block_info.block_len,llstr(pos,llbuff)); + DBUG_RETURN(1); + } + if ((block_info.next_filepos != HA_OFFSET_ERROR && + block_info.next_filepos >= info->state->data_file_length) || + (block_info.prev_filepos != HA_OFFSET_ERROR && + block_info.prev_filepos >= info->state->data_file_length)) + { + _ma_check_print_error(param,"Delete link points outside datafile at %s", + llstr(pos,llbuff)); + DBUG_RETURN(1); + } + param->del_blocks++; + param->del_length+= block_info.block_len; + param->splits++; + pos= block_info.filepos+block_info.block_len; + goto next; + } + _ma_check_print_error(param,"Wrong bytesec: %d-%d-%d at linkstart: %s", + block_info.header[0],block_info.header[1], + block_info.header[2], + llstr(start_block,llbuff)); + DBUG_RETURN(1); + } + if (info->state->data_file_length < block_info.filepos+ + block_info.block_len) + { + _ma_check_print_error(param, + "Recordlink that points outside datafile at %s", + llstr(pos,llbuff)); + got_error=1; + break; + } + param->splits++; + if (!flag++) /* First block */ + { + start_recpos=pos; + pos=block_info.filepos+block_info.block_len; + if (block_info.rec_len > (uint) info->s->base.max_pack_length) + { + _ma_check_print_error(param,"Found too long record (%lu) at %s", + (ulong) block_info.rec_len, + llstr(start_recpos,llbuff)); + got_error=1; + break; + } + if (info->s->base.blobs) + { + if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size, + block_info.rec_len + + info->s->base.extra_rec_buff_size)) + + { + _ma_check_print_error(param, + "Not enough memory (%lu) for blob at %s", + (ulong) block_info.rec_len, + llstr(start_recpos,llbuff)); + got_error=1; + break; + } + } + to= info->rec_buff; + left_length= block_info.rec_len; + } + if (left_length < block_info.data_len) + { + _ma_check_print_error(param,"Found too long record (%lu) at %s", + (ulong) block_info.data_len, + llstr(start_recpos,llbuff)); + got_error=1; + break; + } + if (_ma_read_cache(¶m->read_cache,(byte*) to,block_info.filepos, + (uint) block_info.data_len, + flag == 1 ? READING_NEXT : 0)) + { + _ma_check_print_error(param, + "got error: %d when reading datafile at position: %s", my_errno, llstr(block_info.filepos, llbuff)); + + DBUG_RETURN(1); + } + to+=block_info.data_len; + param->link_used+= block_info.filepos-start_block; + param->used+= block_info.filepos - start_block + block_info.data_len; + param->empty+= block_info.block_len-block_info.data_len; + left_length-= block_info.data_len; + if (left_length) + { + if (b_type & BLOCK_LAST) + { + _ma_check_print_error(param, + "Wrong record length %s of %s at %s", + llstr(block_info.rec_len-left_length,llbuff), + llstr(block_info.rec_len, llbuff2), + llstr(start_recpos,llbuff3)); + got_error=1; + break; + } + if (info->state->data_file_length < block_info.next_filepos) + { + _ma_check_print_error(param, + "Found next-recordlink that points outside datafile at %s", + llstr(block_info.filepos,llbuff)); + got_error=1; + break; + } + } + } while (left_length); + + if (! got_error) + { + if (_ma_rec_unpack(info,record,info->rec_buff,block_info.rec_len) == + MY_FILE_ERROR) + { + _ma_check_print_error(param,"Found wrong record at %s", + llstr(start_recpos,llbuff)); + got_error=1; + } + else + { + info->cur_row.checksum= _ma_checksum(info,record); + if (param->testflag & (T_EXTEND | T_MEDIUM | T_VERBOSE)) + { + if (_ma_rec_check(info,record, info->rec_buff,block_info.rec_len, + test(info->s->calc_checksum))) + { + _ma_check_print_error(param,"Found wrong packed record at %s", + llstr(start_recpos,llbuff)); + got_error= 1; + } + } + param->glob_crc+= info->cur_row.checksum; + } + + if (! got_error) + { + if (check_keys_in_record(param, info, extend, start_recpos, record)) + DBUG_RETURN(1); + } + else + { + if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)) + DBUG_RETURN(1); + } + } + else if (!flag) + pos= block_info.filepos+block_info.block_len; +next:; + } + DBUG_RETURN(0); +} + + +static int check_compressed_record(HA_CHECK *param, MARIA_HA *info, int extend, + byte *record) +{ + my_off_t start_recpos, pos; + char llbuff[22]; + bool got_error= 0; + MARIA_BLOCK_INFO block_info; + DBUG_ENTER("check_compressed_record"); + + pos= info->s->pack.header_length; /* Skip header */ + while (pos < info->state->data_file_length) + { + if (*_ma_killed_ptr(param)) + DBUG_RETURN(-1); + + if (_ma_read_cache(¶m->read_cache,(byte*) block_info.header, pos, + info->s->pack.ref_length, READING_NEXT)) + { + _ma_check_print_error(param, + "got error: %d when reading datafile at position: %s", + my_errno, llstr(pos, llbuff)); + DBUG_RETURN(1); + } + + start_recpos= pos; + param->splits++; + VOID(_ma_pack_get_block_info(info, &info->bit_buff, &block_info, + &info->rec_buff, &info->rec_buff_size, -1, + start_recpos)); + pos=block_info.filepos+block_info.rec_len; + if (block_info.rec_len < (uint) info->s->min_pack_length || + block_info.rec_len > (uint) info->s->max_pack_length) + { + _ma_check_print_error(param, + "Found block with wrong recordlength: %d at %s", + block_info.rec_len, llstr(start_recpos,llbuff)); + got_error=1; + goto end; + } + if (_ma_read_cache(¶m->read_cache,(byte*) info->rec_buff, + block_info.filepos, block_info.rec_len, READING_NEXT)) + { + _ma_check_print_error(param, + "got error: %d when reading datafile at position: %s", + my_errno, llstr(block_info.filepos, llbuff)); + DBUG_RETURN(1); + } + if (_ma_pack_rec_unpack(info, &info->bit_buff, record, + info->rec_buff, block_info.rec_len)) + { + _ma_check_print_error(param,"Found wrong record at %s", + llstr(start_recpos,llbuff)); + got_error=1; + goto end; + } + param->glob_crc+= (*info->s->calc_checksum)(info,record); + param->link_used+= (block_info.filepos - start_recpos); + param->used+= (pos-start_recpos); + +end: + if (! got_error) + { + if (check_keys_in_record(param, info, extend, start_recpos, record)) + DBUG_RETURN(1); + } + else + { + got_error= 0; /* Reset for next loop */ + if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)) + DBUG_RETURN(1); + } + } + DBUG_RETURN(0); +} + + +/* + Check if layout on a page is ok + + NOTES + This is for rows-in-block format. +*/ + +static int check_page_layout(HA_CHECK *param, MARIA_HA *info, + my_off_t page_pos, byte *page, + uint row_count, uint head_empty, + uint *real_rows_found) +{ + uint empty, last_row_end, row, first_dir_entry; + byte *dir_entry; + char llbuff[22]; + DBUG_ENTER("check_page_layout"); + + empty= 0; + last_row_end= PAGE_HEADER_SIZE; + *real_rows_found= 0; + + dir_entry= page+ info->s->block_size - PAGE_SUFFIX_SIZE; + first_dir_entry= info->s->block_size - row_count* DIR_ENTRY_SIZE; + for (row= 0 ; row < row_count ; row++) + { + uint pos, length; + dir_entry-= DIR_ENTRY_SIZE; + pos= uint2korr(dir_entry); + if (!pos) + { + if (row == row_count -1) + { + _ma_check_print_error(param, + "Page %9s: First entry in directory is 0", + llstr(page_pos, llbuff)); + if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)) + DBUG_RETURN(1); + } + continue; /* Deleted row */ + } + (*real_rows_found)++; + length= uint2korr(dir_entry+2); + param->used+= length; + if (pos < last_row_end) + { + _ma_check_print_error(param, + "Page %9s: Row %3u overlapps with previous row", + llstr(page_pos, llbuff), row); + DBUG_RETURN(1); + } + empty+= (pos - last_row_end); + last_row_end= pos + length; + if (last_row_end > first_dir_entry) + { + _ma_check_print_error(param, + "Page %9s: Row %3u overlapps with directory", + llstr(page_pos, llbuff), row); + DBUG_RETURN(1); + } + } + empty+= (first_dir_entry - last_row_end); + + if (empty != head_empty) + { + _ma_check_print_error(param, + "Page %9s: Wrong empty size. Stored: %5u Actual: %5u", + llstr(page_pos, llbuff), head_empty, empty); + DBUG_RETURN(param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)); + } + DBUG_RETURN(0); +} + + +/* + Check all rows on head page + + NOTES + This is for rows-in-block format. + + Before this, we have already called check_page_layout(), so + we know the block is logicaly correct (even if the rows may not be that) + + RETURN + 0 ok + 1 error +*/ + + +static my_bool check_head_page(HA_CHECK *param, MARIA_HA *info, byte *record, + int extend, my_off_t page_pos, byte *page_buff, + uint row_count) +{ + byte *dir_entry; + uint row; + char llbuff[22], llbuff2[22]; + DBUG_ENTER("check_head_page"); + + dir_entry= page_buff+ info->s->block_size - PAGE_SUFFIX_SIZE; + for (row= 0 ; row < row_count ; row++) + { + uint pos, length, flag; + dir_entry-= DIR_ENTRY_SIZE; + pos= uint2korr(dir_entry); + if (!pos) + continue; + length= uint2korr(dir_entry+2); + if (length < info->s->base.min_block_length) + { + _ma_check_print_error(param, + "Page %9s: Row %3u is too short (%d bytes)", + llstr(page_pos, llbuff), row, length); + DBUG_RETURN(1); + } + flag= (uint) (uchar) page_buff[pos]; + if (flag & ~(ROW_FLAG_ALL)) + _ma_check_print_error(param, + "Page %9s: Row %3u has wrong flag: %d", + llstr(page_pos, llbuff), row, flag); + + DBUG_PRINT("info", ("rowid: %s page: %lu row: %u", + llstr(ma_recordpos(page_pos/info->s->block_size, row), + llbuff), + (ulong) (page_pos / info->s->block_size), row)); + if (_ma_read_block_record2(info, record, page_buff+pos, + page_buff+pos+length)) + { + _ma_check_print_error(param, + "Page %9s: Row %3d is crashed", + llstr(page_pos, llbuff), row); + if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)) + DBUG_RETURN(1); + continue; + } + if (info->s->calc_checksum) + { + info->cur_row.checksum= _ma_checksum(info, record); + param->glob_crc+= info->cur_row.checksum; + } + if (info->cur_row.extents_count) + { + byte *extents= info->cur_row.extents; + uint i; + /* Check that bitmap has the right marker for the found extents */ + for (i= 0 ; i < info->cur_row.extents_count ; i++) + { + uint page, page_count, page_type; + page= uint5korr(extents); + page_count= uint2korr(extents+5); + extents+= ROW_EXTENT_SIZE; + page_type= BLOB_PAGE; + if (page_count & TAIL_BIT) + { + page_count= 1; + page_type= TAIL_PAGE; + } + for ( ; page_count--; page++) + { + uint bitmap_pattern; + if (_ma_check_if_right_bitmap_type(info, page_type, page, + &bitmap_pattern)) + { + _ma_check_print_error(param, + "Page %9s: Row: %3d has an extent with wrong information in bitmap: Page %9s Page_type: %d Bitmap: %d", + llstr(page_pos, llbuff), row, + llstr(page * info->s->bitmap.block_size, + llbuff2), + page_type, + bitmap_pattern); + if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)) + DBUG_RETURN(1); + } + } + } + } + param->full_page_count+= info->cur_row.full_page_count; + param->tail_count+= info->cur_row.tail_count; + if (check_keys_in_record(param, info, extend, + ma_recordpos(page_pos/info->s->block_size, row), + record)) + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + + +/* + Check if rows-in-block data file is consistent +*/ + +static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend, + byte *record) +{ + my_off_t pos; + byte *page_buff, *bitmap_buff, *data; + char llbuff[22], llbuff2[22]; + uint block_size= info->s->block_size; + ha_rows full_page_count, tail_count; + my_bool full_dir; + uint offset_page, offset; + + if (_ma_scan_init_block_record(info)) + { + _ma_check_print_error(param, "got error %d when initializing scan", + my_errno); + return 1; + } + bitmap_buff= info->scan.bitmap_buff; + page_buff= info->scan.page_buff; + full_page_count= tail_count= 0; + param->full_page_count= param->tail_count= 0; + param->used= param->link_used= 0; + + for (pos= 0; + pos < info->state->data_file_length; + pos+= block_size) + { + uint row_count, real_row_count, empty_space, page_type, bitmap_pattern; + LINT_INIT(row_count); + LINT_INIT(empty_space); + + if (*_ma_killed_ptr(param)) + { + _ma_scan_end_block_record(info); + return -1; + } + if (((pos / block_size) % info->s->bitmap.pages_covered) == 0) + { + /* Bitmap page */ + if (pagecache_read(info->s->pagecache, + &info->dfile, + (pos / block_size), 1, + bitmap_buff, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, 0) == 0) + { + _ma_check_print_error(param, + "Page %9s: Got error: %d when reading datafile", + my_errno, llstr(pos, llbuff)); + goto err; + } + param->used+= block_size; + param->link_used+= block_size; + continue; + } + /* Skip pages marked as empty in bitmap */ + offset_page= (((pos / block_size) % info->s->bitmap.pages_covered) -1) * 3; + offset= offset_page & 7; + data= bitmap_buff + offset_page / 8; + bitmap_pattern= uint2korr(data); + param->splits++; + if (!((bitmap_pattern >> offset) & 7)) + { + param->empty+= block_size; + param->del_blocks++; + continue; + } + + if (pagecache_read(info->s->pagecache, + &info->dfile, + (pos / block_size), 1, + page_buff, + info->s->page_type, + PAGECACHE_LOCK_LEFT_UNLOCKED, 0) == 0) + { + _ma_check_print_error(param, + "Page %9s: Got error: %d when reading datafile", + my_errno, llstr(pos, llbuff)); + goto err; + } + page_type= page_buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK; + if (page_type == UNALLOCATED_PAGE || page_type >= MAX_PAGE_TYPE) + { + _ma_check_print_error(param, + "Page %9s: Found wrong page type %d\n", + llstr(pos, llbuff), page_type); + if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)) + goto err; + } + switch ((enum en_page_type) page_type) { + case UNALLOCATED_PAGE: + case MAX_PAGE_TYPE: + DBUG_PRINT("warning", + ("Found page with wrong page type: %d", page_type)); + DBUG_ASSERT(0); + break; + case HEAD_PAGE: + row_count= ((uchar*) page_buff)[DIR_COUNT_OFFSET]; + empty_space= uint2korr(page_buff + EMPTY_SPACE_OFFSET); + param->used+= (PAGE_HEADER_SIZE + PAGE_SUFFIX_SIZE + + row_count * DIR_ENTRY_SIZE); + param->link_used+= (PAGE_HEADER_SIZE + PAGE_SUFFIX_SIZE + + row_count * DIR_ENTRY_SIZE); + full_dir= row_count == MAX_ROWS_PER_PAGE; + break; + case TAIL_PAGE: + row_count= ((uchar*) page_buff)[DIR_COUNT_OFFSET]; + empty_space= uint2korr(page_buff + EMPTY_SPACE_OFFSET); + param->used+= (PAGE_HEADER_SIZE + PAGE_SUFFIX_SIZE + + row_count * DIR_ENTRY_SIZE); + param->link_used+= (PAGE_HEADER_SIZE + PAGE_SUFFIX_SIZE + + row_count * DIR_ENTRY_SIZE); + full_dir= row_count == MAX_ROWS_PER_PAGE; + break; + case BLOB_PAGE: + full_page_count++; + full_dir= 0; + empty_space= block_size; /* for error reporting */ + param->link_used+= (LSN_SIZE + PAGE_TYPE_SIZE); + param->used+= block_size; + break; + } + if (_ma_check_bitmap_data(info, page_type, pos / block_size, + full_dir ? 0 : empty_space, + &bitmap_pattern)) + { + _ma_check_print_error(param, + "Page %9s: Wrong data in bitmap. Page_type: %d empty_space: %u Bitmap: %d", + llstr(pos, llbuff), page_type, empty_space, + bitmap_pattern); + if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE)) + goto err; + } + if ((enum en_page_type) page_type == BLOB_PAGE) + continue; + param->empty+= empty_space; + if (check_page_layout(param, info, pos, page_buff, row_count, + empty_space, &real_row_count)) + goto err; + if ((enum en_page_type) page_type == TAIL_PAGE) + { + tail_count+= real_row_count; + continue; + } + if (check_head_page(param, info, record, extend, pos, page_buff, + row_count)) + goto err; + } + + _ma_scan_end_block_record(info); + + if (full_page_count != param->full_page_count) + _ma_check_print_error(param, "Full page count read through records was %s but we found %s pages while scanning table", + llstr(param->full_page_count, llbuff), + llstr(full_page_count, llbuff2)); + if (tail_count != param->tail_count) + _ma_check_print_error(param, "Tail count read through records was %s but we found %s tails while scanning table", + llstr(param->tail_count, llbuff), + llstr(tail_count, llbuff2)); + + /* Update splits to avoid warning */ + info->s->state.split= param->splits; + info->state->del= param->del_blocks; + return param->error_printed != 0; + +err: + _ma_scan_end_block_record(info); + return 1; +} + + +/* Check that record-link is ok */ + +int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info,int extend) +{ + int error; + byte *record; + char llbuff[22],llbuff2[22],llbuff3[22]; + DBUG_ENTER("maria_chk_data_link"); + + if (!(param->testflag & T_SILENT)) + { + if (extend) + puts("- check records and index references"); + else + puts("- check record links"); + } + + if (!(record= (byte*) my_malloc(info->s->base.pack_reclength,MYF(0)))) + { + _ma_check_print_error(param,"Not enough memory for record"); + DBUG_RETURN(-1); + } + param->records= param->del_blocks= 0; + param->used= param->link_used= param->splits= param->del_length= 0; + param->tmp_record_checksum= param->glob_crc= 0; + param->err_count= 0; + + error= 0; + param->empty= info->s->pack.header_length; + + bzero((char*) param->tmp_key_crc, + info->s->base.keys * sizeof(param->tmp_key_crc[0])); + + switch (info->s->data_file_type) { + case BLOCK_RECORD: + error= check_block_record(param, info, extend, record); + break; + case STATIC_RECORD: + error= check_static_record(param, info, extend, record); + break; + case DYNAMIC_RECORD: + error= check_dynamic_record(param, info, extend, record); + break; + case COMPRESSED_RECORD: + error= check_compressed_record(param, info, extend, record); + break; + } /* switch */ + + if (error) + goto err; + + if (param->testflag & T_WRITE_LOOP) + { + VOID(fputs(" \r",stdout)); VOID(fflush(stdout)); + } + if (param->records != info->state->records) + { + _ma_check_print_error(param, + "Record-count is not ok; found %-10s Should be: %s", + llstr(param->records,llbuff), + llstr(info->state->records,llbuff2)); + error=1; + } + else if (param->record_checksum && + param->record_checksum != param->tmp_record_checksum) + { + _ma_check_print_error(param, + "Key pointers and record positions doesn't match"); + error=1; + } + else if (param->glob_crc != info->state->checksum && + (info->s->options & + (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD))) + { + _ma_check_print_warning(param, + "Record checksum is not the same as checksum stored in the index file\n"); + error=1; + } + else if (!extend) + { + uint key; + for (key=0 ; key < info->s->base.keys; key++) + { + if (param->tmp_key_crc[key] != param->key_crc[key] && + !(info->s->keyinfo[key].flag & (HA_FULLTEXT | HA_SPATIAL))) + { + _ma_check_print_error(param,"Checksum for key: %2d doesn't match checksum for records", + key+1); + error=1; + } + } + } + + if (param->del_length != info->state->empty) + { + _ma_check_print_warning(param, + "Found %s deleted space. Should be %s", + llstr(param->del_length,llbuff2), + llstr(info->state->empty,llbuff)); + } + if (param->used + param->empty + param->del_length != + info->state->data_file_length) + { + _ma_check_print_warning(param, + "Found %s record data and %s unused data and %s deleted data", + llstr(param->used, llbuff), + llstr(param->empty,llbuff2), + llstr(param->del_length,llbuff3)); + _ma_check_print_warning(param, + "Total %s Should be: %s", + llstr((param->used+param->empty+param->del_length), + llbuff), + llstr(info->state->data_file_length,llbuff2)); + } + if (param->del_blocks != info->state->del) + { + _ma_check_print_warning(param, + "Found %10s deleted blocks Should be: %s", + llstr(param->del_blocks,llbuff), + llstr(info->state->del,llbuff2)); + } + if (param->splits != info->s->state.split) + { + _ma_check_print_warning(param, + "Found %10s parts Should be: %s parts", + llstr(param->splits, llbuff), + llstr(info->s->state.split,llbuff2)); + } + if (param->testflag & T_INFO) + { + if (param->warning_printed || param->error_printed) + puts(""); + if (param->used != 0 && ! param->error_printed) + { + if (param->records) + { + printf("Records:%18s M.recordlength:%9lu Packed:%14.0f%%\n", + llstr(param->records,llbuff), + (long)((param->used - param->link_used)/param->records), + (info->s->base.blobs ? 0.0 : + (ulonglong2double((ulonglong) info->s->base.reclength * + param->records)- + my_off_t2double(param->used))/ + ulonglong2double((ulonglong) info->s->base.reclength * + param->records)*100.0)); + printf("Recordspace used:%9.0f%% Empty space:%12d%% Blocks/Record: %6.2f\n", + (ulonglong2double(param->used - param->link_used)/ + ulonglong2double(param->used-param->link_used+param->empty)*100.0), + (!param->records ? 100 : + (int) (ulonglong2double(param->del_length+param->empty)/ + my_off_t2double(param->used)*100.0)), + ulonglong2double(param->splits - param->del_blocks) / + param->records); + } + else + printf("Records:%18s\n", "0"); + } + printf("Record blocks:%12s Delete blocks:%10s\n", + llstr(param->splits - param->del_blocks, llbuff), + llstr(param->del_blocks, llbuff2)); + printf("Record data: %12s Deleted data: %10s\n", + llstr(param->used - param->link_used,llbuff), + llstr(param->del_length, llbuff2)); + printf("Lost space: %12s Linkdata: %10s\n", + llstr(param->empty, llbuff),llstr(param->link_used, llbuff2)); + } + my_free((gptr) record,MYF(0)); + DBUG_RETURN (error); + + err: + my_free((gptr) record,MYF(0)); + param->testflag|=T_RETRY_WITHOUT_QUICK; + DBUG_RETURN(1); +} /* maria_chk_data_link */ + + + /* Recover old table by reading each record and writing all keys */ + /* Save new datafile-name in temp_filename */ + +int maria_repair(HA_CHECK *param, register MARIA_HA *info, + my_string name, int rep_quick) +{ + int error,got_error; + uint i; + ha_rows start_records,new_header_length; + my_off_t del; + File new_file; + MARIA_SHARE *share=info->s; + char llbuff[22],llbuff2[22]; + MARIA_SORT_INFO sort_info; + MARIA_SORT_PARAM sort_param; + DBUG_ENTER("maria_repair"); + + bzero((char *)&sort_info, sizeof(sort_info)); + bzero((char *)&sort_param, sizeof(sort_param)); + start_records=info->state->records; + new_header_length=(param->testflag & T_UNPACK) ? 0L : + share->pack.header_length; + got_error=1; + new_file= -1; + sort_param.sort_info=&sort_info; + + if (!(param->testflag & T_SILENT)) + { + printf("- recovering (with keycache) MARIA-table '%s'\n",name); + printf("Data records: %s\n", llstr(info->state->records,llbuff)); + } + param->testflag|=T_REP; /* for easy checking */ + + if (info->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) + param->testflag|=T_CALC_CHECKSUM; + + if (init_io_cache(¶m->read_cache, info->dfile.file, + (uint) param->read_buffer_length, + READ_CACHE,share->pack.header_length,1,MYF(MY_WME))) + { + bzero(&info->rec_cache,sizeof(info->rec_cache)); + goto err; + } + if (!rep_quick) + if (init_io_cache(&info->rec_cache,-1,(uint) param->write_buffer_length, + WRITE_CACHE, new_header_length, 1, + MYF(MY_WME | MY_WAIT_IF_FULL))) + goto err; + info->opt_flag|=WRITE_CACHE_USED; + if (!(sort_param.record=(byte*) my_malloc((uint) share->base.pack_reclength, + MYF(0))) || + _ma_alloc_buffer(&sort_param.rec_buff, &sort_param.rec_buff_size, + info->s->base.default_rec_buff_size)) + { + _ma_check_print_error(param, "Not enough memory for extra record"); + goto err; + } + + if (!rep_quick) + { + /* Get real path for data file */ + if ((new_file= my_create(fn_format(param->temp_filename, + share->data_file_name, "", + DATA_TMP_EXT, 2+4), + 0,param->tmpfile_createflag, + MYF(0))) < 0) + { + _ma_check_print_error(param,"Can't create new tempfile: '%s'", + param->temp_filename); + goto err; + } + if (new_header_length && + maria_filecopy(param, new_file, info->dfile.file, 0L, + new_header_length, "datafile-header")) + goto err; + info->s->state.dellink= HA_OFFSET_ERROR; + info->rec_cache.file=new_file; + if (param->testflag & T_UNPACK) + restore_data_file_type(share); + } + sort_info.info=info; + sort_info.param = param; + sort_param.read_cache=param->read_cache; + sort_param.pos=sort_param.max_pos=share->pack.header_length; + sort_param.filepos=new_header_length; + param->read_cache.end_of_file=sort_info.filelength= + my_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0)); + sort_info.dupp=0; + sort_param.fix_datafile= (my_bool) (! rep_quick); + sort_param.master=1; + sort_info.max_records= ~(ha_rows) 0; + + set_data_file_type(&sort_info, share); + del=info->state->del; + info->state->records=info->state->del=share->state.split=0; + info->state->empty=0; + param->glob_crc=0; + if (param->testflag & T_CALC_CHECKSUM) + sort_param.calc_checksum= 1; + + info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + + /* + Clear all keys. Note that all key blocks allocated until now remain + "dead" parts of the key file. (Bug #4692) + */ + for (i=0 ; i < info->s->base.keys ; i++) + share->state.key_root[i]= HA_OFFSET_ERROR; + + /* Drop the delete chain. */ + share->state.key_del= HA_OFFSET_ERROR; + + /* + If requested, activate (enable) all keys in key_map. In this case, + all indexes will be (re-)built. + */ + if (param->testflag & T_CREATE_MISSING_KEYS) + maria_set_all_keys_active(share->state.key_map, share->base.keys); + + info->state->key_file_length=share->base.keystart; + + maria_lock_memory(param); /* Everything is alloced */ + + /* Re-create all keys, which are set in key_map. */ + while (!(error=sort_get_next_record(&sort_param))) + { + if (writekeys(&sort_param)) + { + if (my_errno != HA_ERR_FOUND_DUPP_KEY) + goto err; + DBUG_DUMP("record",(byte*) sort_param.record,share->base.pack_reclength); + _ma_check_print_info(param,"Duplicate key %2d for record at %10s against new record at %10s", + info->errkey+1, + llstr(sort_param.start_recpos,llbuff), + llstr(info->dup_key_pos,llbuff2)); + if (param->testflag & T_VERBOSE) + { + VOID(_ma_make_key(info,(uint) info->errkey,info->lastkey, + sort_param.record,0L)); + _ma_print_key(stdout,share->keyinfo[info->errkey].seg,info->lastkey, + USE_WHOLE_KEY); + } + sort_info.dupp++; + if ((param->testflag & (T_FORCE_UNIQUENESS|T_QUICK)) == T_QUICK) + { + param->testflag|=T_RETRY_WITHOUT_QUICK; + param->error_printed=1; + goto err; + } + continue; + } + if (_ma_sort_write_record(&sort_param)) + goto err; + } + if (error > 0 || maria_write_data_suffix(&sort_info, (my_bool)!rep_quick) || + flush_io_cache(&info->rec_cache) || param->read_cache.error < 0) + goto err; + + if (param->testflag & T_WRITE_LOOP) + { + VOID(fputs(" \r",stdout)); VOID(fflush(stdout)); + } + if (my_chsize(share->kfile.file, info->state->key_file_length, 0, MYF(0))) + { + _ma_check_print_warning(param, + "Can't change size of indexfile, error: %d", + my_errno); + goto err; + } + + if (rep_quick && del+sort_info.dupp != info->state->del) + { + _ma_check_print_error(param,"Couldn't fix table with quick recovery: Found wrong number of deleted records"); + _ma_check_print_error(param,"Run recovery again without -q"); + got_error=1; + param->retry_repair=1; + param->testflag|=T_RETRY_WITHOUT_QUICK; + goto err; + } + if (param->testflag & T_SAFE_REPAIR) + { + /* Don't repair if we loosed more than one row */ + if (info->state->records+1 < start_records) + { + info->state->records=start_records; + got_error=1; + goto err; + } + } + + if (!rep_quick) + { + my_close(info->dfile.file, MYF(0)); + info->dfile.file= new_file; + info->state->data_file_length=sort_param.filepos; + share->state.version=(ulong) time((time_t*) 0); /* Force reopen */ + } + else + { + info->state->data_file_length=sort_param.max_pos; + } + if (param->testflag & T_CALC_CHECKSUM) + info->state->checksum=param->glob_crc; + + if (!(param->testflag & T_SILENT)) + { + if (start_records != info->state->records) + printf("Data records: %s\n", llstr(info->state->records,llbuff)); + if (sort_info.dupp) + _ma_check_print_warning(param, + "%s records have been removed", + llstr(sort_info.dupp,llbuff)); + } + + got_error=0; + /* If invoked by external program that uses thr_lock */ + if (&share->state.state != info->state) + memcpy( &share->state.state, info->state, sizeof(*info->state)); + +err: + if (!got_error) + { + /* Replace the actual file with the temporary file */ + if (new_file >= 0) + { + myf sync_dir= (share->base.transactional && !share->temporary) ? + MY_SYNC_DIR : 0; + my_close(new_file,MYF(0)); + info->dfile.file= new_file= -1; + if (maria_change_to_newfile(share->data_file_name,MARIA_NAME_DEXT, + DATA_TMP_EXT, + MYF((param->testflag & T_BACKUP_DATA ? + MY_REDEL_MAKE_BACKUP : 0) | + sync_dir)) || + _ma_open_datafile(info,share,-1)) + got_error=1; + } + } + if (got_error) + { + if (! param->error_printed) + _ma_check_print_error(param,"%d for record at pos %s",my_errno, + llstr(sort_param.start_recpos,llbuff)); + if (new_file >= 0) + { + VOID(my_close(new_file,MYF(0))); + VOID(my_delete(param->temp_filename, MYF(MY_WME))); + info->rec_cache.file=-1; /* don't flush data to new_file, it's closed */ + } + maria_mark_crashed_on_repair(info); + } + my_free(sort_param.rec_buff, MYF(MY_ALLOW_ZERO_PTR)); + my_free(sort_param.record,MYF(MY_ALLOW_ZERO_PTR)); + my_free(sort_info.buff,MYF(MY_ALLOW_ZERO_PTR)); + VOID(end_io_cache(¶m->read_cache)); + info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); + VOID(end_io_cache(&info->rec_cache)); + got_error|=_ma_flush_blocks(param, share->pagecache, &share->kfile); + if (!got_error && (param->testflag & T_UNPACK)) + restore_data_file_type(share); + share->state.changed|= (STATE_NOT_OPTIMIZED_KEYS | STATE_NOT_SORTED_PAGES | + STATE_NOT_ANALYZED); + share->state.changed&= ~STATE_NOT_OPTIMIZED_ROWS; + DBUG_RETURN(got_error); +} + + +/* Uppdate keyfile when doing repair */ + +static int writekeys(MARIA_SORT_PARAM *sort_param) +{ + register uint i; + byte *key; + MARIA_HA *info= sort_param->sort_info->info; + byte *buff= sort_param->record; + my_off_t filepos= sort_param->filepos; + DBUG_ENTER("writekeys"); + + key= info->lastkey+info->s->base.max_key_length; + for (i=0 ; i < info->s->base.keys ; i++) + { + if (maria_is_key_active(info->s->state.key_map, i)) + { + if (info->s->keyinfo[i].flag & HA_FULLTEXT ) + { + if (_ma_ft_add(info,i,(char*) key,buff,filepos)) + goto err; + } +#ifdef HAVE_SPATIAL + else if (info->s->keyinfo[i].flag & HA_SPATIAL) + { + uint key_length= _ma_make_key(info,i,key,buff,filepos); + if (maria_rtree_insert(info, i, key, key_length)) + goto err; + } +#endif /*HAVE_SPATIAL*/ + else + { + uint key_length= _ma_make_key(info,i,key,buff,filepos); + if (_ma_ck_write(info,i,key,key_length)) + goto err; + } + } + } + DBUG_RETURN(0); + + err: + if (my_errno == HA_ERR_FOUND_DUPP_KEY) + { + info->errkey=(int) i; /* This key was found */ + while ( i-- > 0 ) + { + if (maria_is_key_active(info->s->state.key_map, i)) + { + if (info->s->keyinfo[i].flag & HA_FULLTEXT) + { + if (_ma_ft_del(info,i,(char*) key,buff,filepos)) + break; + } + else + { + uint key_length= _ma_make_key(info,i,key,buff,filepos); + if (_ma_ck_delete(info,i,key,key_length)) + break; + } + } + } + } + /* Remove checksum that was added to glob_crc in sort_get_next_record */ + if (sort_param->calc_checksum) + sort_param->sort_info->param->glob_crc-= info->cur_row.checksum; + DBUG_PRINT("error",("errno: %d",my_errno)); + DBUG_RETURN(-1); +} /* writekeys */ + + + /* Change all key-pointers that points to a records */ + +int maria_movepoint(register MARIA_HA *info, byte *record, + MARIA_RECORD_POS oldpos, MARIA_RECORD_POS newpos, + uint prot_key) +{ + register uint i; + byte *key; + uint key_length; + DBUG_ENTER("maria_movepoint"); + + key= info->lastkey+info->s->base.max_key_length; + for (i=0 ; i < info->s->base.keys; i++) + { + if (i != prot_key && maria_is_key_active(info->s->state.key_map, i)) + { + key_length= _ma_make_key(info,i,key,record,oldpos); + if (info->s->keyinfo[i].flag & HA_NOSAME) + { /* Change pointer direct */ + uint nod_flag; + MARIA_KEYDEF *keyinfo; + keyinfo=info->s->keyinfo+i; + if (_ma_search(info,keyinfo,key,USE_WHOLE_KEY, + (uint) (SEARCH_SAME | SEARCH_SAVE_BUFF), + info->s->state.key_root[i])) + DBUG_RETURN(-1); + nod_flag=_ma_test_if_nod(info->buff); + _ma_dpointer(info,info->int_keypos-nod_flag- + info->s->rec_reflength,newpos); + if (_ma_write_keypage(info,keyinfo,info->last_keypage, + DFLT_INIT_HITS,info->buff)) + DBUG_RETURN(-1); + } + else + { /* Change old key to new */ + if (_ma_ck_delete(info,i,key,key_length)) + DBUG_RETURN(-1); + key_length= _ma_make_key(info,i,key,record,newpos); + if (_ma_ck_write(info,i,key,key_length)) + DBUG_RETURN(-1); + } + } + } + DBUG_RETURN(0); +} /* maria_movepoint */ + + + /* Tell system that we want all memory for our cache */ + +void maria_lock_memory(HA_CHECK *param __attribute__((unused))) +{ +#ifdef SUN_OS /* Key-cacheing thrases on sun 4.1 */ + if (param->opt_maria_lock_memory) + { + int success = mlockall(MCL_CURRENT); /* or plock(DATLOCK); */ + if (geteuid() == 0 && success != 0) + _ma_check_print_warning(param, + "Failed to lock memory. errno %d",my_errno); + } +#endif +} /* maria_lock_memory */ + + + /* Flush all changed blocks to disk */ + +int _ma_flush_blocks(HA_CHECK *param, PAGECACHE *pagecache, + PAGECACHE_FILE *file) +{ + if (flush_pagecache_blocks(pagecache, file, FLUSH_RELEASE)) + { + _ma_check_print_error(param,"%d when trying to write bufferts",my_errno); + return(1); + } + return 0; +} /* _ma_flush_blocks */ + + + /* Sort index for more efficent reads */ + +int maria_sort_index(HA_CHECK *param, register MARIA_HA *info, my_string name) +{ + reg2 uint key; + reg1 MARIA_KEYDEF *keyinfo; + File new_file; + my_off_t index_pos[HA_MAX_POSSIBLE_KEY]; + uint r_locks,w_locks; + int old_lock; + MARIA_SHARE *share=info->s; + MARIA_STATE_INFO old_state; + myf sync_dir= (share->base.transactional && !share->temporary) ? + MY_SYNC_DIR : 0; + DBUG_ENTER("maria_sort_index"); + + /* cannot sort index files with R-tree indexes */ + for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ; + key++,keyinfo++) + if (keyinfo->key_alg == HA_KEY_ALG_RTREE) + DBUG_RETURN(0); + + if (!(param->testflag & T_SILENT)) + printf("- Sorting index for MARIA-table '%s'\n",name); + + /* Get real path for index file */ + fn_format(param->temp_filename,name,"", MARIA_NAME_IEXT,2+4+32); + if ((new_file=my_create(fn_format(param->temp_filename,param->temp_filename, + "", INDEX_TMP_EXT,2+4), + 0,param->tmpfile_createflag,MYF(0))) <= 0) + { + _ma_check_print_error(param,"Can't create new tempfile: '%s'", + param->temp_filename); + DBUG_RETURN(-1); + } + if (maria_filecopy(param, new_file, share->kfile.file, 0L, + (ulong) share->base.keystart, "headerblock")) + goto err; + + param->new_file_pos=share->base.keystart; + for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ; + key++,keyinfo++) + { + if (! maria_is_key_active(info->s->state.key_map, key)) + continue; + + if (share->state.key_root[key] != HA_OFFSET_ERROR) + { + index_pos[key]=param->new_file_pos; /* Write first block here */ + if (sort_one_index(param,info,keyinfo,share->state.key_root[key], + new_file)) + goto err; + } + else + index_pos[key]= HA_OFFSET_ERROR; /* No blocks */ + } + + /* Flush key cache for this file if we are calling this outside maria_chk */ + flush_pagecache_blocks(share->pagecache, &share->kfile, + FLUSH_IGNORE_CHANGED); + + share->state.version=(ulong) time((time_t*) 0); + old_state= share->state; /* save state if not stored */ + r_locks= share->r_locks; + w_locks= share->w_locks; + old_lock= info->lock_type; + + /* Put same locks as old file */ + share->r_locks= share->w_locks= share->tot_locks= 0; + (void) _ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE); + VOID(my_close(share->kfile.file, MYF(MY_WME))); + share->kfile.file = -1; + VOID(my_close(new_file,MYF(MY_WME))); + if (maria_change_to_newfile(share->index_file_name, MARIA_NAME_IEXT, + INDEX_TMP_EXT, sync_dir) || + _ma_open_keyfile(share)) + goto err2; + info->lock_type= F_UNLCK; /* Force maria_readinfo to lock */ + _ma_readinfo(info,F_WRLCK,0); /* Will lock the table */ + info->lock_type= old_lock; + share->r_locks= r_locks; + share->w_locks= w_locks; + share->tot_locks= r_locks+w_locks; + share->state= old_state; /* Restore old state */ + + info->state->key_file_length=param->new_file_pos; + info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + for (key=0 ; key < info->s->base.keys ; key++) + info->s->state.key_root[key]=index_pos[key]; + info->s->state.key_del= HA_OFFSET_ERROR; + + info->s->state.changed&= ~STATE_NOT_SORTED_PAGES; + DBUG_RETURN(0); + +err: + VOID(my_close(new_file,MYF(MY_WME))); +err2: + VOID(my_delete(param->temp_filename,MYF(MY_WME))); + DBUG_RETURN(-1); +} /* maria_sort_index */ + + + /* Sort records recursive using one index */ + +static int sort_one_index(HA_CHECK *param, MARIA_HA *info, + MARIA_KEYDEF *keyinfo, + my_off_t pagepos, File new_file) +{ + uint length,nod_flag,used_length, key_length; + byte *buff,*keypos,*endpos; + byte key[HA_MAX_POSSIBLE_KEY_BUFF]; + my_off_t new_page_pos,next_page; + char llbuff[22]; + DBUG_ENTER("sort_one_index"); + + /* cannot walk over R-tree indices */ + DBUG_ASSERT(keyinfo->key_alg != HA_KEY_ALG_RTREE); + new_page_pos=param->new_file_pos; + param->new_file_pos+=keyinfo->block_length; + + if (!(buff= (byte*) my_alloca((uint) keyinfo->block_length))) + { + _ma_check_print_error(param,"Not enough memory for key block"); + DBUG_RETURN(-1); + } + if (!_ma_fetch_keypage(info,keyinfo,pagepos,DFLT_INIT_HITS,buff,0)) + { + _ma_check_print_error(param,"Can't read key block from filepos: %s", + llstr(pagepos,llbuff)); + goto err; + } + if ((nod_flag=_ma_test_if_nod(buff)) || keyinfo->flag & HA_FULLTEXT) + { + used_length= maria_data_on_page(buff); + keypos=buff+2+nod_flag; + endpos=buff+used_length; + for ( ;; ) + { + if (nod_flag) + { + next_page= _ma_kpos(nod_flag,keypos); + /* Save new pos */ + _ma_kpointer(info,keypos-nod_flag,param->new_file_pos); + if (sort_one_index(param,info,keyinfo,next_page,new_file)) + { + DBUG_PRINT("error", + ("From page: %ld, keyoffset: %lu used_length: %d", + (ulong) pagepos, (ulong) (keypos - buff), + (int) used_length)); + DBUG_DUMP("buff",(byte*) buff,used_length); + goto err; + } + } + if (keypos >= endpos || + (key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,key)) == 0) + break; + DBUG_ASSERT(keypos <= endpos); + if (keyinfo->flag & HA_FULLTEXT) + { + uint off; + int subkeys; + get_key_full_length_rdonly(off, key); + subkeys=ft_sintXkorr(key+off); + if (subkeys < 0) + { + next_page= _ma_dpos(info,0,key+key_length); + _ma_dpointer(info,keypos-nod_flag-info->s->rec_reflength, + param->new_file_pos); /* Save new pos */ + if (sort_one_index(param,info,&info->s->ft2_keyinfo, + next_page,new_file)) + goto err; + } + } + } + } + + /* Fill block with zero and write it to the new index file */ + length= maria_data_on_page(buff); + bzero((byte*) buff+length,keyinfo->block_length-length); + if (my_pwrite(new_file,(byte*) buff,(uint) keyinfo->block_length, + new_page_pos,MYF(MY_NABP | MY_WAIT_IF_FULL))) + { + _ma_check_print_error(param,"Can't write indexblock, error: %d",my_errno); + goto err; + } + my_afree((gptr) buff); + DBUG_RETURN(0); +err: + my_afree((gptr) buff); + DBUG_RETURN(1); +} /* sort_one_index */ + + + /* + Let temporary file replace old file. + This assumes that the new file was created in the same + directory as given by realpath(filename). + This will ensure that any symlinks that are used will still work. + Copy stats from old file to new file, deletes orignal and + changes new file name to old file name + */ + +int maria_change_to_newfile(const char * filename, const char * old_ext, + const char * new_ext, myf MyFlags) +{ + char old_filename[FN_REFLEN],new_filename[FN_REFLEN]; +#ifdef USE_RAID + if (raid_chunks) + return my_raid_redel(fn_format(old_filename,filename,"",old_ext,2+4), + fn_format(new_filename,filename,"",new_ext,2+4), + raid_chunks, + MYF(MY_WME | MY_LINK_WARNING | MyFlags)); +#endif + /* Get real path to filename */ + (void) fn_format(old_filename,filename,"",old_ext,2+4+32); + return my_redel(old_filename, + fn_format(new_filename,old_filename,"",new_ext,2+4), + MYF(MY_WME | MY_LINK_WARNING | MyFlags)); +} /* maria_change_to_newfile */ + + +/* Copy a block between two files */ + +int maria_filecopy(HA_CHECK *param, File to,File from,my_off_t start, + my_off_t length, const char *type) +{ + char tmp_buff[IO_SIZE],*buff; + ulong buff_length; + DBUG_ENTER("maria_filecopy"); + + buff_length=(ulong) min(param->write_buffer_length,length); + if (!(buff=my_malloc(buff_length,MYF(0)))) + { + buff=tmp_buff; buff_length=IO_SIZE; + } + + VOID(my_seek(from,start,MY_SEEK_SET,MYF(0))); + while (length > buff_length) + { + if (my_read(from,(byte*) buff,buff_length,MYF(MY_NABP)) || + my_write(to,(byte*) buff,buff_length,param->myf_rw)) + goto err; + length-= buff_length; + } + if (my_read(from,(byte*) buff,(uint) length,MYF(MY_NABP)) || + my_write(to,(byte*) buff,(uint) length,param->myf_rw)) + goto err; + if (buff != tmp_buff) + my_free(buff,MYF(0)); + DBUG_RETURN(0); +err: + if (buff != tmp_buff) + my_free(buff,MYF(0)); + _ma_check_print_error(param,"Can't copy %s to tempfile, error %d", + type,my_errno); + DBUG_RETURN(1); +} + + +/* + Repair table or given index using sorting + + SYNOPSIS + maria_repair_by_sort() + param Repair parameters + info MARIA handler to repair + name Name of table (for warnings) + rep_quick set to <> 0 if we should not change data file + + RESULT + 0 ok + <>0 Error +*/ + +int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, + const char * name, int rep_quick) +{ + int got_error; + uint i; + ulong length; + ha_rows start_records; + my_off_t new_header_length,del; + File new_file; + MARIA_SORT_PARAM sort_param; + MARIA_SHARE *share=info->s; + HA_KEYSEG *keyseg; + ulong *rec_per_key_part; + char llbuff[22]; + MARIA_SORT_INFO sort_info; + ulonglong key_map=share->state.key_map; + myf sync_dir= (share->base.transactional && !share->temporary) ? + MY_SYNC_DIR : 0; + DBUG_ENTER("maria_repair_by_sort"); + + start_records=info->state->records; + got_error=1; + new_file= -1; + new_header_length=(param->testflag & T_UNPACK) ? 0 : + share->pack.header_length; + if (!(param->testflag & T_SILENT)) + { + printf("- recovering (with sort) MARIA-table '%s'\n",name); + printf("Data records: %s\n", llstr(start_records,llbuff)); + } + param->testflag|=T_REP; /* for easy checking */ + + if (info->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) + param->testflag|=T_CALC_CHECKSUM; + + bzero((char*)&sort_info,sizeof(sort_info)); + bzero((char *)&sort_param, sizeof(sort_param)); + if (!(sort_info.key_block= + alloc_key_blocks(param, + (uint) param->sort_key_blocks, + share->base.max_key_block_length)) + || init_io_cache(¶m->read_cache, info->dfile.file, + (uint) param->read_buffer_length, + READ_CACHE,share->pack.header_length,1,MYF(MY_WME)) || + (! rep_quick && + init_io_cache(&info->rec_cache, info->dfile.file, + (uint) param->write_buffer_length, + WRITE_CACHE,new_header_length,1, + MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw))) + goto err; + sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks; + info->opt_flag|=WRITE_CACHE_USED; + info->rec_cache.file= info->dfile.file; /* for sort_delete_record */ + + if (!(sort_param.record=(byte*) my_malloc((uint) share->base.pack_reclength, + MYF(0))) || + _ma_alloc_buffer(&sort_param.rec_buff, &sort_param.rec_buff_size, + info->s->base.default_rec_buff_size)) + { + _ma_check_print_error(param, "Not enough memory for extra record"); + goto err; + } + if (!rep_quick) + { + /* Get real path for data file */ + if ((new_file=my_create(fn_format(param->temp_filename, + share->data_file_name, "", + DATA_TMP_EXT, 2+4), + 0,param->tmpfile_createflag, + MYF(0))) < 0) + { + _ma_check_print_error(param,"Can't create new tempfile: '%s'", + param->temp_filename); + goto err; + } + if (new_header_length && + maria_filecopy(param, new_file, info->dfile.file, 0L, + new_header_length, "datafile-header")) + goto err; + if (param->testflag & T_UNPACK) + restore_data_file_type(share); + share->state.dellink= HA_OFFSET_ERROR; + info->rec_cache.file=new_file; + } + + info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + if (!(param->testflag & T_CREATE_MISSING_KEYS)) + { + /* + Flush key cache for this file if we are calling this outside + maria_chk + */ + flush_pagecache_blocks(share->pagecache, &share->kfile, + FLUSH_IGNORE_CHANGED); + /* Clear the pointers to the given rows */ + for (i=0 ; i < share->base.keys ; i++) + share->state.key_root[i]= HA_OFFSET_ERROR; + share->state.key_del= HA_OFFSET_ERROR; + info->state->key_file_length=share->base.keystart; + } + else + { + if (flush_pagecache_blocks(share->pagecache, &share->kfile, + FLUSH_FORCE_WRITE)) + goto err; + key_map= ~key_map; /* Create the missing keys */ + } + + sort_info.info=info; + sort_info.param = param; + + set_data_file_type(&sort_info, share); + sort_param.filepos=new_header_length; + sort_info.dupp=0; + sort_info.buff=0; + param->read_cache.end_of_file=sort_info.filelength= + my_seek(param->read_cache.file,0L,MY_SEEK_END,MYF(0)); + + sort_param.wordlist=NULL; + init_alloc_root(&sort_param.wordroot, FTPARSER_MEMROOT_ALLOC_SIZE, 0); + + if (share->data_file_type == DYNAMIC_RECORD) + length=max(share->base.min_pack_length+1,share->base.min_block_length); + else if (share->data_file_type == COMPRESSED_RECORD) + length=share->base.min_block_length; + else + length=share->base.pack_reclength; + sort_info.max_records= + ((param->testflag & T_CREATE_MISSING_KEYS) ? info->state->records : + (ha_rows) (sort_info.filelength/length+1)); + sort_param.key_cmp=sort_key_cmp; + sort_param.lock_in_memory=maria_lock_memory; + sort_param.tmpdir=param->tmpdir; + sort_param.sort_info=&sort_info; + sort_param.fix_datafile= (my_bool) (! rep_quick); + sort_param.master =1; + + del=info->state->del; + param->glob_crc=0; + if (param->testflag & T_CALC_CHECKSUM) + sort_param.calc_checksum= 1; + + rec_per_key_part= param->rec_per_key_part; + for (sort_param.key=0 ; sort_param.key < share->base.keys ; + rec_per_key_part+=sort_param.keyinfo->keysegs, sort_param.key++) + { + sort_param.read_cache=param->read_cache; + sort_param.keyinfo=share->keyinfo+sort_param.key; + sort_param.seg=sort_param.keyinfo->seg; + if (! maria_is_key_active(key_map, sort_param.key)) + { + /* Remember old statistics for key */ + memcpy((char*) rec_per_key_part, + (char*) (share->state.rec_per_key_part + + (uint) (rec_per_key_part - param->rec_per_key_part)), + sort_param.keyinfo->keysegs*sizeof(*rec_per_key_part)); + continue; + } + + if ((!(param->testflag & T_SILENT))) + printf ("- Fixing index %d\n",sort_param.key+1); + sort_param.max_pos=sort_param.pos=share->pack.header_length; + keyseg=sort_param.seg; + bzero((char*) sort_param.unique,sizeof(sort_param.unique)); + sort_param.key_length=share->rec_reflength; + for (i=0 ; keyseg[i].type != HA_KEYTYPE_END; i++) + { + sort_param.key_length+=keyseg[i].length; + if (keyseg[i].flag & HA_SPACE_PACK) + sort_param.key_length+=get_pack_length(keyseg[i].length); + if (keyseg[i].flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART)) + sort_param.key_length+=2 + test(keyseg[i].length >= 127); + if (keyseg[i].flag & HA_NULL_PART) + sort_param.key_length++; + } + info->state->records=info->state->del=share->state.split=0; + info->state->empty=0; + + if (sort_param.keyinfo->flag & HA_FULLTEXT) + { + uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT* + sort_param.keyinfo->seg->charset->mbmaxlen; + sort_param.key_length+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN; + /* + fulltext indexes may have much more entries than the + number of rows in the table. We estimate the number here. + + Note, built-in parser is always nr. 0 - see ftparser_call_initializer() + */ + if (sort_param.keyinfo->ftparser_nr == 0) + { + /* + for built-in parser the number of generated index entries + cannot be larger than the size of the data file divided + by the minimal word's length + */ + sort_info.max_records= + (ha_rows) (sort_info.filelength/ft_min_word_len+1); + } + else + { + /* + for external plugin parser we cannot tell anything at all :( + so, we'll use all the sort memory and start from ~10 buffpeks. + (see _create_index_by_sort) + */ + sort_info.max_records= + 10*param->sort_buffer_length/sort_param.key_length; + } + + sort_param.key_read= sort_maria_ft_key_read; + sort_param.key_write= sort_maria_ft_key_write; + } + else + { + sort_param.key_read= sort_key_read; + sort_param.key_write= sort_key_write; + } + + if (_ma_create_index_by_sort(&sort_param, + (my_bool) (!(param->testflag & T_VERBOSE)), + (uint) param->sort_buffer_length)) + { + param->retry_repair=1; + goto err; + } + /* No need to calculate checksum again. */ + sort_param.calc_checksum= 0; + free_root(&sort_param.wordroot, MYF(0)); + + /* Set for next loop */ + sort_info.max_records= (ha_rows) info->state->records; + + if (param->testflag & T_STATISTICS) + maria_update_key_parts(sort_param.keyinfo, rec_per_key_part, sort_param.unique, + param->stats_method == MI_STATS_METHOD_IGNORE_NULLS? + sort_param.notnull: NULL,(ulonglong) info->state->records); + maria_set_key_active(share->state.key_map, sort_param.key); + + if (sort_param.fix_datafile) + { + param->read_cache.end_of_file=sort_param.filepos; + if (maria_write_data_suffix(&sort_info,1) || end_io_cache(&info->rec_cache)) + goto err; + if (param->testflag & T_SAFE_REPAIR) + { + /* Don't repair if we loosed more than one row */ + if (info->state->records+1 < start_records) + { + info->state->records=start_records; + goto err; + } + } + share->state.state.data_file_length = info->state->data_file_length= + sort_param.filepos; + /* Only whole records */ + share->state.version=(ulong) time((time_t*) 0); + my_close(info->dfile.file, MYF(0)); + info->dfile.file= new_file; + share->data_file_type=sort_info.new_data_file_type; + share->pack.header_length=(ulong) new_header_length; + sort_param.fix_datafile=0; + } + else + info->state->data_file_length=sort_param.max_pos; + + param->read_cache.file= info->dfile.file; /* re-init read cache */ + reinit_io_cache(¶m->read_cache,READ_CACHE,share->pack.header_length, + 1,1); + } + + if (param->testflag & T_WRITE_LOOP) + { + VOID(fputs(" \r",stdout)); VOID(fflush(stdout)); + } + + if (rep_quick && del+sort_info.dupp != info->state->del) + { + _ma_check_print_error(param,"Couldn't fix table with quick recovery: Found wrong number of deleted records"); + _ma_check_print_error(param,"Run recovery again without -q"); + got_error=1; + param->retry_repair=1; + param->testflag|=T_RETRY_WITHOUT_QUICK; + goto err; + } + + if (rep_quick & T_FORCE_UNIQUENESS) + { + my_off_t skr=info->state->data_file_length+ + (share->options & HA_OPTION_COMPRESS_RECORD ? + MEMMAP_EXTRA_MARGIN : 0); +#ifdef USE_RELOC + if (share->data_file_type == STATIC_RECORD && + skr < share->base.reloc*share->base.min_pack_length) + skr=share->base.reloc*share->base.min_pack_length; +#endif + if (skr != sort_info.filelength) + if (my_chsize(info->dfile.file, skr, 0, MYF(0))) + _ma_check_print_warning(param, + "Can't change size of datafile, error: %d", + my_errno); + } + if (param->testflag & T_CALC_CHECKSUM) + info->state->checksum=param->glob_crc; + + if (my_chsize(share->kfile.file, info->state->key_file_length, 0, MYF(0))) + _ma_check_print_warning(param, + "Can't change size of indexfile, error: %d", + my_errno); + + if (!(param->testflag & T_SILENT)) + { + if (start_records != info->state->records) + printf("Data records: %s\n", llstr(info->state->records,llbuff)); + if (sort_info.dupp) + _ma_check_print_warning(param, + "%s records have been removed", + llstr(sort_info.dupp,llbuff)); + } + got_error=0; + + if (&share->state.state != info->state) + memcpy( &share->state.state, info->state, sizeof(*info->state)); + +err: + got_error|= _ma_flush_blocks(param, share->pagecache, &share->kfile); + VOID(end_io_cache(&info->rec_cache)); + if (!got_error) + { + /* Replace the actual file with the temporary file */ + if (new_file >= 0) + { + my_close(new_file,MYF(0)); + info->dfile.file= new_file= -1; + if (maria_change_to_newfile(share->data_file_name,MARIA_NAME_DEXT, + DATA_TMP_EXT, + MYF((param->testflag & T_BACKUP_DATA ? + MY_REDEL_MAKE_BACKUP : 0) | + sync_dir)) || + _ma_open_datafile(info,share,-1)) + got_error=1; + } + } + if (got_error) + { + if (! param->error_printed) + _ma_check_print_error(param,"%d when fixing table",my_errno); + if (new_file >= 0) + { + VOID(my_close(new_file,MYF(0))); + VOID(my_delete(param->temp_filename, MYF(MY_WME))); + if (info->dfile.file == new_file) + info->dfile.file= -1; + } + maria_mark_crashed_on_repair(info); + } + else if (key_map == share->state.key_map) + share->state.changed&= ~STATE_NOT_OPTIMIZED_KEYS; + share->state.changed|= STATE_NOT_SORTED_PAGES; + share->state.changed&= ~STATE_NOT_OPTIMIZED_ROWS; + + my_free(sort_param.rec_buff, MYF(MY_ALLOW_ZERO_PTR)); + my_free(sort_param.record,MYF(MY_ALLOW_ZERO_PTR)); + my_free((gptr) sort_info.key_block,MYF(MY_ALLOW_ZERO_PTR)); + my_free((gptr) sort_info.ft_buf, MYF(MY_ALLOW_ZERO_PTR)); + my_free(sort_info.buff,MYF(MY_ALLOW_ZERO_PTR)); + VOID(end_io_cache(¶m->read_cache)); + info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); + if (!got_error && (param->testflag & T_UNPACK)) + restore_data_file_type(share); + DBUG_RETURN(got_error); +} + +/* + Threaded repair of table using sorting + + SYNOPSIS + maria_repair_parallel() + param Repair parameters + info MARIA handler to repair + name Name of table (for warnings) + rep_quick set to <> 0 if we should not change data file + + DESCRIPTION + Same as maria_repair_by_sort but do it multithreaded + Each key is handled by a separate thread. + TODO: make a number of threads a parameter + + In parallel repair we use one thread per index. There are two modes: + + Quick + + Only the indexes are rebuilt. All threads share a read buffer. + Every thread that needs fresh data in the buffer enters the shared + cache lock. The last thread joining the lock reads the buffer from + the data file and wakes all other threads. + + Non-quick + + The data file is rebuilt and all indexes are rebuilt to point to + the new record positions. One thread is the master thread. It + reads from the old data file and writes to the new data file. It + also creates one of the indexes. The other threads read from a + buffer which is filled by the master. If they need fresh data, + they enter the shared cache lock. If the masters write buffer is + full, it flushes it to the new data file and enters the shared + cache lock too. When all threads joined in the lock, the master + copies its write buffer to the read buffer for the other threads + and wakes them. + + RESULT + 0 ok + <>0 Error +*/ + +int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, + const char * name, int rep_quick) +{ +#ifndef THREAD + return maria_repair_by_sort(param, info, name, rep_quick); +#else + int got_error; + uint i,key, total_key_length, istep; + ulong rec_length; + ha_rows start_records; + my_off_t new_header_length,del; + File new_file; + MARIA_SORT_PARAM *sort_param=0; + MARIA_SHARE *share=info->s; + ulong *rec_per_key_part; + HA_KEYSEG *keyseg; + char llbuff[22]; + IO_CACHE new_data_cache; /* For non-quick repair. */ + IO_CACHE_SHARE io_share; + MARIA_SORT_INFO sort_info; + ulonglong key_map=share->state.key_map; + pthread_attr_t thr_attr; + myf sync_dir= (share->base.transactional && !share->temporary) ? + MY_SYNC_DIR : 0; + DBUG_ENTER("maria_repair_parallel"); + + start_records=info->state->records; + got_error=1; + new_file= -1; + new_header_length=(param->testflag & T_UNPACK) ? 0 : + share->pack.header_length; + if (!(param->testflag & T_SILENT)) + { + printf("- parallel recovering (with sort) MARIA-table '%s'\n",name); + printf("Data records: %s\n", llstr(start_records,llbuff)); + } + param->testflag|=T_REP; /* for easy checking */ + + if (info->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) + param->testflag|=T_CALC_CHECKSUM; + + /* + Quick repair (not touching data file, rebuilding indexes): + { + Read cache is (MI_CHECK *param)->read_cache using info->dfile.file. + } + + Non-quick repair (rebuilding data file and indexes): + { + Master thread: + + Read cache is (MI_CHECK *param)->read_cache using info->dfile.file. + Write cache is (MI_INFO *info)->rec_cache using new_file. + + Slave threads: + + Read cache is new_data_cache synced to master rec_cache. + + The final assignment of the filedescriptor for rec_cache is done + after the cache creation. + + Don't check file size on new_data_cache, as the resulting file size + is not known yet. + + As rec_cache and new_data_cache are synced, write_buffer_length is + used for the read cache 'new_data_cache'. Both start at the same + position 'new_header_length'. + } + */ + DBUG_PRINT("info", ("is quick repair: %d", rep_quick)); + bzero((char*)&sort_info,sizeof(sort_info)); + /* Initialize pthread structures before goto err. */ + pthread_mutex_init(&sort_info.mutex, MY_MUTEX_INIT_FAST); + pthread_cond_init(&sort_info.cond, 0); + + if (!(sort_info.key_block= + alloc_key_blocks(param, (uint) param->sort_key_blocks, + share->base.max_key_block_length)) || + init_io_cache(¶m->read_cache, info->dfile.file, + (uint) param->read_buffer_length, + READ_CACHE, share->pack.header_length, 1, MYF(MY_WME)) || + (!rep_quick && + (init_io_cache(&info->rec_cache, info->dfile.file, + (uint) param->write_buffer_length, + WRITE_CACHE, new_header_length, 1, + MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw) || + init_io_cache(&new_data_cache, -1, + (uint) param->write_buffer_length, + READ_CACHE, new_header_length, 1, + MYF(MY_WME | MY_DONT_CHECK_FILESIZE))))) + goto err; + sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks; + info->opt_flag|=WRITE_CACHE_USED; + info->rec_cache.file= info->dfile.file; /* for sort_delete_record */ + + if (!rep_quick) + { + /* Get real path for data file */ + if ((new_file= my_create(fn_format(param->temp_filename, + share->data_file_name, "", + DATA_TMP_EXT, + 2+4), + 0,param->tmpfile_createflag, + MYF(0))) < 0) + { + _ma_check_print_error(param,"Can't create new tempfile: '%s'", + param->temp_filename); + goto err; + } + if (new_header_length && + maria_filecopy(param, new_file, info->dfile.file,0L,new_header_length, + "datafile-header")) + goto err; + if (param->testflag & T_UNPACK) + restore_data_file_type(share); + share->state.dellink= HA_OFFSET_ERROR; + info->rec_cache.file=new_file; + } + + info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + if (!(param->testflag & T_CREATE_MISSING_KEYS)) + { + /* + Flush key cache for this file if we are calling this outside + maria_chk + */ + flush_pagecache_blocks(share->pagecache, &share->kfile, + FLUSH_IGNORE_CHANGED); + /* Clear the pointers to the given rows */ + for (i=0 ; i < share->base.keys ; i++) + share->state.key_root[i]= HA_OFFSET_ERROR; + share->state.key_del= HA_OFFSET_ERROR; + info->state->key_file_length=share->base.keystart; + } + else + { + if (flush_pagecache_blocks(share->pagecache, &share->kfile, + FLUSH_FORCE_WRITE)) + goto err; + key_map= ~key_map; /* Create the missing keys */ + } + + sort_info.info=info; + sort_info.param = param; + + set_data_file_type(&sort_info, share); + sort_info.dupp=0; + sort_info.buff=0; + param->read_cache.end_of_file=sort_info.filelength= + my_seek(param->read_cache.file,0L,MY_SEEK_END,MYF(0)); + + if (share->data_file_type == DYNAMIC_RECORD) + rec_length=max(share->base.min_pack_length+1,share->base.min_block_length); + else if (share->data_file_type == COMPRESSED_RECORD) + rec_length=share->base.min_block_length; + else + rec_length=share->base.pack_reclength; + /* + +1 below is required hack for parallel repair mode. + The info->state->records value, that is compared later + to sort_info.max_records and cannot exceed it, is + increased in sort_key_write. In maria_repair_by_sort, sort_key_write + is called after sort_key_read, where the comparison is performed, + but in parallel mode master thread can call sort_key_write + before some other repair thread calls sort_key_read. + Furthermore I'm not even sure +1 would be enough. + May be sort_info.max_records shold be always set to max value in + parallel mode. + */ + sort_info.max_records= + ((param->testflag & T_CREATE_MISSING_KEYS) ? info->state->records + 1: + (ha_rows) (sort_info.filelength/rec_length+1)); + + del=info->state->del; + param->glob_crc=0; + + if (!(sort_param=(MARIA_SORT_PARAM *) + my_malloc((uint) share->base.keys * + (sizeof(MARIA_SORT_PARAM) + share->base.pack_reclength), + MYF(MY_ZEROFILL)))) + { + _ma_check_print_error(param,"Not enough memory for key!"); + goto err; + } + total_key_length=0; + rec_per_key_part= param->rec_per_key_part; + info->state->records=info->state->del=share->state.split=0; + info->state->empty=0; + + for (i=key=0, istep=1 ; key < share->base.keys ; + rec_per_key_part+=sort_param[i].keyinfo->keysegs, i+=istep, key++) + { + sort_param[i].key=key; + sort_param[i].keyinfo=share->keyinfo+key; + sort_param[i].seg=sort_param[i].keyinfo->seg; + if (! maria_is_key_active(key_map, key)) + { + /* Remember old statistics for key */ + memcpy((char*) rec_per_key_part, + (char*) (share->state.rec_per_key_part+ + (uint) (rec_per_key_part - param->rec_per_key_part)), + sort_param[i].keyinfo->keysegs*sizeof(*rec_per_key_part)); + istep=0; + continue; + } + istep=1; + if ((!(param->testflag & T_SILENT))) + printf ("- Fixing index %d\n",key+1); + if (sort_param[i].keyinfo->flag & HA_FULLTEXT) + { + sort_param[i].key_read=sort_maria_ft_key_read; + sort_param[i].key_write=sort_maria_ft_key_write; + } + else + { + sort_param[i].key_read=sort_key_read; + sort_param[i].key_write=sort_key_write; + } + sort_param[i].key_cmp=sort_key_cmp; + sort_param[i].lock_in_memory=maria_lock_memory; + sort_param[i].tmpdir=param->tmpdir; + sort_param[i].sort_info=&sort_info; + sort_param[i].master=0; + sort_param[i].fix_datafile=0; + sort_param[i].calc_checksum= 0; + + sort_param[i].filepos=new_header_length; + sort_param[i].max_pos=sort_param[i].pos=share->pack.header_length; + + sort_param[i].record= (((char *)(sort_param+share->base.keys))+ + (share->base.pack_reclength * i)); + if (_ma_alloc_buffer(&sort_param[i].rec_buff, &sort_param[i].rec_buff_size, + share->base.default_rec_buff_size)) + { + _ma_check_print_error(param,"Not enough memory!"); + goto err; + } + sort_param[i].key_length=share->rec_reflength; + for (keyseg=sort_param[i].seg; keyseg->type != HA_KEYTYPE_END; + keyseg++) + { + sort_param[i].key_length+=keyseg->length; + if (keyseg->flag & HA_SPACE_PACK) + sort_param[i].key_length+=get_pack_length(keyseg->length); + if (keyseg->flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART)) + sort_param[i].key_length+=2 + test(keyseg->length >= 127); + if (keyseg->flag & HA_NULL_PART) + sort_param[i].key_length++; + } + total_key_length+=sort_param[i].key_length; + + if (sort_param[i].keyinfo->flag & HA_FULLTEXT) + { + uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT* + sort_param[i].keyinfo->seg->charset->mbmaxlen; + sort_param[i].key_length+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN; + init_alloc_root(&sort_param[i].wordroot, FTPARSER_MEMROOT_ALLOC_SIZE, 0); + } + } + sort_info.total_keys=i; + sort_param[0].master= 1; + sort_param[0].fix_datafile= (my_bool)(! rep_quick); + sort_param[0].calc_checksum= test(param->testflag & T_CALC_CHECKSUM); + + sort_info.got_error=0; + pthread_mutex_lock(&sort_info.mutex); + + /* + Initialize the I/O cache share for use with the read caches and, in + case of non-quick repair, the write cache. When all threads join on + the cache lock, the writer copies the write cache contents to the + read caches. + */ + if (i > 1) + { + if (rep_quick) + init_io_cache_share(¶m->read_cache, &io_share, NULL, i); + else + init_io_cache_share(&new_data_cache, &io_share, &info->rec_cache, i); + } + else + io_share.total_threads= 0; /* share not used */ + + (void) pthread_attr_init(&thr_attr); + (void) pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED); + + for (i=0 ; i < sort_info.total_keys ; i++) + { + /* + Copy the properly initialized IO_CACHE structure so that every + thread has its own copy. In quick mode param->read_cache is shared + for use by all threads. In non-quick mode all threads but the + first copy the shared new_data_cache, which is synchronized to the + write cache of the first thread. The first thread copies + param->read_cache, which is not shared. + */ + sort_param[i].read_cache= ((rep_quick || !i) ? param->read_cache : + new_data_cache); + DBUG_PRINT("io_cache_share", ("thread: %u read_cache: 0x%lx", + i, (long) &sort_param[i].read_cache)); + + /* + two approaches: the same amount of memory for each thread + or the memory for the same number of keys for each thread... + In the second one all the threads will fill their sort_buffers + (and call write_keys) at the same time, putting more stress on i/o. + */ + sort_param[i].sortbuff_size= +#ifndef USING_SECOND_APPROACH + param->sort_buffer_length/sort_info.total_keys; +#else + param->sort_buffer_length*sort_param[i].key_length/total_key_length; +#endif + if (pthread_create(&sort_param[i].thr, &thr_attr, + _ma_thr_find_all_keys, + (void *) (sort_param+i))) + { + _ma_check_print_error(param,"Cannot start a repair thread"); + /* Cleanup: Detach from the share. Avoid others to be blocked. */ + if (io_share.total_threads) + remove_io_thread(&sort_param[i].read_cache); + DBUG_PRINT("error", ("Cannot start a repair thread")); + sort_info.got_error=1; + } + else + sort_info.threads_running++; + } + (void) pthread_attr_destroy(&thr_attr); + + /* waiting for all threads to finish */ + while (sort_info.threads_running) + pthread_cond_wait(&sort_info.cond, &sort_info.mutex); + pthread_mutex_unlock(&sort_info.mutex); + + if ((got_error= _ma_thr_write_keys(sort_param))) + { + param->retry_repair=1; + goto err; + } + got_error=1; /* Assume the following may go wrong */ + + if (sort_param[0].fix_datafile) + { + /* + Append some nuls to the end of a memory mapped file. Destroy the + write cache. The master thread did already detach from the share + by remove_io_thread() in sort.c:thr_find_all_keys(). + */ + if (maria_write_data_suffix(&sort_info,1) || end_io_cache(&info->rec_cache)) + goto err; + if (param->testflag & T_SAFE_REPAIR) + { + /* Don't repair if we loosed more than one row */ + if (info->state->records+1 < start_records) + { + info->state->records=start_records; + goto err; + } + } + share->state.state.data_file_length= info->state->data_file_length= + sort_param->filepos; + /* Only whole records */ + share->state.version=(ulong) time((time_t*) 0); + /* + Exchange the data file descriptor of the table, so that we use the + new file from now on. + */ + my_close(info->dfile.file, MYF(0)); + info->dfile.file= new_file; + + share->data_file_type=sort_info.new_data_file_type; + share->pack.header_length=(ulong) new_header_length; + } + else + info->state->data_file_length=sort_param->max_pos; + + if (rep_quick && del+sort_info.dupp != info->state->del) + { + _ma_check_print_error(param,"Couldn't fix table with quick recovery: Found wrong number of deleted records"); + _ma_check_print_error(param,"Run recovery again without -q"); + param->retry_repair=1; + param->testflag|=T_RETRY_WITHOUT_QUICK; + goto err; + } + + if (rep_quick & T_FORCE_UNIQUENESS) + { + my_off_t skr=info->state->data_file_length+ + (share->options & HA_OPTION_COMPRESS_RECORD ? + MEMMAP_EXTRA_MARGIN : 0); +#ifdef USE_RELOC + if (share->data_file_type == STATIC_RECORD && + skr < share->base.reloc*share->base.min_pack_length) + skr=share->base.reloc*share->base.min_pack_length; +#endif + if (skr != sort_info.filelength) + if (my_chsize(info->dfile.file, skr, 0, MYF(0))) + _ma_check_print_warning(param, + "Can't change size of datafile, error: %d", + my_errno); + } + if (param->testflag & T_CALC_CHECKSUM) + info->state->checksum=param->glob_crc; + + if (my_chsize(share->kfile.file, info->state->key_file_length, 0, MYF(0))) + _ma_check_print_warning(param, + "Can't change size of indexfile, error: %d", my_errno); + + if (!(param->testflag & T_SILENT)) + { + if (start_records != info->state->records) + printf("Data records: %s\n", llstr(info->state->records,llbuff)); + if (sort_info.dupp) + _ma_check_print_warning(param, + "%s records have been removed", + llstr(sort_info.dupp,llbuff)); + } + got_error=0; + + if (&share->state.state != info->state) + memcpy(&share->state.state, info->state, sizeof(*info->state)); + +err: + got_error|= _ma_flush_blocks(param, share->pagecache, &share->kfile); + /* + Destroy the write cache. The master thread did already detach from + the share by remove_io_thread() or it was not yet started (if the + error happend before creating the thread). + */ + VOID(end_io_cache(&info->rec_cache)); + /* + Destroy the new data cache in case of non-quick repair. All slave + threads did either detach from the share by remove_io_thread() + already or they were not yet started (if the error happend before + creating the threads). + */ + if (!rep_quick) + VOID(end_io_cache(&new_data_cache)); + if (!got_error) + { + /* Replace the actual file with the temporary file */ + if (new_file >= 0) + { + my_close(new_file,MYF(0)); + info->dfile.file= new_file= -1; + if (maria_change_to_newfile(share->data_file_name,MARIA_NAME_DEXT, + DATA_TMP_EXT, + MYF((param->testflag & T_BACKUP_DATA ? + MY_REDEL_MAKE_BACKUP : 0) | + sync_dir)) || + _ma_open_datafile(info,share,-1)) + got_error=1; + } + } + if (got_error) + { + if (! param->error_printed) + _ma_check_print_error(param,"%d when fixing table",my_errno); + if (new_file >= 0) + { + VOID(my_close(new_file,MYF(0))); + VOID(my_delete(param->temp_filename, MYF(MY_WME))); + if (info->dfile.file == new_file) + info->dfile.file= -1; + } + maria_mark_crashed_on_repair(info); + } + else if (key_map == share->state.key_map) + share->state.changed&= ~STATE_NOT_OPTIMIZED_KEYS; + share->state.changed|= STATE_NOT_SORTED_PAGES; + share->state.changed&= ~STATE_NOT_OPTIMIZED_ROWS; + + pthread_cond_destroy (&sort_info.cond); + pthread_mutex_destroy(&sort_info.mutex); + + my_free((gptr) sort_info.ft_buf, MYF(MY_ALLOW_ZERO_PTR)); + my_free((gptr) sort_info.key_block,MYF(MY_ALLOW_ZERO_PTR)); + my_free((gptr) sort_param,MYF(MY_ALLOW_ZERO_PTR)); + my_free(sort_info.buff,MYF(MY_ALLOW_ZERO_PTR)); + VOID(end_io_cache(¶m->read_cache)); + info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); + if (!got_error && (param->testflag & T_UNPACK)) + restore_data_file_type(share); + DBUG_RETURN(got_error); +#endif /* THREAD */ +} + + /* Read next record and return next key */ + +static int sort_key_read(MARIA_SORT_PARAM *sort_param, byte *key) +{ + int error; + MARIA_SORT_INFO *sort_info= sort_param->sort_info; + MARIA_HA *info= sort_info->info; + DBUG_ENTER("sort_key_read"); + + if ((error=sort_get_next_record(sort_param))) + DBUG_RETURN(error); + if (info->state->records == sort_info->max_records) + { + _ma_check_print_error(sort_info->param, + "Key %d - Found too many records; Can't continue", + sort_param->key+1); + DBUG_RETURN(1); + } + sort_param->real_key_length= + (info->s->rec_reflength+ + _ma_make_key(info, sort_param->key, key, + sort_param->record, sort_param->filepos)); +#ifdef HAVE_purify + bzero(key+sort_param->real_key_length, + (sort_param->key_length-sort_param->real_key_length)); +#endif + DBUG_RETURN(_ma_sort_write_record(sort_param)); +} /* sort_key_read */ + + +static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, byte *key) +{ + int error; + MARIA_SORT_INFO *sort_info=sort_param->sort_info; + MARIA_HA *info=sort_info->info; + FT_WORD *wptr=0; + DBUG_ENTER("sort_maria_ft_key_read"); + + if (!sort_param->wordlist) + { + for (;;) + { + free_root(&sort_param->wordroot, MYF(MY_MARK_BLOCKS_FREE)); + if ((error=sort_get_next_record(sort_param))) + DBUG_RETURN(error); + if (!(wptr= _ma_ft_parserecord(info,sort_param->key,sort_param->record, + &sort_param->wordroot))) + + DBUG_RETURN(1); + if (wptr->pos) + break; + error=_ma_sort_write_record(sort_param); + } + sort_param->wordptr=sort_param->wordlist=wptr; + } + else + { + error=0; + wptr=(FT_WORD*)(sort_param->wordptr); + } + + sort_param->real_key_length=(info->s->rec_reflength+ + _ma_ft_make_key(info, sort_param->key, + key, wptr++, + sort_param->filepos)); +#ifdef HAVE_purify + if (sort_param->key_length > sort_param->real_key_length) + bzero(key+sort_param->real_key_length, + (sort_param->key_length-sort_param->real_key_length)); +#endif + if (!wptr->pos) + { + free_root(&sort_param->wordroot, MYF(MY_MARK_BLOCKS_FREE)); + sort_param->wordlist=0; + error=_ma_sort_write_record(sort_param); + } + else + sort_param->wordptr=(void*)wptr; + + DBUG_RETURN(error); +} /* sort_maria_ft_key_read */ + + +/* + Read next record from file using parameters in sort_info. + + SYNOPSIS + sort_get_next_record() + sort_param Information about and for the sort process + + NOTE + + Dynamic Records With Non-Quick Parallel Repair + + For non-quick parallel repair we use a synchronized read/write + cache. This means that one thread is the master who fixes the data + file by reading each record from the old data file and writing it + to the new data file. By doing this the records in the new data + file are written contiguously. Whenever the write buffer is full, + it is copied to the read buffer. The slaves read from the read + buffer, which is not associated with a file. Thus read_cache.file + is -1. When using _mi_read_cache(), the slaves must always set + flag to READING_NEXT so that the function never tries to read from + file. This is safe because the records are contiguous. There is no + need to read outside the cache. This condition is evaluated in the + variable 'parallel_flag' for quick reference. read_cache.file must + be >= 0 in every other case. + + RETURN + -1 end of file + 0 ok + > 0 error +*/ + +static int sort_get_next_record(MARIA_SORT_PARAM *sort_param) +{ + int searching; + int parallel_flag; + uint found_record,b_type,left_length; + my_off_t pos; + MARIA_BLOCK_INFO block_info; + MARIA_SORT_INFO *sort_info=sort_param->sort_info; + HA_CHECK *param=sort_info->param; + MARIA_HA *info=sort_info->info; + MARIA_SHARE *share=info->s; + char llbuff[22],llbuff2[22]; + DBUG_ENTER("sort_get_next_record"); + + if (*_ma_killed_ptr(param)) + DBUG_RETURN(1); + + switch (share->data_file_type) { + case BLOCK_RECORD: + DBUG_ASSERT(0); + break; + case STATIC_RECORD: + for (;;) + { + if (my_b_read(&sort_param->read_cache,sort_param->record, + share->base.pack_reclength)) + { + if (sort_param->read_cache.error) + param->out_flag |= O_DATA_LOST; + param->retry_repair=1; + param->testflag|=T_RETRY_WITHOUT_QUICK; + DBUG_RETURN(-1); + } + sort_param->start_recpos=sort_param->pos; + if (!sort_param->fix_datafile) + { + sort_param->filepos=sort_param->pos; + if (sort_param->master) + share->state.split++; + } + sort_param->max_pos=(sort_param->pos+=share->base.pack_reclength); + if (*sort_param->record) + { + if (sort_param->calc_checksum) + param->glob_crc+= (info->cur_row.checksum= + _ma_static_checksum(info,sort_param->record)); + DBUG_RETURN(0); + } + if (!sort_param->fix_datafile && sort_param->master) + { + info->state->del++; + info->state->empty+=share->base.pack_reclength; + } + } + case DYNAMIC_RECORD: + { + byte *to; + LINT_INIT(to); + pos=sort_param->pos; + searching=(sort_param->fix_datafile && (param->testflag & T_EXTEND)); + parallel_flag= (sort_param->read_cache.file < 0) ? READING_NEXT : 0; + for (;;) + { + found_record=block_info.second_read= 0; + left_length=1; + if (searching) + { + pos=MY_ALIGN(pos,MARIA_DYN_ALIGN_SIZE); + param->testflag|=T_RETRY_WITHOUT_QUICK; + sort_param->start_recpos=pos; + } + do + { + if (pos > sort_param->max_pos) + sort_param->max_pos=pos; + if (pos & (MARIA_DYN_ALIGN_SIZE-1)) + { + if ((param->testflag & T_VERBOSE) || searching == 0) + _ma_check_print_info(param,"Wrong aligned block at %s", + llstr(pos,llbuff)); + if (searching) + goto try_next; + } + if (found_record && pos == param->search_after_block) + _ma_check_print_info(param,"Block: %s used by record at %s", + llstr(param->search_after_block,llbuff), + llstr(sort_param->start_recpos,llbuff2)); + if (_ma_read_cache(&sort_param->read_cache, + (byte*) block_info.header,pos, + MARIA_BLOCK_INFO_HEADER_LENGTH, + (! found_record ? READING_NEXT : 0) | + parallel_flag | READING_HEADER)) + { + if (found_record) + { + _ma_check_print_info(param, + "Can't read whole record at %s (errno: %d)", + llstr(sort_param->start_recpos,llbuff),errno); + goto try_next; + } + DBUG_RETURN(-1); + } + if (searching && ! sort_param->fix_datafile) + { + param->error_printed=1; + param->retry_repair=1; + param->testflag|=T_RETRY_WITHOUT_QUICK; + DBUG_RETURN(1); /* Something wrong with data */ + } + b_type= _ma_get_block_info(&block_info,-1,pos); + if ((b_type & (BLOCK_ERROR | BLOCK_FATAL_ERROR)) || + ((b_type & BLOCK_FIRST) && + (block_info.rec_len < (uint) share->base.min_pack_length || + block_info.rec_len > (uint) share->base.max_pack_length))) + { + uint i; + if (param->testflag & T_VERBOSE || searching == 0) + _ma_check_print_info(param, + "Wrong bytesec: %3d-%3d-%3d at %10s; Skipped", + block_info.header[0],block_info.header[1], + block_info.header[2],llstr(pos,llbuff)); + if (found_record) + goto try_next; + block_info.second_read=0; + searching=1; + /* Search after block in read header string */ + for (i=MARIA_DYN_ALIGN_SIZE ; + i < MARIA_BLOCK_INFO_HEADER_LENGTH ; + i+= MARIA_DYN_ALIGN_SIZE) + if (block_info.header[i] >= 1 && + block_info.header[i] <= MARIA_MAX_DYN_HEADER_BYTE) + break; + pos+=(ulong) i; + sort_param->start_recpos=pos; + continue; + } + if (b_type & BLOCK_DELETED) + { + bool error=0; + if (block_info.block_len+ (uint) (block_info.filepos-pos) < + share->base.min_block_length) + { + if (!searching) + _ma_check_print_info(param, + "Deleted block with impossible length %u at %s", + block_info.block_len,llstr(pos,llbuff)); + error=1; + } + else + { + if ((block_info.next_filepos != HA_OFFSET_ERROR && + block_info.next_filepos >= + info->state->data_file_length) || + (block_info.prev_filepos != HA_OFFSET_ERROR && + block_info.prev_filepos >= info->state->data_file_length)) + { + if (!searching) + _ma_check_print_info(param, + "Delete link points outside datafile at %s", + llstr(pos,llbuff)); + error=1; + } + } + if (error) + { + if (found_record) + goto try_next; + searching=1; + pos+= MARIA_DYN_ALIGN_SIZE; + sort_param->start_recpos=pos; + block_info.second_read=0; + continue; + } + } + else + { + if (block_info.block_len+ (uint) (block_info.filepos-pos) < + share->base.min_block_length || + block_info.block_len > (uint) share->base.max_pack_length+ + MARIA_SPLIT_LENGTH) + { + if (!searching) + _ma_check_print_info(param, + "Found block with impossible length %u at %s; Skipped", + block_info.block_len+ (uint) (block_info.filepos-pos), + llstr(pos,llbuff)); + if (found_record) + goto try_next; + searching=1; + pos+= MARIA_DYN_ALIGN_SIZE; + sort_param->start_recpos=pos; + block_info.second_read=0; + continue; + } + } + if (b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR)) + { + if (!sort_param->fix_datafile && sort_param->master && + (b_type & BLOCK_DELETED)) + { + info->state->empty+=block_info.block_len; + info->state->del++; + share->state.split++; + } + if (found_record) + goto try_next; + if (searching) + { + pos+=MARIA_DYN_ALIGN_SIZE; + sort_param->start_recpos=pos; + } + else + pos=block_info.filepos+block_info.block_len; + block_info.second_read=0; + continue; + } + + if (!sort_param->fix_datafile && sort_param->master) + share->state.split++; + if (! found_record++) + { + sort_param->find_length=left_length=block_info.rec_len; + sort_param->start_recpos=pos; + if (!sort_param->fix_datafile) + sort_param->filepos=sort_param->start_recpos; + if (sort_param->fix_datafile && (param->testflag & T_EXTEND)) + sort_param->pos=block_info.filepos+1; + else + sort_param->pos=block_info.filepos+block_info.block_len; + if (share->base.blobs) + { + if (_ma_alloc_buffer(&sort_param->rec_buff, + &sort_param->rec_buff_size, + block_info.rec_len + + info->s->base.extra_rec_buff_size)) + + { + if (param->max_record_length >= block_info.rec_len) + { + _ma_check_print_error(param,"Not enough memory for blob at %s (need %lu)", + llstr(sort_param->start_recpos,llbuff), + (ulong) block_info.rec_len); + DBUG_RETURN(1); + } + else + { + _ma_check_print_info(param,"Not enough memory for blob at %s (need %lu); Row skipped", + llstr(sort_param->start_recpos,llbuff), + (ulong) block_info.rec_len); + goto try_next; + } + } + } + to= sort_param->rec_buff; + } + if (left_length < block_info.data_len || ! block_info.data_len) + { + _ma_check_print_info(param, + "Found block with too small length at %s; " + "Skipped", + llstr(sort_param->start_recpos,llbuff)); + goto try_next; + } + if (block_info.filepos + block_info.data_len > + sort_param->read_cache.end_of_file) + { + _ma_check_print_info(param, + "Found block that points outside data file " + "at %s", + llstr(sort_param->start_recpos,llbuff)); + goto try_next; + } + /* + Copy information that is already read. Avoid accessing data + below the cache start. This could happen if the header + streched over the end of the previous buffer contents. + */ + { + uint header_len= (uint) (block_info.filepos - pos); + uint prefetch_len= (MARIA_BLOCK_INFO_HEADER_LENGTH - header_len); + + if (prefetch_len > block_info.data_len) + prefetch_len= block_info.data_len; + if (prefetch_len) + { + memcpy(to, block_info.header + header_len, prefetch_len); + block_info.filepos+= prefetch_len; + block_info.data_len-= prefetch_len; + left_length-= prefetch_len; + to+= prefetch_len; + } + } + if (block_info.data_len && + _ma_read_cache(&sort_param->read_cache,to,block_info.filepos, + block_info.data_len, + (found_record == 1 ? READING_NEXT : 0) | + parallel_flag)) + { + _ma_check_print_info(param, + "Read error for block at: %s (error: %d); Skipped", + llstr(block_info.filepos,llbuff),my_errno); + goto try_next; + } + left_length-=block_info.data_len; + to+=block_info.data_len; + pos=block_info.next_filepos; + if (pos == HA_OFFSET_ERROR && left_length) + { + _ma_check_print_info(param,"Wrong block with wrong total length starting at %s", + llstr(sort_param->start_recpos,llbuff)); + goto try_next; + } + if (pos + MARIA_BLOCK_INFO_HEADER_LENGTH > sort_param->read_cache.end_of_file) + { + _ma_check_print_info(param,"Found link that points at %s (outside data file) at %s", + llstr(pos,llbuff2), + llstr(sort_param->start_recpos,llbuff)); + goto try_next; + } + } while (left_length); + + if (_ma_rec_unpack(info,sort_param->record,sort_param->rec_buff, + sort_param->find_length) != MY_FILE_ERROR) + { + if (sort_param->read_cache.error < 0) + DBUG_RETURN(1); + if (sort_param->calc_checksum) + info->cur_row.checksum= _ma_checksum(info, sort_param->record); + if ((param->testflag & (T_EXTEND | T_REP)) || searching) + { + if (_ma_rec_check(info, sort_param->record, sort_param->rec_buff, + sort_param->find_length, + (param->testflag & T_QUICK) && + sort_param->calc_checksum && + test(info->s->calc_checksum))) + { + _ma_check_print_info(param,"Found wrong packed record at %s", + llstr(sort_param->start_recpos,llbuff)); + goto try_next; + } + } + if (sort_param->calc_checksum) + param->glob_crc+= info->cur_row.checksum; + DBUG_RETURN(0); + } + if (!searching) + _ma_check_print_info(param,"Key %d - Found wrong stored record at %s", + sort_param->key+1, + llstr(sort_param->start_recpos,llbuff)); + try_next: + pos=(sort_param->start_recpos+=MARIA_DYN_ALIGN_SIZE); + searching=1; + } + } + case COMPRESSED_RECORD: + for (searching=0 ;; searching=1, sort_param->pos++) + { + if (_ma_read_cache(&sort_param->read_cache,(byte*) block_info.header, + sort_param->pos, + share->pack.ref_length,READING_NEXT)) + DBUG_RETURN(-1); + if (searching && ! sort_param->fix_datafile) + { + param->error_printed=1; + param->retry_repair=1; + param->testflag|=T_RETRY_WITHOUT_QUICK; + DBUG_RETURN(1); /* Something wrong with data */ + } + sort_param->start_recpos=sort_param->pos; + if (_ma_pack_get_block_info(info, &sort_param->bit_buff, &block_info, + &sort_param->rec_buff, + &sort_param->rec_buff_size, -1, + sort_param->pos)) + DBUG_RETURN(-1); + if (!block_info.rec_len && + sort_param->pos + MEMMAP_EXTRA_MARGIN == + sort_param->read_cache.end_of_file) + DBUG_RETURN(-1); + if (block_info.rec_len < (uint) share->min_pack_length || + block_info.rec_len > (uint) share->max_pack_length) + { + if (! searching) + _ma_check_print_info(param,"Found block with wrong recordlength: %d at %s\n", + block_info.rec_len, + llstr(sort_param->pos,llbuff)); + continue; + } + if (_ma_read_cache(&sort_param->read_cache,(byte*) sort_param->rec_buff, + block_info.filepos, block_info.rec_len, + READING_NEXT)) + { + if (! searching) + _ma_check_print_info(param,"Couldn't read whole record from %s", + llstr(sort_param->pos,llbuff)); + continue; + } + if (_ma_pack_rec_unpack(info, &sort_param->bit_buff, sort_param->record, + sort_param->rec_buff, block_info.rec_len)) + { + if (! searching) + _ma_check_print_info(param,"Found wrong record at %s", + llstr(sort_param->pos,llbuff)); + continue; + } + if (!sort_param->fix_datafile) + { + sort_param->filepos=sort_param->pos; + if (sort_param->master) + share->state.split++; + } + sort_param->max_pos=(sort_param->pos=block_info.filepos+ + block_info.rec_len); + info->packed_length=block_info.rec_len; + + if (sort_param->calc_checksum) + { + info->cur_row.checksum= (*info->s->calc_checksum)(info, + sort_param->record); + param->glob_crc+= info->cur_row.checksum; + } + DBUG_RETURN(0); + } + } + DBUG_RETURN(1); /* Impossible */ +} + + +/* + Write record to new file. + + SYNOPSIS + _ma_sort_write_record() + sort_param Sort parameters. + + NOTE + This is only called by a master thread if parallel repair is used. + + RETURN + 0 OK + 1 Error +*/ + +int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param) +{ + int flag; + uint length; + ulong block_length,reclength; + byte *from; + byte block_buff[8]; + MARIA_SORT_INFO *sort_info=sort_param->sort_info; + HA_CHECK *param=sort_info->param; + MARIA_HA *info=sort_info->info; + MARIA_SHARE *share=info->s; + DBUG_ENTER("_ma_sort_write_record"); + + if (sort_param->fix_datafile) + { + switch (sort_info->new_data_file_type) { + case BLOCK_RECORD: + DBUG_ASSERT(0); + break; + case STATIC_RECORD: + if (my_b_write(&info->rec_cache,sort_param->record, + share->base.pack_reclength)) + { + _ma_check_print_error(param,"%d when writing to datafile",my_errno); + DBUG_RETURN(1); + } + sort_param->filepos+=share->base.pack_reclength; + info->s->state.split++; + break; + case DYNAMIC_RECORD: + if (! info->blobs) + from=sort_param->rec_buff; + else + { + /* must be sure that local buffer is big enough */ + reclength=info->s->base.pack_reclength+ + _ma_calc_total_blob_length(info,sort_param->record)+ + ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER)+MARIA_SPLIT_LENGTH+ + MARIA_DYN_DELETE_BLOCK_HEADER; + if (sort_info->buff_length < reclength) + { + if (!(sort_info->buff=my_realloc(sort_info->buff, (uint) reclength, + MYF(MY_FREE_ON_ERROR | + MY_ALLOW_ZERO_PTR)))) + DBUG_RETURN(1); + sort_info->buff_length=reclength; + } + from=sort_info->buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER); + } + /* We can use info->checksum here as only one thread calls this */ + info->cur_row.checksum= _ma_checksum(info,sort_param->record); + reclength= _ma_rec_pack(info,from,sort_param->record); + flag=0; + + do + { + block_length=reclength+ 3 + test(reclength >= (65520-3)); + if (block_length < share->base.min_block_length) + block_length=share->base.min_block_length; + info->update|=HA_STATE_WRITE_AT_END; + block_length=MY_ALIGN(block_length,MARIA_DYN_ALIGN_SIZE); + if (block_length > MARIA_MAX_BLOCK_LENGTH) + block_length=MARIA_MAX_BLOCK_LENGTH; + if (_ma_write_part_record(info,0L,block_length, + sort_param->filepos+block_length, + &from,&reclength,&flag)) + { + _ma_check_print_error(param,"%d when writing to datafile",my_errno); + DBUG_RETURN(1); + } + sort_param->filepos+=block_length; + info->s->state.split++; + } while (reclength); + break; + case COMPRESSED_RECORD: + reclength=info->packed_length; + length= _ma_save_pack_length((uint) share->pack.version, block_buff, + reclength); + if (info->s->base.blobs) + length+= _ma_save_pack_length((uint) share->pack.version, + block_buff + length, info->blob_length); + if (my_b_write(&info->rec_cache,block_buff,length) || + my_b_write(&info->rec_cache,(byte*) sort_param->rec_buff,reclength)) + { + _ma_check_print_error(param,"%d when writing to datafile",my_errno); + DBUG_RETURN(1); + } + sort_param->filepos+=reclength+length; + info->s->state.split++; + break; + } + } + if (sort_param->master) + { + info->state->records++; + if ((param->testflag & T_WRITE_LOOP) && + (info->state->records % WRITE_COUNT) == 0) + { + char llbuff[22]; + printf("%s\r", llstr(info->state->records,llbuff)); + VOID(fflush(stdout)); + } + } + DBUG_RETURN(0); +} /* _ma_sort_write_record */ + + + /* Compare two keys from _ma_create_index_by_sort */ + +static int sort_key_cmp(MARIA_SORT_PARAM *sort_param, const void *a, + const void *b) +{ + uint not_used[2]; + return (ha_key_cmp(sort_param->seg, *((uchar**) a), *((uchar**) b), + USE_WHOLE_KEY, SEARCH_SAME, not_used)); +} /* sort_key_cmp */ + + +static int sort_key_write(MARIA_SORT_PARAM *sort_param, const byte *a) +{ + uint diff_pos[2]; + char llbuff[22],llbuff2[22]; + MARIA_SORT_INFO *sort_info=sort_param->sort_info; + HA_CHECK *param= sort_info->param; + int cmp; + + if (sort_info->key_block->inited) + { + cmp=ha_key_cmp(sort_param->seg, (uchar*) sort_info->key_block->lastkey, + (uchar*) a, USE_WHOLE_KEY,SEARCH_FIND | SEARCH_UPDATE, + diff_pos); + if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL) + ha_key_cmp(sort_param->seg, (uchar*) sort_info->key_block->lastkey, + (uchar*) a, USE_WHOLE_KEY, + SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diff_pos); + else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) + { + diff_pos[0]= maria_collect_stats_nonulls_next(sort_param->seg, + sort_param->notnull, + sort_info->key_block->lastkey, + a); + } + sort_param->unique[diff_pos[0]-1]++; + } + else + { + cmp= -1; + if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) + maria_collect_stats_nonulls_first(sort_param->seg, sort_param->notnull, + a); + } + if ((sort_param->keyinfo->flag & HA_NOSAME) && cmp == 0) + { + sort_info->dupp++; + sort_info->info->cur_row.lastpos= get_record_for_key(sort_info->info, + sort_param->keyinfo, + a); + _ma_check_print_warning(param, + "Duplicate key for record at %10s against record at %10s", + llstr(sort_info->info->cur_row.lastpos, llbuff), + llstr(get_record_for_key(sort_info->info, + sort_param->keyinfo, + sort_info->key_block-> + lastkey), + llbuff2)); + param->testflag|=T_RETRY_WITHOUT_QUICK; + if (sort_info->param->testflag & T_VERBOSE) + _ma_print_key(stdout,sort_param->seg, a, USE_WHOLE_KEY); + return (sort_delete_record(sort_param)); + } +#ifndef DBUG_OFF + if (cmp > 0) + { + _ma_check_print_error(param, + "Internal error: Keys are not in order from sort"); + return(1); + } +#endif + return (sort_insert_key(sort_param, sort_info->key_block, + a, HA_OFFSET_ERROR)); +} /* sort_key_write */ + + +int _ma_sort_ft_buf_flush(MARIA_SORT_PARAM *sort_param) +{ + MARIA_SORT_INFO *sort_info=sort_param->sort_info; + SORT_KEY_BLOCKS *key_block=sort_info->key_block; + MARIA_SHARE *share=sort_info->info->s; + uint val_off, val_len; + int error; + SORT_FT_BUF *maria_ft_buf=sort_info->ft_buf; + byte *from, *to; + + val_len=share->ft2_keyinfo.keylength; + get_key_full_length_rdonly(val_off, maria_ft_buf->lastkey); + to= maria_ft_buf->lastkey+val_off; + + if (maria_ft_buf->buf) + { + /* flushing first-level tree */ + error= sort_insert_key(sort_param,key_block,maria_ft_buf->lastkey, + HA_OFFSET_ERROR); + for (from=to+val_len; + !error && from < maria_ft_buf->buf; + from+= val_len) + { + memcpy(to, from, val_len); + error= sort_insert_key(sort_param,key_block,maria_ft_buf->lastkey, + HA_OFFSET_ERROR); + } + return error; + } + /* flushing second-level tree keyblocks */ + error=_ma_flush_pending_blocks(sort_param); + /* updating lastkey with second-level tree info */ + ft_intXstore(maria_ft_buf->lastkey+val_off, -maria_ft_buf->count); + _ma_dpointer(sort_info->info, maria_ft_buf->lastkey+val_off+HA_FT_WLEN, + share->state.key_root[sort_param->key]); + /* restoring first level tree data in sort_info/sort_param */ + sort_info->key_block=sort_info->key_block_end- sort_info->param->sort_key_blocks; + sort_param->keyinfo=share->keyinfo+sort_param->key; + share->state.key_root[sort_param->key]=HA_OFFSET_ERROR; + /* writing lastkey in first-level tree */ + return error ? error : + sort_insert_key(sort_param,sort_info->key_block, + maria_ft_buf->lastkey,HA_OFFSET_ERROR); +} + + +static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param, + const byte *a) +{ + uint a_len, val_off, val_len, error; + MARIA_SORT_INFO *sort_info= sort_param->sort_info; + SORT_FT_BUF *ft_buf= sort_info->ft_buf; + SORT_KEY_BLOCKS *key_block= sort_info->key_block; + + val_len=HA_FT_WLEN+sort_info->info->s->base.rec_reflength; + get_key_full_length_rdonly(a_len, (uchar *)a); + + if (!ft_buf) + { + /* + use two-level tree only if key_reflength fits in rec_reflength place + and row format is NOT static - for _ma_dpointer not to garble offsets + */ + if ((sort_info->info->s->base.key_reflength <= + sort_info->info->s->base.rec_reflength) && + (sort_info->info->s->options & + (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD))) + ft_buf= (SORT_FT_BUF *)my_malloc(sort_param->keyinfo->block_length + + sizeof(SORT_FT_BUF), MYF(MY_WME)); + + if (!ft_buf) + { + sort_param->key_write=sort_key_write; + return sort_key_write(sort_param, a); + } + sort_info->ft_buf= ft_buf; + goto word_init_ft_buf; /* no need to duplicate the code */ + } + get_key_full_length_rdonly(val_off, ft_buf->lastkey); + + if (ha_compare_text(sort_param->seg->charset, + ((uchar *)a)+1,a_len-1, + (uchar*) ft_buf->lastkey+1,val_off-1, 0, 0)==0) + { + byte *p; + if (!ft_buf->buf) /* store in second-level tree */ + { + ft_buf->count++; + return sort_insert_key(sort_param,key_block, + a + a_len, HA_OFFSET_ERROR); + } + + /* storing the key in the buffer. */ + memcpy (ft_buf->buf, (char *)a+a_len, val_len); + ft_buf->buf+=val_len; + if (ft_buf->buf < ft_buf->end) + return 0; + + /* converting to two-level tree */ + p=ft_buf->lastkey+val_off; + + while (key_block->inited) + key_block++; + sort_info->key_block=key_block; + sort_param->keyinfo=& sort_info->info->s->ft2_keyinfo; + ft_buf->count=(ft_buf->buf - p)/val_len; + + /* flushing buffer to second-level tree */ + for (error=0; !error && p < ft_buf->buf; p+= val_len) + error=sort_insert_key(sort_param,key_block,p,HA_OFFSET_ERROR); + ft_buf->buf=0; + return error; + } + + /* flushing buffer */ + if ((error=_ma_sort_ft_buf_flush(sort_param))) + return error; + +word_init_ft_buf: + a_len+=val_len; + memcpy(ft_buf->lastkey, a, a_len); + ft_buf->buf=ft_buf->lastkey+a_len; + /* + 32 is just a safety margin here + (at least max(val_len, sizeof(nod_flag)) should be there). + May be better performance could be achieved if we'd put + (sort_info->keyinfo->block_length-32)/XXX + instead. + TODO: benchmark the best value for XXX. + */ + ft_buf->end= ft_buf->lastkey+ (sort_param->keyinfo->block_length-32); + return 0; +} /* sort_maria_ft_key_write */ + + + /* get pointer to record from a key */ + +static my_off_t get_record_for_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + const byte *key) +{ + return _ma_dpos(info,0, key + _ma_keylength(keyinfo, key)); +} /* get_record_for_key */ + + + /* Insert a key in sort-key-blocks */ + +static int sort_insert_key(MARIA_SORT_PARAM *sort_param, + register SORT_KEY_BLOCKS *key_block, + const byte *key, + my_off_t prev_block) +{ + uint a_length,t_length,nod_flag; + my_off_t filepos,key_file_length; + byte *anc_buff,*lastkey; + MARIA_KEY_PARAM s_temp; + MARIA_HA *info; + MARIA_KEYDEF *keyinfo=sort_param->keyinfo; + MARIA_SORT_INFO *sort_info= sort_param->sort_info; + HA_CHECK *param=sort_info->param; + DBUG_ENTER("sort_insert_key"); + + anc_buff= key_block->buff; + info=sort_info->info; + lastkey=key_block->lastkey; + nod_flag= (key_block == sort_info->key_block ? 0 : + info->s->base.key_reflength); + + if (!key_block->inited) + { + key_block->inited=1; + if (key_block == sort_info->key_block_end) + { + _ma_check_print_error(param,"To many key-block-levels; Try increasing sort_key_blocks"); + DBUG_RETURN(1); + } + a_length=2+nod_flag; + key_block->end_pos=anc_buff+2; + lastkey=0; /* No previous key in block */ + } + else + a_length= maria_data_on_page(anc_buff); + + /* Save pointer to previous block */ + if (nod_flag) + _ma_kpointer(info,key_block->end_pos,prev_block); + + t_length=(*keyinfo->pack_key)(keyinfo,nod_flag, + (byte*) 0,lastkey,lastkey,key, + &s_temp); + (*keyinfo->store_key)(keyinfo, key_block->end_pos+nod_flag,&s_temp); + a_length+=t_length; + maria_putint(anc_buff,a_length,nod_flag); + key_block->end_pos+=t_length; + if (a_length <= keyinfo->block_length) + { + VOID(_ma_move_key(keyinfo, key_block->lastkey, key)); + key_block->last_length=a_length-t_length; + DBUG_RETURN(0); + } + + /* Fill block with end-zero and write filled block */ + maria_putint(anc_buff,key_block->last_length,nod_flag); + bzero(anc_buff+key_block->last_length, + keyinfo->block_length- key_block->last_length); + key_file_length=info->state->key_file_length; + if ((filepos= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR) + DBUG_RETURN(1); + + /* If we read the page from the key cache, we have to write it back to it */ + if (key_file_length == info->state->key_file_length) + { + if (_ma_write_keypage(info, keyinfo, filepos, DFLT_INIT_HITS, anc_buff)) + DBUG_RETURN(1); + } + else if (my_pwrite(info->s->kfile.file, anc_buff, + (uint) keyinfo->block_length,filepos, param->myf_rw)) + DBUG_RETURN(1); + DBUG_DUMP("buff",anc_buff,maria_data_on_page(anc_buff)); + + /* Write separator-key to block in next level */ + if (sort_insert_key(sort_param,key_block+1,key_block->lastkey,filepos)) + DBUG_RETURN(1); + + /* clear old block and write new key in it */ + key_block->inited=0; + DBUG_RETURN(sort_insert_key(sort_param, key_block,key,prev_block)); +} /* sort_insert_key */ + + + /* Delete record when we found a duplicated key */ + +static int sort_delete_record(MARIA_SORT_PARAM *sort_param) +{ + uint i; + int old_file,error; + byte *key; + MARIA_SORT_INFO *sort_info=sort_param->sort_info; + HA_CHECK *param=sort_info->param; + MARIA_HA *info=sort_info->info; + DBUG_ENTER("sort_delete_record"); + + if ((param->testflag & (T_FORCE_UNIQUENESS|T_QUICK)) == T_QUICK) + { + _ma_check_print_error(param, + "Quick-recover aborted; Run recovery without switch -q or with switch -qq"); + DBUG_RETURN(1); + } + if (info->s->options & HA_OPTION_COMPRESS_RECORD) + { + _ma_check_print_error(param, + "Recover aborted; Can't run standard recovery on compressed tables with errors in data-file. Use switch 'maria_chk --safe-recover' to fix it\n",stderr);; + DBUG_RETURN(1); + } + + old_file= info->dfile.file; + info->dfile.file= info->rec_cache.file; + if (sort_info->current_key) + { + key= info->lastkey+info->s->base.max_key_length; + if ((error=(*info->s->read_record)(info,sort_param->record, + info->cur_row.lastpos)) && + error != HA_ERR_RECORD_DELETED) + { + _ma_check_print_error(param,"Can't read record to be removed"); + info->dfile.file= old_file; + DBUG_RETURN(1); + } + + for (i=0 ; i < sort_info->current_key ; i++) + { + uint key_length= _ma_make_key(info, i, key, sort_param->record, + info->cur_row.lastpos); + if (_ma_ck_delete(info, i, key, key_length)) + { + _ma_check_print_error(param, + "Can't delete key %d from record to be removed", + i+1); + info->dfile.file= old_file; + DBUG_RETURN(1); + } + } + if (sort_param->calc_checksum) + param->glob_crc-=(*info->s->calc_checksum)(info, sort_param->record); + } + error= (flush_io_cache(&info->rec_cache) || + (*info->s->delete_record)(info, sort_param->record)); + info->dfile.file= old_file; /* restore actual value */ + info->state->records--; + DBUG_RETURN(error); +} /* sort_delete_record */ + + /* Fix all pending blocks and flush everything to disk */ + +int _ma_flush_pending_blocks(MARIA_SORT_PARAM *sort_param) +{ + uint nod_flag,length; + my_off_t filepos,key_file_length; + SORT_KEY_BLOCKS *key_block; + MARIA_SORT_INFO *sort_info= sort_param->sort_info; + myf myf_rw=sort_info->param->myf_rw; + MARIA_HA *info=sort_info->info; + MARIA_KEYDEF *keyinfo=sort_param->keyinfo; + DBUG_ENTER("_ma_flush_pending_blocks"); + + filepos= HA_OFFSET_ERROR; /* if empty file */ + nod_flag=0; + for (key_block=sort_info->key_block ; key_block->inited ; key_block++) + { + key_block->inited=0; + length= maria_data_on_page(key_block->buff); + if (nod_flag) + _ma_kpointer(info,key_block->end_pos,filepos); + key_file_length=info->state->key_file_length; + bzero(key_block->buff+length, keyinfo->block_length-length); + if ((filepos= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR) + DBUG_RETURN(1); + + /* If we read the page from the key cache, we have to write it back */ + if (key_file_length == info->state->key_file_length) + { + if (_ma_write_keypage(info, keyinfo, filepos, + DFLT_INIT_HITS, key_block->buff)) + DBUG_RETURN(1); + } + else if (my_pwrite(info->s->kfile.file, key_block->buff, + (uint) keyinfo->block_length,filepos, myf_rw)) + DBUG_RETURN(1); + DBUG_DUMP("buff",key_block->buff,length); + nod_flag=1; + } + info->s->state.key_root[sort_param->key]=filepos; /* Last is root for tree */ + DBUG_RETURN(0); +} /* _ma_flush_pending_blocks */ + + /* alloc space and pointers for key_blocks */ + +static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks, + uint buffer_length) +{ + reg1 uint i; + SORT_KEY_BLOCKS *block; + DBUG_ENTER("alloc_key_blocks"); + + if (!(block= (SORT_KEY_BLOCKS*) my_malloc((sizeof(SORT_KEY_BLOCKS)+ + buffer_length+IO_SIZE)*blocks, + MYF(0)))) + { + _ma_check_print_error(param,"Not enough memory for sort-key-blocks"); + return(0); + } + for (i=0 ; i < blocks ; i++) + { + block[i].inited=0; + block[i].buff= (byte*) (block+blocks)+(buffer_length+IO_SIZE)*i; + } + DBUG_RETURN(block); +} /* alloc_key_blocks */ + + + /* Check if file is almost full */ + +int maria_test_if_almost_full(MARIA_HA *info) +{ + if (info->s->options & HA_OPTION_COMPRESS_RECORD) + return 0; + return (my_seek(info->s->kfile.file, 0L, MY_SEEK_END, MYF(0))/10*9 > + (my_off_t) (info->s->base.max_key_file_length) || + my_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0)) / 10 * 9 > + (my_off_t) info->s->base.max_data_file_length); +} + + /* Recreate table with bigger more alloced record-data */ + +int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename) +{ + int error; + MARIA_HA info; + MARIA_SHARE share; + MARIA_KEYDEF *keyinfo,*key,*key_end; + HA_KEYSEG *keysegs,*keyseg; + MARIA_COLUMNDEF *columndef,*column,*end; + MARIA_UNIQUEDEF *uniquedef,*u_ptr,*u_end; + MARIA_STATUS_INFO status_info; + uint unpack,key_parts; + ha_rows max_records; + ulonglong file_length,tmp_length; + MARIA_CREATE_INFO create_info; + DBUG_ENTER("maria_recreate_table"); + + error=1; /* Default error */ + info= **org_info; + status_info= (*org_info)->state[0]; + info.state= &status_info; + share= *(*org_info)->s; + unpack= (share.options & HA_OPTION_COMPRESS_RECORD) && + (param->testflag & T_UNPACK); + if (!(keyinfo=(MARIA_KEYDEF*) my_alloca(sizeof(MARIA_KEYDEF) * + share.base.keys))) + DBUG_RETURN(0); + memcpy((byte*) keyinfo,(byte*) share.keyinfo, + (size_t) (sizeof(MARIA_KEYDEF)*share.base.keys)); + + key_parts= share.base.all_key_parts; + if (!(keysegs=(HA_KEYSEG*) my_alloca(sizeof(HA_KEYSEG)* + (key_parts+share.base.keys)))) + { + my_afree((gptr) keyinfo); + DBUG_RETURN(1); + } + if (!(columndef=(MARIA_COLUMNDEF*) + my_alloca(sizeof(MARIA_COLUMNDEF)*(share.base.fields+1)))) + { + my_afree((gptr) keyinfo); + my_afree((gptr) keysegs); + DBUG_RETURN(1); + } + if (!(uniquedef=(MARIA_UNIQUEDEF*) + my_alloca(sizeof(MARIA_UNIQUEDEF)*(share.state.header.uniques+1)))) + { + my_afree((gptr) columndef); + my_afree((gptr) keyinfo); + my_afree((gptr) keysegs); + DBUG_RETURN(1); + } + + /* Copy the column definitions */ + memcpy((byte*) columndef,(byte*) share.columndef, + (size_t) (sizeof(MARIA_COLUMNDEF)*(share.base.fields+1))); + for (column=columndef, end= columndef+share.base.fields; + column != end ; + column++) + { + if (unpack && !(share.options & HA_OPTION_PACK_RECORD) && + column->type != FIELD_BLOB && + column->type != FIELD_VARCHAR && + column->type != FIELD_CHECK) + column->type=(int) FIELD_NORMAL; + } + + /* Change the new key to point at the saved key segments */ + memcpy((byte*) keysegs,(byte*) share.keyparts, + (size_t) (sizeof(HA_KEYSEG)*(key_parts+share.base.keys+ + share.state.header.uniques))); + keyseg=keysegs; + for (key=keyinfo,key_end=keyinfo+share.base.keys; key != key_end ; key++) + { + key->seg=keyseg; + for (; keyseg->type ; keyseg++) + { + if (param->language) + keyseg->language=param->language; /* change language */ + } + keyseg++; /* Skip end pointer */ + } + + /* + Copy the unique definitions and change them to point at the new key + segments + */ + memcpy((byte*) uniquedef,(byte*) share.uniqueinfo, + (size_t) (sizeof(MARIA_UNIQUEDEF)*(share.state.header.uniques))); + for (u_ptr=uniquedef,u_end=uniquedef+share.state.header.uniques; + u_ptr != u_end ; u_ptr++) + { + u_ptr->seg=keyseg; + keyseg+=u_ptr->keysegs+1; + } + if (share.options & HA_OPTION_COMPRESS_RECORD) + share.base.records=max_records=info.state->records; + else if (share.base.min_pack_length) + max_records=(ha_rows) (my_seek(info.dfile.file, 0L, MY_SEEK_END, + MYF(0)) / + (ulong) share.base.min_pack_length); + else + max_records=0; + unpack= (share.data_file_type == COMPRESSED_RECORD) && + (param->testflag & T_UNPACK); + share.options&= ~HA_OPTION_TEMP_COMPRESS_RECORD; + + file_length=(ulonglong) my_seek(info.dfile.file, 0L, MY_SEEK_END, MYF(0)); + tmp_length= file_length+file_length/10; + set_if_bigger(file_length,param->max_data_file_length); + set_if_bigger(file_length,tmp_length); + set_if_bigger(file_length,(ulonglong) share.base.max_data_file_length); + + VOID(maria_close(*org_info)); + bzero((char*) &create_info,sizeof(create_info)); + create_info.max_rows=max(max_records,share.base.records); + create_info.reloc_rows=share.base.reloc; + create_info.old_options=(share.options | + (unpack ? HA_OPTION_TEMP_COMPRESS_RECORD : 0)); + + create_info.data_file_length=file_length; + create_info.auto_increment=share.state.auto_increment; + create_info.language = (param->language ? param->language : + share.state.header.language); + create_info.key_file_length= status_info.key_file_length; + create_info.org_data_file_type= ((enum data_file_type) + share.state.header.org_data_file_type); + + /* + Allow for creating an auto_increment key. This has an effect only if + an auto_increment key exists in the original table. + */ + create_info.with_auto_increment= TRUE; + create_info.null_bytes= share.base.null_bytes; + /* + We don't have to handle symlinks here because we are using + HA_DONT_TOUCH_DATA + */ + if (maria_create(filename, share.data_file_type, + share.base.keys - share.state.header.uniques, + keyinfo, share.base.fields, columndef, + share.state.header.uniques, uniquedef, + &create_info, + HA_DONT_TOUCH_DATA)) + { + _ma_check_print_error(param, + "Got error %d when trying to recreate indexfile", + my_errno); + goto end; + } + *org_info=maria_open(filename,O_RDWR, + (param->testflag & T_WAIT_FOREVER) ? HA_OPEN_WAIT_IF_LOCKED : + (param->testflag & T_DESCRIPT) ? HA_OPEN_IGNORE_IF_LOCKED : + HA_OPEN_ABORT_IF_LOCKED); + if (!*org_info) + { + _ma_check_print_error(param, + "Got error %d when trying to open re-created indexfile", + my_errno); + goto end; + } + /* We are modifing */ + (*org_info)->s->options&= ~HA_OPTION_READ_ONLY_DATA; + VOID(_ma_readinfo(*org_info,F_WRLCK,0)); + (*org_info)->state->records=info.state->records; + if (share.state.create_time) + (*org_info)->s->state.create_time=share.state.create_time; + (*org_info)->s->state.unique=(*org_info)->this_unique= + share.state.unique; + (*org_info)->state->checksum=info.state->checksum; + (*org_info)->state->del=info.state->del; + (*org_info)->s->state.dellink=share.state.dellink; + (*org_info)->state->empty=info.state->empty; + (*org_info)->state->data_file_length=info.state->data_file_length; + if (maria_update_state_info(param,*org_info,UPDATE_TIME | UPDATE_STAT | + UPDATE_OPEN_COUNT)) + goto end; + error=0; +end: + my_afree((gptr) uniquedef); + my_afree((gptr) keyinfo); + my_afree((gptr) columndef); + my_afree((gptr) keysegs); + DBUG_RETURN(error); +} + + + /* write suffix to data file if neaded */ + +int maria_write_data_suffix(MARIA_SORT_INFO *sort_info, my_bool fix_datafile) +{ + MARIA_HA *info=sort_info->info; + + if (info->s->options & HA_OPTION_COMPRESS_RECORD && fix_datafile) + { + char buff[MEMMAP_EXTRA_MARGIN]; + bzero(buff,sizeof(buff)); + if (my_b_write(&info->rec_cache,buff,sizeof(buff))) + { + _ma_check_print_error(sort_info->param, + "%d when writing to datafile",my_errno); + return 1; + } + sort_info->param->read_cache.end_of_file+=sizeof(buff); + } + return 0; +} + + +/* Update state and maria_chk time of indexfile */ + +int maria_update_state_info(HA_CHECK *param, MARIA_HA *info,uint update) +{ + MARIA_SHARE *share=info->s; + + if (update & UPDATE_OPEN_COUNT) + { + share->state.open_count=0; + share->global_changed=0; + } + if (update & UPDATE_STAT) + { + uint i, key_parts= mi_uint2korr(share->state.header.key_parts); + share->state.rec_per_key_rows=info->state->records; + share->state.changed&= ~STATE_NOT_ANALYZED; + if (info->state->records) + { + for (i=0; istate.rec_per_key_part[i]=param->rec_per_key_part[i])) + share->state.changed|= STATE_NOT_ANALYZED; + } + } + } + if (update & (UPDATE_STAT | UPDATE_SORT | UPDATE_TIME | UPDATE_AUTO_INC)) + { + if (update & UPDATE_TIME) + { + share->state.check_time= (long) time((time_t*) 0); + if (!share->state.create_time) + share->state.create_time=share->state.check_time; + } + /* + When tables are locked we haven't synched the share state and the + real state for a while so we better do it here before synching + the share state to disk. Only when table is write locked is it + necessary to perform this synch. + */ + if (info->lock_type == F_WRLCK) + share->state.state= *info->state; + if (_ma_state_info_write(share->kfile.file, &share->state, 1 + 2)) + goto err; + share->changed=0; + } + { /* Force update of status */ + int error; + uint r_locks=share->r_locks,w_locks=share->w_locks; + share->r_locks= share->w_locks= share->tot_locks= 0; + error= _ma_writeinfo(info,WRITEINFO_NO_UNLOCK); + share->r_locks=r_locks; + share->w_locks=w_locks; + share->tot_locks=r_locks+w_locks; + if (!error) + return 0; + } +err: + _ma_check_print_error(param,"%d when updating keyfile",my_errno); + return 1; +} + + /* + Update auto increment value for a table + When setting the 'repair_only' flag we only want to change the + old auto_increment value if its wrong (smaller than some given key). + The reason is that we shouldn't change the auto_increment value + for a table without good reason when only doing a repair; If the + user have inserted and deleted rows, the auto_increment value + may be bigger than the biggest current row and this is ok. + + If repair_only is not set, we will update the flag to the value in + param->auto_increment is bigger than the biggest key. + */ + +void _ma_update_auto_increment_key(HA_CHECK *param, MARIA_HA *info, + my_bool repair_only) +{ + byte *record; + DBUG_ENTER("update_auto_increment_key"); + + if (!info->s->base.auto_key || + ! maria_is_key_active(info->s->state.key_map, info->s->base.auto_key - 1)) + { + if (!(param->testflag & T_VERY_SILENT)) + _ma_check_print_info(param, + "Table: %s doesn't have an auto increment key\n", + param->isam_file_name); + DBUG_VOID_RETURN; + } + if (!(param->testflag & T_SILENT) && + !(param->testflag & T_REP)) + printf("Updating MARIA file: %s\n", param->isam_file_name); + /* + We have to use an allocated buffer instead of info->rec_buff as + _ma_put_key_in_record() may use info->rec_buff + */ + if (!(record= (byte*) my_malloc((uint) info->s->base.pack_reclength, + MYF(0)))) + { + _ma_check_print_error(param,"Not enough memory for extra record"); + DBUG_VOID_RETURN; + } + + maria_extra(info,HA_EXTRA_KEYREAD,0); + if (maria_rlast(info, record, info->s->base.auto_key-1)) + { + if (my_errno != HA_ERR_END_OF_FILE) + { + maria_extra(info,HA_EXTRA_NO_KEYREAD,0); + my_free((char*) record, MYF(0)); + _ma_check_print_error(param,"%d when reading last record",my_errno); + DBUG_VOID_RETURN; + } + if (!repair_only) + info->s->state.auto_increment=param->auto_increment_value; + } + else + { + ulonglong auto_increment= ma_retrieve_auto_increment(info, record); + set_if_bigger(info->s->state.auto_increment,auto_increment); + if (!repair_only) + set_if_bigger(info->s->state.auto_increment, param->auto_increment_value); + } + maria_extra(info,HA_EXTRA_NO_KEYREAD,0); + my_free((char*) record, MYF(0)); + maria_update_state_info(param, info, UPDATE_AUTO_INC); + DBUG_VOID_RETURN; +} + + +/* + Update statistics for each part of an index + + SYNOPSIS + maria_update_key_parts() + keyinfo IN Index information (only key->keysegs used) + rec_per_key_part OUT Store statistics here + unique IN Array of (#distinct tuples) + notnull_tuples IN Array of (#tuples), or NULL + records Number of records in the table + + DESCRIPTION + This function is called produce index statistics values from unique and + notnull_tuples arrays after these arrays were produced with sequential + index scan (the scan is done in two places: chk_index() and + sort_key_write()). + + This function handles all 3 index statistics collection methods. + + Unique is an array: + unique[0]= (#different values of {keypart1}) - 1 + unique[1]= (#different values of {keypart1,keypart2} tuple)-unique[0]-1 + ... + + For MI_STATS_METHOD_IGNORE_NULLS method, notnull_tuples is an array too: + notnull_tuples[0]= (#of {keypart1} tuples such that keypart1 is not NULL) + notnull_tuples[1]= (#of {keypart1,keypart2} tuples such that all + keypart{i} are not NULL) + ... + For all other statistics collection methods notnull_tuples==NULL. + + Output is an array: + rec_per_key_part[k] = + = E(#records in the table such that keypart_1=c_1 AND ... AND + keypart_k=c_k for arbitrary constants c_1 ... c_k) + + = {assuming that values have uniform distribution and index contains all + tuples from the domain (or that {c_1, ..., c_k} tuple is choosen from + index tuples} + + = #tuples-in-the-index / #distinct-tuples-in-the-index. + + The #tuples-in-the-index and #distinct-tuples-in-the-index have different + meaning depending on which statistics collection method is used: + + MI_STATS_METHOD_* how are nulls compared? which tuples are counted? + NULLS_EQUAL NULL == NULL all tuples in table + NULLS_NOT_EQUAL NULL != NULL all tuples in table + IGNORE_NULLS n/a tuples that don't have NULLs +*/ + +void maria_update_key_parts(MARIA_KEYDEF *keyinfo, ulong *rec_per_key_part, + ulonglong *unique, ulonglong *notnull, + ulonglong records) +{ + ulonglong count=0,tmp, unique_tuples; + ulonglong tuples= records; + uint parts; + for (parts=0 ; parts < keyinfo->keysegs ; parts++) + { + count+=unique[parts]; + unique_tuples= count + 1; + if (notnull) + { + tuples= notnull[parts]; + /* + #(unique_tuples not counting tuples with NULLs) = + #(unique_tuples counting tuples with NULLs as different) - + #(tuples with NULLs) + */ + unique_tuples -= (records - notnull[parts]); + } + + if (unique_tuples == 0) + tmp= 1; + else if (count == 0) + tmp= tuples; /* 1 unique tuple */ + else + tmp= (tuples + unique_tuples/2) / unique_tuples; + + /* + for some weird keys (e.g. FULLTEXT) tmp can be <1 here. + let's ensure it is not + */ + set_if_bigger(tmp,1); + if (tmp >= (ulonglong) ~(ulong) 0) + tmp=(ulonglong) ~(ulong) 0; + + *rec_per_key_part=(ulong) tmp; + rec_per_key_part++; + } +} + + +static ha_checksum maria_byte_checksum(const byte *buf, uint length) +{ + ha_checksum crc; + const byte *end=buf+length; + for (crc=0; buf != end; buf++) + crc=((crc << 1) + *((uchar*) buf)) + + test(crc & (((ha_checksum) 1) << (8*sizeof(ha_checksum)-1))); + return crc; +} + +static my_bool maria_too_big_key_for_sort(MARIA_KEYDEF *key, ha_rows rows) +{ + uint key_maxlength=key->maxlength; + if (key->flag & HA_FULLTEXT) + { + uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT* + key->seg->charset->mbmaxlen; + key_maxlength+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN; + } + return (key->flag & HA_SPATIAL) || + (key->flag & (HA_BINARY_PACK_KEY | HA_VAR_LENGTH_KEY | HA_FULLTEXT) && + ((ulonglong) rows * key_maxlength > + (ulonglong) maria_max_temp_length)); +} + +/* + Deactivate all not unique index that can be recreated fast + These include packed keys on which sorting will use more temporary + space than the max allowed file length or for which the unpacked keys + will take much more space than packed keys. + Note that 'rows' may be zero for the case when we don't know how many + rows we will put into the file. + */ + +void maria_disable_non_unique_index(MARIA_HA *info, ha_rows rows) +{ + MARIA_SHARE *share=info->s; + MARIA_KEYDEF *key=share->keyinfo; + uint i; + + DBUG_ASSERT(info->state->records == 0 && + (!rows || rows >= MARIA_MIN_ROWS_TO_DISABLE_INDEXES)); + for (i=0 ; i < share->base.keys ; i++,key++) + { + if (!(key->flag & (HA_NOSAME | HA_SPATIAL | HA_AUTO_KEY)) && + ! maria_too_big_key_for_sort(key,rows) && info->s->base.auto_key != i+1) + { + maria_clear_key_active(share->state.key_map, i); + info->update|= HA_STATE_CHANGED; + } + } +} + + +/* + Return TRUE if we can use repair by sorting + One can set the force argument to force to use sorting + even if the temporary file would be quite big! +*/ + +my_bool maria_test_if_sort_rep(MARIA_HA *info, ha_rows rows, + ulonglong key_map, my_bool force) +{ + MARIA_SHARE *share=info->s; + MARIA_KEYDEF *key=share->keyinfo; + uint i; + + /* + maria_repair_by_sort only works if we have at least one key. If we don't + have any keys, we should use the normal repair. + */ + if (! maria_is_any_key_active(key_map)) + return FALSE; /* Can't use sort */ + for (i=0 ; i < share->base.keys ; i++,key++) + { + if (!force && maria_too_big_key_for_sort(key,rows)) + return FALSE; + } + return TRUE; +} + + +static void +set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share) +{ + if ((sort_info->new_data_file_type=share->data_file_type) == + COMPRESSED_RECORD && sort_info->param->testflag & T_UNPACK) + { + MARIA_SHARE tmp; + sort_info->new_data_file_type= share->state.header.org_data_file_type; + /* Set delete_function for sort_delete_record() */ + memcpy((char*) &tmp, share, sizeof(*share)); + tmp.options= ~HA_OPTION_COMPRESS_RECORD; + _ma_setup_functions(&tmp); + share->delete_record=tmp.delete_record; + } +} + +static void restore_data_file_type(MARIA_SHARE *share) +{ + share->options&= ~HA_OPTION_COMPRESS_RECORD; + mi_int2store(share->state.header.options,share->options); + share->state.header.data_file_type= + share->state.header.org_data_file_type; + share->data_file_type= share->state.header.data_file_type= + share->pack.header_length= 0; +} + + +/** + @brief Writes a LOGREC_REPAIR_TABLE record and updates create_rename_lsn + + REPAIR/OPTIMIZE have replaced the data/index file with a new file + and so, in this scenario: + @verbatim + CHECKPOINT - REDO_INSERT - COMMIT - ... - REPAIR - ... - crash + @endverbatim + we do not want Recovery to apply the REDO_INSERT to the table, as it would + then possibly wrongly extend the table. By updating create_rename_lsn at + the end of REPAIR, we know that REDO_INSERT will be skipped. + + @param param description of the REPAIR operation + @param info table + + @return Operation status + @retval 0 ok + @retval 1 error (disk problem) +*/ + +int _ma_repair_write_log_record(const HA_CHECK *param, MARIA_HA *info) +{ + MARIA_SHARE *share= info->s; + /* Only called from ha_maria.cc, not maria_check, so translog is inited */ + if (share->base.transactional && !share->temporary) + { + /* For now this record is only informative */ + LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; + uchar log_data[LSN_STORE_SIZE]; + compile_time_assert(LSN_STORE_SIZE >= (FILEID_STORE_SIZE + 4)); + log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= FILEID_STORE_SIZE + 4; + /* + testflag gives an idea of what REPAIR did (in particular T_QUICK + or not: did it touch the data file or not?). + */ + int4store(log_data + FILEID_STORE_SIZE, param->testflag); + if (unlikely(translog_write_record(&share->state.create_rename_lsn, + LOGREC_REDO_REPAIR_TABLE, + &dummy_transaction_object, share, + log_array[TRANSLOG_INTERNAL_PARTS + + 0].length, + sizeof(log_array)/sizeof(log_array[0]), + log_array, log_data))) + return 1; + /* + But this piece is really needed, to have the new table's content durable + and to not apply old REDOs to the new table. The table's existence was + made durable earlier (MY_SYNC_DIR passed to maria_change_to_newfile()). + */ + lsn_store(log_data, share->state.create_rename_lsn); + DBUG_ASSERT(info->dfile.file >= 0); + DBUG_ASSERT(share->kfile.file >= 0); + return (my_pwrite(share->kfile.file, log_data, sizeof(log_data), + sizeof(share->state.header) + 2, MYF(MY_NABP)) || + _ma_sync_table_files(info)); + } + return 0; +} diff --git a/storage/maria/ma_checkpoint.c b/storage/maria/ma_checkpoint.c new file mode 100644 index 00000000000..ed5520f66bf --- /dev/null +++ b/storage/maria/ma_checkpoint.c @@ -0,0 +1,428 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + WL#3071 Maria checkpoint + First version written by Guilhem Bichot on 2006-04-27. + Does not compile yet. +*/ + +/* Here is the implementation of this module */ + +/* + Summary: + - there are asynchronous checkpoints (a writer to the log notices that it's + been a long time since we last checkpoint-ed, so posts a request for a + background thread to do a checkpoint; does not care about the success of the + checkpoint). Then the checkpoint is done by the checkpoint thread, at an + unspecified moment ("later") (==soon, of course). + - there are synchronous checkpoints: a thread requests a checkpoint to + happen now and wants to know when it finishes and if it succeeded; then the + checkpoint is done by that same thread. +*/ + +#include "page_cache.h" +#include "least_recently_dirtied.h" +#include "transaction.h" +#include "share.h" +#include "log.h" + +#define LSN_IMPOSSIBLE ((LSN)0) /* could also be called LSN_ERROR */ +#define LSN_MAX ((LSN)ULONGLONG_MAX) + +/* + this transaction is used for any system work (purge, checkpoint writing + etc), that is, background threads. It will not be declared/initialized here + in the final version. +*/ +st_transaction system_trans= {0 /* long trans id */, 0 /* short trans id */,0,...}; + +/* those three are protected by the log's mutex */ +/* + The maximum rec_lsn in the LRD when last checkpoint was run, serves for the + MEDIUM checkpoint. +*/ +LSN max_rec_lsn_at_last_checkpoint= 0; +/* last submitted checkpoint request; cleared when starts */ +CHECKPOINT_LEVEL next_asynchronous_checkpoint_to_do= NONE; +CHECKPOINT_LEVEL checkpoint_in_progress= NONE; + +static inline ulonglong read_non_atomic(ulonglong volatile *x); + +/* + Used by MySQL client threads requesting a checkpoint (like "ALTER MARIA + ENGINE DO CHECKPOINT"), and probably by maria_panic(), and at the end of the + UNDO recovery phase. +*/ +my_bool execute_synchronous_checkpoint(CHECKPOINT_LEVEL level) +{ + my_bool result; + DBUG_ENTER("execute_synchronous_checkpoint"); + DBUG_ASSERT(level > NONE); + + lock(log_mutex); + while (checkpoint_in_progress != NONE) + wait_on_checkpoint_done_cond(); + + result= execute_checkpoint(level); + DBUG_RETURN(result); +} + +/* + If no checkpoint is running, and there is a pending asynchronous checkpoint + request, executes it. + Is safe if multiple threads call it, though in first version only one will. + It's intended to be used by a thread which regularly calls this function; + this is why, if there is a request, it does not wait in a loop for + synchronous checkpoints to be finished, but just exits (because the thread + may want to do something useful meanwhile (flushing dirty pages for example) + instead of waiting). +*/ +my_bool execute_asynchronous_checkpoint_if_any() +{ + my_bool result; + CHECKPOINT_LEVEL level; + DBUG_ENTER("execute_asynchronous_checkpoint"); + + /* first check without mutex, ok to see old data */ + if (likely((next_asynchronous_checkpoint_to_do == NONE) || + (checkpoint_in_progress != NONE))) + DBUG_RETURN(FALSE); + + lock(log_mutex); + if (likely((next_asynchronous_checkpoint_to_do == NONE) || + (checkpoint_in_progress != NONE))) + { + unlock(log_mutex); + DBUG_RETURN(FALSE); + } + + result= execute_checkpoint(next_asynchronous_checkpoint_to_do); + DBUG_RETURN(result); +} + + +/* + Does the actual checkpointing. Called by + execute_synchronous_checkpoint() and + execute_asynchronous_checkpoint_if_any(). +*/ +my_bool execute_checkpoint(CHECKPOINT_LEVEL level) +{ + my_bool result; + DBUG_ENTER("execute_checkpoint"); + + safemutex_assert_owner(log_mutex); + if (next_asynchronous_checkpoint_to_do <= level) + next_asynchronous_checkpoint_to_do= NONE; + checkpoint_in_progress= level; + + if (unlikely(level > INDIRECT)) + { + LSN copy_of_max_rec_lsn_at_last_checkpoint= + max_rec_lsn_at_last_checkpoint; + /* much I/O work to do, release log mutex */ + unlock(log_mutex); + + switch (level) + { + case FULL: + /* flush all pages up to the current end of the LRD */ + flush_all_LRD_to_lsn(LSN_MAX); + /* this will go full speed (normal scheduling, no sleep) */ + break; + case MEDIUM: + /* + flush all pages which were already dirty at last checkpoint: + ensures that recovery will never start from before the next-to-last + checkpoint (two-checkpoint rule). + */ + flush_all_LRD_to_lsn(copy_of_max_rec_lsn_at_last_checkpoint); + /* this will go full speed (normal scheduling, no sleep) */ + break; + } + lock(log_mutex); + } + + result= execute_checkpoint_indirect(); + checkpoint_in_progress= NONE; + unlock(log_mutex); + broadcast(checkpoint_done_cond); + DBUG_RETURN(result); +} + + +/* + Does an indirect checpoint (collects data from data structures, writes into + a checkpoint log record). + Starts and ends while having log's mutex (released in the middle). +*/ +my_bool execute_checkpoint_indirect() +{ + int error= 0, i; + /* checkpoint record data: */ + LSN checkpoint_start_lsn; + char checkpoint_start_lsn_char[8]; + LEX_STRING strings[6]= + {checkpoint_start_lsn_char, 8}, {0,0}, {0,0}, {0,0}, {0,0}, {0,0} }; + char *ptr; + LSN checkpoint_lsn; + LSN candidate_max_rec_lsn_at_last_checkpoint; + DBUG_ENTER("execute_checkpoint_indirect"); + + DBUG_ASSERT(sizeof(byte *) <= 8); + DBUG_ASSERT(sizeof(LSN) <= 8); + + safemutex_assert_owner(log_mutex); + + /* STEP 1: record current end-of-log LSN */ + checkpoint_start_lsn= log_read_end_lsn(); + if (LSN_IMPOSSIBLE == checkpoint_start_lsn) /* error */ + DBUG_RETURN(TRUE); + unlock(log_mutex); + + DBUG_PRINT("info",("checkpoint_start_lsn %lu", checkpoint_start_lsn)); + int8store(strings[0].str, checkpoint_start_lsn); + + /* STEP 2: fetch information about dirty pages */ + + if (pagecache_collect_changed_blocks_with_LSN(pagecache, &strings[1], + &candidate_max_rec_lsn_at_last_checkpoint)) + goto err; + + /* STEP 3: fetch information about transactions */ + if (trnman_collect_transactions(&strings[2], &strings[3])) + goto err; + + /* STEP 4: fetch information about table files */ + + { + /* This global mutex is in fact THR_LOCK_maria (see ma_open()) */ + lock(global_share_list_mutex); + strings[4].length= 8+(8+8)*share_list->count; + if (NULL == (strings[4].str= my_malloc(strings[4].length))) + goto err; + ptr= string3.str; + /* + Note that maria_open_list is a list of MARIA_HA*, while we would prefer + a list of MARIA_SHARE* here (we are interested in the short id, + unique file name, members of MARIA_SHARE*, and in file descriptors, + which will in the end be in MARIA_SHARE*). + */ + for (iterate on the maria_open_list) + { + /* latch each MARIA_SHARE, one by one, like this: */ + pthread_mutex_lock(&share->intern_lock); + /* + TODO: + we need to prevent the share from going away while we later flush and + force it without holding THR_LOCK_maria. For example if the share is + free()d by maria_close() we'll have a problem. Or if the share's file + descriptor is closed by maria_close() we will not be able to my_sync() + it. + */ + pthread_mutex_unlock(&share->intern_lock); + store the share pointer into a private array; + } + unlock(global_share_list_mutex); + + /* work on copy */ + int8store(ptr, elements_in_array); + ptr+= 8; + for (el in array) + { + int8store(ptr, array[...].short_id); + ptr+= 8; + memcpy(ptr, array[...].unique_file_name[_length], ...); + ptr+= ...; + /* maybe we need to lock share->intern_lock here */ + /* + these two are long ops (involving disk I/O) that's why we copied the + list, to not keep the list locked for long: + */ + flush_bitmap_pages(el); + /* TODO: and also autoinc counter, logical file end, free page list */ + + /* + fsyncs the fd, that's the loooong operation (e.g. max 150 fsync per + second, so if you have touched 1000 files it's 7 seconds). + */ + force_file(el); + } + } + + /* LAST STEP: now write the checkpoint log record */ + + checkpoint_lsn= log_write_record(LOGREC_CHECKPOINT, + &system_trans, strings); + + /* + Do nothing between the log write and the control file write, for the + "repair control file" tool to be possible one day. + */ + + if (LSN_IMPOSSIBLE == checkpoint_lsn) + goto err; + + if (0 != control_file_write_and_force(checkpoint_lsn, NULL)) + goto err; + + /* + Note that we should not alter memory structures until we have successfully + written the checkpoint record and control file. + Btw, a log write failure is serious: + - if we know how many bytes we managed to write, we should try to write + more, keeping the log's mutex (MY_FULL_IO) + - if we don't know, this log record is corrupted and we have no way to + "de-corrupt" it, so it will stay corrupted, and as the log is sequential, + any log record written after it will not be reachable (for example if we + would write UNDOs and crash, we would not be able to read the log and so + not be able to rollback), so we should stop the engine now (holding the + log's mutex) and do a recovery. + */ + goto end; + +err: + print_error_to_error_log(the_error_message); + candidate_max_rec_lsn_at_last_checkpoint= LSN_IMPOSSIBLE; + +end: + + for (i= 1; i<6; i++) + my_free(strings[i].str, MYF(MY_ALLOW_ZERO_PTR)); + + /* + this portion cannot be done as a hook in write_log_record() for the + LOGREC_CHECKPOINT type because: + - at that moment we still have not written to the control file so cannot + mark the request as done; this could be solved by writing to the control + file in the hook but that would be an I/O under the log's mutex, bad. + - it would not be nice organisation of code (I tried it :). + */ + if (candidate_max_rec_lsn_at_last_checkpoint != LSN_IMPOSSIBLE) + { + /* checkpoint succeeded */ + /* + TODO: compute log's low water mark (how to do that with our fuzzy + ARIES-like reads of data structures? TODO think about it :). + */ + lock(log_mutex); + /* That LSN is used for the "two-checkpoint rule" (MEDIUM checkpoints) */ + maximum_rec_lsn_last_checkpoint= candidate_max_rec_lsn_at_last_checkpoint; + DBUG_RETURN(FALSE); + } + lock(log_mutex); + DBUG_RETURN(TRUE); + /* + keep mutex locked upon exit because callers will want to clear + mutex-protected status variables + */ +} + + + +/* + Here's what should be put in log_write_record() in the log handler: +*/ +log_write_record(...) +{ + ...; + lock(log_mutex); + ...; + write_to_log(length); + written_since_last_checkpoint+= length; + if (written_since_last_checkpoint > + MAX_LOG_BYTES_WRITTEN_BETWEEN_CHECKPOINTS) + { + /* + ask one system thread (the "LRD background flusher and checkpointer + thread" WL#3261) to do a checkpoint + */ + request_asynchronous_checkpoint(INDIRECT); + /* prevent similar redundant requests */ + written_since_last_checkpoint= (my_off_t)0; + } + ...; + unlock(log_mutex); + ...; +} + +/* + Requests a checkpoint from the background thread, *asynchronously* + (requestor does not wait for completion, and does not even later check the + result). + In real life it will be called by log_write_record(). +*/ +void request_asynchronous_checkpoint(CHECKPOINT_LEVEL level); +{ + safemutex_assert_owner(log_mutex); + + DBUG_ASSERT(level > NONE); + if ((next_asynchronous_checkpoint_to_do < level) && + (checkpoint_in_progress < level)) + { + /* no equal or stronger running or to run, we post request */ + /* + We just don't broacast a cond, the checkpoint thread + (see ma_least_recently_dirtied.c) will notice our request in max a few + seconds. + */ + next_asynchronous_checkpoint_to_do= level; /* post request */ + } + + /* + If there was an error, only an error + message to the error log will say it; normal, for a checkpoint triggered + by a log write, we probably don't want the client's log write to throw an + error, as the log write succeeded and a checkpoint failure is not + critical: the failure in this case is more for the DBA to know than for + the end user. + */ +} + + +/* + If a 64-bit variable transitions from both halves being zero to both halves + being non-zero, and never changes after that (like the transaction's + first_undo_lsn), this function can be used to do a read of it (without + mutex, without atomic load) which always produces a correct (though maybe + slightly old) value (even on 32-bit CPUs). + The prototype will change with Sanja's new LSN type. +*/ +static inline ulonglong read_non_atomic(ulonglong volatile *x) +{ +#if ( SIZEOF_CHARP >= 8 ) + /* 64-bit CPU (right?), 64-bit reads are atomic */ + return *x; +#else + /* + 32-bit CPU, 64-bit reads may give a mixed of old half and new half (old + low bits and new high bits, or the contrary). + As the variable we read transitions from both halves being zero to both + halves being non-zero, and never changes then, we can detect atomicity + problems: + */ + ulonglong y; + for (;;) /* loop until no atomicity problems */ + { + y= *x; + if (likely(((0 == y) || + ((0 != (y >> 32)) && (0 != (y << 32))))) + return y; + /* Worth seeing it! */ + DBUG_PRINT("info",("atomicity problem")); + } +#endif +} diff --git a/storage/maria/ma_checkpoint.h b/storage/maria/ma_checkpoint.h new file mode 100644 index 00000000000..1ce2ccb7012 --- /dev/null +++ b/storage/maria/ma_checkpoint.h @@ -0,0 +1,34 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + WL#3071 Maria checkpoint + First version written by Guilhem Bichot on 2006-04-27. + Does not compile yet. +*/ + +/* This is the interface of this module. */ + +typedef enum enum_checkpoint_level { + NONE=-1, + INDIRECT, /* just write dirty_pages, transactions table and sync files */ + MEDIUM, /* also flush all dirty pages which were already dirty at prev checkpoint*/ + FULL /* also flush all dirty pages */ +} CHECKPOINT_LEVEL; + +void request_asynchronous_checkpoint(CHECKPOINT_LEVEL level); +my_bool execute_synchronous_checkpoint(CHECKPOINT_LEVEL level); +my_bool execute_asynchronous_checkpoint_if_any(); +/* that's all that's needed in the interface */ diff --git a/storage/maria/ma_checksum.c b/storage/maria/ma_checksum.c new file mode 100644 index 00000000000..95555aa3129 --- /dev/null +++ b/storage/maria/ma_checksum.c @@ -0,0 +1,69 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Calculate a checksum for a row */ + +#include "maria_def.h" + +ha_checksum _ma_checksum(MARIA_HA *info, const byte *record) +{ + ha_checksum crc=0; + MARIA_COLUMNDEF *column= info->s->columndef; + MARIA_COLUMNDEF *column_end= column+ info->s->base.fields; + + if (info->s->base.null_bytes) + crc= my_checksum(crc, record, info->s->base.null_bytes); + + for ( ; column != column_end ; column++) + { + const byte *pos= record + column->offset; + ulong length; + + switch (column->type) { + case FIELD_BLOB: + { + uint blob_size_length= column->length- portable_sizeof_char_ptr; + length= _ma_calc_blob_length(blob_size_length, pos); + if (length) + { + memcpy((char*) &pos, pos + blob_size_length, sizeof(char*)); + crc= my_checksum(crc, pos, length); + } + continue; + } + case FIELD_VARCHAR: + { + uint pack_length= HA_VARCHAR_PACKLENGTH(column->length-1); + if (pack_length == 1) + length= (ulong) *(uchar*) pos; + else + length= uint2korr(pos); + pos+= pack_length; + break; + } + default: + length= column->length; + break; + } + crc= my_checksum(crc, pos, length); + } + return crc; +} + + +ha_checksum _ma_static_checksum(MARIA_HA *info, const byte *pos) +{ + return my_checksum(0, pos, info->s->base.reclength); +} diff --git a/storage/maria/ma_close.c b/storage/maria/ma_close.c new file mode 100644 index 00000000000..34c1bfb4d6d --- /dev/null +++ b/storage/maria/ma_close.c @@ -0,0 +1,128 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* close a isam-database */ +/* + TODO: + We need to have a separate mutex on the closed file to allow other threads + to open other files during the time we flush the cache and close this file +*/ + +#include "maria_def.h" + +int maria_close(register MARIA_HA *info) +{ + int error=0,flag; + MARIA_SHARE *share=info->s; + DBUG_ENTER("maria_close"); + DBUG_PRINT("enter",("base: 0x%lx reopen: %u locks: %u", + (long) info, (uint) share->reopen, + (uint) share->tot_locks)); + + pthread_mutex_lock(&THR_LOCK_maria); + if (info->lock_type == F_EXTRA_LCK) + info->lock_type=F_UNLCK; /* HA_EXTRA_NO_USER_CHANGE */ + + if (share->reopen == 1 && share->kfile.file >= 0) + _ma_decrement_open_count(info); + + if (info->lock_type != F_UNLCK) + { + if (maria_lock_database(info,F_UNLCK)) + error=my_errno; + } + pthread_mutex_lock(&share->intern_lock); + + if (share->options & HA_OPTION_READ_ONLY_DATA) + { + share->r_locks--; + share->tot_locks--; + } + if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED)) + { + if (end_io_cache(&info->rec_cache)) + error=my_errno; + info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); + } + flag= !--share->reopen; + maria_open_list=list_delete(maria_open_list,&info->open_list); + pthread_mutex_unlock(&share->intern_lock); + + my_free(info->rec_buff, MYF(MY_ALLOW_ZERO_PTR)); + (*share->end)(info); + + if (flag) + { + if (share->kfile.file >= 0) + { + if ((*share->once_end)(share)) + error= my_errno; + if (flush_pagecache_blocks(share->pagecache, &share->kfile, + (share->temporary ? + FLUSH_IGNORE_CHANGED : + FLUSH_RELEASE))) + error= my_errno; + /* + File must be synced as it is going out of the maria_open_list and so + becoming unknown to Checkpoint. + */ + if (my_sync(share->kfile.file, MYF(MY_WME))) + error= my_errno; + /* + If we are crashed, we can safely flush the current state as it will + not change the crashed state. + We can NOT write the state in other cases as other threads + may be using the file at this point + */ + if (share->mode != O_RDONLY && maria_is_crashed(info)) + _ma_state_info_write(share->kfile.file, &share->state, 1); + if (my_close(share->kfile.file, MYF(0))) + error= my_errno; + } +#ifdef HAVE_MMAP + if (share->file_map) + _ma_unmap_file(info); +#endif +#ifdef THREAD + thr_lock_delete(&share->lock); + VOID(pthread_mutex_destroy(&share->intern_lock)); + { + int i,keys; + keys = share->state.header.keys; + VOID(rwlock_destroy(&share->mmap_lock)); + for(i=0; ikey_root_lock[i])); + } + } +#endif + my_free((gptr) info->s,MYF(0)); + } + pthread_mutex_unlock(&THR_LOCK_maria); + if (info->ftparser_param) + { + my_free((gptr)info->ftparser_param, MYF(0)); + info->ftparser_param= 0; + } + if (info->dfile.file >= 0 && my_close(info->dfile.file, MYF(0))) + error = my_errno; + + my_free((gptr) info,MYF(0)); + + if (error) + { + DBUG_RETURN(my_errno=error); + } + DBUG_RETURN(0); +} /* maria_close */ diff --git a/storage/maria/ma_commit.c b/storage/maria/ma_commit.c new file mode 100644 index 00000000000..88aaee0509f --- /dev/null +++ b/storage/maria/ma_commit.c @@ -0,0 +1,71 @@ +/* Copyright (C) 2007 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" +#include "trnman.h" + +/** + @brief writes a COMMIT record to log and commits transaction in memory + + @param trn transaction + + @return Operation status + @retval 0 ok + @retval 1 error (disk error or out of memory) +*/ + +int ma_commit(TRN *trn) +{ + if (trn->undo_lsn == 0) /* no work done, rollback (cheaper than commit) */ + return trnman_rollback_trn(trn); + /* + - if COMMIT record is written before trnman_commit_trn(): + if Checkpoint comes in the middle it will see trn is not committed, + then if crash, Recovery might roll back trn (if min(rec_lsn) is after + COMMIT record) and this is not an issue as + * transaction's updates were not made visible to other transactions + * "commit ok" was not sent to client + Alternatively, Recovery might commit trn (if min(rec_lsn) is before COMMIT + record), which is ok too. All in all it means that "trn committed" is not + 100% equal to "COMMIT record written". + - if COMMIT record is written after trnman_commit_trn(): + if crash happens between the two, trn will be rolled back which is an + issue (transaction's updates were made visible to other transactions). + So we need to go the first way. + */ + /** + @todo RECOVERY share's state is written to disk only in + maria_lock_database(), so COMMIT record is not the last record of the + transaction! It is probably an issue. Recovery of the state is a problem + not yet solved. + */ + LSN commit_lsn; + LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS]; + /* + We do not store "thd->transaction.xid_state.xid" for now, it will be + needed only when we support XA. + */ + return + translog_write_record(&commit_lsn, LOGREC_COMMIT, + trn, NULL, 0, + sizeof(log_array)/sizeof(log_array[0]), + log_array, NULL) || + translog_flush(commit_lsn) || trnman_commit_trn(trn); + /* + Note: if trnman_commit_trn() fails above, we have already + written the COMMIT record, so Checkpoint and Recovery will see the + transaction as committed. + */ +} diff --git a/storage/maria/ma_commit.h b/storage/maria/ma_commit.h new file mode 100644 index 00000000000..2c57c73fd7a --- /dev/null +++ b/storage/maria/ma_commit.h @@ -0,0 +1,18 @@ +/* Copyright (C) 2007 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +C_MODE_START +int ma_commit(TRN *trn); +C_MODE_END diff --git a/storage/maria/ma_control_file.c b/storage/maria/ma_control_file.c new file mode 100644 index 00000000000..db5440dc873 --- /dev/null +++ b/storage/maria/ma_control_file.c @@ -0,0 +1,322 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + WL#3234 Maria control file + First version written by Guilhem Bichot on 2006-04-27. + Does not compile yet. +*/ + +#include "maria_def.h" + +/* Here is the implementation of this module */ + +/* + a control file contains 3 objects: magic string, LSN of last checkpoint, + number of last log. +*/ + +/* total size should be < sector size for atomic write operation */ +#define CONTROL_FILE_MAGIC_STRING "\xfe\xfe\xc\1MACF" +#define CONTROL_FILE_MAGIC_STRING_OFFSET 0 +#define CONTROL_FILE_MAGIC_STRING_SIZE (sizeof(CONTROL_FILE_MAGIC_STRING)-1) +#define CONTROL_FILE_CHECKSUM_OFFSET (CONTROL_FILE_MAGIC_STRING_OFFSET + CONTROL_FILE_MAGIC_STRING_SIZE) +#define CONTROL_FILE_CHECKSUM_SIZE 4 +#define CONTROL_FILE_LSN_OFFSET (CONTROL_FILE_CHECKSUM_OFFSET + CONTROL_FILE_CHECKSUM_SIZE) +#define CONTROL_FILE_LSN_SIZE LSN_STORE_SIZE +#define CONTROL_FILE_FILENO_OFFSET (CONTROL_FILE_LSN_OFFSET + CONTROL_FILE_LSN_SIZE) +#define CONTROL_FILE_FILENO_SIZE 4 +#define CONTROL_FILE_SIZE (CONTROL_FILE_FILENO_OFFSET + CONTROL_FILE_FILENO_SIZE) + +/* + This module owns these two vars. + uint32 is always atomically updated, but LSN is 8 bytes, we will need + provisions to ensure that it's updated atomically in + ma_control_file_write_and_force(). Probably the log mutex could be + used. TODO. +*/ +LSN last_checkpoint_lsn; +uint32 last_logno; + +/** + @brief If log's lock should be asserted when writing to control file. + + Can be re-used by any function which needs to be thread-safe except when + it is called at startup. +*/ +my_bool maria_multi_threaded= FALSE; + +/* + Control file is less then 512 bytes (a disk sector), + to be as atomic as possible +*/ +static int control_file_fd= -1; + +/* + Initialize control file subsystem + + SYNOPSIS + ma_control_file_create_or_open() + + Looks for the control file. If absent, it's a fresh start, creates file. + If present, reads it to find out last checkpoint's LSN and last log, updates + the last_checkpoint_lsn and last_logno global variables. + Called at engine's start. + + The format of the control file is: + 4 bytes: magic string + 4 bytes: checksum of the following bytes + 4 bytes: number of log where last checkpoint is + 4 bytes: offset in log where last checkpoint is + 4 bytes: number of last log + + RETURN + 0 - OK + 1 - Error (in which case the file is left closed) +*/ +CONTROL_FILE_ERROR ma_control_file_create_or_open() +{ + char buffer[CONTROL_FILE_SIZE]; + char name[FN_REFLEN]; + MY_STAT stat_buff; + my_bool create_file; + int open_flags= O_BINARY | /*O_DIRECT |*/ O_RDWR; + int error= CONTROL_FILE_UNKNOWN_ERROR; + DBUG_ENTER("ma_control_file_create_or_open"); + + /* + If you change sizes in the #defines, you at least have to change the + "*store" and "*korr" calls in this file, and can even create backward + compatibility problems. Beware! + */ + DBUG_ASSERT(CONTROL_FILE_LSN_SIZE == (3+4)); + DBUG_ASSERT(CONTROL_FILE_FILENO_SIZE == 4); + + if (control_file_fd >= 0) /* already open */ + DBUG_RETURN(0); + + if (fn_format(name, CONTROL_FILE_BASE_NAME, + maria_data_root, "", MYF(MY_WME)) == NullS) + DBUG_RETURN(CONTROL_FILE_UNKNOWN_ERROR); + + create_file= test(my_access(name,F_OK)); + + if (create_file) + { + if ((control_file_fd= my_create(name, 0, + open_flags, MYF(MY_SYNC_DIR))) < 0) + DBUG_RETURN(CONTROL_FILE_UNKNOWN_ERROR); + + /* + To be safer we should make sure that there are no logs or data/index + files around (indeed it could be that the control file alone was deleted + or not restored, and we should not go on with life at this point). + + TODO: For now we trust (this is alpha version), but for beta if would + be great to verify. + + We could have a tool which can rebuild the control file, by reading the + directory of logs, finding the newest log, reading it to find last + checkpoint... Slow but can save your db. For this to be possible, we + must always write to the control file right after writing the checkpoint + log record, and do nothing in between (i.e. the checkpoint must be + usable as soon as it has been written to the log). + */ + + /* init the file with these "undefined" values */ + DBUG_RETURN(ma_control_file_write_and_force(CONTROL_FILE_IMPOSSIBLE_LSN, + CONTROL_FILE_IMPOSSIBLE_FILENO, + CONTROL_FILE_UPDATE_ALL)); + } + + /* Otherwise, file exists */ + + if ((control_file_fd= my_open(name, open_flags, MYF(MY_WME))) < 0) + goto err; + + if (my_stat(name, &stat_buff, MYF(MY_WME)) == NULL) + goto err; + + if ((uint)stat_buff.st_size < CONTROL_FILE_SIZE) + { + /* + Given that normally we write only a sector and it's atomic, the only + possibility for a file to be of too short size is if we crashed at the + very first startup, between file creation and file write. Quite unlikely + (and can be made even more unlikely by doing this: create a temp file, + write it, and then rename it to be the control file). + What's more likely is if someone forgot to restore the control file, + just did a "touch control" to try to get Maria to start, or if the + disk/filesystem has a problem. + So let's be rigid. + */ + /* + TODO: store a message "too small file" somewhere, so that it goes to + MySQL's error log at startup. + */ + error= CONTROL_FILE_TOO_SMALL; + goto err; + } + + if ((uint)stat_buff.st_size > CONTROL_FILE_SIZE) + { + /* TODO: store "too big file" message */ + error= CONTROL_FILE_TOO_BIG; + goto err; + } + + if (my_read(control_file_fd, buffer, CONTROL_FILE_SIZE, + MYF(MY_FNABP | MY_WME))) + goto err; + if (memcmp(buffer + CONTROL_FILE_MAGIC_STRING_OFFSET, + CONTROL_FILE_MAGIC_STRING, CONTROL_FILE_MAGIC_STRING_SIZE)) + { + /* TODO: store message "bad magic string" somewhere */ + error= CONTROL_FILE_BAD_MAGIC_STRING; + goto err; + } + if (my_checksum(0, buffer + CONTROL_FILE_LSN_OFFSET, + CONTROL_FILE_SIZE - CONTROL_FILE_LSN_OFFSET) != + uint4korr(buffer + CONTROL_FILE_CHECKSUM_OFFSET)) + { + /* TODO: store message "checksum mismatch" somewhere */ + error= CONTROL_FILE_BAD_CHECKSUM; + goto err; + } + last_checkpoint_lsn= lsn_korr(buffer + CONTROL_FILE_LSN_OFFSET); + last_logno= uint4korr(buffer + CONTROL_FILE_FILENO_OFFSET); + + DBUG_RETURN(0); +err: + ma_control_file_end(); + DBUG_RETURN(error); +} + + +/* + Write information durably to the control file; stores this information into + the last_checkpoint_lsn and last_logno global variables. + Called when we have created a new log (after syncing this log's creation) + and when we have written a checkpoint (after syncing this log record). + Variables last_checkpoint_lsn and last_logno must be protected by caller + using log's lock, unless this function is called at startup. + + SYNOPSIS + ma_control_file_write_and_force() + checkpoint_lsn LSN of last checkpoint + logno last log file number + objs_to_write which of the arguments should be used as new values + (for example, CONTROL_FILE_UPDATE_ONLY_LSN will not + write the logno argument to the control file and will + not update the last_logno global variable); can be: + CONTROL_FILE_UPDATE_ALL + CONTROL_FILE_UPDATE_ONLY_LSN + CONTROL_FILE_UPDATE_ONLY_LOGNO. + + NOTE + We always want to do one single my_pwrite() here to be as atomic as + possible. + + RETURN + 0 - OK + 1 - Error +*/ + +int ma_control_file_write_and_force(const LSN checkpoint_lsn, uint32 logno, + uint objs_to_write) +{ + char buffer[CONTROL_FILE_SIZE]; + my_bool update_checkpoint_lsn= FALSE, update_logno= FALSE; + DBUG_ENTER("ma_control_file_write_and_force"); + + DBUG_ASSERT(control_file_fd >= 0); /* must be open */ +#ifndef DBUG_OFF + if (maria_multi_threaded) + translog_lock_assert_owner(); +#endif + + memcpy(buffer + CONTROL_FILE_MAGIC_STRING_OFFSET, + CONTROL_FILE_MAGIC_STRING, CONTROL_FILE_MAGIC_STRING_SIZE); + + if (objs_to_write == CONTROL_FILE_UPDATE_ONLY_LSN) + update_checkpoint_lsn= TRUE; + else if (objs_to_write == CONTROL_FILE_UPDATE_ONLY_LOGNO) + update_logno= TRUE; + else if (objs_to_write == CONTROL_FILE_UPDATE_ALL) + update_checkpoint_lsn= update_logno= TRUE; + else /* incorrect value of objs_to_write */ + DBUG_ASSERT(0); + + if (update_checkpoint_lsn) + lsn_store(buffer + CONTROL_FILE_LSN_OFFSET, checkpoint_lsn); + else /* store old value == change nothing */ + lsn_store(buffer + CONTROL_FILE_LSN_OFFSET, last_checkpoint_lsn); + + if (update_logno) + int4store(buffer + CONTROL_FILE_FILENO_OFFSET, logno); + else + int4store(buffer + CONTROL_FILE_FILENO_OFFSET, last_logno); + + { + uint32 sum= (uint32) + my_checksum(0, buffer + CONTROL_FILE_LSN_OFFSET, + CONTROL_FILE_SIZE - CONTROL_FILE_LSN_OFFSET); + int4store(buffer + CONTROL_FILE_CHECKSUM_OFFSET, sum); + } + + if (my_pwrite(control_file_fd, buffer, sizeof(buffer), + 0, MYF(MY_FNABP | MY_WME)) || + my_sync(control_file_fd, MYF(MY_WME))) + DBUG_RETURN(1); + + if (update_checkpoint_lsn) + last_checkpoint_lsn= checkpoint_lsn; + if (update_logno) + last_logno= logno; + + DBUG_RETURN(0); +} + + +/* + Free resources taken by control file subsystem + + SYNOPSIS + ma_control_file_end() +*/ + +int ma_control_file_end() +{ + int close_error; + DBUG_ENTER("ma_control_file_end"); + + if (control_file_fd < 0) /* already closed */ + DBUG_RETURN(0); + + close_error= my_close(control_file_fd, MYF(MY_WME)); + /* + As my_close() frees structures even if close() fails, we do the same, + i.e. we mark the file as closed in all cases. + */ + control_file_fd= -1; + /* + As this module owns these variables, closing the module forbids access to + them (just a safety): + */ + last_checkpoint_lsn= CONTROL_FILE_IMPOSSIBLE_LSN; + last_logno= CONTROL_FILE_IMPOSSIBLE_FILENO; + + DBUG_RETURN(close_error); +} diff --git a/storage/maria/ma_control_file.h b/storage/maria/ma_control_file.h new file mode 100644 index 00000000000..c974838684b --- /dev/null +++ b/storage/maria/ma_control_file.h @@ -0,0 +1,85 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + WL#3234 Maria control file + First version written by Guilhem Bichot on 2006-04-27. +*/ + +#define CONTROL_FILE_BASE_NAME "maria_control" +/* + indicate absence of the log file number; first log is always number 1, 0 is + impossible. +*/ +#define CONTROL_FILE_IMPOSSIBLE_FILENO 0 +/* logs always have a header */ +#define CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET 0 +/* indicate absence of LSN. */ +#define CONTROL_FILE_IMPOSSIBLE_LSN ((LSN)0) + +/* Here is the interface of this module */ + +/* + LSN of the last checkoint + (if last_checkpoint_lsn == CONTROL_FILE_IMPOSSIBLE_LSN + then there was never a checkpoint) +*/ +extern LSN last_checkpoint_lsn; +/* + Last log number (if last_logno == + CONTROL_FILE_IMPOSSIBLE_FILENO then there is no log file yet) +*/ +extern uint32 last_logno; + +extern my_bool maria_multi_threaded; + +typedef enum enum_control_file_error { + CONTROL_FILE_OK= 0, + CONTROL_FILE_TOO_SMALL, + CONTROL_FILE_TOO_BIG, + CONTROL_FILE_BAD_MAGIC_STRING, + CONTROL_FILE_BAD_CHECKSUM, + CONTROL_FILE_UNKNOWN_ERROR /* any other error */ +} CONTROL_FILE_ERROR; + +#define CONTROL_FILE_UPDATE_ALL 0 +#define CONTROL_FILE_UPDATE_ONLY_LSN 1 +#define CONTROL_FILE_UPDATE_ONLY_LOGNO 2 + +#ifdef __cplusplus +extern "C" { +#endif + +/* + Looks for the control file. If absent, it's a fresh start, create file. + If present, read it to find out last checkpoint's LSN and last log. + Called at engine's start. +*/ +CONTROL_FILE_ERROR ma_control_file_create_or_open(); +/* + Write information durably to the control file. + Called when we have created a new log (after syncing this log's creation) + and when we have written a checkpoint (after syncing this log record). +*/ +int ma_control_file_write_and_force(const LSN checkpoint_lsn, uint32 logno, + uint objs_to_write); + + +/* Free resources taken by control file subsystem */ +int ma_control_file_end(); + +#ifdef __cplusplus +} +#endif diff --git a/storage/maria/ma_create.c b/storage/maria/ma_create.c new file mode 100644 index 00000000000..53e15deb74b --- /dev/null +++ b/storage/maria/ma_create.c @@ -0,0 +1,1159 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Create a MARIA table */ + +#include "ma_ftdefs.h" +#include "ma_sp_defs.h" +#include +#include "ma_blockrec.h" +#include "trnman_public.h" + +#if defined(MSDOS) || defined(__WIN__) +#ifdef __WIN__ +#include +#else +#include /* Prototype for getpid */ +#endif +#endif +#include + +static int compare_columns(MARIA_COLUMNDEF **a, MARIA_COLUMNDEF **b); + +/* + Old options is used when recreating database, from maria_chk +*/ + +int maria_create(const char *name, enum data_file_type datafile_type, + uint keys,MARIA_KEYDEF *keydefs, + uint columns, MARIA_COLUMNDEF *columndef, + uint uniques, MARIA_UNIQUEDEF *uniquedefs, + MARIA_CREATE_INFO *ci,uint flags) +{ + register uint i,j; + File dfile,file; + int errpos,save_errno, create_mode= O_RDWR | O_TRUNC, res; + myf create_flag; + uint length,max_key_length,packed,pack_bytes,pointer,real_length_diff, + key_length,info_length,key_segs,options,min_key_length_skip, + base_pos,long_varchar_count,varchar_length, + unique_key_parts,fulltext_keys,offset, not_block_record_extra_length; + uint max_field_lengths, extra_header_size; + ulong reclength, real_reclength,min_pack_length; + char filename[FN_REFLEN], dlinkname[FN_REFLEN], *dlinkname_ptr= NULL, + klinkname[FN_REFLEN], *klinkname_ptr= NULL; + ulong pack_reclength; + ulonglong tot_length,max_rows, tmp; + enum en_fieldtype type; + enum data_file_type org_datafile_type= datafile_type; + MARIA_SHARE share; + MARIA_KEYDEF *keydef,tmp_keydef; + MARIA_UNIQUEDEF *uniquedef; + HA_KEYSEG *keyseg,tmp_keyseg; + MARIA_COLUMNDEF *column, *end_column; + ulong *rec_per_key_part; + my_off_t key_root[HA_MAX_POSSIBLE_KEY], kfile_size_before_extension; + MARIA_CREATE_INFO tmp_create_info; + my_bool tmp_table= FALSE; /* cache for presence of HA_OPTION_TMP_TABLE */ + my_bool forced_packed; + myf sync_dir= 0; + uchar *log_data= NULL; + DBUG_ENTER("maria_create"); + DBUG_PRINT("enter", ("keys: %u columns: %u uniques: %u flags: %u", + keys, columns, uniques, flags)); + + DBUG_ASSERT(maria_block_size && maria_block_size % IO_SIZE == 0); + LINT_INIT(dfile); + LINT_INIT(file); + + if (!ci) + { + bzero((char*) &tmp_create_info,sizeof(tmp_create_info)); + ci=&tmp_create_info; + } + + if (keys + uniques > MARIA_MAX_KEY || columns == 0) + { + DBUG_RETURN(my_errno=HA_WRONG_CREATE_OPTION); + } + errpos=0; + options=0; + bzero((byte*) &share,sizeof(share)); + + if (flags & HA_DONT_TOUCH_DATA) + { + org_datafile_type= ci->org_data_file_type; + if (!(ci->old_options & HA_OPTION_TEMP_COMPRESS_RECORD)) + options=ci->old_options & + (HA_OPTION_COMPRESS_RECORD | HA_OPTION_PACK_RECORD | + HA_OPTION_READ_ONLY_DATA | HA_OPTION_CHECKSUM | + HA_OPTION_TMP_TABLE | HA_OPTION_DELAY_KEY_WRITE); + else + { + /* Uncompressing rows */ + options=ci->old_options & + (HA_OPTION_CHECKSUM | HA_OPTION_TMP_TABLE | HA_OPTION_DELAY_KEY_WRITE); + } + } + + if (ci->reloc_rows > ci->max_rows) + ci->reloc_rows=ci->max_rows; /* Check if wrong parameter */ + + if (!(rec_per_key_part= + (ulong*) my_malloc((keys + uniques)*HA_MAX_KEY_SEG*sizeof(long), + MYF(MY_WME | MY_ZEROFILL)))) + DBUG_RETURN(my_errno); + + /* Start by checking fields and field-types used */ + + varchar_length=long_varchar_count=packed= not_block_record_extra_length= + pack_reclength= max_field_lengths= 0; + reclength= min_pack_length= ci->null_bytes; + forced_packed= 0; + + for (column= columndef, end_column= column + columns ; + column != end_column ; + column++) + { + /* Fill in not used struct parts */ + column->offset= reclength; + column->empty_pos= 0; + column->empty_bit= 0; + column->fill_length= column->length; + + reclength+= column->length; + type= column->type; + if (type == FIELD_SKIP_PRESPACE && datafile_type == BLOCK_RECORD) + type= FIELD_NORMAL; /* SKIP_PRESPACE not supported */ + + if (type != FIELD_NORMAL && type != FIELD_CHECK) + { + column->empty_pos= packed/8; + column->empty_bit= (1 << (packed & 7)); + if (type == FIELD_BLOB) + { + forced_packed= 1; + packed++; + share.base.blobs++; + if (pack_reclength != INT_MAX32) + { + if (column->length == 4+portable_sizeof_char_ptr) + pack_reclength= INT_MAX32; + else + { + /* Add max possible blob length */ + pack_reclength+= (1 << ((column->length- + portable_sizeof_char_ptr)*8)); + } + } + max_field_lengths+= (column->length - portable_sizeof_char_ptr); + } + else if (type == FIELD_SKIP_PRESPACE || + type == FIELD_SKIP_ENDSPACE) + { + forced_packed= 1; + max_field_lengths+= column->length > 255 ? 2 : 1; + not_block_record_extra_length++; + packed++; + } + else if (type == FIELD_VARCHAR) + { + varchar_length+= column->length-1; /* Used for min_pack_length */ + pack_reclength++; + not_block_record_extra_length++; + max_field_lengths++; + packed++; + column->fill_length= 1; + /* We must test for 257 as length includes pack-length */ + if (test(column->length >= 257)) + { + long_varchar_count++; + max_field_lengths++; + column->fill_length= 2; + } + } + else if (type == FIELD_SKIP_ZERO) + packed++; + else + { + if (!column->null_bit) + min_pack_length+= column->length; + else + not_block_record_extra_length+= column->length; + column->empty_pos= 0; + column->empty_bit= 0; + } + } + else /* FIELD_NORMAL */ + { + if (!column->null_bit) + { + min_pack_length+= column->length; + share.base.fixed_not_null_fields++; + share.base.fixed_not_null_fields_length+= column->length; + } + else + not_block_record_extra_length+= column->length; + } + } + + if (datafile_type == STATIC_RECORD && forced_packed) + { + /* Can't use fixed length records, revert to block records */ + datafile_type= BLOCK_RECORD; + } + + if (datafile_type == DYNAMIC_RECORD) + options|= HA_OPTION_PACK_RECORD; /* Must use packed records */ + + if (datafile_type == STATIC_RECORD) + { + /* We can't use checksum with static length rows */ + flags&= ~HA_CREATE_CHECKSUM; + options&= ~HA_OPTION_CHECKSUM; + min_pack_length+= varchar_length; + packed= 0; + } + if (datafile_type != BLOCK_RECORD) + min_pack_length+= not_block_record_extra_length; + + if ((packed & 7) == 1) + { + /* + Not optimal packing, try to remove a 1 byte length zero-field as + this will get same record length, but smaller pack overhead + */ + while (column != columndef) + { + column--; + if (column->type == (int) FIELD_SKIP_ZERO && column->length == 1) + { + column->type=(int) FIELD_NORMAL; + column->empty_pos= 0; + column->empty_bit= 0; + packed--; + min_pack_length++; + break; + } + } + } + + if (flags & HA_CREATE_TMP_TABLE) + { + options|= HA_OPTION_TMP_TABLE; + tmp_table= TRUE; + create_mode|= O_EXCL | O_NOFOLLOW; + /* "CREATE TEMPORARY" tables are not crash-safe (dropped at restart) */ + ci->transactional= FALSE; + } + share.base.null_bytes= ci->null_bytes; + share.base.original_null_bytes= ci->null_bytes; + share.base.transactional= ci->transactional; + share.base.max_field_lengths= max_field_lengths; + share.base.field_offsets= 0; /* for future */ + + if (pack_reclength != INT_MAX32) + pack_reclength+= max_field_lengths + long_varchar_count; + + if (flags & HA_CREATE_CHECKSUM || (options & HA_OPTION_CHECKSUM)) + { + options|= HA_OPTION_CHECKSUM; + min_pack_length++; + pack_reclength++; + } + if (flags & HA_CREATE_DELAY_KEY_WRITE) + options|= HA_OPTION_DELAY_KEY_WRITE; + if (flags & HA_CREATE_RELIES_ON_SQL_LAYER) + options|= HA_OPTION_RELIES_ON_SQL_LAYER; + + pack_bytes= (packed + 7) / 8; + if (pack_reclength != INT_MAX32) + pack_reclength+= reclength+pack_bytes + + test(test_all_bits(options, HA_OPTION_CHECKSUM | HA_PACK_RECORD)); + min_pack_length+= pack_bytes; + /* Calculate min possible row length for rows-in-block */ + extra_header_size= MAX_FIXED_HEADER_SIZE; + if (ci->transactional) + extra_header_size= TRANS_MAX_FIXED_HEADER_SIZE; + share.base.min_row_length= (extra_header_size + share.base.null_bytes + + pack_bytes); + if (!ci->data_file_length && ci->max_rows) + { + if (pack_reclength == INT_MAX32 || + (~(ulonglong) 0)/ci->max_rows < (ulonglong) pack_reclength) + ci->data_file_length= ~(ulonglong) 0; + else + ci->data_file_length=(ulonglong) ci->max_rows*pack_reclength; + } + else if (!ci->max_rows) + { + if (datafile_type == BLOCK_RECORD) + { + uint rows_per_page= ((maria_block_size - PAGE_OVERHEAD_SIZE) / + (min_pack_length + extra_header_size + + DIR_ENTRY_SIZE)); + ulonglong data_file_length= ci->data_file_length; + if (!data_file_length) + data_file_length= ((((ulonglong) 1 << ((BLOCK_RECORD_POINTER_SIZE-1) * + 8)) -1)); + if (rows_per_page > 0) + { + set_if_smaller(rows_per_page, MAX_ROWS_PER_PAGE); + ci->max_rows= data_file_length / maria_block_size * rows_per_page; + } + else + ci->max_rows= data_file_length / (min_pack_length + + extra_header_size + + DIR_ENTRY_SIZE); + } + else + ci->max_rows=(ha_rows) (ci->data_file_length/(min_pack_length + + ((options & + HA_OPTION_PACK_RECORD) ? + 3 : 0))); + } + max_rows= (ulonglong) ci->max_rows; + if (datafile_type == BLOCK_RECORD) + { + /* The + 1 is for record position withing page */ + pointer= maria_get_pointer_length((ci->data_file_length / + maria_block_size), 3) + 1; + set_if_smaller(pointer, BLOCK_RECORD_POINTER_SIZE); + + if (!max_rows) + max_rows= (((((ulonglong) 1 << ((pointer-1)*8)) -1) * maria_block_size) / + min_pack_length); + } + else + { + if (datafile_type != STATIC_RECORD) + pointer= maria_get_pointer_length(ci->data_file_length, + maria_data_pointer_size); + else + pointer= maria_get_pointer_length(ci->max_rows, maria_data_pointer_size); + if (!max_rows) + max_rows= ((((ulonglong) 1 << (pointer*8)) -1) / min_pack_length); + } + + real_reclength=reclength; + if (datafile_type == STATIC_RECORD) + { + if (reclength <= pointer) + reclength=pointer+1; /* reserve place for delete link */ + } + else + reclength+= long_varchar_count; /* We need space for varchar! */ + + max_key_length=0; tot_length=0 ; key_segs=0; + fulltext_keys=0; + share.state.rec_per_key_part=rec_per_key_part; + share.state.key_root=key_root; + share.state.key_del= HA_OFFSET_ERROR; + if (uniques) + max_key_length= MARIA_UNIQUE_HASH_LENGTH + pointer; + + for (i=0, keydef=keydefs ; i < keys ; i++ , keydef++) + { + share.state.key_root[i]= HA_OFFSET_ERROR; + min_key_length_skip=length=real_length_diff=0; + key_length=pointer; + if (keydef->flag & HA_SPATIAL) + { +#ifdef HAVE_SPATIAL + /* BAR TODO to support 3D and more dimensions in the future */ + uint sp_segs=SPDIMS*2; + keydef->flag=HA_SPATIAL; + + if (flags & HA_DONT_TOUCH_DATA) + { + /* + Called by maria_chk - i.e. table structure was taken from + MYI file and SPATIAL key *does have* additional sp_segs keysegs. + keydef->seg here points right at the GEOMETRY segment, + so we only need to decrease keydef->keysegs. + (see maria_recreate_table() in _ma_check.c) + */ + keydef->keysegs-=sp_segs-1; + } + + for (j=0, keyseg=keydef->seg ; (int) j < keydef->keysegs ; + j++, keyseg++) + { + if (keyseg->type != HA_KEYTYPE_BINARY && + keyseg->type != HA_KEYTYPE_VARBINARY1 && + keyseg->type != HA_KEYTYPE_VARBINARY2) + { + my_errno=HA_WRONG_CREATE_OPTION; + goto err_no_lock; + } + } + keydef->keysegs+=sp_segs; + key_length+=SPLEN*sp_segs; + length++; /* At least one length byte */ + min_key_length_skip+=SPLEN*2*SPDIMS; +#else + my_errno= HA_ERR_UNSUPPORTED; + goto err_no_lock; +#endif /*HAVE_SPATIAL*/ + } + else if (keydef->flag & HA_FULLTEXT) + { + keydef->flag=HA_FULLTEXT | HA_PACK_KEY | HA_VAR_LENGTH_KEY; + options|=HA_OPTION_PACK_KEYS; /* Using packed keys */ + + for (j=0, keyseg=keydef->seg ; (int) j < keydef->keysegs ; + j++, keyseg++) + { + if (keyseg->type != HA_KEYTYPE_TEXT && + keyseg->type != HA_KEYTYPE_VARTEXT1 && + keyseg->type != HA_KEYTYPE_VARTEXT2) + { + my_errno=HA_WRONG_CREATE_OPTION; + goto err_no_lock; + } + if (!(keyseg->flag & HA_BLOB_PART) && + (keyseg->type == HA_KEYTYPE_VARTEXT1 || + keyseg->type == HA_KEYTYPE_VARTEXT2)) + { + /* Make a flag that this is a VARCHAR */ + keyseg->flag|= HA_VAR_LENGTH_PART; + /* Store in bit_start number of bytes used to pack the length */ + keyseg->bit_start= ((keyseg->type == HA_KEYTYPE_VARTEXT1)? + 1 : 2); + } + } + + fulltext_keys++; + key_length+= HA_FT_MAXBYTELEN+HA_FT_WLEN; + length++; /* At least one length byte */ + min_key_length_skip+=HA_FT_MAXBYTELEN; + real_length_diff=HA_FT_MAXBYTELEN-FT_MAX_WORD_LEN_FOR_SORT; + } + else + { + /* Test if prefix compression */ + if (keydef->flag & HA_PACK_KEY) + { + /* Can't use space_compression on number keys */ + if ((keydef->seg[0].flag & HA_SPACE_PACK) && + keydef->seg[0].type == (int) HA_KEYTYPE_NUM) + keydef->seg[0].flag&= ~HA_SPACE_PACK; + + /* Only use HA_PACK_KEY when first segment is a variable length key */ + if (!(keydef->seg[0].flag & (HA_SPACE_PACK | HA_BLOB_PART | + HA_VAR_LENGTH_PART))) + { + /* pack relative to previous key */ + keydef->flag&= ~HA_PACK_KEY; + keydef->flag|= HA_BINARY_PACK_KEY | HA_VAR_LENGTH_KEY; + } + else + { + keydef->seg[0].flag|=HA_PACK_KEY; /* for easyer intern test */ + keydef->flag|=HA_VAR_LENGTH_KEY; + options|=HA_OPTION_PACK_KEYS; /* Using packed keys */ + } + } + if (keydef->flag & HA_BINARY_PACK_KEY) + options|=HA_OPTION_PACK_KEYS; /* Using packed keys */ + + if (keydef->flag & HA_AUTO_KEY && ci->with_auto_increment) + share.base.auto_key=i+1; + for (j=0, keyseg=keydef->seg ; j < keydef->keysegs ; j++, keyseg++) + { + /* numbers are stored with high by first to make compression easier */ + switch (keyseg->type) { + case HA_KEYTYPE_SHORT_INT: + case HA_KEYTYPE_LONG_INT: + case HA_KEYTYPE_FLOAT: + case HA_KEYTYPE_DOUBLE: + case HA_KEYTYPE_USHORT_INT: + case HA_KEYTYPE_ULONG_INT: + case HA_KEYTYPE_LONGLONG: + case HA_KEYTYPE_ULONGLONG: + case HA_KEYTYPE_INT24: + case HA_KEYTYPE_UINT24: + case HA_KEYTYPE_INT8: + keyseg->flag|= HA_SWAP_KEY; + break; + case HA_KEYTYPE_VARTEXT1: + case HA_KEYTYPE_VARTEXT2: + case HA_KEYTYPE_VARBINARY1: + case HA_KEYTYPE_VARBINARY2: + if (!(keyseg->flag & HA_BLOB_PART)) + { + /* Make a flag that this is a VARCHAR */ + keyseg->flag|= HA_VAR_LENGTH_PART; + /* Store in bit_start number of bytes used to pack the length */ + keyseg->bit_start= ((keyseg->type == HA_KEYTYPE_VARTEXT1 || + keyseg->type == HA_KEYTYPE_VARBINARY1) ? + 1 : 2); + } + break; + default: + break; + } + if (keyseg->flag & HA_SPACE_PACK) + { + DBUG_ASSERT(!(keyseg->flag & HA_VAR_LENGTH_PART)); + keydef->flag |= HA_SPACE_PACK_USED | HA_VAR_LENGTH_KEY; + options|=HA_OPTION_PACK_KEYS; /* Using packed keys */ + length++; /* At least one length byte */ + min_key_length_skip+=keyseg->length; + if (keyseg->length >= 255) + { /* prefix may be 3 bytes */ + min_key_length_skip+=2; + length+=2; + } + } + if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART)) + { + DBUG_ASSERT(!test_all_bits(keyseg->flag, + (HA_VAR_LENGTH_PART | HA_BLOB_PART))); + keydef->flag|=HA_VAR_LENGTH_KEY; + length++; /* At least one length byte */ + options|=HA_OPTION_PACK_KEYS; /* Using packed keys */ + min_key_length_skip+=keyseg->length; + if (keyseg->length >= 255) + { /* prefix may be 3 bytes */ + min_key_length_skip+=2; + length+=2; + } + } + key_length+= keyseg->length; + if (keyseg->null_bit) + { + key_length++; + options|=HA_OPTION_PACK_KEYS; + keyseg->flag|=HA_NULL_PART; + keydef->flag|=HA_VAR_LENGTH_KEY | HA_NULL_PART_KEY; + } + } + } /* if HA_FULLTEXT */ + key_segs+=keydef->keysegs; + if (keydef->keysegs > HA_MAX_KEY_SEG) + { + my_errno=HA_WRONG_CREATE_OPTION; + goto err_no_lock; + } + /* + key_segs may be 0 in the case when we only want to be able to + add on row into the table. This can happen with some DISTINCT queries + in MySQL + */ + if ((keydef->flag & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME && + key_segs) + share.state.rec_per_key_part[key_segs-1]=1L; + length+=key_length; + /* + A key can't be longer than than half a index block (as we have + to be able to put at least 2 keys on an index block for the key + algorithms to work). + */ + if (length > maria_max_key_length()) + { + my_errno=HA_WRONG_CREATE_OPTION; + goto err_no_lock; + } + keydef->block_length= maria_block_size; + keydef->keylength= (uint16) key_length; + keydef->minlength= (uint16) (length-min_key_length_skip); + keydef->maxlength= (uint16) length; + + if (length > max_key_length) + max_key_length= length; + tot_length+= ((max_rows/(ulong) (((uint) maria_block_size-5)/ + (length*2))) * + maria_block_size); + } + + unique_key_parts=0; + offset=reclength-uniques*MARIA_UNIQUE_HASH_LENGTH; + for (i=0, uniquedef=uniquedefs ; i < uniques ; i++ , uniquedef++) + { + uniquedef->key=keys+i; + unique_key_parts+=uniquedef->keysegs; + share.state.key_root[keys+i]= HA_OFFSET_ERROR; + tot_length+= (max_rows/(ulong) (((uint) maria_block_size-5)/ + ((MARIA_UNIQUE_HASH_LENGTH + pointer)*2)))* + (ulong) maria_block_size; + } + keys+=uniques; /* Each unique has 1 key */ + key_segs+=uniques; /* Each unique has 1 key seg */ + + base_pos=(MARIA_STATE_INFO_SIZE + keys * MARIA_STATE_KEY_SIZE + + key_segs * MARIA_STATE_KEYSEG_SIZE); + info_length= base_pos+(uint) (MARIA_BASE_INFO_SIZE+ + keys * MARIA_KEYDEF_SIZE+ + uniques * MARIA_UNIQUEDEF_SIZE + + (key_segs + unique_key_parts)*HA_KEYSEG_SIZE+ + columns*MARIA_COLUMNDEF_SIZE); + + DBUG_PRINT("info", ("info_length: %u", info_length)); + /* There are only 16 bits for the total header length. */ + if (info_length > 65535) + { + my_printf_error(0, "Maria table '%s' has too many columns and/or " + "indexes and/or unique constraints.", + MYF(0), name + dirname_length(name)); + my_errno= HA_WRONG_CREATE_OPTION; + goto err_no_lock; + } + + bmove(share.state.header.file_version,(byte*) maria_file_magic,4); + ci->old_options=options| (ci->old_options & HA_OPTION_TEMP_COMPRESS_RECORD ? + HA_OPTION_COMPRESS_RECORD | + HA_OPTION_TEMP_COMPRESS_RECORD: 0); + mi_int2store(share.state.header.options,ci->old_options); + mi_int2store(share.state.header.header_length,info_length); + mi_int2store(share.state.header.state_info_length,MARIA_STATE_INFO_SIZE); + mi_int2store(share.state.header.base_info_length,MARIA_BASE_INFO_SIZE); + mi_int2store(share.state.header.base_pos,base_pos); + share.state.header.data_file_type= datafile_type; + share.state.header.org_data_file_type= org_datafile_type; + share.state.header.language= (ci->language ? + ci->language : default_charset_info->number); + + share.state.dellink = HA_OFFSET_ERROR; + share.state.first_bitmap_with_space= 0; + share.state.create_rename_lsn= 0; + share.state.process= (ulong) getpid(); + share.state.unique= (ulong) 0; + share.state.update_count=(ulong) 0; + share.state.version= (ulong) time((time_t*) 0); + share.state.sortkey= (ushort) ~0; + share.state.auto_increment=ci->auto_increment; + share.options=options; + share.base.rec_reflength=pointer; + share.base.block_size= maria_block_size; + + /* Get estimate for index file length (this may be wrong for FT keys) */ + tmp= (tot_length + maria_block_size * keys * + MARIA_INDEX_BLOCK_MARGIN) / maria_block_size; + /* + use maximum of key_file_length we calculated and key_file_length value we + got from MYI file header (see also mariapack.c:save_state) + */ + share.base.key_reflength= + maria_get_pointer_length(max(ci->key_file_length,tmp),3); + share.base.keys= share.state.header.keys= keys; + share.state.header.uniques= uniques; + share.state.header.fulltext_keys= fulltext_keys; + mi_int2store(share.state.header.key_parts,key_segs); + mi_int2store(share.state.header.unique_key_parts,unique_key_parts); + + maria_set_all_keys_active(share.state.key_map, keys); + + share.base.keystart = share.state.state.key_file_length= + MY_ALIGN(info_length, maria_block_size); + share.base.max_key_block_length= maria_block_size; + share.base.max_key_length=ALIGN_SIZE(max_key_length+4); + share.base.records=ci->max_rows; + share.base.reloc= ci->reloc_rows; + share.base.reclength=real_reclength; + share.base.pack_reclength=reclength+ test(options & HA_OPTION_CHECKSUM); + share.base.max_pack_length=pack_reclength; + share.base.min_pack_length=min_pack_length; + share.base.pack_bytes= pack_bytes; + share.base.fields= columns; + share.base.pack_fields= packed; +#ifdef USE_RAID + share.base.raid_type=ci->raid_type; + share.base.raid_chunks=ci->raid_chunks; + share.base.raid_chunksize=ci->raid_chunksize; +#endif + + /* max_data_file_length and max_key_file_length are recalculated on open */ + if (tmp_table) + share.base.max_data_file_length= (my_off_t) ci->data_file_length; + else if (ci->transactional && translog_inited) + { + /* + we have checked translog_inited above, because maria_chk may call us + (via maria_recreate_table()) and it does not have a log. + */ + sync_dir= MY_SYNC_DIR; + } + + if (datafile_type == BLOCK_RECORD) + share.base.min_block_length= share.base.min_row_length; + else + { + share.base.min_block_length= + (share.base.pack_reclength+3 < MARIA_EXTEND_BLOCK_LENGTH && + ! share.base.blobs) ? + max(share.base.pack_reclength,MARIA_MIN_BLOCK_LENGTH) : + MARIA_EXTEND_BLOCK_LENGTH; + } + if (! (flags & HA_DONT_TOUCH_DATA)) + share.state.create_time= (long) time((time_t*) 0); + + pthread_mutex_lock(&THR_LOCK_maria); + + if (ci->index_file_name) + { + char *iext= strrchr(ci->index_file_name, '.'); + int have_iext= iext && !strcmp(iext, MARIA_NAME_IEXT); + if (tmp_table) + { + char *path; + /* chop off the table name, tempory tables use generated name */ + if ((path= strrchr(ci->index_file_name, FN_LIBCHAR))) + *path= '\0'; + fn_format(filename, name, ci->index_file_name, MARIA_NAME_IEXT, + MY_REPLACE_DIR | MY_UNPACK_FILENAME | MY_APPEND_EXT); + } + else + { + fn_format(filename, ci->index_file_name, "", MARIA_NAME_IEXT, + MY_UNPACK_FILENAME | (have_iext ? MY_REPLACE_EXT : + MY_APPEND_EXT)); + } + fn_format(klinkname, name, "", MARIA_NAME_IEXT, + MY_UNPACK_FILENAME|MY_APPEND_EXT); + klinkname_ptr= klinkname; + /* + Don't create the table if the link or file exists to ensure that one + doesn't accidently destroy another table. + Don't sync dir now if the data file has the same path. + */ + create_flag= + (ci->data_file_name && + !strcmp(ci->index_file_name, ci->data_file_name)) ? 0 : sync_dir; + } + else + { + fn_format(filename, name, "", MARIA_NAME_IEXT, + (MY_UNPACK_FILENAME | + (flags & HA_DONT_TOUCH_DATA) ? MY_RETURN_REAL_PATH : 0) | + MY_APPEND_EXT); + /* + Replace the current file. + Don't sync dir now if the data file has the same path. + */ + create_flag= MY_DELETE_OLD | (!ci->data_file_name ? 0 : sync_dir); + } + + /* + If a MRG_MARIA table is in use, the mapped MARIA tables are open, + but no entry is made in the table cache for them. + A TRUNCATE command checks for the table in the cache only and could + be fooled to believe, the table is not open. + Pull the emergency brake in this situation. (Bug #8306) + */ + if (_ma_test_if_reopen(filename)) + { + my_printf_error(0, "MARIA table '%s' is in use " + "(most likely by a MERGE table). Try FLUSH TABLES.", + MYF(0), name + dirname_length(name)); + goto err; + } + + if ((file= my_create_with_symlink(klinkname_ptr, filename, 0, create_mode, + MYF(MY_WME|create_flag))) < 0) + goto err; + errpos=1; + + if (!(flags & HA_DONT_TOUCH_DATA)) + { + if (ci->data_file_name) + { + char *dext= strrchr(ci->data_file_name, '.'); + int have_dext= dext && !strcmp(dext, MARIA_NAME_DEXT); + + if (tmp_table) + { + char *path; + /* chop off the table name, tempory tables use generated name */ + if ((path= strrchr(ci->data_file_name, FN_LIBCHAR))) + *path= '\0'; + fn_format(filename, name, ci->data_file_name, MARIA_NAME_DEXT, + MY_REPLACE_DIR | MY_UNPACK_FILENAME | MY_APPEND_EXT); + } + else + { + fn_format(filename, ci->data_file_name, "", MARIA_NAME_DEXT, + MY_UNPACK_FILENAME | + (have_dext ? MY_REPLACE_EXT : MY_APPEND_EXT)); + } + fn_format(dlinkname, name, "",MARIA_NAME_DEXT, + MY_UNPACK_FILENAME | MY_APPEND_EXT); + dlinkname_ptr= dlinkname; + create_flag=0; + } + else + { + fn_format(filename,name,"", MARIA_NAME_DEXT, + MY_UNPACK_FILENAME | MY_APPEND_EXT); + create_flag=MY_DELETE_OLD; + } + if ((dfile= + my_create_with_symlink(dlinkname_ptr, filename, 0, create_mode, + MYF(MY_WME | create_flag | sync_dir))) < 0) + goto err; + errpos=3; + + share.data_file_type= datafile_type; + if (_ma_initialize_data_file(dfile, &share)) + goto err; + } + DBUG_PRINT("info", ("write state info and base info")); + if (_ma_state_info_write(file, &share.state, 2) || + _ma_base_info_write(file, &share.base)) + goto err; + DBUG_PRINT("info", ("base_pos: %d base_info_size: %d", + base_pos, MARIA_BASE_INFO_SIZE)); + DBUG_ASSERT(my_tell(file,MYF(0)) == base_pos+ MARIA_BASE_INFO_SIZE); + + /* Write key and keyseg definitions */ + DBUG_PRINT("info", ("write key and keyseg definitions")); + for (i=0 ; i < share.base.keys - uniques; i++) + { + uint sp_segs=(keydefs[i].flag & HA_SPATIAL) ? 2*SPDIMS : 0; + + if (_ma_keydef_write(file, &keydefs[i])) + goto err; + for (j=0 ; j < keydefs[i].keysegs-sp_segs ; j++) + if (_ma_keyseg_write(file, &keydefs[i].seg[j])) + goto err; +#ifdef HAVE_SPATIAL + for (j=0 ; j < sp_segs ; j++) + { + HA_KEYSEG sseg; + sseg.type=SPTYPE; + sseg.language= 7; /* Binary */ + sseg.null_bit=0; + sseg.bit_start=0; + sseg.bit_end=0; + sseg.bit_length= 0; + sseg.bit_pos= 0; + sseg.length=SPLEN; + sseg.null_pos=0; + sseg.start=j*SPLEN; + sseg.flag= HA_SWAP_KEY; + if (_ma_keyseg_write(file, &sseg)) + goto err; + } +#endif + } + /* Create extra keys for unique definitions */ + offset=reclength-uniques*MARIA_UNIQUE_HASH_LENGTH; + bzero((char*) &tmp_keydef,sizeof(tmp_keydef)); + bzero((char*) &tmp_keyseg,sizeof(tmp_keyseg)); + for (i=0; i < uniques ; i++) + { + tmp_keydef.keysegs=1; + tmp_keydef.flag= HA_UNIQUE_CHECK; + tmp_keydef.block_length= (uint16) maria_block_size; + tmp_keydef.keylength= MARIA_UNIQUE_HASH_LENGTH + pointer; + tmp_keydef.minlength=tmp_keydef.maxlength=tmp_keydef.keylength; + tmp_keyseg.type= MARIA_UNIQUE_HASH_TYPE; + tmp_keyseg.length= MARIA_UNIQUE_HASH_LENGTH; + tmp_keyseg.start= offset; + offset+= MARIA_UNIQUE_HASH_LENGTH; + if (_ma_keydef_write(file,&tmp_keydef) || + _ma_keyseg_write(file,(&tmp_keyseg))) + goto err; + } + + /* Save unique definition */ + DBUG_PRINT("info", ("write unique definitions")); + for (i=0 ; i < share.state.header.uniques ; i++) + { + HA_KEYSEG *keyseg_end; + keyseg= uniquedefs[i].seg; + if (_ma_uniquedef_write(file, &uniquedefs[i])) + goto err; + for (keyseg= uniquedefs[i].seg, keyseg_end= keyseg+ uniquedefs[i].keysegs; + keyseg < keyseg_end; + keyseg++) + { + switch (keyseg->type) { + case HA_KEYTYPE_VARTEXT1: + case HA_KEYTYPE_VARTEXT2: + case HA_KEYTYPE_VARBINARY1: + case HA_KEYTYPE_VARBINARY2: + if (!(keyseg->flag & HA_BLOB_PART)) + { + keyseg->flag|= HA_VAR_LENGTH_PART; + keyseg->bit_start= ((keyseg->type == HA_KEYTYPE_VARTEXT1 || + keyseg->type == HA_KEYTYPE_VARBINARY1) ? + 1 : 2); + } + break; + default: + DBUG_ASSERT((keyseg->flag & HA_VAR_LENGTH_PART) == 0); + break; + } + if (_ma_keyseg_write(file, keyseg)) + goto err; + } + } + DBUG_PRINT("info", ("write field definitions")); + if (datafile_type == BLOCK_RECORD) + { + /* Store columns in a more efficent order */ + MARIA_COLUMNDEF **col_order, **pos; + if (!(col_order= (MARIA_COLUMNDEF**) my_malloc(share.base.fields * + sizeof(MARIA_COLUMNDEF*), + MYF(MY_WME)))) + goto err; + for (column= columndef, pos= col_order ; + column != end_column ; + column++, pos++) + *pos= column; + qsort(col_order, share.base.fields, sizeof(*col_order), + (qsort_cmp) compare_columns); + for (i=0 ; i < share.base.fields ; i++) + { + if (_ma_columndef_write(file, col_order[i])) + { + my_free((gptr) col_order, MYF(0)); + goto err; + } + } + my_free((gptr) col_order, MYF(0)); + } + else + { + for (i=0 ; i < share.base.fields ; i++) + if (_ma_columndef_write(file, &columndef[i])) + goto err; + } + + if ((kfile_size_before_extension= my_tell(file,MYF(0))) == MY_FILEPOS_ERROR) + goto err; +#ifndef DBUG_OFF + if (kfile_size_before_extension != info_length) + DBUG_PRINT("warning",("info_length: %u != used_length: %u", + info_length, (uint)kfile_size_before_extension)); +#endif + + if (sync_dir) + { + /* + we log the first bytes and then the size to which we extend; this is + not log 1 KB of mostly zeroes if this is a small table. + */ + char empty_string[]= ""; + LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 3]; + uint total_rec_length= 0; + uint i; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= 1 + 2 + + kfile_size_before_extension; + /* we are needing maybe 64 kB, so don't use the stack */ + log_data= my_malloc(log_array[TRANSLOG_INTERNAL_PARTS + 0].length, MYF(0)); + if ((log_data == NULL) || + my_pread(file, 1 + 2 + log_data, kfile_size_before_extension, + 0, MYF(MY_NABP))) + goto err_no_lock; + /* + remember if the data file was created or not, to know if Recovery can + do it or not, in the future + */ + log_data[0]= test(flags & HA_DONT_TOUCH_DATA); + int2store(log_data + 1, kfile_size_before_extension); + log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data; + /* symlink description is also needed for re-creation by Recovery: */ + log_array[TRANSLOG_INTERNAL_PARTS + 1].str= + dlinkname_ptr ? dlinkname : empty_string; + log_array[TRANSLOG_INTERNAL_PARTS + 1].length= + strlen(log_array[TRANSLOG_INTERNAL_PARTS + 1].str); + log_array[TRANSLOG_INTERNAL_PARTS + 2].str= + klinkname_ptr ? klinkname : empty_string; + log_array[TRANSLOG_INTERNAL_PARTS + 2].length= + strlen(log_array[TRANSLOG_INTERNAL_PARTS + 2].str); + for (i= TRANSLOG_INTERNAL_PARTS; + i < (sizeof(log_array)/sizeof(log_array[0])); i++) + total_rec_length+= log_array[i].length; + /* + For this record to be of any use for Recovery, we need the upper + MySQL layer to be crash-safe, which it is not now (that would require + work using the ddl_log of sql/sql_table.cc); when it is, we should + reconsider the moment of writing this log record (before or after op, + under THR_LOCK_maria or not...), how to use it in Recovery, and force + the log. For now this record is just informative. + Note that in case of TRUNCATE TABLE we also come here. + When in CREATE/TRUNCATE (or DROP or RENAME or REPAIR) we have not called + external_lock(), so have no TRN. It does not matter, as all these + operations are non-transactional and sync their files. + */ + if (unlikely(translog_write_record(&share.state.create_rename_lsn, + LOGREC_REDO_CREATE_TABLE, + &dummy_transaction_object, NULL, + total_rec_length, + sizeof(log_array)/sizeof(log_array[0]), + log_array, NULL))) + goto err_no_lock; + /* + store LSN into file, needed for Recovery to not be confused if a + DROP+CREATE happened (applying REDOs to the wrong table). + If such direct my_pwrite() to a fixed offset is too "hackish", I can + call ma_state_info_write() again but it will be less efficient. + */ + lsn_store(log_data, share.state.create_rename_lsn); + if (my_pwrite(file, log_data, LSN_STORE_SIZE, + sizeof(share.state.header) + 2, MYF(MY_NABP))) + goto err_no_lock; + my_free(log_data, MYF(0)); + } + + /* Enlarge files */ + DBUG_PRINT("info", ("enlarge to keystart: %lu", + (ulong) share.base.keystart)); + if (my_chsize(file,(ulong) share.base.keystart,0,MYF(0))) + goto err; + + if (sync_dir && my_sync(file, MYF(0))) + goto err; + + if (! (flags & HA_DONT_TOUCH_DATA)) + { +#ifdef USE_RELOC + if (my_chsize(dfile,share.base.min_pack_length*ci->reloc_rows,0,MYF(0))) + goto err; +#endif + errpos=2; + if ((sync_dir && my_sync(dfile, MYF(0))) || my_close(dfile,MYF(0))) + goto err; + } + pthread_mutex_unlock(&THR_LOCK_maria); + res= 0; + my_free((char*) rec_per_key_part,MYF(0)); + errpos=0; + if (my_close(file,MYF(0))) + res= my_errno; + DBUG_RETURN(res); + +err: + pthread_mutex_unlock(&THR_LOCK_maria); + +err_no_lock: + save_errno=my_errno; + switch (errpos) { + case 3: + VOID(my_close(dfile,MYF(0))); + /* fall through */ + case 2: + if (! (flags & HA_DONT_TOUCH_DATA)) + my_delete_with_symlink(fn_format(filename,name,"",MARIA_NAME_DEXT, + MY_UNPACK_FILENAME | MY_APPEND_EXT), + sync_dir); + /* fall through */ + case 1: + VOID(my_close(file,MYF(0))); + if (! (flags & HA_DONT_TOUCH_DATA)) + my_delete_with_symlink(fn_format(filename,name,"",MARIA_NAME_IEXT, + MY_UNPACK_FILENAME | MY_APPEND_EXT), + sync_dir); + } + my_free(log_data, MYF(MY_ALLOW_ZERO_PTR)); + my_free((char*) rec_per_key_part, MYF(0)); + DBUG_RETURN(my_errno=save_errno); /* return the fatal errno */ +} + + +uint maria_get_pointer_length(ulonglong file_length, uint def) +{ + DBUG_ASSERT(def >= 2 && def <= 7); + if (file_length) /* If not default */ + { +#ifdef NOT_YET_READY_FOR_8_BYTE_POINTERS + if (file_length >= (ULL(1) << 56)) + def=8; + else +#endif + if (file_length >= (ULL(1) << 48)) + def=7; + else if (file_length >= (ULL(1) << 40)) + def=6; + else if (file_length >= (ULL(1) << 32)) + def=5; + else if (file_length >= (ULL(1) << 24)) + def=4; + else if (file_length >= (ULL(1) << 16)) + def=3; + else + def=2; + } + return def; +} + + +/* + Sort columns for records-in-block + + IMPLEMENTATION + Sort columns in following order: + + Fixed size, not null columns + Fixed length, null fields + Variable length fields (CHAR, VARCHAR) + Blobs + + For same kind of fields, keep fields in original order +*/ + +static inline int sign(longlong a) +{ + return a < 0 ? -1 : (a > 0 ? 1 : 0); +} + + +static int compare_columns(MARIA_COLUMNDEF **a_ptr, MARIA_COLUMNDEF **b_ptr) +{ + MARIA_COLUMNDEF *a= *a_ptr, *b= *b_ptr; + enum en_fieldtype a_type, b_type; + + a_type= ((a->type == FIELD_NORMAL || a->type == FIELD_CHECK) ? + FIELD_NORMAL : a->type); + b_type= ((b->type == FIELD_NORMAL || b->type == FIELD_CHECK) ? + FIELD_NORMAL : b->type); + + if (a_type == FIELD_NORMAL && !a->null_bit) + { + if (b_type != FIELD_NORMAL || b->null_bit) + return -1; + return sign((long) (a->offset - b->offset)); + } + if (b_type == FIELD_NORMAL && !b->null_bit) + return 1; + if (a_type == b_type) + return sign((long) (a->offset - b->offset)); + if (a_type == FIELD_NORMAL) + return -1; + if (b_type == FIELD_NORMAL) + return 1; + if (a_type == FIELD_BLOB) + return 1; + if (b_type == FIELD_BLOB) + return -1; + return sign((long) (a->offset - b->offset)); +} + + +/* Initialize data file */ + +int _ma_initialize_data_file(File dfile, MARIA_SHARE *share) +{ + if (share->data_file_type == BLOCK_RECORD) + { + if (my_chsize(dfile, share->base.block_size, 0, MYF(MY_WME))) + return 1; + share->state.state.data_file_length= share->base.block_size; + _ma_bitmap_delete_all(share); + } + return 0; +} diff --git a/storage/maria/ma_dbug.c b/storage/maria/ma_dbug.c new file mode 100644 index 00000000000..150385607b6 --- /dev/null +++ b/storage/maria/ma_dbug.c @@ -0,0 +1,193 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Support rutiner with are using with dbug */ + +#include "maria_def.h" + + /* Print a key in user understandable format */ + +void _ma_print_key(FILE *stream, register HA_KEYSEG *keyseg, + const byte *key, uint length) +{ + int flag; + short int s_1; + long int l_1; + float f_1; + double d_1; + const byte *end; + const byte *key_end= key + length; + + VOID(fputs("Key: \"",stream)); + flag=0; + for (; keyseg->type && key < key_end ;keyseg++) + { + if (flag++) + VOID(putc('-',stream)); + end= key+ keyseg->length; + if (keyseg->flag & HA_NULL_PART) + { + /* A NULL value is encoded by a 1-byte flag. Zero means NULL. */ + if (! *(key++)) + { + fprintf(stream,"NULL"); + continue; + } + end++; + } + + switch (keyseg->type) { + case HA_KEYTYPE_BINARY: + if (!(keyseg->flag & HA_SPACE_PACK) && keyseg->length == 1) + { /* packed binary digit */ + VOID(fprintf(stream,"%d",(uint) *key++)); + break; + } + /* fall through */ + case HA_KEYTYPE_TEXT: + case HA_KEYTYPE_NUM: + if (keyseg->flag & HA_SPACE_PACK) + { + VOID(fprintf(stream,"%.*s",(int) *key,key+1)); + key+= (int) *key+1; + } + else + { + VOID(fprintf(stream,"%.*s",(int) keyseg->length,key)); + key=end; + } + break; + case HA_KEYTYPE_INT8: + VOID(fprintf(stream,"%d",(int) *((signed char*) key))); + key=end; + break; + case HA_KEYTYPE_SHORT_INT: + s_1= mi_sint2korr(key); + VOID(fprintf(stream,"%d",(int) s_1)); + key=end; + break; + case HA_KEYTYPE_USHORT_INT: + { + ushort u_1; + u_1= mi_uint2korr(key); + VOID(fprintf(stream,"%u",(uint) u_1)); + key=end; + break; + } + case HA_KEYTYPE_LONG_INT: + l_1=mi_sint4korr(key); + VOID(fprintf(stream,"%ld",l_1)); + key=end; + break; + case HA_KEYTYPE_ULONG_INT: + l_1=mi_sint4korr(key); + VOID(fprintf(stream,"%lu",(ulong) l_1)); + key=end; + break; + case HA_KEYTYPE_INT24: + VOID(fprintf(stream,"%ld",(long) mi_sint3korr(key))); + key=end; + break; + case HA_KEYTYPE_UINT24: + VOID(fprintf(stream,"%lu",(ulong) mi_uint3korr(key))); + key=end; + break; + case HA_KEYTYPE_FLOAT: + mi_float4get(f_1,key); + VOID(fprintf(stream,"%g",(double) f_1)); + key=end; + break; + case HA_KEYTYPE_DOUBLE: + mi_float8get(d_1,key); + VOID(fprintf(stream,"%g",d_1)); + key=end; + break; +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + { + char buff[21]; + longlong2str(mi_sint8korr(key),buff,-10); + VOID(fprintf(stream,"%s",buff)); + key=end; + break; + } + case HA_KEYTYPE_ULONGLONG: + { + char buff[21]; + longlong2str(mi_sint8korr(key),buff,10); + VOID(fprintf(stream,"%s",buff)); + key=end; + break; + } + case HA_KEYTYPE_BIT: + { + uint i; + fputs("0x",stream); + for (i=0 ; i < keyseg->length ; i++) + fprintf(stream, "%02x", (uint) *key++); + key= end; + break; + } + +#endif + case HA_KEYTYPE_VARTEXT1: /* VARCHAR and TEXT */ + case HA_KEYTYPE_VARTEXT2: /* VARCHAR and TEXT */ + case HA_KEYTYPE_VARBINARY1: /* VARBINARY and BLOB */ + case HA_KEYTYPE_VARBINARY2: /* VARBINARY and BLOB */ + { + uint tmp_length; + get_key_length(tmp_length,key); + /* + The following command sometimes gives a warning from valgrind. + Not yet sure if the bug is in valgrind, glibc or mysqld + */ + VOID(fprintf(stream,"%.*s",(int) tmp_length,key)); + key+=tmp_length; + break; + } + default: break; /* This never happens */ + } + } + VOID(fputs("\"\n",stream)); + return; +} /* print_key */ + + +#ifdef EXTRA_DEBUG + +my_bool _ma_check_table_is_closed(const char *name, const char *where) +{ + char filename[FN_REFLEN]; + LIST *pos; + DBUG_ENTER("_ma_check_table_is_closed"); + + (void) fn_format(filename,name,"",MARIA_NAME_IEXT,4+16+32); + for (pos=maria_open_list ; pos ; pos=pos->next) + { + MARIA_HA *info=(MARIA_HA*) pos->data; + MARIA_SHARE *share=info->s; + if (!strcmp(share->unique_file_name,filename)) + { + if (share->last_version) + { + fprintf(stderr,"Warning: Table: %s is open on %s\n", name,where); + DBUG_PRINT("warning",("Table: %s is open on %s", name,where)); + DBUG_RETURN(1); + } + } + } + DBUG_RETURN(0); +} +#endif /* EXTRA_DEBUG */ diff --git a/storage/maria/ma_delete.c b/storage/maria/ma_delete.c new file mode 100644 index 00000000000..54c6b7aaefc --- /dev/null +++ b/storage/maria/ma_delete.c @@ -0,0 +1,891 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Remove a row from a MARIA table */ + +#include "ma_fulltext.h" +#include "ma_rt_index.h" + +static int d_search(MARIA_HA *info,MARIA_KEYDEF *keyinfo,uint comp_flag, + byte *key,uint key_length,my_off_t page,byte *anc_buff); +static int del(MARIA_HA *info,MARIA_KEYDEF *keyinfo,byte *key,byte *anc_buff, + my_off_t leaf_page,byte *leaf_buff,byte *keypos, + my_off_t next_block,byte *ret_key); +static int underflow(MARIA_HA *info,MARIA_KEYDEF *keyinfo,byte *anc_buff, + my_off_t leaf_page,byte *leaf_buff,byte *keypos); +static uint remove_key(MARIA_KEYDEF *keyinfo,uint nod_flag,byte *keypos, + byte *lastkey,byte *page_end, + my_off_t *next_block); +static int _ma_ck_real_delete(register MARIA_HA *info,MARIA_KEYDEF *keyinfo, + byte *key, uint key_length, my_off_t *root); + + +int maria_delete(MARIA_HA *info,const byte *record) +{ + uint i; + byte *old_key; + int save_errno; + char lastpos[8]; + MARIA_SHARE *share=info->s; + DBUG_ENTER("maria_delete"); + + /* Test if record is in datafile */ + + DBUG_EXECUTE_IF("maria_pretend_crashed_table_on_usage", + maria_print_error(info->s, HA_ERR_CRASHED); + DBUG_RETURN(my_errno= HA_ERR_CRASHED);); + DBUG_EXECUTE_IF("my_error_test_undefined_error", + maria_print_error(info->s, INT_MAX); + DBUG_RETURN(my_errno= INT_MAX);); + if (!(info->update & HA_STATE_AKTIV)) + { + DBUG_RETURN(my_errno=HA_ERR_KEY_NOT_FOUND); /* No database read */ + } + if (share->options & HA_OPTION_READ_ONLY_DATA) + { + DBUG_RETURN(my_errno=EACCES); + } + if (_ma_readinfo(info,F_WRLCK,1)) + DBUG_RETURN(my_errno); + if ((*share->compare_record)(info,record)) + goto err; /* Error on read-check */ + + if (_ma_mark_file_changed(info)) + goto err; + + /* Remove all keys from the index file */ + + old_key= info->lastkey2; + for (i=0 ; i < share->base.keys ; i++ ) + { + if (maria_is_key_active(info->s->state.key_map, i)) + { + info->s->keyinfo[i].version++; + if (info->s->keyinfo[i].flag & HA_FULLTEXT ) + { + if (_ma_ft_del(info,i,(char*) old_key,record,info->cur_row.lastpos)) + goto err; + } + else + { + if (info->s->keyinfo[i].ck_delete(info,i,old_key, + _ma_make_key(info,i,old_key,record,info->cur_row.lastpos))) + goto err; + } + /* The above changed info->lastkey2. Inform maria_rnext_same(). */ + info->update&= ~HA_STATE_RNEXT_SAME; + } + } + + if ((*share->delete_record)(info, record)) + goto err; /* Remove record from database */ + + /* + We can't use the row based checksum as this doesn't have enough + precision. + */ + if (info->s->calc_checksum) + { + info->cur_row.checksum= (*info->s->calc_checksum)(info,record); + info->state->checksum-= info->cur_row.checksum; + } + + info->update= HA_STATE_CHANGED+HA_STATE_DELETED+HA_STATE_ROW_CHANGED; + info->state->records--; + share->state.changed|= STATE_NOT_OPTIMIZED_ROWS; + + mi_sizestore(lastpos, info->cur_row.lastpos); + VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); + allow_break(); /* Allow SIGHUP & SIGINT */ + if (info->invalidator != 0) + { + DBUG_PRINT("info", ("invalidator... '%s' (delete)", info->s->open_file_name)); + (*info->invalidator)(info->s->open_file_name); + info->invalidator=0; + } + DBUG_RETURN(0); + +err: + save_errno=my_errno; + mi_sizestore(lastpos, info->cur_row.lastpos); + if (save_errno != HA_ERR_RECORD_CHANGED) + { + maria_print_error(info->s, HA_ERR_CRASHED); + maria_mark_crashed(info); /* mark table crashed */ + } + VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); + info->update|=HA_STATE_WRITTEN; /* Buffer changed */ + allow_break(); /* Allow SIGHUP & SIGINT */ + my_errno=save_errno; + if (save_errno == HA_ERR_KEY_NOT_FOUND) + { + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + } + + DBUG_RETURN(my_errno); +} /* maria_delete */ + + + /* Remove a key from the btree index */ + +int _ma_ck_delete(register MARIA_HA *info, uint keynr, byte *key, + uint key_length) +{ + return _ma_ck_real_delete(info, info->s->keyinfo+keynr, key, key_length, + &info->s->state.key_root[keynr]); +} /* _ma_ck_delete */ + + +static int _ma_ck_real_delete(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *key, uint key_length, my_off_t *root) +{ + int error; + uint nod_flag; + my_off_t old_root; + byte *root_buff; + DBUG_ENTER("_ma_ck_real_delete"); + + if ((old_root=*root) == HA_OFFSET_ERROR) + { + maria_print_error(info->s, HA_ERR_CRASHED); + DBUG_RETURN(my_errno=HA_ERR_CRASHED); + } + if (!(root_buff= (byte*) my_alloca((uint) keyinfo->block_length+ + HA_MAX_KEY_BUFF*2))) + { + DBUG_PRINT("error",("Couldn't allocate memory")); + DBUG_RETURN(my_errno=ENOMEM); + } + DBUG_PRINT("info",("root_page: %ld", (long) old_root)); + if (!_ma_fetch_keypage(info,keyinfo,old_root,DFLT_INIT_HITS,root_buff,0)) + { + error= -1; + goto err; + } + if ((error=d_search(info,keyinfo, + (keyinfo->flag & HA_FULLTEXT ? SEARCH_FIND | SEARCH_UPDATE + : SEARCH_SAME), + key,key_length,old_root,root_buff)) >0) + { + if (error == 2) + { + DBUG_PRINT("test",("Enlarging of root when deleting")); + error= _ma_enlarge_root(info,keyinfo,key,root); + } + else /* error == 1 */ + { + if (maria_data_on_page(root_buff) <= (nod_flag=_ma_test_if_nod(root_buff))+3) + { + error=0; + if (nod_flag) + *root= _ma_kpos(nod_flag,root_buff+2+nod_flag); + else + *root=HA_OFFSET_ERROR; + if (_ma_dispose(info,keyinfo,old_root,DFLT_INIT_HITS)) + error= -1; + } + else + error= _ma_write_keypage(info,keyinfo,old_root, + DFLT_INIT_HITS,root_buff); + } + } +err: + my_afree((gptr) root_buff); + DBUG_PRINT("exit",("Return: %d",error)); + DBUG_RETURN(error); +} /* _ma_ck_real_delete */ + + + /* + ** Remove key below key root + ** Return values: + ** 1 if there are less buffers; In this case anc_buff is not saved + ** 2 if there are more buffers + ** -1 on errors + */ + +static int d_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + uint comp_flag, byte *key, uint key_length, + my_off_t page, byte *anc_buff) +{ + int flag,ret_value,save_flag; + uint length,nod_flag,search_key_length; + my_bool last_key; + byte *leaf_buff,*keypos; + my_off_t leaf_page,next_block; + byte lastkey[HA_MAX_KEY_BUFF]; + DBUG_ENTER("d_search"); + DBUG_DUMP("page",anc_buff,maria_data_on_page(anc_buff)); + + search_key_length= (comp_flag & SEARCH_FIND) ? key_length : USE_WHOLE_KEY; + flag=(*keyinfo->bin_search)(info,keyinfo,anc_buff,key, search_key_length, + comp_flag, &keypos, lastkey, &last_key); + if (flag == MARIA_FOUND_WRONG_KEY) + { + DBUG_PRINT("error",("Found wrong key")); + DBUG_RETURN(-1); + } + nod_flag=_ma_test_if_nod(anc_buff); + + if (!flag && keyinfo->flag & HA_FULLTEXT) + { + uint off; + int subkeys; + + get_key_full_length_rdonly(off, lastkey); + subkeys=ft_sintXkorr(lastkey+off); + DBUG_ASSERT(info->ft1_to_ft2==0 || subkeys >=0); + comp_flag=SEARCH_SAME; + if (subkeys >= 0) + { + /* normal word, one-level tree structure */ + if (info->ft1_to_ft2) + { + /* we're in ft1->ft2 conversion mode. Saving key data */ + insert_dynamic(info->ft1_to_ft2, (char*) (lastkey+off)); + } + else + { + /* we need exact match only if not in ft1->ft2 conversion mode */ + flag=(*keyinfo->bin_search)(info,keyinfo,anc_buff,key,USE_WHOLE_KEY, + comp_flag, &keypos, lastkey, &last_key); + } + /* fall through to normal delete */ + } + else + { + /* popular word. two-level tree. going down */ + uint tmp_key_length; + my_off_t root; + byte *kpos=keypos; + + if (!(tmp_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&kpos,lastkey))) + { + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno= HA_ERR_CRASHED; + DBUG_RETURN(-1); + } + root= _ma_dpos(info,nod_flag,kpos); + if (subkeys == -1) + { + /* the last entry in sub-tree */ + if (_ma_dispose(info, keyinfo, root,DFLT_INIT_HITS)) + DBUG_RETURN(-1); + /* fall through to normal delete */ + } + else + { + keyinfo=&info->s->ft2_keyinfo; + kpos-=keyinfo->keylength+nod_flag; /* we'll modify key entry 'in vivo' */ + get_key_full_length_rdonly(off, key); + key+=off; + ret_value= _ma_ck_real_delete(info, &info->s->ft2_keyinfo, + key, HA_FT_WLEN, &root); + _ma_dpointer(info, kpos+HA_FT_WLEN, root); + subkeys++; + ft_intXstore(kpos, subkeys); + if (!ret_value) + ret_value= _ma_write_keypage(info,keyinfo,page, + DFLT_INIT_HITS,anc_buff); + DBUG_PRINT("exit",("Return: %d",ret_value)); + DBUG_RETURN(ret_value); + } + } + } + leaf_buff=0; + LINT_INIT(leaf_page); + if (nod_flag) + { + leaf_page= _ma_kpos(nod_flag,keypos); + if (!(leaf_buff= (byte*) my_alloca((uint) keyinfo->block_length+ + HA_MAX_KEY_BUFF*2))) + { + DBUG_PRINT("error",("Couldn't allocate memory")); + my_errno=ENOMEM; + DBUG_PRINT("exit",("Return: %d",-1)); + DBUG_RETURN(-1); + } + if (!_ma_fetch_keypage(info,keyinfo,leaf_page,DFLT_INIT_HITS,leaf_buff,0)) + goto err; + } + + if (flag != 0) + { + if (!nod_flag) + { + DBUG_PRINT("error",("Didn't find key")); + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; /* This should newer happend */ + goto err; + } + save_flag=0; + ret_value=d_search(info,keyinfo,comp_flag,key,key_length, + leaf_page,leaf_buff); + } + else + { /* Found key */ + uint tmp; + length= maria_data_on_page(anc_buff); + if (!(tmp= remove_key(keyinfo,nod_flag,keypos,lastkey,anc_buff+length, + &next_block))) + goto err; + + length-= tmp; + + maria_putint(anc_buff,length,nod_flag); + if (!nod_flag) + { /* On leaf page */ + if (_ma_write_keypage(info,keyinfo,page,DFLT_INIT_HITS,anc_buff)) + { + DBUG_PRINT("exit",("Return: %d",-1)); + DBUG_RETURN(-1); + } + /* Page will be update later if we return 1 */ + DBUG_RETURN(test(length <= (info->quick_mode ? MARIA_MIN_KEYBLOCK_LENGTH : + (uint) keyinfo->underflow_block_length))); + } + save_flag=1; + ret_value=del(info,keyinfo,key,anc_buff,leaf_page,leaf_buff,keypos, + next_block,lastkey); + } + if (ret_value >0) + { + save_flag=1; + if (ret_value == 1) + ret_value= underflow(info,keyinfo,anc_buff,leaf_page,leaf_buff,keypos); + else + { /* This happens only with packed keys */ + DBUG_PRINT("test",("Enlarging of key when deleting")); + if (!_ma_get_last_key(info,keyinfo,anc_buff,lastkey,keypos,&length)) + goto err; + ret_value= _ma_insert(info,keyinfo,key,anc_buff,keypos,lastkey, + (byte*) 0,(byte*) 0,(my_off_t) 0,(my_bool) 0); + } + } + if (ret_value == 0 && maria_data_on_page(anc_buff) > keyinfo->block_length) + { + save_flag=1; + ret_value= _ma_split_page(info,keyinfo,key,anc_buff,lastkey,0) | 2; + } + if (save_flag && ret_value != 1) + ret_value|= _ma_write_keypage(info,keyinfo,page,DFLT_INIT_HITS,anc_buff); + else + { + DBUG_DUMP("page",anc_buff,maria_data_on_page(anc_buff)); + } + my_afree(leaf_buff); + DBUG_PRINT("exit",("Return: %d",ret_value)); + DBUG_RETURN(ret_value); + +err: + my_afree(leaf_buff); + DBUG_PRINT("exit",("Error: %d",my_errno)); + DBUG_RETURN (-1); +} /* d_search */ + + + /* Remove a key that has a page-reference */ + +static int del(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + byte *key, byte *anc_buff, my_off_t leaf_page, + byte *leaf_buff, + byte *keypos, /* Pos to where deleted key was */ + my_off_t next_block, + byte *ret_key) /* key before keypos in anc_buff */ +{ + int ret_value,length; + uint a_length,nod_flag,tmp; + my_off_t next_page; + byte keybuff[HA_MAX_KEY_BUFF],*endpos,*next_buff,*key_start, *prev_key; + MARIA_SHARE *share=info->s; + MARIA_KEY_PARAM s_temp; + DBUG_ENTER("del"); + DBUG_PRINT("enter",("leaf_page: %ld keypos: 0x%lx", (long) leaf_page, + (ulong) keypos)); + DBUG_DUMP("leaf_buff",leaf_buff,maria_data_on_page(leaf_buff)); + + endpos= leaf_buff+ maria_data_on_page(leaf_buff); + if (!(key_start= _ma_get_last_key(info,keyinfo,leaf_buff,keybuff,endpos, + &tmp))) + DBUG_RETURN(-1); + + if ((nod_flag=_ma_test_if_nod(leaf_buff))) + { + next_page= _ma_kpos(nod_flag,endpos); + if (!(next_buff= (byte*) my_alloca((uint) keyinfo->block_length+ + HA_MAX_KEY_BUFF*2))) + DBUG_RETURN(-1); + if (!_ma_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,next_buff,0)) + ret_value= -1; + else + { + DBUG_DUMP("next_page",next_buff,maria_data_on_page(next_buff)); + if ((ret_value=del(info,keyinfo,key,anc_buff,next_page,next_buff, + keypos,next_block,ret_key)) >0) + { + endpos=leaf_buff+maria_data_on_page(leaf_buff); + if (ret_value == 1) + { + ret_value=underflow(info,keyinfo,leaf_buff,next_page, + next_buff,endpos); + if (ret_value == 0 && maria_data_on_page(leaf_buff) > keyinfo->block_length) + { + ret_value= _ma_split_page(info,keyinfo,key,leaf_buff,ret_key,0) | 2; + } + } + else + { + DBUG_PRINT("test",("Inserting of key when deleting")); + if (!_ma_get_last_key(info,keyinfo,leaf_buff,keybuff,endpos, + &tmp)) + goto err; + ret_value= _ma_insert(info,keyinfo,key,leaf_buff,endpos,keybuff, + (byte*) 0,(byte*) 0,(my_off_t) 0,0); + } + } + if (_ma_write_keypage(info,keyinfo,leaf_page,DFLT_INIT_HITS,leaf_buff)) + goto err; + } + my_afree(next_buff); + DBUG_RETURN(ret_value); + } + + /* Remove last key from leaf page */ + + maria_putint(leaf_buff,key_start-leaf_buff,nod_flag); + if (_ma_write_keypage(info,keyinfo,leaf_page,DFLT_INIT_HITS,leaf_buff)) + goto err; + + /* Place last key in ancestor page on deleted key position */ + + a_length= maria_data_on_page(anc_buff); + endpos=anc_buff+a_length; + if (keypos != anc_buff+2+share->base.key_reflength && + !_ma_get_last_key(info,keyinfo,anc_buff,ret_key,keypos,&tmp)) + goto err; + prev_key=(keypos == anc_buff+2+share->base.key_reflength ? + 0 : ret_key); + length=(*keyinfo->pack_key)(keyinfo,share->base.key_reflength, + keypos == endpos ? (byte*) 0 : keypos, + prev_key, prev_key, + keybuff,&s_temp); + if (length > 0) + bmove_upp(endpos+length,endpos,(uint) (endpos-keypos)); + else + bmove(keypos,keypos-length, (int) (endpos-keypos)+length); + (*keyinfo->store_key)(keyinfo,keypos,&s_temp); + /* Save pointer to next leaf */ + if (!(*keyinfo->get_key)(keyinfo,share->base.key_reflength,&keypos,ret_key)) + goto err; + _ma_kpointer(info,keypos - share->base.key_reflength,next_block); + maria_putint(anc_buff,a_length+length,share->base.key_reflength); + + DBUG_RETURN( maria_data_on_page(leaf_buff) <= + (info->quick_mode ? MARIA_MIN_KEYBLOCK_LENGTH : + (uint) keyinfo->underflow_block_length)); +err: + DBUG_RETURN(-1); +} /* del */ + + + /* Balances adjacent pages if underflow occours */ + +static int underflow(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + byte *anc_buff, + my_off_t leaf_page,/* Ancestor page and underflow page */ + byte *leaf_buff, + byte *keypos) /* Position to pos after key */ +{ + int t_length; + uint length,anc_length,buff_length,leaf_length,p_length,s_length,nod_flag, + key_reflength,key_length; + my_off_t next_page; + byte anc_key[HA_MAX_KEY_BUFF],leaf_key[HA_MAX_KEY_BUFF]; + byte *buff,*endpos,*next_keypos,*anc_pos,*half_pos,*temp_pos,*prev_key; + byte *after_key; + MARIA_KEY_PARAM s_temp; + MARIA_SHARE *share=info->s; + DBUG_ENTER("underflow"); + DBUG_PRINT("enter",("leaf_page: %ld keypos: 0x%lx",(long) leaf_page, + (ulong) keypos)); + DBUG_DUMP("anc_buff",anc_buff,maria_data_on_page(anc_buff)); + DBUG_DUMP("leaf_buff",leaf_buff,maria_data_on_page(leaf_buff)); + + buff=info->buff; + info->keyread_buff_used=1; + next_keypos=keypos; + nod_flag=_ma_test_if_nod(leaf_buff); + p_length=nod_flag+2; + anc_length= maria_data_on_page(anc_buff); + leaf_length= maria_data_on_page(leaf_buff); + key_reflength=share->base.key_reflength; + if (info->s->keyinfo+info->lastinx == keyinfo) + info->page_changed=1; + + if ((keypos < anc_buff+anc_length && (info->state->records & 1)) || + keypos == anc_buff+2+key_reflength) + { /* Use page right of anc-page */ + DBUG_PRINT("test",("use right page")); + + if (keyinfo->flag & HA_BINARY_PACK_KEY) + { + if (!(next_keypos= _ma_get_key(info, keyinfo, + anc_buff, buff, keypos, &length))) + goto err; + } + else + { + /* Got to end of found key */ + buff[0]=buff[1]=0; /* Avoid length error check if packed key */ + if (!(*keyinfo->get_key)(keyinfo,key_reflength,&next_keypos, + buff)) + goto err; + } + next_page= _ma_kpos(key_reflength,next_keypos); + if (!_ma_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,buff,0)) + goto err; + buff_length= maria_data_on_page(buff); + DBUG_DUMP("next",buff,buff_length); + + /* find keys to make a big key-page */ + bmove(next_keypos-key_reflength, buff+2, key_reflength); + if (!_ma_get_last_key(info,keyinfo,anc_buff,anc_key,next_keypos,&length) + || !_ma_get_last_key(info,keyinfo,leaf_buff,leaf_key, + leaf_buff+leaf_length,&length)) + goto err; + + /* merge pages and put parting key from anc_buff between */ + prev_key=(leaf_length == p_length ? (byte*) 0 : leaf_key); + t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,buff+p_length, + prev_key, prev_key, + anc_key, &s_temp); + length=buff_length-p_length; + endpos=buff+length+leaf_length+t_length; + /* buff will always be larger than before !*/ + bmove_upp(endpos, buff+buff_length,length); + memcpy(buff, leaf_buff,(size_t) leaf_length); + (*keyinfo->store_key)(keyinfo,buff+leaf_length,&s_temp); + buff_length=(uint) (endpos-buff); + maria_putint(buff,buff_length,nod_flag); + + /* remove key from anc_buff */ + + if (!(s_length=remove_key(keyinfo,key_reflength,keypos,anc_key, + anc_buff+anc_length,(my_off_t *) 0))) + goto err; + + anc_length-=s_length; + maria_putint(anc_buff,anc_length,key_reflength); + + if (buff_length <= keyinfo->block_length) + { /* Keys in one page */ + memcpy(leaf_buff,buff,(size_t) buff_length); + if (_ma_dispose(info,keyinfo,next_page,DFLT_INIT_HITS)) + goto err; + } + else + { /* Page is full */ + endpos=anc_buff+anc_length; + DBUG_PRINT("test",("anc_buff: 0x%lx endpos: 0x%lx", + (long) anc_buff, (long) endpos)); + if (keypos != anc_buff+2+key_reflength && + !_ma_get_last_key(info,keyinfo,anc_buff,anc_key,keypos,&length)) + goto err; + if (!(half_pos= _ma_find_half_pos(nod_flag, keyinfo, buff, leaf_key, + &key_length, &after_key))) + goto err; + length=(uint) (half_pos-buff); + memcpy(leaf_buff,buff,(size_t) length); + maria_putint(leaf_buff,length,nod_flag); + + /* Correct new keypointer to leaf_page */ + half_pos=after_key; + _ma_kpointer(info,leaf_key+key_length,next_page); + /* Save key in anc_buff */ + prev_key=(keypos == anc_buff+2+key_reflength ? (byte*) 0 : anc_key), + t_length=(*keyinfo->pack_key)(keyinfo,key_reflength, + (keypos == endpos ? (byte*) 0 : + keypos), + prev_key, prev_key, + leaf_key, &s_temp); + if (t_length >= 0) + bmove_upp(endpos+t_length, endpos, (uint) (endpos-keypos)); + else + bmove(keypos,keypos-t_length,(uint) (endpos-keypos)+t_length); + (*keyinfo->store_key)(keyinfo,keypos,&s_temp); + maria_putint(anc_buff,(anc_length+=t_length),key_reflength); + + /* Store key first in new page */ + if (nod_flag) + bmove(buff+2,half_pos-nod_flag,(size_t) nod_flag); + if (!(*keyinfo->get_key)(keyinfo,nod_flag,&half_pos,leaf_key)) + goto err; + t_length=(int) (*keyinfo->pack_key)(keyinfo, nod_flag, (byte*) 0, + (byte*) 0, (byte*) 0, + leaf_key, &s_temp); + /* t_length will always be > 0 for a new page !*/ + length=(uint) ((buff+maria_data_on_page(buff))-half_pos); + bmove(buff+p_length+t_length, half_pos, (size_t) length); + (*keyinfo->store_key)(keyinfo,buff+p_length,&s_temp); + maria_putint(buff,length+t_length+p_length,nod_flag); + + if (_ma_write_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,buff)) + goto err; + } + if (_ma_write_keypage(info,keyinfo,leaf_page,DFLT_INIT_HITS,leaf_buff)) + goto err; + DBUG_RETURN(anc_length <= ((info->quick_mode ? MARIA_MIN_BLOCK_LENGTH : + (uint) keyinfo->underflow_block_length))); + } + + DBUG_PRINT("test",("use left page")); + + keypos= _ma_get_last_key(info,keyinfo,anc_buff,anc_key,keypos,&length); + if (!keypos) + goto err; + next_page= _ma_kpos(key_reflength,keypos); + if (!_ma_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,buff,0)) + goto err; + buff_length= maria_data_on_page(buff); + endpos=buff+buff_length; + DBUG_DUMP("prev",buff,buff_length); + + /* find keys to make a big key-page */ + bmove(next_keypos - key_reflength, leaf_buff+2, key_reflength); + next_keypos=keypos; + if (!(*keyinfo->get_key)(keyinfo,key_reflength,&next_keypos, + anc_key)) + goto err; + if (!_ma_get_last_key(info,keyinfo,buff,leaf_key,endpos,&length)) + goto err; + + /* merge pages and put parting key from anc_buff between */ + prev_key=(leaf_length == p_length ? (byte*) 0 : leaf_key); + t_length=(*keyinfo->pack_key)(keyinfo,nod_flag, + (leaf_length == p_length ? + (byte*) 0 : leaf_buff+p_length), + prev_key, prev_key, + anc_key, &s_temp); + if (t_length >= 0) + bmove(endpos+t_length, leaf_buff+p_length, + (size_t) (leaf_length-p_length)); + else /* We gained space */ + bmove(endpos,leaf_buff+((int) p_length-t_length), + (size_t) (leaf_length-p_length+t_length)); + + (*keyinfo->store_key)(keyinfo,endpos,&s_temp); + buff_length=buff_length+leaf_length-p_length+t_length; + maria_putint(buff,buff_length,nod_flag); + + /* remove key from anc_buff */ + if (!(s_length= remove_key(keyinfo,key_reflength,keypos,anc_key, + anc_buff+anc_length,(my_off_t *) 0))) + goto err; + + anc_length-=s_length; + maria_putint(anc_buff,anc_length,key_reflength); + + if (buff_length <= keyinfo->block_length) + { /* Keys in one page */ + if (_ma_dispose(info,keyinfo,leaf_page,DFLT_INIT_HITS)) + goto err; + } + else + { /* Page is full */ + if (keypos == anc_buff+2+key_reflength) + anc_pos=0; /* First key */ + else if (!_ma_get_last_key(info,keyinfo,anc_buff,anc_pos=anc_key,keypos, + &length)) + goto err; + endpos= _ma_find_half_pos(nod_flag,keyinfo,buff,leaf_key, + &key_length, &half_pos); + if (!endpos) + goto err; + _ma_kpointer(info,leaf_key+key_length,leaf_page); + /* Save key in anc_buff */ + DBUG_DUMP("anc_buff",anc_buff,anc_length); + DBUG_DUMP("key_to_anc",leaf_key,key_length); + + temp_pos=anc_buff+anc_length; + t_length=(*keyinfo->pack_key)(keyinfo,key_reflength, + keypos == temp_pos ? (byte*) 0 + : keypos, + anc_pos, anc_pos, + leaf_key,&s_temp); + if (t_length > 0) + bmove_upp(temp_pos+t_length, temp_pos, (uint) (temp_pos-keypos)); + else + bmove(keypos,keypos-t_length,(uint) (temp_pos-keypos)+t_length); + (*keyinfo->store_key)(keyinfo,keypos,&s_temp); + maria_putint(anc_buff,(anc_length+=t_length),key_reflength); + + /* Store first key on new page */ + if (nod_flag) + bmove(leaf_buff+2,half_pos-nod_flag,(size_t) nod_flag); + if (!(length=(*keyinfo->get_key)(keyinfo,nod_flag,&half_pos,leaf_key))) + goto err; + DBUG_DUMP("key_to_leaf",leaf_key,length); + t_length=(*keyinfo->pack_key)(keyinfo,nod_flag, (byte*) 0, + (byte*) 0, (byte*) 0, leaf_key, &s_temp); + length=(uint) ((buff+buff_length)-half_pos); + DBUG_PRINT("info",("t_length: %d length: %d",t_length,(int) length)); + bmove(leaf_buff+p_length+t_length,half_pos, + (size_t) length); + (*keyinfo->store_key)(keyinfo,leaf_buff+p_length,&s_temp); + maria_putint(leaf_buff,length+t_length+p_length,nod_flag); + if (_ma_write_keypage(info,keyinfo,leaf_page,DFLT_INIT_HITS,leaf_buff)) + goto err; + maria_putint(buff,endpos-buff,nod_flag); + } + if (_ma_write_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,buff)) + goto err; + DBUG_RETURN(anc_length <= (uint) keyinfo->block_length/2); + +err: + DBUG_RETURN(-1); +} /* underflow */ + + + /* + remove a key from packed buffert + The current code doesn't handle the case that the next key may be + packed better against the previous key if there is a case difference + returns how many chars was removed or 0 on error + */ + +static uint remove_key(MARIA_KEYDEF *keyinfo, uint nod_flag, + byte *keypos, /* Where key starts */ + byte *lastkey, /* key to be removed */ + byte *page_end, /* End of page */ + my_off_t *next_block) /* ptr to next block */ +{ + int s_length; + byte *start; + DBUG_ENTER("remove_key"); + DBUG_PRINT("enter",("keypos: 0x%lx page_end: 0x%lx",(long) keypos, (long) page_end)); + + start=keypos; + if (!(keyinfo->flag & + (HA_PACK_KEY | HA_SPACE_PACK_USED | HA_VAR_LENGTH_KEY | + HA_BINARY_PACK_KEY))) + { + s_length=(int) (keyinfo->keylength+nod_flag); + if (next_block && nod_flag) + *next_block= _ma_kpos(nod_flag,keypos+s_length); + } + else + { /* Let keypos point at next key */ + /* Calculate length of key */ + if (!(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,lastkey)) + DBUG_RETURN(0); /* Error */ + + if (next_block && nod_flag) + *next_block= _ma_kpos(nod_flag,keypos); + s_length=(int) (keypos-start); + if (keypos != page_end) + { + if (keyinfo->flag & HA_BINARY_PACK_KEY) + { + byte *old_key=start; + uint next_length,prev_length,prev_pack_length; + get_key_length(next_length,keypos); + get_key_pack_length(prev_length,prev_pack_length,old_key); + if (next_length > prev_length) + { + /* We have to copy data from the current key to the next key */ + bmove_upp((char*) keypos,(char*) (lastkey+next_length), + (next_length-prev_length)); + keypos-=(next_length-prev_length)+prev_pack_length; + store_key_length(keypos,prev_length); + s_length=(int) (keypos-start); + } + } + else + { + /* Check if a variable length first key part */ + if ((keyinfo->seg->flag & HA_PACK_KEY) && *keypos & 128) + { + /* Next key is packed against the current one */ + uint next_length,prev_length,prev_pack_length,lastkey_length, + rest_length; + if (keyinfo->seg[0].length >= 127) + { + if (!(prev_length=mi_uint2korr(start) & 32767)) + goto end; + next_length=mi_uint2korr(keypos) & 32767; + keypos+=2; + prev_pack_length=2; + } + else + { + if (!(prev_length= *start & 127)) + goto end; /* Same key as previous*/ + next_length= *keypos & 127; + keypos++; + prev_pack_length=1; + } + if (!(*start & 128)) + prev_length=0; /* prev key not packed */ + if (keyinfo->seg[0].flag & HA_NULL_PART) + lastkey++; /* Skip null marker */ + get_key_length(lastkey_length,lastkey); + if (!next_length) /* Same key after */ + { + next_length=lastkey_length; + rest_length=0; + } + else + get_key_length(rest_length,keypos); + + if (next_length >= prev_length) + { /* Key after is based on deleted key */ + uint pack_length,tmp; + bmove_upp((char*) keypos,(char*) (lastkey+next_length), + tmp=(next_length-prev_length)); + rest_length+=tmp; + pack_length= prev_length ? get_pack_length(rest_length): 0; + keypos-=tmp+pack_length+prev_pack_length; + s_length=(int) (keypos-start); + if (prev_length) /* Pack against prev key */ + { + *keypos++= start[0]; + if (prev_pack_length == 2) + *keypos++= start[1]; + store_key_length(keypos,rest_length); + } + else + { + /* Next key is not packed anymore */ + if (keyinfo->seg[0].flag & HA_NULL_PART) + { + rest_length++; /* Mark not null */ + } + if (prev_pack_length == 2) + { + mi_int2store(keypos,rest_length); + } + else + *keypos= rest_length; + } + } + } + } + } + } + end: + bmove(start, start+s_length, (uint) (page_end-start-s_length)); + DBUG_RETURN((uint) s_length); +} /* remove_key */ diff --git a/storage/maria/ma_delete_all.c b/storage/maria/ma_delete_all.c new file mode 100644 index 00000000000..7286f540aa1 --- /dev/null +++ b/storage/maria/ma_delete_all.c @@ -0,0 +1,134 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Remove all rows from a MARIA table */ +/* This clears the status information and truncates files */ + +#include "maria_def.h" +#include "trnman_public.h" + +/** + @brief deletes all rows from a table + + @param info Maria handler + + @return Operation status + @retval 0 ok + @retval 1 error +*/ + +int maria_delete_all_rows(MARIA_HA *info) +{ + uint i; + MARIA_SHARE *share=info->s; + MARIA_STATE_INFO *state=&share->state; + my_bool log_record; + DBUG_ENTER("maria_delete_all_rows"); + + if (share->options & HA_OPTION_READ_ONLY_DATA) + { + DBUG_RETURN(my_errno=EACCES); + } + /** + @todo LOCK take X-lock on table here. + When we have versioning, if some other thread is looking at this table, + we cannot shrink the file like this. + */ + if (_ma_readinfo(info,F_WRLCK,1)) + DBUG_RETURN(my_errno); + log_record= share->base.transactional && !share->temporary; + if (_ma_mark_file_changed(info)) + goto err; + + info->state->records=info->state->del=state->split=0; + state->changed= 0; /* File is optimized */ + state->dellink = HA_OFFSET_ERROR; + state->sortkey= (ushort) ~0; + info->state->key_file_length=share->base.keystart; + info->state->data_file_length=0; + info->state->empty=info->state->key_empty=0; + info->state->checksum=0; + + state->key_del= HA_OFFSET_ERROR; + for (i=0 ; i < share->base.keys ; i++) + state->key_root[i]= HA_OFFSET_ERROR; + + /* + If we are using delayed keys or if the user has done changes to the tables + since it was locked then there may be key blocks in the key cache + */ + flush_pagecache_blocks(share->pagecache, &share->kfile, + FLUSH_IGNORE_CHANGED); + if (my_chsize(info->dfile.file, 0, 0, MYF(MY_WME)) || + my_chsize(share->kfile.file, share->base.keystart, 0, MYF(MY_WME)) ) + goto err; + + if (_ma_initialize_data_file(info->dfile.file, share)) + goto err; + + VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); +#ifdef HAVE_MMAP + /* Resize mmaped area */ + rw_wrlock(&info->s->mmap_lock); + _ma_remap_file(info, (my_off_t)0); + rw_unlock(&info->s->mmap_lock); +#endif + if (log_record) + { + /* For now this record is only informative */ + LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; + uchar log_data[LSN_STORE_SIZE]; + log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= FILEID_STORE_SIZE; + if (unlikely(translog_write_record(&share->state.create_rename_lsn, + LOGREC_REDO_DELETE_ALL, + info->trn, share, 0, + sizeof(log_array)/sizeof(log_array[0]), + log_array, log_data))) + goto err; + /* + store LSN into file. It is an optimization so that all old REDOs for + this table are ignored (scenario: checkpoint, INSERT1s, DELETE ALL; + INSERT2s, crash: then Recovery can skip INSERT1s). It also allows us to + ignore the present record at Recovery. + Note that storing the LSN could not be done by _ma_writeinfo() above as + the table is locked at this moment. So we need to do it by ourselves. + */ + lsn_store(log_data, share->state.create_rename_lsn); + if (my_pwrite(share->kfile.file, log_data, sizeof(log_data), + sizeof(share->state.header) + 2, MYF(MY_NABP)) || + _ma_sync_table_files(info)) + goto err; + /** + @todo RECOVERY Until we take into account the log record above + for log-low-water-mark calculation and use it in Recovery, we need + to sync above. + */ + } + allow_break(); /* Allow SIGHUP & SIGINT */ + DBUG_RETURN(0); + +err: + { + int save_errno=my_errno; + VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); + info->update|=HA_STATE_WRITTEN; /* Buffer changed */ + /** @todo RECOVERY until we use the log record above we have to sync */ + if (log_record &&_ma_sync_table_files(info) && !save_errno) + save_errno= my_errno; + allow_break(); /* Allow SIGHUP & SIGINT */ + DBUG_RETURN(my_errno=save_errno); + } +} /* maria_delete */ diff --git a/storage/maria/ma_delete_table.c b/storage/maria/ma_delete_table.c new file mode 100644 index 00000000000..990714043bf --- /dev/null +++ b/storage/maria/ma_delete_table.c @@ -0,0 +1,109 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "ma_fulltext.h" +#include "trnman_public.h" + +/** + @brief drops (deletes) a table + + @param name table's name + + @return Operation status + @retval 0 ok + @retval 1 error +*/ + +int maria_delete_table(const char *name) +{ + char from[FN_REFLEN]; +#ifdef USE_RAID + uint raid_type=0,raid_chunks=0; +#endif + MARIA_HA *info; + myf sync_dir; + DBUG_ENTER("maria_delete_table"); + +#ifdef EXTRA_DEBUG + _ma_check_table_is_closed(name,"delete"); +#endif + /** @todo LOCK take X-lock on table */ + /* + We need to know if this table is transactional. + When built with RAID support, we also need to determine if this table + makes use of the raid feature. If yes, we need to remove all raid + chunks. This is done with my_raid_delete(). Unfortunately it is + necessary to open the table just to check this. We use + 'open_for_repair' to be able to open even a crashed table. If even + this open fails, we assume no raid configuration for this table + and try to remove the normal data file only. This may however + leave the raid chunks behind. + */ + if (!(info= maria_open(name, O_RDONLY, HA_OPEN_FOR_REPAIR))) + { +#ifdef USE_RAID + raid_type= 0; +#endif + sync_dir= 0; + } + else + { +#ifdef USE_RAID + raid_type= info->s->base.raid_type; + raid_chunks= info->s->base.raid_chunks; +#endif + sync_dir= (info->s->base.transactional && !info->s->temporary) ? + MY_SYNC_DIR : 0; + maria_close(info); + } +#ifdef USE_RAID +#ifdef EXTRA_DEBUG + _ma_check_table_is_closed(name,"delete"); +#endif +#endif /* USE_RAID */ + + if (sync_dir) + { + /* + For this log record to be of any use for Recovery, we need the upper + MySQL layer to be crash-safe in DDLs; when it is we should reconsider + the moment of writing this log record, how to use it in Recovery, and + force the log. For now this record is only informative. + */ + LSN lsn; + LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; + log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char *)name; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= strlen(name); + if (unlikely(translog_write_record(&lsn, LOGREC_REDO_DROP_TABLE, + &dummy_transaction_object, NULL, + log_array[TRANSLOG_INTERNAL_PARTS + + 0].length, + sizeof(log_array)/sizeof(log_array[0]), + log_array, NULL))) + DBUG_RETURN(1); + } + + fn_format(from,name,"",MARIA_NAME_IEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT); + if (my_delete_with_symlink(from, MYF(MY_WME | sync_dir))) + DBUG_RETURN(my_errno); + fn_format(from,name,"",MARIA_NAME_DEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT); +#ifdef USE_RAID + if (raid_type) + DBUG_RETURN(my_raid_delete(from, raid_chunks, MYF(MY_WME | sync_dir)) ? + my_errno : 0); +#endif + DBUG_RETURN(my_delete_with_symlink(from, MYF(MY_WME | sync_dir)) ? + my_errno : 0); +} diff --git a/storage/maria/ma_dynrec.c b/storage/maria/ma_dynrec.c new file mode 100644 index 00000000000..ebf84032106 --- /dev/null +++ b/storage/maria/ma_dynrec.c @@ -0,0 +1,1967 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Functions to handle space-packed-records and blobs + + A row may be stored in one or more linked blocks. + The block size is between MARIA_MIN_BLOCK_LENGTH and MARIA_MAX_BLOCK_LENGTH. + Each block is aligned on MARIA_DYN_ALIGN_SIZE. + The reson for the max block size is to not have too many different types + of blocks. For the differnet block types, look at _ma_get_block_info() +*/ + +#include "maria_def.h" + +static my_bool write_dynamic_record(MARIA_HA *info,const byte *record, + ulong reclength); +static int _ma_find_writepos(MARIA_HA *info,ulong reclength,my_off_t *filepos, + ulong *length); +static my_bool update_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS filepos, + byte *record, ulong reclength); +static my_bool delete_dynamic_record(MARIA_HA *info,MARIA_RECORD_POS filepos, + uint second_read); +static my_bool _ma_cmp_buffer(File file, const byte *buff, my_off_t filepos, + uint length); + +#ifdef THREAD +/* Play it safe; We have a small stack when using threads */ +#undef my_alloca +#undef my_afree +#define my_alloca(A) my_malloc((A),MYF(0)) +#define my_afree(A) my_free((A),MYF(0)) +#endif + + /* Interface function from MARIA_HA */ + +#ifdef HAVE_MMAP + +/* + Create mmaped area for MARIA handler + + SYNOPSIS + _ma_dynmap_file() + info MARIA handler + + RETURN + 0 ok + 1 error. +*/ + +my_bool _ma_dynmap_file(MARIA_HA *info, my_off_t size) +{ + DBUG_ENTER("_ma_dynmap_file"); + if (size > (my_off_t) (~((size_t) 0)) - MEMMAP_EXTRA_MARGIN) + { + DBUG_PRINT("warning", ("File is too large for mmap")); + DBUG_RETURN(1); + } + /* + Ingo wonders if it is good to use MAP_NORESERVE. From the Linux man page: + MAP_NORESERVE + Do not reserve swap space for this mapping. When swap space is + reserved, one has the guarantee that it is possible to modify the + mapping. When swap space is not reserved one might get SIGSEGV + upon a write if no physical memory is available. + */ + info->s->file_map= (byte*) + my_mmap(0, (size_t)(size + MEMMAP_EXTRA_MARGIN), + info->s->mode==O_RDONLY ? PROT_READ : + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_NORESERVE, + info->dfile.file, 0L); + if (info->s->file_map == (byte*) MAP_FAILED) + { + info->s->file_map= NULL; + DBUG_RETURN(1); + } +#if defined(HAVE_MADVISE) + madvise(info->s->file_map, size, MADV_RANDOM); +#endif + info->s->mmaped_length= size; + DBUG_RETURN(0); +} + + +/* + Resize mmaped area for MARIA handler + + SYNOPSIS + _ma_remap_file() + info MARIA handler + + RETURN +*/ + +void _ma_remap_file(MARIA_HA *info, my_off_t size) +{ + if (info->s->file_map) + { + VOID(my_munmap(info->s->file_map, + (size_t) info->s->mmaped_length + MEMMAP_EXTRA_MARGIN)); + _ma_dynmap_file(info, size); + } +} +#endif + + +/* + Read bytes from MySAM handler, using mmap or pread + + SYNOPSIS + _ma_mmap_pread() + info MARIA handler + Buffer Input buffer + Count Count of bytes for read + offset Start position + MyFlags + + RETURN + 0 ok +*/ + +uint _ma_mmap_pread(MARIA_HA *info, byte *Buffer, + uint Count, my_off_t offset, myf MyFlags) +{ + DBUG_PRINT("info", ("maria_read with mmap %d\n", info->dfile.file)); + if (info->s->concurrent_insert) + rw_rdlock(&info->s->mmap_lock); + + /* + The following test may fail in the following cases: + - We failed to remap a memory area (fragmented memory?) + - This thread has done some writes, but not yet extended the + memory mapped area. + */ + + if (info->s->mmaped_length >= offset + Count) + { + memcpy(Buffer, info->s->file_map + offset, Count); + if (info->s->concurrent_insert) + rw_unlock(&info->s->mmap_lock); + return 0; + } + else + { + if (info->s->concurrent_insert) + rw_unlock(&info->s->mmap_lock); + return my_pread(info->dfile.file, Buffer, Count, offset, MyFlags); + } +} + + + /* wrapper for my_pread in case if mmap isn't used */ + +uint _ma_nommap_pread(MARIA_HA *info, byte *Buffer, + uint Count, my_off_t offset, myf MyFlags) +{ + return my_pread(info->dfile.file, Buffer, Count, offset, MyFlags); +} + + +/* + Write bytes to MySAM handler, using mmap or pwrite + + SYNOPSIS + _ma_mmap_pwrite() + info MARIA handler + Buffer Output buffer + Count Count of bytes for write + offset Start position + MyFlags + + RETURN + 0 ok + !=0 error. In this case return error from pwrite +*/ + +uint _ma_mmap_pwrite(MARIA_HA *info, byte *Buffer, + uint Count, my_off_t offset, myf MyFlags) +{ + DBUG_PRINT("info", ("maria_write with mmap %d\n", info->dfile.file)); + if (info->s->concurrent_insert) + rw_rdlock(&info->s->mmap_lock); + + /* + The following test may fail in the following cases: + - We failed to remap a memory area (fragmented memory?) + - This thread has done some writes, but not yet extended the + memory mapped area. + */ + + if (info->s->mmaped_length >= offset + Count) + { + memcpy(info->s->file_map + offset, Buffer, Count); + if (info->s->concurrent_insert) + rw_unlock(&info->s->mmap_lock); + return 0; + } + else + { + info->s->nonmmaped_inserts++; + if (info->s->concurrent_insert) + rw_unlock(&info->s->mmap_lock); + return my_pwrite(info->dfile.file, Buffer, Count, offset, MyFlags); + } + +} + + + /* wrapper for my_pwrite in case if mmap isn't used */ + +uint _ma_nommap_pwrite(MARIA_HA *info, byte *Buffer, + uint Count, my_off_t offset, myf MyFlags) +{ + return my_pwrite(info->dfile.file, Buffer, Count, offset, MyFlags); +} + + +my_bool _ma_write_dynamic_record(MARIA_HA *info, const byte *record) +{ + ulong reclength= _ma_rec_pack(info,info->rec_buff + MARIA_REC_BUFF_OFFSET, + record); + return (write_dynamic_record(info,info->rec_buff + MARIA_REC_BUFF_OFFSET, + reclength)); +} + +my_bool _ma_update_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS pos, + const byte *oldrec __attribute__ ((unused)), + const byte *record) +{ + uint length= _ma_rec_pack(info, info->rec_buff + MARIA_REC_BUFF_OFFSET, + record); + return (update_dynamic_record(info, pos, + info->rec_buff + MARIA_REC_BUFF_OFFSET, + length)); +} + + +my_bool _ma_write_blob_record(MARIA_HA *info, const byte *record) +{ + byte *rec_buff; + int error; + ulong reclength,reclength2,extra; + + extra= (ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER)+MARIA_SPLIT_LENGTH+ + MARIA_DYN_DELETE_BLOCK_HEADER+1); + reclength= (info->s->base.pack_reclength + + _ma_calc_total_blob_length(info,record)+ extra); + if (!(rec_buff=(byte*) my_alloca(reclength))) + { + my_errno= HA_ERR_OUT_OF_MEM; /* purecov: inspected */ + return(1); + } + reclength2= _ma_rec_pack(info, + rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER), + record); + DBUG_PRINT("info",("reclength: %lu reclength2: %lu", + reclength, reclength2)); + DBUG_ASSERT(reclength2 <= reclength); + error= write_dynamic_record(info, + rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER), + reclength2); + my_afree(rec_buff); + return(error != 0); +} + + +my_bool _ma_update_blob_record(MARIA_HA *info, MARIA_RECORD_POS pos, + const byte *oldrec __attribute__ ((unused)), + const byte *record) +{ + byte *rec_buff; + int error; + ulong reclength,extra; + + extra= (ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER)+MARIA_SPLIT_LENGTH+ + MARIA_DYN_DELETE_BLOCK_HEADER); + reclength= (info->s->base.pack_reclength+ + _ma_calc_total_blob_length(info,record)+ extra); +#ifdef NOT_USED /* We now support big rows */ + if (reclength > MARIA_DYN_MAX_ROW_LENGTH) + { + my_errno=HA_ERR_TO_BIG_ROW; + return 1; + } +#endif + if (!(rec_buff=(byte*) my_alloca(reclength))) + { + my_errno= HA_ERR_OUT_OF_MEM; /* purecov: inspected */ + return(1); + } + reclength= _ma_rec_pack(info,rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER), + record); + error=update_dynamic_record(info,pos, + rec_buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER), + reclength); + my_afree(rec_buff); + return(error != 0); +} + + +my_bool _ma_delete_dynamic_record(MARIA_HA *info, + const byte *record __attribute__ ((unused))) +{ + return delete_dynamic_record(info, info->cur_row.lastpos, 0); +} + + + /* Write record to data-file */ + +static my_bool write_dynamic_record(MARIA_HA *info, const byte *record, + ulong reclength) +{ + int flag; + ulong length; + my_off_t filepos; + DBUG_ENTER("write_dynamic_record"); + + flag=0; + do + { + if (_ma_find_writepos(info,reclength,&filepos,&length)) + goto err; + if (_ma_write_part_record(info,filepos,length, + (info->append_insert_at_end ? + HA_OFFSET_ERROR : info->s->state.dellink), + (byte**) &record,&reclength,&flag)) + goto err; + } while (reclength); + + DBUG_RETURN(0); +err: + DBUG_RETURN(1); +} + + + /* Get a block for data ; The given data-area must be used !! */ + +static int _ma_find_writepos(MARIA_HA *info, + ulong reclength, /* record length */ + my_off_t *filepos, /* Return file pos */ + ulong *length) /* length of block at filepos */ +{ + MARIA_BLOCK_INFO block_info; + ulong tmp; + DBUG_ENTER("_ma_find_writepos"); + + if (info->s->state.dellink != HA_OFFSET_ERROR && + !info->append_insert_at_end) + { + /* Deleted blocks exists; Get last used block */ + *filepos=info->s->state.dellink; + block_info.second_read=0; + info->rec_cache.seek_not_done=1; + if (!(_ma_get_block_info(&block_info, info->dfile.file, + info->s->state.dellink) & + BLOCK_DELETED)) + { + DBUG_PRINT("error",("Delete link crashed")); + my_errno=HA_ERR_WRONG_IN_RECORD; + DBUG_RETURN(-1); + } + info->s->state.dellink=block_info.next_filepos; + info->state->del--; + info->state->empty-= block_info.block_len; + *length= block_info.block_len; + } + else + { + /* No deleted blocks; Allocate a new block */ + *filepos=info->state->data_file_length; + if ((tmp=reclength+3 + test(reclength >= (65520-3))) < + info->s->base.min_block_length) + tmp= info->s->base.min_block_length; + else + tmp= ((tmp+MARIA_DYN_ALIGN_SIZE-1) & + (~ (ulong) (MARIA_DYN_ALIGN_SIZE-1))); + if (info->state->data_file_length > + (info->s->base.max_data_file_length - tmp)) + { + my_errno=HA_ERR_RECORD_FILE_FULL; + DBUG_RETURN(-1); + } + if (tmp > MARIA_MAX_BLOCK_LENGTH) + tmp=MARIA_MAX_BLOCK_LENGTH; + *length= tmp; + info->state->data_file_length+= tmp; + info->s->state.split++; + info->update|=HA_STATE_WRITE_AT_END; + } + DBUG_RETURN(0); +} /* _ma_find_writepos */ + + + +/* + Unlink a deleted block from the deleted list. + This block will be combined with the preceding or next block to form + a big block. +*/ + +static bool unlink_deleted_block(MARIA_HA *info, MARIA_BLOCK_INFO *block_info) +{ + DBUG_ENTER("unlink_deleted_block"); + if (block_info->filepos == info->s->state.dellink) + { + /* First deleted block; We can just use this ! */ + info->s->state.dellink=block_info->next_filepos; + } + else + { + MARIA_BLOCK_INFO tmp; + tmp.second_read=0; + /* Unlink block from the previous block */ + if (!(_ma_get_block_info(&tmp, info->dfile.file, block_info->prev_filepos) + & BLOCK_DELETED)) + DBUG_RETURN(1); /* Something is wrong */ + mi_sizestore(tmp.header+4,block_info->next_filepos); + if (info->s->file_write(info,(char*) tmp.header+4,8, + block_info->prev_filepos+4, MYF(MY_NABP))) + DBUG_RETURN(1); + /* Unlink block from next block */ + if (block_info->next_filepos != HA_OFFSET_ERROR) + { + if (!(_ma_get_block_info(&tmp, info->dfile.file, + block_info->next_filepos) + & BLOCK_DELETED)) + DBUG_RETURN(1); /* Something is wrong */ + mi_sizestore(tmp.header+12,block_info->prev_filepos); + if (info->s->file_write(info,(char*) tmp.header+12,8, + block_info->next_filepos+12, + MYF(MY_NABP))) + DBUG_RETURN(1); + } + } + /* We now have one less deleted block */ + info->state->del--; + info->state->empty-= block_info->block_len; + info->s->state.split--; + + /* + If this was a block that we where accessing through table scan + (maria_rrnd() or maria_scan(), then ensure that we skip over this block + when doing next maria_rrnd() or maria_scan(). + */ + if (info->cur_row.nextpos == block_info->filepos) + info->cur_row.nextpos+= block_info->block_len; + DBUG_RETURN(0); +} + + +/* + Add a backward link to delete block + + SYNOPSIS + update_backward_delete_link() + info MARIA handler + delete_block Position to delete block to update. + If this is 'HA_OFFSET_ERROR', nothing will be done + filepos Position to block that 'delete_block' should point to + + RETURN + 0 ok + 1 error. In this case my_error is set. +*/ + +static my_bool update_backward_delete_link(MARIA_HA *info, + my_off_t delete_block, + MARIA_RECORD_POS filepos) +{ + MARIA_BLOCK_INFO block_info; + DBUG_ENTER("update_backward_delete_link"); + + if (delete_block != HA_OFFSET_ERROR) + { + block_info.second_read=0; + if (_ma_get_block_info(&block_info, info->dfile.file, delete_block) + & BLOCK_DELETED) + { + char buff[8]; + mi_sizestore(buff,filepos); + if (info->s->file_write(info,buff, 8, delete_block+12, MYF(MY_NABP))) + DBUG_RETURN(1); /* Error on write */ + } + else + { + my_errno=HA_ERR_WRONG_IN_RECORD; + DBUG_RETURN(1); /* Wrong delete link */ + } + } + DBUG_RETURN(0); +} + +/* Delete datarecord from database */ +/* info->rec_cache.seek_not_done is updated in cmp_record */ + +static my_bool delete_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS filepos, + uint second_read) +{ + uint length,b_type; + MARIA_BLOCK_INFO block_info,del_block; + int error; + my_bool remove_next_block; + DBUG_ENTER("delete_dynamic_record"); + + /* First add a link from the last block to the new one */ + error= update_backward_delete_link(info, info->s->state.dellink, filepos); + + block_info.second_read=second_read; + do + { + /* Remove block at 'filepos' */ + if ((b_type= _ma_get_block_info(&block_info, info->dfile.file, filepos)) + & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | + BLOCK_FATAL_ERROR) || + (length=(uint) (block_info.filepos-filepos) +block_info.block_len) < + MARIA_MIN_BLOCK_LENGTH) + { + my_errno=HA_ERR_WRONG_IN_RECORD; + DBUG_RETURN(1); + } + /* Check if next block is a delete block */ + del_block.second_read=0; + remove_next_block=0; + if (_ma_get_block_info(&del_block, info->dfile.file, filepos + length) & + BLOCK_DELETED && del_block.block_len+length < + MARIA_DYN_MAX_BLOCK_LENGTH) + { + /* We can't remove this yet as this block may be the head block */ + remove_next_block=1; + length+=del_block.block_len; + } + + block_info.header[0]=0; + mi_int3store(block_info.header+1,length); + mi_sizestore(block_info.header+4,info->s->state.dellink); + if (b_type & BLOCK_LAST) + bfill(block_info.header+12,8,255); + else + mi_sizestore(block_info.header+12,block_info.next_filepos); + if (info->s->file_write(info,(byte*) block_info.header,20,filepos, + MYF(MY_NABP))) + DBUG_RETURN(1); + info->s->state.dellink = filepos; + info->state->del++; + info->state->empty+=length; + filepos=block_info.next_filepos; + + /* Now it's safe to unlink the deleted block directly after this one */ + if (remove_next_block && unlink_deleted_block(info,&del_block)) + error=1; + } while (!(b_type & BLOCK_LAST)); + + DBUG_RETURN(error); +} + + + /* Write a block to datafile */ + +int _ma_write_part_record(MARIA_HA *info, + my_off_t filepos, /* points at empty block */ + ulong length, /* length of block */ + my_off_t next_filepos,/* Next empty block */ + byte **record, /* pointer to record ptr */ + ulong *reclength, /* length of *record */ + int *flag) /* *flag == 0 if header */ +{ + ulong head_length,res_length,extra_length,long_block,del_length; + byte *pos,*record_end; + my_off_t next_delete_block; + uchar temp[MARIA_SPLIT_LENGTH+MARIA_DYN_DELETE_BLOCK_HEADER]; + DBUG_ENTER("_ma_write_part_record"); + + next_delete_block=HA_OFFSET_ERROR; + + res_length=extra_length=0; + if (length > *reclength + MARIA_SPLIT_LENGTH) + { /* Splitt big block */ + res_length=MY_ALIGN(length- *reclength - MARIA_EXTEND_BLOCK_LENGTH, + MARIA_DYN_ALIGN_SIZE); + length-= res_length; /* Use this for first part */ + } + long_block= (length < 65520L && *reclength < 65520L) ? 0 : 1; + if (length == *reclength+ 3 + long_block) + { + /* Block is exactly of the right length */ + temp[0]=(uchar) (1+ *flag)+(uchar) long_block; /* Flag is 0 or 6 */ + if (long_block) + { + mi_int3store(temp+1,*reclength); + head_length=4; + } + else + { + mi_int2store(temp+1,*reclength); + head_length=3; + } + } + else if (length-long_block < *reclength+4) + { /* To short block */ + if (next_filepos == HA_OFFSET_ERROR) + next_filepos= (info->s->state.dellink != HA_OFFSET_ERROR && + !info->append_insert_at_end ? + info->s->state.dellink : info->state->data_file_length); + if (*flag == 0) /* First block */ + { + if (*reclength > MARIA_MAX_BLOCK_LENGTH) + { + head_length= 16; + temp[0]=13; + mi_int4store(temp+1,*reclength); + mi_int3store(temp+5,length-head_length); + mi_sizestore((byte*) temp+8,next_filepos); + } + else + { + head_length=5+8+long_block*2; + temp[0]=5+(uchar) long_block; + if (long_block) + { + mi_int3store(temp+1,*reclength); + mi_int3store(temp+4,length-head_length); + mi_sizestore((byte*) temp+7,next_filepos); + } + else + { + mi_int2store(temp+1,*reclength); + mi_int2store(temp+3,length-head_length); + mi_sizestore((byte*) temp+5,next_filepos); + } + } + } + else + { + head_length=3+8+long_block; + temp[0]=11+(uchar) long_block; + if (long_block) + { + mi_int3store(temp+1,length-head_length); + mi_sizestore((byte*) temp+4,next_filepos); + } + else + { + mi_int2store(temp+1,length-head_length); + mi_sizestore((byte*) temp+3,next_filepos); + } + } + } + else + { /* Block with empty info last */ + head_length=4+long_block; + extra_length= length- *reclength-head_length; + temp[0]= (uchar) (3+ *flag)+(uchar) long_block; /* 3,4 or 9,10 */ + if (long_block) + { + mi_int3store(temp+1,*reclength); + temp[4]= (uchar) (extra_length); + } + else + { + mi_int2store(temp+1,*reclength); + temp[3]= (uchar) (extra_length); + } + length= *reclength+head_length; /* Write only what is needed */ + } + DBUG_DUMP("header",(byte*) temp,head_length); + + /* Make a long block for one write */ + record_end= *record+length-head_length; + del_length=(res_length ? MARIA_DYN_DELETE_BLOCK_HEADER : 0); + bmove((byte*) (*record-head_length),(byte*) temp,head_length); + memcpy(temp,record_end,(size_t) (extra_length+del_length)); + bzero((byte*) record_end,extra_length); + + if (res_length) + { + /* Check first if we can join this block with the next one */ + MARIA_BLOCK_INFO del_block; + my_off_t next_block=filepos+length+extra_length+res_length; + + del_block.second_read=0; + if (next_block < info->state->data_file_length && + info->s->state.dellink != HA_OFFSET_ERROR) + { + if ((_ma_get_block_info(&del_block, info->dfile.file, next_block) + & BLOCK_DELETED) && + res_length + del_block.block_len < MARIA_DYN_MAX_BLOCK_LENGTH) + { + if (unlink_deleted_block(info,&del_block)) + goto err; + res_length+=del_block.block_len; + } + } + + /* Create a delete link of the last part of the block */ + pos=record_end+extra_length; + pos[0]= '\0'; + mi_int3store(pos+1,res_length); + mi_sizestore(pos+4,info->s->state.dellink); + bfill(pos+12,8,255); /* End link */ + next_delete_block=info->s->state.dellink; + info->s->state.dellink= filepos+length+extra_length; + info->state->del++; + info->state->empty+=res_length; + info->s->state.split++; + } + if (info->opt_flag & WRITE_CACHE_USED && + info->update & HA_STATE_WRITE_AT_END) + { + if (info->update & HA_STATE_EXTEND_BLOCK) + { + info->update&= ~HA_STATE_EXTEND_BLOCK; + if (my_block_write(&info->rec_cache,(byte*) *record-head_length, + length+extra_length+del_length,filepos)) + goto err; + } + else if (my_b_write(&info->rec_cache,(byte*) *record-head_length, + length+extra_length+del_length)) + goto err; + } + else + { + info->rec_cache.seek_not_done=1; + if (info->s->file_write(info,(byte*) *record-head_length, + length+extra_length+ + del_length,filepos,info->s->write_flag)) + goto err; + } + memcpy(record_end,temp,(size_t) (extra_length+del_length)); + *record=record_end; + *reclength-=(length-head_length); + *flag=6; + + if (del_length) + { + /* link the next delete block to this */ + if (update_backward_delete_link(info, next_delete_block, + info->s->state.dellink)) + goto err; + } + + DBUG_RETURN(0); +err: + DBUG_PRINT("exit",("errno: %d",my_errno)); + DBUG_RETURN(1); +} /* _ma_write_part_record */ + + + /* update record from datafile */ + +static my_bool update_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS filepos, + byte *record, ulong reclength) +{ + int flag; + uint error; + ulong length; + MARIA_BLOCK_INFO block_info; + DBUG_ENTER("update_dynamic_record"); + + flag=block_info.second_read=0; + while (reclength > 0) + { + if (filepos != info->s->state.dellink) + { + block_info.next_filepos= HA_OFFSET_ERROR; + if ((error= _ma_get_block_info(&block_info, info->dfile.file, filepos)) + & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | + BLOCK_FATAL_ERROR)) + { + DBUG_PRINT("error",("Got wrong block info")); + if (!(error & BLOCK_FATAL_ERROR)) + my_errno=HA_ERR_WRONG_IN_RECORD; + goto err; + } + length=(ulong) (block_info.filepos-filepos) + block_info.block_len; + if (length < reclength) + { + uint tmp=MY_ALIGN(reclength - length + 3 + + test(reclength >= 65520L),MARIA_DYN_ALIGN_SIZE); + /* Don't create a block bigger than MARIA_MAX_BLOCK_LENGTH */ + tmp= min(length+tmp, MARIA_MAX_BLOCK_LENGTH)-length; + /* Check if we can extend this block */ + if (block_info.filepos + block_info.block_len == + info->state->data_file_length && + info->state->data_file_length < + info->s->base.max_data_file_length-tmp) + { + /* extend file */ + DBUG_PRINT("info",("Extending file with %d bytes",tmp)); + if (info->cur_row.nextpos == info->state->data_file_length) + info->cur_row.nextpos+= tmp; + info->state->data_file_length+= tmp; + info->update|= HA_STATE_WRITE_AT_END | HA_STATE_EXTEND_BLOCK; + length+=tmp; + } + else if (length < MARIA_MAX_BLOCK_LENGTH - MARIA_MIN_BLOCK_LENGTH) + { + /* + Check if next block is a deleted block + Above we have MARIA_MIN_BLOCK_LENGTH to avoid the problem where + the next block is so small it can't be splited which could + casue problems + */ + + MARIA_BLOCK_INFO del_block; + del_block.second_read=0; + if (_ma_get_block_info(&del_block, info->dfile.file, + block_info.filepos + block_info.block_len) & + BLOCK_DELETED) + { + /* Use; Unlink it and extend the current block */ + DBUG_PRINT("info",("Extending current block")); + if (unlink_deleted_block(info,&del_block)) + goto err; + if ((length+=del_block.block_len) > MARIA_MAX_BLOCK_LENGTH) + { + /* + New block was too big, link overflow part back to + delete list + */ + my_off_t next_pos; + ulong rest_length= length-MARIA_MAX_BLOCK_LENGTH; + set_if_bigger(rest_length, MARIA_MIN_BLOCK_LENGTH); + next_pos= del_block.filepos+ del_block.block_len - rest_length; + + if (update_backward_delete_link(info, info->s->state.dellink, + next_pos)) + DBUG_RETURN(1); + + /* create delete link for data that didn't fit into the page */ + del_block.header[0]=0; + mi_int3store(del_block.header+1, rest_length); + mi_sizestore(del_block.header+4,info->s->state.dellink); + bfill(del_block.header+12,8,255); + if (info->s->file_write(info,(byte*) del_block.header, 20, + next_pos, MYF(MY_NABP))) + DBUG_RETURN(1); + info->s->state.dellink= next_pos; + info->s->state.split++; + info->state->del++; + info->state->empty+= rest_length; + length-= rest_length; + } + } + } + } + } + else + { + if (_ma_find_writepos(info,reclength,&filepos,&length)) + goto err; + } + if (_ma_write_part_record(info,filepos,length,block_info.next_filepos, + &record,&reclength,&flag)) + goto err; + if ((filepos=block_info.next_filepos) == HA_OFFSET_ERROR) + { + /* Start writing data on deleted blocks */ + filepos=info->s->state.dellink; + } + } + + if (block_info.next_filepos != HA_OFFSET_ERROR) + if (delete_dynamic_record(info,block_info.next_filepos,1)) + goto err; + DBUG_RETURN(0); +err: + DBUG_RETURN(1); +} + + + /* Pack a record. Return new reclength */ + +uint _ma_rec_pack(MARIA_HA *info, register byte *to, register const byte *from) +{ + uint length,new_length,flag,bit,i; + char *pos,*end,*startpos,*packpos; + enum en_fieldtype type; + reg3 MARIA_COLUMNDEF *column; + MARIA_BLOB *blob; + DBUG_ENTER("_ma_rec_pack"); + + flag= 0; + bit= 1; + startpos= packpos=to; + to+= info->s->base.pack_bytes; + blob= info->blobs; + column= info->s->columndef; + if (info->s->base.null_bytes) + { + memcpy(to, from, info->s->base.null_bytes); + from+= info->s->base.null_bytes; + to+= info->s->base.null_bytes; + } + + for (i=info->s->base.fields ; i-- > 0; from+= length, column++) + { + length=(uint) column->length; + if ((type = (enum en_fieldtype) column->type) != FIELD_NORMAL) + { + if (type == FIELD_BLOB) + { + if (!blob->length) + flag|=bit; + else + { + char *temp_pos; + size_t tmp_length=length-portable_sizeof_char_ptr; + memcpy((byte*) to,from,tmp_length); + memcpy_fixed(&temp_pos,from+tmp_length,sizeof(char*)); + memcpy(to+tmp_length,temp_pos,(size_t) blob->length); + to+=tmp_length+blob->length; + } + blob++; + } + else if (type == FIELD_SKIP_ZERO) + { + if (memcmp((byte*) from, maria_zero_string, length) == 0) + flag|=bit; + else + { + memcpy((byte*) to,from,(size_t) length); to+=length; + } + } + else if (type == FIELD_SKIP_ENDSPACE || + type == FIELD_SKIP_PRESPACE) + { + pos= (byte*) from; end= (byte*) from + length; + if (type == FIELD_SKIP_ENDSPACE) + { /* Pack trailing spaces */ + while (end > from && *(end-1) == ' ') + end--; + } + else + { /* Pack pref-spaces */ + while (pos < end && *pos == ' ') + pos++; + } + new_length=(uint) (end-pos); + if (new_length +1 + test(column->length > 255 && new_length > 127) + < length) + { + if (column->length > 255 && new_length > 127) + { + to[0]=(char) ((new_length & 127)+128); + to[1]=(char) (new_length >> 7); + to+=2; + } + else + *to++= (char) new_length; + memcpy((byte*) to,pos,(size_t) new_length); to+=new_length; + flag|=bit; + } + else + { + memcpy(to,from,(size_t) length); to+=length; + } + } + else if (type == FIELD_VARCHAR) + { + uint pack_length= HA_VARCHAR_PACKLENGTH(column->length -1); + uint tmp_length; + if (pack_length == 1) + { + tmp_length= (uint) *(uchar*) from; + *to++= *from; + } + else + { + tmp_length= uint2korr(from); + store_key_length_inc(to,tmp_length); + } + memcpy(to, from+pack_length,tmp_length); + to+= tmp_length; + continue; + } + else + { + memcpy(to,from,(size_t) length); to+=length; + continue; /* Normal field */ + } + if ((bit= bit << 1) >= 256) + { + *packpos++ = (char) (uchar) flag; + bit=1; flag=0; + } + } + else + { + memcpy(to,from,(size_t) length); to+=length; + } + } + if (bit != 1) + *packpos= (char) (uchar) flag; + if (info->s->calc_checksum) + *to++= (byte) info->cur_row.checksum; + DBUG_PRINT("exit",("packed length: %d",(int) (to-startpos))); + DBUG_RETURN((uint) (to-startpos)); +} /* _ma_rec_pack */ + + + +/* + Check if a record was correctly packed. Used only by maria_chk + Returns 0 if record is ok. +*/ + +my_bool _ma_rec_check(MARIA_HA *info,const char *record, byte *rec_buff, + ulong packed_length, my_bool with_checksum) +{ + uint length,new_length,flag,bit,i; + char *pos,*end,*packpos,*to; + enum en_fieldtype type; + reg3 MARIA_COLUMNDEF *column; + DBUG_ENTER("_ma_rec_check"); + + packpos=rec_buff; to= rec_buff+info->s->base.pack_bytes; + column= info->s->columndef; + flag= *packpos; bit=1; + record+= info->s->base.null_bytes; + to+= info->s->base.null_bytes; + + for (i=info->s->base.fields ; i-- > 0; record+= length, column++) + { + length=(uint) column->length; + if ((type = (enum en_fieldtype) column->type) != FIELD_NORMAL) + { + if (type == FIELD_BLOB) + { + uint blob_length= + _ma_calc_blob_length(length-portable_sizeof_char_ptr,record); + if (!blob_length && !(flag & bit)) + goto err; + if (blob_length) + to+=length - portable_sizeof_char_ptr+ blob_length; + } + else if (type == FIELD_SKIP_ZERO) + { + if (memcmp((byte*) record, maria_zero_string, length) == 0) + { + if (!(flag & bit)) + goto err; + } + else + to+=length; + } + else if (type == FIELD_SKIP_ENDSPACE || + type == FIELD_SKIP_PRESPACE) + { + pos= (byte*) record; end= (byte*) record + length; + if (type == FIELD_SKIP_ENDSPACE) + { /* Pack trailing spaces */ + while (end > record && *(end-1) == ' ') + end--; + } + else + { /* Pack pre-spaces */ + while (pos < end && *pos == ' ') + pos++; + } + new_length=(uint) (end-pos); + if (new_length +1 + test(column->length > 255 && new_length > 127) + < length) + { + if (!(flag & bit)) + goto err; + if (column->length > 255 && new_length > 127) + { + if (to[0] != (char) ((new_length & 127)+128) || + to[1] != (char) (new_length >> 7)) + goto err; + to+=2; + } + else if (*to++ != (char) new_length) + goto err; + to+=new_length; + } + else + to+=length; + } + else if (type == FIELD_VARCHAR) + { + uint pack_length= HA_VARCHAR_PACKLENGTH(column->length -1); + uint tmp_length; + if (pack_length == 1) + { + tmp_length= (uint) *(uchar*) record; + to+= 1+ tmp_length; + continue; + } + else + { + tmp_length= uint2korr(record); + to+= get_pack_length(tmp_length)+tmp_length; + } + continue; + } + else + { + to+=length; + continue; /* Normal field */ + } + if ((bit= bit << 1) >= 256) + { + flag= *++packpos; + bit=1; + } + } + else + to+= length; + } + if (packed_length != (uint) (to - rec_buff) + test(info->s->calc_checksum) || + (bit != 1 && (flag & ~(bit - 1)))) + goto err; + if (with_checksum && ((uchar) info->cur_row.checksum != (uchar) *to)) + { + DBUG_PRINT("error",("wrong checksum for row")); + goto err; + } + DBUG_RETURN(0); + +err: + DBUG_RETURN(1); +} + + + + /* Unpacks a record */ + /* Returns -1 and my_errno =HA_ERR_RECORD_DELETED if reclength isn't */ + /* right. Returns reclength (>0) if ok */ + +ulong _ma_rec_unpack(register MARIA_HA *info, register byte *to, byte *from, + ulong found_length) +{ + uint flag,bit,length,min_pack_length, column_length; + enum en_fieldtype type; + byte *from_end,*to_end,*packpos; + reg3 MARIA_COLUMNDEF *column, *end_column; + DBUG_ENTER("_ma_rec_unpack"); + + to_end=to + info->s->base.reclength; + from_end=from+found_length; + flag= (uchar) *from; bit=1; packpos=from; + if (found_length < info->s->base.min_pack_length) + goto err; + from+= info->s->base.pack_bytes; + min_pack_length= info->s->base.min_pack_length - info->s->base.pack_bytes; + + if ((length= info->s->base.null_bytes)) + { + memcpy(to, from, length); + from+= length; + to+= length; + min_pack_length-= length; + } + + for (column= info->s->columndef, end_column= column + info->s->base.fields; + column < end_column ; to+= column_length, column++) + { + column_length= column->length; + if ((type = (enum en_fieldtype) column->type) != FIELD_NORMAL && + (type != FIELD_CHECK)) + { + if (type == FIELD_VARCHAR) + { + uint pack_length= HA_VARCHAR_PACKLENGTH(column_length-1); + if (pack_length == 1) + { + length= (uint) *(uchar*) from; + if (length > column_length-1) + goto err; + *to= *from++; + } + else + { + get_key_length(length, from); + if (length > column_length-2) + goto err; + int2store(to,length); + } + if (from+length > from_end) + goto err; + memcpy(to+pack_length, from, length); + from+= length; + min_pack_length--; + continue; + } + if (flag & bit) + { + if (type == FIELD_BLOB || type == FIELD_SKIP_ZERO) + bzero((byte*) to,column_length); + else if (type == FIELD_SKIP_ENDSPACE || + type == FIELD_SKIP_PRESPACE) + { + if (column->length > 255 && *from & 128) + { + if (from + 1 >= from_end) + goto err; + length= (*from & 127)+ ((uint) (uchar) *(from+1) << 7); from+=2; + } + else + { + if (from == from_end) + goto err; + length= (uchar) *from++; + } + min_pack_length--; + if (length >= column_length || + min_pack_length + length > (uint) (from_end - from)) + goto err; + if (type == FIELD_SKIP_ENDSPACE) + { + memcpy(to,(byte*) from,(size_t) length); + bfill((byte*) to+length,column_length-length,' '); + } + else + { + bfill((byte*) to,column_length-length,' '); + memcpy(to+column_length-length,(byte*) from,(size_t) length); + } + from+=length; + } + } + else if (type == FIELD_BLOB) + { + uint size_length=column_length- portable_sizeof_char_ptr; + ulong blob_length= _ma_calc_blob_length(size_length,from); + ulong from_left= (ulong) (from_end - from); + if (from_left < size_length || + from_left - size_length < blob_length || + from_left - size_length - blob_length < min_pack_length) + goto err; + memcpy((byte*) to,(byte*) from,(size_t) size_length); + from+=size_length; + memcpy_fixed((byte*) to+size_length,(byte*) &from,sizeof(char*)); + from+=blob_length; + } + else + { + if (type == FIELD_SKIP_ENDSPACE || type == FIELD_SKIP_PRESPACE) + min_pack_length--; + if (min_pack_length + column_length > (uint) (from_end - from)) + goto err; + memcpy(to,(byte*) from,(size_t) column_length); from+=column_length; + } + if ((bit= bit << 1) >= 256) + { + flag= (uchar) *++packpos; bit=1; + } + } + else + { + if (min_pack_length > (uint) (from_end - from)) + goto err; + min_pack_length-=column_length; + memcpy(to, (byte*) from, (size_t) column_length); + from+=column_length; + } + } + if (info->s->calc_checksum) + info->cur_row.checksum= (uint) (uchar) *from++; + if (to == to_end && from == from_end && (bit == 1 || !(flag & ~(bit-1)))) + DBUG_RETURN(found_length); + +err: + my_errno= HA_ERR_WRONG_IN_RECORD; + DBUG_PRINT("error",("to_end: 0x%lx -> 0x%lx from_end: 0x%lx -> 0x%lx", + (long) to, (long) to_end, (long) from, (long) from_end)); + DBUG_DUMP("from",(byte*) info->rec_buff,info->s->base.min_pack_length); + DBUG_RETURN(MY_FILE_ERROR); +} /* _ma_rec_unpack */ + + + /* Calc length of blob. Update info in blobs->length */ + +ulong _ma_calc_total_blob_length(MARIA_HA *info, const byte *record) +{ + ulong length; + MARIA_BLOB *blob,*end; + + for (length=0, blob= info->blobs, end=blob+info->s->base.blobs ; + blob != end; + blob++) + { + blob->length= _ma_calc_blob_length(blob->pack_length,record + blob->offset); + length+=blob->length; + } + return length; +} + + +ulong _ma_calc_blob_length(uint length, const byte *pos) +{ + switch (length) { + case 1: + return (uint) (uchar) *pos; + case 2: + return (uint) uint2korr(pos); + case 3: + return uint3korr(pos); + case 4: + return uint4korr(pos); + default: + break; + } + return 0; /* Impossible */ +} + + +void _ma_store_blob_length(byte *pos,uint pack_length,uint length) +{ + switch (pack_length) { + case 1: + *pos= (uchar) length; + break; + case 2: + int2store(pos,length); + break; + case 3: + int3store(pos,length); + break; + case 4: + int4store(pos,length); + default: + break; + } + return; +} + + +/* + Read record from datafile. + + SYNOPSIS + _ma_read_dynamic_record() + info MARIA_HA pointer to table. + filepos From where to read the record. + buf Destination for record. + + NOTE + If a write buffer is active, it needs to be flushed if its contents + intersects with the record to read. We always check if the position + of the first byte of the write buffer is lower than the position + past the last byte to read. In theory this is also true if the write + buffer is completely below the read segment. That is, if there is no + intersection. But this case is unusual. We flush anyway. Only if the + first byte in the write buffer is above the last byte to read, we do + not flush. + + A dynamic record may need several reads. So this check must be done + before every read. Reading a dynamic record starts with reading the + block header. If the record does not fit into the free space of the + header, the block may be longer than the header. In this case a + second read is necessary. These one or two reads repeat for every + part of the record. + + RETURN + 0 OK + 1 Error +*/ +int _ma_read_dynamic_record(MARIA_HA *info, byte *buf, + MARIA_RECORD_POS filepos) +{ + int block_of_record; + uint b_type; + MARIA_BLOCK_INFO block_info; + File file; + DBUG_ENTER("_ma_read_dynamic_record"); + + if (filepos != HA_OFFSET_ERROR) + { + byte *to; + uint left_length; + + LINT_INIT(to); + LINT_INIT(left_length); + file= info->dfile.file; + block_of_record= 0; /* First block of record is numbered as zero. */ + block_info.second_read= 0; + do + { + /* A corrupted table can have wrong pointers. (Bug# 19835) */ + if (filepos == HA_OFFSET_ERROR) + goto panic; + if (info->opt_flag & WRITE_CACHE_USED && + (info->rec_cache.pos_in_file < filepos + + MARIA_BLOCK_INFO_HEADER_LENGTH) && + flush_io_cache(&info->rec_cache)) + goto err; + info->rec_cache.seek_not_done=1; + if ((b_type= _ma_get_block_info(&block_info, file, filepos)) & + (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | + BLOCK_FATAL_ERROR)) + { + if (b_type & (BLOCK_SYNC_ERROR | BLOCK_DELETED)) + my_errno=HA_ERR_RECORD_DELETED; + goto err; + } + if (block_of_record++ == 0) /* First block */ + { + if (block_info.rec_len > (uint) info->s->base.max_pack_length) + goto panic; + if (info->s->base.blobs) + { + if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size, + block_info.rec_len + + info->s->base.extra_rec_buff_size)) + goto err; + } + to= info->rec_buff; + left_length=block_info.rec_len; + } + if (left_length < block_info.data_len || ! block_info.data_len) + goto panic; /* Wrong linked record */ + /* copy information that is already read */ + { + uint offset= (uint) (block_info.filepos - filepos); + uint prefetch_len= (sizeof(block_info.header) - offset); + filepos+= sizeof(block_info.header); + + if (prefetch_len > block_info.data_len) + prefetch_len= block_info.data_len; + if (prefetch_len) + { + memcpy((byte*) to, block_info.header + offset, prefetch_len); + block_info.data_len-= prefetch_len; + left_length-= prefetch_len; + to+= prefetch_len; + } + } + /* read rest of record from file */ + if (block_info.data_len) + { + if (info->opt_flag & WRITE_CACHE_USED && + info->rec_cache.pos_in_file < filepos + block_info.data_len && + flush_io_cache(&info->rec_cache)) + goto err; + /* + What a pity that this method is not called 'file_pread' and that + there is no equivalent without seeking. We are at the right + position already. :( + */ + if (info->s->file_read(info, (byte*) to, block_info.data_len, + filepos, MYF(MY_NABP))) + goto panic; + left_length-=block_info.data_len; + to+=block_info.data_len; + } + filepos= block_info.next_filepos; + } while (left_length); + + info->update|= HA_STATE_AKTIV; /* We have a aktive record */ + fast_ma_writeinfo(info); + DBUG_RETURN(_ma_rec_unpack(info,buf,info->rec_buff,block_info.rec_len) != + MY_FILE_ERROR ? 0 : 1); + } + fast_ma_writeinfo(info); + DBUG_RETURN(1); /* Wrong data to read */ + +panic: + my_errno=HA_ERR_WRONG_IN_RECORD; +err: + VOID(_ma_writeinfo(info,0)); + DBUG_RETURN(1); +} + + /* compare unique constraint between stored rows */ + +my_bool _ma_cmp_dynamic_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, + const byte *record, MARIA_RECORD_POS pos) +{ + byte *old_rec_buff,*old_record; + my_off_t old_rec_buff_size; + my_bool error; + DBUG_ENTER("_ma_cmp_dynamic_unique"); + + if (!(old_record=my_alloca(info->s->base.reclength))) + DBUG_RETURN(1); + + /* Don't let the compare destroy blobs that may be in use */ + old_rec_buff= info->rec_buff; + old_rec_buff_size= info->rec_buff_size; + + if (info->s->base.blobs) + info->rec_buff= 0; + error= _ma_read_dynamic_record(info, old_record, pos) != 0; + if (!error) + error=_ma_unique_comp(def, record, old_record, def->null_are_equal) != 0; + if (info->s->base.blobs) + { + my_free(info->rec_buff, MYF(MY_ALLOW_ZERO_PTR)); + info->rec_buff= old_rec_buff; + info->rec_buff_size= old_rec_buff_size; + } + my_afree(old_record); + DBUG_RETURN(error); +} + + + /* Compare of record on disk with packed record in memory */ + +my_bool _ma_cmp_dynamic_record(register MARIA_HA *info, + register const byte *record) +{ + uint flag, reclength, b_type,cmp_length; + my_off_t filepos; + byte *buffer; + MARIA_BLOCK_INFO block_info; + my_bool error= 1; + DBUG_ENTER("_ma_cmp_dynamic_record"); + + /* We are going to do changes; dont let anybody disturb */ + dont_break(); /* Dont allow SIGHUP or SIGINT */ + + if (info->opt_flag & WRITE_CACHE_USED) + { + info->update&= ~(HA_STATE_WRITE_AT_END | HA_STATE_EXTEND_BLOCK); + if (flush_io_cache(&info->rec_cache)) + DBUG_RETURN(1); + } + info->rec_cache.seek_not_done=1; + + /* If nobody have touched the database we don't have to test rec */ + + buffer=info->rec_buff; + if ((info->opt_flag & READ_CHECK_USED)) + { /* If check isn't disabled */ + if (info->s->base.blobs) + { + if (!(buffer=(byte*) my_alloca(info->s->base.pack_reclength+ + _ma_calc_total_blob_length(info,record)))) + DBUG_RETURN(1); + } + reclength= _ma_rec_pack(info,buffer,record); + record= buffer; + + filepos= info->cur_row.lastpos; + flag=block_info.second_read=0; + block_info.next_filepos=filepos; + while (reclength > 0) + { + if ((b_type= _ma_get_block_info(&block_info, info->dfile.file, + block_info.next_filepos)) + & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | + BLOCK_FATAL_ERROR)) + { + if (b_type & (BLOCK_SYNC_ERROR | BLOCK_DELETED)) + my_errno=HA_ERR_RECORD_CHANGED; + goto err; + } + if (flag == 0) /* First block */ + { + flag=1; + if (reclength != block_info.rec_len) + { + my_errno=HA_ERR_RECORD_CHANGED; + goto err; + } + } else if (reclength < block_info.data_len) + { + my_errno=HA_ERR_WRONG_IN_RECORD; + goto err; + } + reclength-= block_info.data_len; + cmp_length= block_info.data_len; + if (!reclength && info->s->calc_checksum) + cmp_length--; /* 'record' may not contain checksum */ + + if (_ma_cmp_buffer(info->dfile.file, record, block_info.filepos, + cmp_length)) + { + my_errno=HA_ERR_RECORD_CHANGED; + goto err; + } + flag=1; + record+=block_info.data_len; + } + } + my_errno=0; + error= 0; +err: + if (buffer != info->rec_buff) + my_afree((gptr) buffer); + DBUG_PRINT("exit", ("result: %d", error)); + DBUG_RETURN(error); +} + + + /* Compare file to buffert */ + +static my_bool _ma_cmp_buffer(File file, const byte *buff, my_off_t filepos, + uint length) +{ + uint next_length; + char temp_buff[IO_SIZE*2]; + DBUG_ENTER("_ma_cmp_buffer"); + + next_length= IO_SIZE*2 - (uint) (filepos & (IO_SIZE-1)); + + while (length > IO_SIZE*2) + { + if (my_pread(file,temp_buff,next_length,filepos, MYF(MY_NABP)) || + memcmp((byte*) buff,temp_buff,next_length)) + goto err; + filepos+=next_length; + buff+=next_length; + length-= next_length; + next_length=IO_SIZE*2; + } + if (my_pread(file,temp_buff,length,filepos,MYF(MY_NABP))) + goto err; + DBUG_RETURN(memcmp((byte*) buff,temp_buff,length) != 0); +err: + DBUG_RETURN(1); +} + + +/* + Read next record from datafile during table scan. + + SYNOPSIS + _ma_read_rnd_dynamic_record() + info MARIA_HA pointer to table. + buf Destination for record. + filepos From where to read the record. + skip_deleted_blocks If to repeat reading until a non-deleted + record is found. + + NOTE + This is identical to _ma_read_dynamic_record(), except the following + cases: + + - If there is no active row at 'filepos', continue scanning for + an active row. (This is becasue the previous + _ma_read_rnd_dynamic_record() call stored the next block position + in filepos, but this position may not be a start block for a row + - We may have READ_CACHING enabled, in which case we use the cache + to read rows. + + For other comments, check _ma_read_dynamic_record() + + RETURN + 0 OK + != 0 Error +*/ + +int _ma_read_rnd_dynamic_record(MARIA_HA *info, + byte *buf, + MARIA_RECORD_POS filepos, + my_bool skip_deleted_blocks) +{ + int block_of_record, info_read, save_errno; + uint left_len,b_type; + byte *to; + MARIA_BLOCK_INFO block_info; + MARIA_SHARE *share=info->s; + DBUG_ENTER("_ma_read_rnd_dynamic_record"); + + info_read=0; + LINT_INIT(to); + + if (info->lock_type == F_UNLCK) + { +#ifndef UNSAFE_LOCKING +#else + info->tmp_lock_type=F_RDLCK; +#endif + } + else + info_read=1; /* memory-keyinfoblock is ok */ + + block_of_record= 0; /* First block of record is numbered as zero. */ + block_info.second_read= 0; + left_len=1; + do + { + if (filepos >= info->state->data_file_length) + { + if (!info_read) + { /* Check if changed */ + info_read=1; + info->rec_cache.seek_not_done=1; + if (_ma_state_info_read_dsk(share->kfile.file, &share->state, 1)) + goto panic; + } + if (filepos >= info->state->data_file_length) + { + my_errno= HA_ERR_END_OF_FILE; + goto err; + } + } + if (info->opt_flag & READ_CACHE_USED) + { + if (_ma_read_cache(&info->rec_cache,(byte*) block_info.header,filepos, + sizeof(block_info.header), + (!block_of_record && skip_deleted_blocks ? + READING_NEXT : 0) | READING_HEADER)) + goto panic; + b_type= _ma_get_block_info(&block_info,-1,filepos); + } + else + { + if (info->opt_flag & WRITE_CACHE_USED && + info->rec_cache.pos_in_file < filepos + MARIA_BLOCK_INFO_HEADER_LENGTH && + flush_io_cache(&info->rec_cache)) + DBUG_RETURN(my_errno); + info->rec_cache.seek_not_done=1; + b_type= _ma_get_block_info(&block_info, info->dfile.file, filepos); + } + + if (b_type & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR | + BLOCK_FATAL_ERROR)) + { + if ((b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR)) + && skip_deleted_blocks) + { + filepos=block_info.filepos+block_info.block_len; + block_info.second_read=0; + continue; /* Search after next_record */ + } + if (b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR)) + { + my_errno= HA_ERR_RECORD_DELETED; + info->cur_row.lastpos= block_info.filepos; + info->cur_row.nextpos= block_info.filepos+block_info.block_len; + } + goto err; + } + if (block_of_record == 0) /* First block */ + { + if (block_info.rec_len > (uint) share->base.max_pack_length) + goto panic; + info->cur_row.lastpos= filepos; + if (share->base.blobs) + { + if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size, + block_info.rec_len + + info->s->base.extra_rec_buff_size)) + goto err; + } + to= info->rec_buff; + left_len=block_info.rec_len; + } + if (left_len < block_info.data_len) + goto panic; /* Wrong linked record */ + + /* copy information that is already read */ + { + uint offset=(uint) (block_info.filepos - filepos); + uint tmp_length= (sizeof(block_info.header) - offset); + filepos=block_info.filepos; + + if (tmp_length > block_info.data_len) + tmp_length= block_info.data_len; + if (tmp_length) + { + memcpy((byte*) to, block_info.header+offset,tmp_length); + block_info.data_len-=tmp_length; + left_len-=tmp_length; + to+=tmp_length; + filepos+=tmp_length; + } + } + /* read rest of record from file */ + if (block_info.data_len) + { + if (info->opt_flag & READ_CACHE_USED) + { + if (_ma_read_cache(&info->rec_cache,(byte*) to,filepos, + block_info.data_len, + (!block_of_record && skip_deleted_blocks) ? + READING_NEXT : 0)) + goto panic; + } + else + { + if (info->opt_flag & WRITE_CACHE_USED && + info->rec_cache.pos_in_file < + block_info.filepos + block_info.data_len && + flush_io_cache(&info->rec_cache)) + goto err; + /* VOID(my_seek(info->dfile.file, filepos, MY_SEEK_SET, MYF(0))); */ + if (my_read(info->dfile.file, (byte*)to, block_info.data_len, + MYF(MY_NABP))) + { + if (my_errno == -1) + my_errno= HA_ERR_WRONG_IN_RECORD; /* Unexpected end of file */ + goto err; + } + } + } + /* + Increment block-of-record counter. If it was the first block, + remember the position behind the block for the next call. + */ + if (block_of_record++ == 0) + { + info->cur_row.nextpos= block_info.filepos+block_info.block_len; + skip_deleted_blocks=0; + } + left_len-=block_info.data_len; + to+=block_info.data_len; + filepos=block_info.next_filepos; + } while (left_len); + + info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED; + fast_ma_writeinfo(info); + if (_ma_rec_unpack(info,buf,info->rec_buff,block_info.rec_len) != + MY_FILE_ERROR) + DBUG_RETURN(0); + DBUG_RETURN(my_errno); /* Wrong record */ + +panic: + my_errno=HA_ERR_WRONG_IN_RECORD; /* Something is fatal wrong */ +err: + save_errno=my_errno; + VOID(_ma_writeinfo(info,0)); + DBUG_RETURN(my_errno=save_errno); +} + + + /* Read and process header from a dynamic-record-file */ + +uint _ma_get_block_info(MARIA_BLOCK_INFO *info, File file, my_off_t filepos) +{ + uint return_val=0; + uchar *header=info->header; + + if (file >= 0) + { + /* + We do not use my_pread() here because we want to have the file + pointer set to the end of the header after this function. + my_pread() may leave the file pointer untouched. + */ + VOID(my_seek(file,filepos,MY_SEEK_SET,MYF(0))); + if (my_read(file,(char*) header,sizeof(info->header),MYF(0)) != + sizeof(info->header)) + goto err; + } + DBUG_DUMP("header",(byte*) header,MARIA_BLOCK_INFO_HEADER_LENGTH); + if (info->second_read) + { + if (info->header[0] <= 6 || info->header[0] == 13) + return_val=BLOCK_SYNC_ERROR; + } + else + { + if (info->header[0] > 6 && info->header[0] != 13) + return_val=BLOCK_SYNC_ERROR; + } + info->next_filepos= HA_OFFSET_ERROR; /* Dummy if no next block */ + + switch (info->header[0]) { + case 0: + if ((info->block_len=(uint) mi_uint3korr(header+1)) < + MARIA_MIN_BLOCK_LENGTH || + (info->block_len & (MARIA_DYN_ALIGN_SIZE -1))) + goto err; + info->filepos=filepos; + info->next_filepos=mi_sizekorr(header+4); + info->prev_filepos=mi_sizekorr(header+12); +#if SIZEOF_OFF_T == 4 + if ((mi_uint4korr(header+4) != 0 && + (mi_uint4korr(header+4) != (ulong) ~0 || + info->next_filepos != (ulong) ~0)) || + (mi_uint4korr(header+12) != 0 && + (mi_uint4korr(header+12) != (ulong) ~0 || + info->prev_filepos != (ulong) ~0))) + goto err; +#endif + return return_val | BLOCK_DELETED; /* Deleted block */ + + case 1: + info->rec_len=info->data_len=info->block_len=mi_uint2korr(header+1); + info->filepos=filepos+3; + return return_val | BLOCK_FIRST | BLOCK_LAST; + case 2: + info->rec_len=info->data_len=info->block_len=mi_uint3korr(header+1); + info->filepos=filepos+4; + return return_val | BLOCK_FIRST | BLOCK_LAST; + + case 13: + info->rec_len=mi_uint4korr(header+1); + info->block_len=info->data_len=mi_uint3korr(header+5); + info->next_filepos=mi_sizekorr(header+8); + info->second_read=1; + info->filepos=filepos+16; + return return_val | BLOCK_FIRST; + + case 3: + info->rec_len=info->data_len=mi_uint2korr(header+1); + info->block_len=info->rec_len+ (uint) header[3]; + info->filepos=filepos+4; + return return_val | BLOCK_FIRST | BLOCK_LAST; + case 4: + info->rec_len=info->data_len=mi_uint3korr(header+1); + info->block_len=info->rec_len+ (uint) header[4]; + info->filepos=filepos+5; + return return_val | BLOCK_FIRST | BLOCK_LAST; + + case 5: + info->rec_len=mi_uint2korr(header+1); + info->block_len=info->data_len=mi_uint2korr(header+3); + info->next_filepos=mi_sizekorr(header+5); + info->second_read=1; + info->filepos=filepos+13; + return return_val | BLOCK_FIRST; + case 6: + info->rec_len=mi_uint3korr(header+1); + info->block_len=info->data_len=mi_uint3korr(header+4); + info->next_filepos=mi_sizekorr(header+7); + info->second_read=1; + info->filepos=filepos+15; + return return_val | BLOCK_FIRST; + + /* The following blocks are identical to 1-6 without rec_len */ + case 7: + info->data_len=info->block_len=mi_uint2korr(header+1); + info->filepos=filepos+3; + return return_val | BLOCK_LAST; + case 8: + info->data_len=info->block_len=mi_uint3korr(header+1); + info->filepos=filepos+4; + return return_val | BLOCK_LAST; + + case 9: + info->data_len=mi_uint2korr(header+1); + info->block_len=info->data_len+ (uint) header[3]; + info->filepos=filepos+4; + return return_val | BLOCK_LAST; + case 10: + info->data_len=mi_uint3korr(header+1); + info->block_len=info->data_len+ (uint) header[4]; + info->filepos=filepos+5; + return return_val | BLOCK_LAST; + + case 11: + info->data_len=info->block_len=mi_uint2korr(header+1); + info->next_filepos=mi_sizekorr(header+3); + info->second_read=1; + info->filepos=filepos+11; + return return_val; + case 12: + info->data_len=info->block_len=mi_uint3korr(header+1); + info->next_filepos=mi_sizekorr(header+4); + info->second_read=1; + info->filepos=filepos+12; + return return_val; + } + +err: + my_errno=HA_ERR_WRONG_IN_RECORD; /* Garbage */ + return BLOCK_ERROR; +} diff --git a/storage/maria/ma_extra.c b/storage/maria/ma_extra.c new file mode 100644 index 00000000000..61eba165412 --- /dev/null +++ b/storage/maria/ma_extra.c @@ -0,0 +1,488 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" +#ifdef HAVE_SYS_MMAN_H +#include +#endif + +static void maria_extra_keyflag(MARIA_HA *info, + enum ha_extra_function function); + +/** + @brief Set options and buffers to optimize table handling + + @param name table's name + @param info open table + @param function operation + @param extra_arg Pointer to extra argument (normally pointer to + ulong); used when function is one of: + HA_EXTRA_WRITE_CACHE + HA_EXTRA_CACHE + + @return Operation status + @retval 0 ok + @retval !=0 error +*/ + +int maria_extra(MARIA_HA *info, enum ha_extra_function function, + void *extra_arg) +{ + int error=0; + ulong cache_size; + MARIA_SHARE *share=info->s; + my_bool block_records= share->data_file_type == BLOCK_RECORD; + + DBUG_ENTER("maria_extra"); + DBUG_PRINT("enter",("function: %d",(int) function)); + + switch (function) { + case HA_EXTRA_RESET_STATE: /* Reset state (don't free buffers) */ + info->lastinx= 0; /* Use first index as def */ + info->last_search_keypage= info->cur_row.lastpos= HA_OFFSET_ERROR; + info->page_changed=1; + /* Next/prev gives first/last */ + if (info->opt_flag & READ_CACHE_USED) + { + reinit_io_cache(&info->rec_cache,READ_CACHE,0, + (pbool) (info->lock_type != F_UNLCK), + (pbool) test(info->update & HA_STATE_ROW_CHANGED) + ); + } + info->update= ((info->update & HA_STATE_CHANGED) | HA_STATE_NEXT_FOUND | + HA_STATE_PREV_FOUND); + break; + case HA_EXTRA_CACHE: + if (block_records) + break; /* Not supported */ + + if (info->lock_type == F_UNLCK && + (share->options & HA_OPTION_PACK_RECORD)) + { + error=1; /* Not possibly if not locked */ + my_errno=EACCES; + break; + } + if (info->s->file_map) /* Don't use cache if mmap */ + break; +#if defined(HAVE_MMAP) && defined(HAVE_MADVISE) + if ((share->options & HA_OPTION_COMPRESS_RECORD)) + { + pthread_mutex_lock(&share->intern_lock); + if (_ma_memmap_file(info)) + { + /* We don't nead MADV_SEQUENTIAL if small file */ + madvise(share->file_map,share->state.state.data_file_length, + share->state.state.data_file_length <= RECORD_CACHE_SIZE*16 ? + MADV_RANDOM : MADV_SEQUENTIAL); + pthread_mutex_unlock(&share->intern_lock); + break; + } + pthread_mutex_unlock(&share->intern_lock); + } +#endif + if (info->opt_flag & WRITE_CACHE_USED) + { + info->opt_flag&= ~WRITE_CACHE_USED; + if ((error=end_io_cache(&info->rec_cache))) + break; + } + if (!(info->opt_flag & + (READ_CACHE_USED | WRITE_CACHE_USED | MEMMAP_USED))) + { + cache_size= (extra_arg ? *(ulong*) extra_arg : + my_default_record_cache_size); + if (!(init_io_cache(&info->rec_cache, info->dfile.file, + (uint) min(info->state->data_file_length+1, + cache_size), + READ_CACHE,0L,(pbool) (info->lock_type != F_UNLCK), + MYF(share->write_flag & MY_WAIT_IF_FULL)))) + { + info->opt_flag|=READ_CACHE_USED; + info->update&= ~HA_STATE_ROW_CHANGED; + } + if (share->concurrent_insert) + info->rec_cache.end_of_file=info->state->data_file_length; + } + break; + case HA_EXTRA_REINIT_CACHE: + if (info->opt_flag & READ_CACHE_USED) + { + reinit_io_cache(&info->rec_cache, READ_CACHE, info->cur_row.nextpos, + (pbool) (info->lock_type != F_UNLCK), + (pbool) test(info->update & HA_STATE_ROW_CHANGED)); + info->update&= ~HA_STATE_ROW_CHANGED; + if (share->concurrent_insert) + info->rec_cache.end_of_file=info->state->data_file_length; + } + break; + case HA_EXTRA_WRITE_CACHE: + if (info->lock_type == F_UNLCK) + { + error=1; /* Not possibly if not locked */ + break; + } + if (block_records) + break; /* Not supported */ + + cache_size= (extra_arg ? *(ulong*) extra_arg : + my_default_record_cache_size); + if (!(info->opt_flag & + (READ_CACHE_USED | WRITE_CACHE_USED | OPT_NO_ROWS)) && + !share->state.header.uniques) + if (!(init_io_cache(&info->rec_cache, info->dfile.file, cache_size, + WRITE_CACHE,info->state->data_file_length, + (pbool) (info->lock_type != F_UNLCK), + MYF(share->write_flag & MY_WAIT_IF_FULL)))) + { + info->opt_flag|=WRITE_CACHE_USED; + info->update&= ~(HA_STATE_ROW_CHANGED | + HA_STATE_WRITE_AT_END | + HA_STATE_EXTEND_BLOCK); + } + break; + case HA_EXTRA_PREPARE_FOR_UPDATE: + if (info->s->data_file_type != DYNAMIC_RECORD) + break; + /* Remove read/write cache if dynamic rows */ + case HA_EXTRA_NO_CACHE: + if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED)) + { + info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); + error=end_io_cache(&info->rec_cache); + /* Sergei will insert full text index caching here */ + } +#if defined(HAVE_MMAP) && defined(HAVE_MADVISE) + if (info->opt_flag & MEMMAP_USED) + madvise(share->file_map,share->state.state.data_file_length,MADV_RANDOM); +#endif + break; + case HA_EXTRA_FLUSH_CACHE: + if (info->opt_flag & WRITE_CACHE_USED) + { + if ((error=flush_io_cache(&info->rec_cache))) + { + maria_print_error(info->s, HA_ERR_CRASHED); + maria_mark_crashed(info); /* Fatal error found */ + } + } + break; + case HA_EXTRA_NO_READCHECK: + info->opt_flag&= ~READ_CHECK_USED; /* No readcheck */ + break; + case HA_EXTRA_READCHECK: + info->opt_flag|= READ_CHECK_USED; + break; + case HA_EXTRA_KEYREAD: /* Read only keys to record */ + case HA_EXTRA_REMEMBER_POS: + info->opt_flag |= REMEMBER_OLD_POS; + bmove((byte*) info->lastkey+share->base.max_key_length*2, + (byte*) info->lastkey,info->lastkey_length); + info->save_update= info->update; + info->save_lastinx= info->lastinx; + info->save_lastpos= info->cur_row.lastpos; + info->save_lastkey_length=info->lastkey_length; + if (function == HA_EXTRA_REMEMBER_POS) + break; + /* fall through */ + case HA_EXTRA_KEYREAD_CHANGE_POS: + info->opt_flag |= KEY_READ_USED; + info->read_record= _ma_read_key_record; + break; + case HA_EXTRA_NO_KEYREAD: + case HA_EXTRA_RESTORE_POS: + if (info->opt_flag & REMEMBER_OLD_POS) + { + bmove((byte*) info->lastkey, + (byte*) info->lastkey+share->base.max_key_length*2, + info->save_lastkey_length); + info->update= info->save_update | HA_STATE_WRITTEN; + info->lastinx= info->save_lastinx; + info->cur_row.lastpos= info->save_lastpos; + info->lastkey_length=info->save_lastkey_length; + } + info->read_record= share->read_record; + info->opt_flag&= ~(KEY_READ_USED | REMEMBER_OLD_POS); + break; + case HA_EXTRA_NO_USER_CHANGE: /* Database is somehow locked agains changes */ + info->lock_type= F_EXTRA_LCK; /* Simulate as locked */ + break; + case HA_EXTRA_WAIT_LOCK: + info->lock_wait=0; + break; + case HA_EXTRA_NO_WAIT_LOCK: + info->lock_wait=MY_DONT_WAIT; + break; + case HA_EXTRA_NO_KEYS: + if (info->lock_type == F_UNLCK) + { + error=1; /* Not possibly if not lock */ + break; + } + if (maria_is_any_key_active(share->state.key_map)) + { + MARIA_KEYDEF *key=share->keyinfo; + uint i; + for (i=0 ; i < share->base.keys ; i++,key++) + { + if (!(key->flag & HA_NOSAME) && info->s->base.auto_key != i+1) + { + maria_clear_key_active(share->state.key_map, i); + info->update|= HA_STATE_CHANGED; + } + } + + if (!share->changed) + { + share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED; + share->changed=1; /* Update on close */ + if (!share->global_changed) + { + share->global_changed=1; + share->state.open_count++; + } + } + share->state.state= *info->state; + error=_ma_state_info_write(share->kfile.file, &share->state, (1 | 2)); + } + break; + case HA_EXTRA_FORCE_REOPEN: + pthread_mutex_lock(&THR_LOCK_maria); + share->last_version= 0L; /* Impossible version */ + pthread_mutex_unlock(&THR_LOCK_maria); + break; + case HA_EXTRA_PREPARE_FOR_DELETE: + /* QQ: suggest to rename it to "PREPARE_FOR_DROP" */ + pthread_mutex_lock(&THR_LOCK_maria); + share->last_version= 0L; /* Impossible version */ +#ifdef __WIN__ + /* Close the isam and data files as Win32 can't drop an open table */ + pthread_mutex_lock(&share->intern_lock); + /* + If this is Windows we remove blocks from pagecache. If not Windows we + don't do it, so these pages stay in the pagecache? So they may later be + flushed to a wrong file? + Or is it that this flush_pagecache_blocks() never finds any blocks? Then + why do we do it on Windows? + Don't we wait for all instances to be closed before dropping the table? + Do we ever do something useful here? + BUG? + */ + if (flush_pagecache_blocks(share->pagecache, &share->kfile, + FLUSH_IGNORE_CHANGED)) + { + error=my_errno; + share->changed=1; + maria_print_error(info->s, HA_ERR_CRASHED); + maria_mark_crashed(info); /* Fatal error found */ + } + if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED)) + { + info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); + error=end_io_cache(&info->rec_cache); + } + if (info->lock_type != F_UNLCK && ! info->was_locked) + { + info->was_locked=info->lock_type; + if (maria_lock_database(info,F_UNLCK)) + error=my_errno; + info->lock_type = F_UNLCK; + } + if (share->kfile.file >= 0) + { + _ma_decrement_open_count(info); + if (my_close(share->kfile,MYF(0))) + error=my_errno; + } + { + LIST *list_element ; + for (list_element=maria_open_list ; + list_element ; + list_element=list_element->next) + { + MARIA_HA *tmpinfo=(MARIA_HA*) list_element->data; + if (tmpinfo->s == info->s) + { + /** + @todo RECOVERY BUG: flush of bitmap and sync of dfile are missing + */ + if (tmpinfo->dfile.file >= 0 && + my_close(tmpinfo->dfile.file, MYF(0))) + error = my_errno; + tmpinfo->dfile.file= -1; + } + } + } + share->kfile.file= -1; /* Files aren't open anymore */ + pthread_mutex_unlock(&share->intern_lock); +#endif + pthread_mutex_unlock(&THR_LOCK_maria); + break; + case HA_EXTRA_FLUSH: + if (!share->temporary) + flush_pagecache_blocks(share->pagecache, &share->kfile, FLUSH_KEEP); +#ifdef HAVE_PWRITE + _ma_decrement_open_count(info); +#endif + if (share->not_flushed) + { + share->not_flushed=0; + if (_ma_sync_table_files(info)) + error= my_errno; + if (error) + { + share->changed=1; + maria_print_error(info->s, HA_ERR_CRASHED); + maria_mark_crashed(info); /* Fatal error found */ + } + } + if (share->base.blobs && info->rec_buff_size > + share->base.default_rec_buff_size) + { + info->rec_buff_size= 1; /* Force realloc */ + _ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size, + share->base.default_rec_buff_size); + } + break; + case HA_EXTRA_NORMAL: /* Theese isn't in use */ + info->quick_mode=0; + break; + case HA_EXTRA_QUICK: + info->quick_mode=1; + break; + case HA_EXTRA_NO_ROWS: + if (!share->state.header.uniques) + info->opt_flag|= OPT_NO_ROWS; + break; + case HA_EXTRA_PRELOAD_BUFFER_SIZE: + info->preload_buff_size= *((ulong *) extra_arg); + break; + case HA_EXTRA_CHANGE_KEY_TO_UNIQUE: + case HA_EXTRA_CHANGE_KEY_TO_DUP: + maria_extra_keyflag(info, function); + break; + case HA_EXTRA_MMAP: +#ifdef HAVE_MMAP + if (block_records) + break; /* Not supported */ + pthread_mutex_lock(&share->intern_lock); + /* + Memory map the data file if it is not already mapped and if there + are no other threads using this table. intern_lock prevents other + threads from starting to use the table while we are mapping it. + */ + if (!share->file_map && (share->tot_locks == 1)) + { + if (_ma_dynmap_file(info, share->state.state.data_file_length)) + { + DBUG_PRINT("warning",("mmap failed: errno: %d",errno)); + error= my_errno= errno; + } + else + { + share->file_read= _ma_mmap_pread; + share->file_write= _ma_mmap_pwrite; + } + } + pthread_mutex_unlock(&share->intern_lock); +#endif + break; + case HA_EXTRA_MARK_AS_LOG_TABLE: + pthread_mutex_lock(&share->intern_lock); + share->is_log_table= TRUE; + pthread_mutex_unlock(&share->intern_lock); + break; + case HA_EXTRA_KEY_CACHE: + case HA_EXTRA_NO_KEY_CACHE: + default: + break; + } + { + char tmp[1]; + tmp[0]=function; + } + DBUG_RETURN(error); +} /* maria_extra */ + + +/* + Start/Stop Inserting Duplicates Into a Table, WL#1648. +*/ + +static void maria_extra_keyflag(MARIA_HA *info, + enum ha_extra_function function) +{ + uint idx; + + for (idx= 0; idx< info->s->base.keys; idx++) + { + switch (function) { + case HA_EXTRA_CHANGE_KEY_TO_UNIQUE: + info->s->keyinfo[idx].flag|= HA_NOSAME; + break; + case HA_EXTRA_CHANGE_KEY_TO_DUP: + info->s->keyinfo[idx].flag&= ~(HA_NOSAME); + break; + default: + break; + } + } +} + + +int maria_reset(MARIA_HA *info) +{ + int error= 0; + MARIA_SHARE *share=info->s; + DBUG_ENTER("maria_reset"); + /* + Free buffers and reset the following flags: + EXTRA_CACHE, EXTRA_WRITE_CACHE, EXTRA_KEYREAD, EXTRA_QUICK + + If the row buffer cache is large (for dynamic tables), reduce it + to save memory. + */ + if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED)) + { + info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); + error= end_io_cache(&info->rec_cache); + } + if (share->base.blobs && info->rec_buff_size > + share->base.default_rec_buff_size) + { + info->rec_buff_size= 1; /* Force realloc */ + _ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size, + share->base.default_rec_buff_size); + } +#if defined(HAVE_MMAP) && defined(HAVE_MADVISE) + if (info->opt_flag & MEMMAP_USED) + madvise(share->file_map,share->state.state.data_file_length,MADV_RANDOM); +#endif + info->opt_flag&= ~(KEY_READ_USED | REMEMBER_OLD_POS); + info->quick_mode=0; + info->lastinx= 0; /* Use first index as def */ + info->last_search_keypage= info->cur_row.lastpos= HA_OFFSET_ERROR; + info->page_changed= 1; + info->update= ((info->update & HA_STATE_CHANGED) | HA_STATE_NEXT_FOUND | + HA_STATE_PREV_FOUND); + DBUG_RETURN(error); +} + + +int _ma_sync_table_files(const MARIA_HA *info) +{ + return (my_sync(info->dfile.file, MYF(0)) || + my_sync(info->s->kfile.file, MYF(0))); +} diff --git a/storage/maria/ma_ft_boolean_search.c b/storage/maria/ma_ft_boolean_search.c new file mode 100644 index 00000000000..6e95262fe84 --- /dev/null +++ b/storage/maria/ma_ft_boolean_search.c @@ -0,0 +1,963 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +/* TODO: add caching - pre-read several index entries at once */ + +/* + Added optimization for full-text queries with plus-words. It was + implemented by sharing maximal document id (max_docid) variable + inside plus subtree. max_docid could be used by any word in plus + subtree, but it could be updated by plus-word only. + + The idea is: there is no need to search for docid smaller than + biggest docid inside current plus subtree. + + Examples: + +word1 word2 + share same max_docid + max_docid updated by word1 + +word1 +(word2 word3) + share same max_docid + max_docid updated by word1 + +(word1 -word2) +(+word3 word4) + share same max_docid + max_docid updated by word3 +*/ + +#define FT_CORE +#include "ma_ftdefs.h" + +/* search with boolean queries */ + +static double _wghts[11]= +{ + 0.131687242798354, + 0.197530864197531, + 0.296296296296296, + 0.444444444444444, + 0.666666666666667, + 1.000000000000000, + 1.500000000000000, + 2.250000000000000, + 3.375000000000000, + 5.062500000000000, + 7.593750000000000}; +static double *wghts=_wghts+5; /* wghts[i] = 1.5**i */ + +static double _nwghts[11]= +{ + -0.065843621399177, + -0.098765432098766, + -0.148148148148148, + -0.222222222222222, + -0.333333333333334, + -0.500000000000000, + -0.750000000000000, + -1.125000000000000, + -1.687500000000000, + -2.531250000000000, + -3.796875000000000}; +static double *nwghts=_nwghts+5; /* nwghts[i] = -0.5*1.5**i */ + +#define FTB_FLAG_TRUNC 1 +/* At most one of the following flags can be set */ +#define FTB_FLAG_YES 2 +#define FTB_FLAG_NO 4 +#define FTB_FLAG_WONLY 8 + +typedef struct st_ftb_expr FTB_EXPR; +struct st_ftb_expr +{ + FTB_EXPR *up; + uint flags; +/* ^^^^^^^^^^^^^^^^^^ FTB_{EXPR,WORD} common section */ + my_off_t docid[2]; + my_off_t max_docid; + float weight; + float cur_weight; + LIST *phrase; /* phrase words */ + LIST *document; /* for phrase search */ + uint yesses; /* number of "yes" words matched */ + uint nos; /* number of "no" words matched */ + uint ythresh; /* number of "yes" words in expr */ + uint yweaks; /* number of "yes" words for scan only */ +}; + +typedef struct st_ftb_word +{ + FTB_EXPR *up; + uint flags; +/* ^^^^^^^^^^^^^^^^^^ FTB_{EXPR,WORD} common section */ + my_off_t docid[2]; /* for index search and for scan */ + my_off_t key_root; + my_off_t *max_docid; + MARIA_KEYDEF *keyinfo; + struct st_ftb_word *prev; + float weight; + uint ndepth; + uint len; + uchar off; + byte word[1]; +} FTB_WORD; + +typedef struct st_ft_info +{ + struct _ft_vft *please; + MARIA_HA *info; + CHARSET_INFO *charset; + FTB_EXPR *root; + FTB_WORD **list; + FTB_WORD *last_word; + MEM_ROOT mem_root; + QUEUE queue; + TREE no_dupes; + my_off_t lastpos; + uint keynr; + uchar with_scan; + enum { UNINITIALIZED, READY, INDEX_SEARCH, INDEX_DONE } state; +} FTB; + +static int FTB_WORD_cmp(my_off_t *v, FTB_WORD *a, FTB_WORD *b) +{ + int i; + + /* if a==curdoc, take it as a < b */ + if (v && a->docid[0] == *v) + return -1; + + /* ORDER BY docid, ndepth DESC */ + i=CMP_NUM(a->docid[0], b->docid[0]); + if (!i) + i=CMP_NUM(b->ndepth,a->ndepth); + return i; +} + +static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b) +{ + /* ORDER BY word DESC, ndepth DESC */ + int i= ha_compare_text(cs, (uchar*) (*b)->word+1,(*b)->len-1, + (uchar*) (*a)->word+1,(*a)->len-1,0,0); + if (!i) + i=CMP_NUM((*b)->ndepth,(*a)->ndepth); + return i; +} + + +typedef struct st_my_ftb_param +{ + FTB *ftb; + FTB_EXPR *ftbe; + byte *up_quot; + uint depth; +} MY_FTB_PARAM; + + +static int ftb_query_add_word(MYSQL_FTPARSER_PARAM *param, + char *word, int word_len, + MYSQL_FTPARSER_BOOLEAN_INFO *info) +{ + MY_FTB_PARAM *ftb_param= param->mysql_ftparam; + FTB_WORD *ftbw; + FTB_EXPR *ftbe, *tmp_expr; + FT_WORD *phrase_word; + LIST *tmp_element; + int r= info->weight_adjust; + float weight= (float) + (info->wasign ? nwghts : wghts)[(r>5)?5:((r<-5)?-5:r)]; + + switch (info->type) { + case FT_TOKEN_WORD: + ftbw= (FTB_WORD *)alloc_root(&ftb_param->ftb->mem_root, + sizeof(FTB_WORD) + + (info->trunc ? HA_MAX_KEY_BUFF : + word_len * ftb_param->ftb->charset->mbmaxlen + + HA_FT_WLEN + + ftb_param->ftb->info->s->rec_reflength)); + ftbw->len= word_len + 1; + ftbw->flags= 0; + ftbw->off= 0; + if (info->yesno > 0) ftbw->flags|= FTB_FLAG_YES; + if (info->yesno < 0) ftbw->flags|= FTB_FLAG_NO; + if (info->trunc) ftbw->flags|= FTB_FLAG_TRUNC; + ftbw->weight= weight; + ftbw->up= ftb_param->ftbe; + ftbw->docid[0]= ftbw->docid[1]= HA_OFFSET_ERROR; + ftbw->ndepth= (info->yesno < 0) + ftb_param->depth; + ftbw->key_root= HA_OFFSET_ERROR; + memcpy(ftbw->word + 1, word, word_len); + ftbw->word[0]= word_len; + if (info->yesno > 0) ftbw->up->ythresh++; + ftb_param->ftb->queue.max_elements++; + ftbw->prev= ftb_param->ftb->last_word; + ftb_param->ftb->last_word= ftbw; + ftb_param->ftb->with_scan|= (info->trunc & FTB_FLAG_TRUNC); + for (tmp_expr= ftb_param->ftbe; tmp_expr->up; tmp_expr= tmp_expr->up) + if (! (tmp_expr->flags & FTB_FLAG_YES)) + break; + ftbw->max_docid= &tmp_expr->max_docid; + /* fall through */ + case FT_TOKEN_STOPWORD: + if (! ftb_param->up_quot) break; + phrase_word= (FT_WORD *)alloc_root(&ftb_param->ftb->mem_root, sizeof(FT_WORD)); + tmp_element= (LIST *)alloc_root(&ftb_param->ftb->mem_root, sizeof(LIST)); + phrase_word->pos= word; + phrase_word->len= word_len; + tmp_element->data= (void *)phrase_word; + ftb_param->ftbe->phrase= list_add(ftb_param->ftbe->phrase, tmp_element); + /* Allocate document list at this point. + It allows to avoid huge amount of allocs/frees for each row.*/ + tmp_element= (LIST *)alloc_root(&ftb_param->ftb->mem_root, sizeof(LIST)); + tmp_element->data= alloc_root(&ftb_param->ftb->mem_root, sizeof(FT_WORD)); + ftb_param->ftbe->document= + list_add(ftb_param->ftbe->document, tmp_element); + break; + case FT_TOKEN_LEFT_PAREN: + ftbe=(FTB_EXPR *)alloc_root(&ftb_param->ftb->mem_root, sizeof(FTB_EXPR)); + ftbe->flags= 0; + if (info->yesno > 0) ftbe->flags|= FTB_FLAG_YES; + if (info->yesno < 0) ftbe->flags|= FTB_FLAG_NO; + ftbe->weight= weight; + ftbe->up= ftb_param->ftbe; + ftbe->max_docid= ftbe->ythresh= ftbe->yweaks= 0; + ftbe->docid[0]= ftbe->docid[1]= HA_OFFSET_ERROR; + ftbe->phrase= NULL; + ftbe->document= 0; + if (info->quot) ftb_param->ftb->with_scan|= 2; + if (info->yesno > 0) ftbe->up->ythresh++; + ftb_param->ftbe= ftbe; + ftb_param->depth++; + ftb_param->up_quot= info->quot; + break; + case FT_TOKEN_RIGHT_PAREN: + if (ftb_param->ftbe->document) + { + /* Circuit document list */ + for (tmp_element= ftb_param->ftbe->document; + tmp_element->next; tmp_element= tmp_element->next) /* no-op */; + tmp_element->next= ftb_param->ftbe->document; + ftb_param->ftbe->document->prev= tmp_element; + } + info->quot= 0; + if (ftb_param->ftbe->up) + { + DBUG_ASSERT(ftb_param->depth); + ftb_param->ftbe= ftb_param->ftbe->up; + ftb_param->depth--; + ftb_param->up_quot= 0; + } + break; + case FT_TOKEN_EOF: + default: + break; + } + return(0); +} + + +static int ftb_parse_query_internal(MYSQL_FTPARSER_PARAM *param, + char *query, int len) +{ + MY_FTB_PARAM *ftb_param= param->mysql_ftparam; + MYSQL_FTPARSER_BOOLEAN_INFO info; + CHARSET_INFO *cs= ftb_param->ftb->charset; + char **start= &query; + char *end= query + len; + FT_WORD w; + + info.prev= ' '; + info.quot= 0; + while (maria_ft_get_word(cs, start, end, &w, &info)) + param->mysql_add_word(param, w.pos, w.len, &info); + return(0); +} + + +static void _ftb_parse_query(FTB *ftb, byte *query, uint len, + struct st_mysql_ftparser *parser) +{ + MYSQL_FTPARSER_PARAM *param; + MY_FTB_PARAM ftb_param; + DBUG_ENTER("_ftb_parse_query"); + DBUG_ASSERT(parser); + + if (ftb->state != UNINITIALIZED) + DBUG_VOID_RETURN; + if (! (param= maria_ftparser_call_initializer(ftb->info, ftb->keynr, 0))) + DBUG_VOID_RETURN; + + ftb_param.ftb= ftb; + ftb_param.depth= 0; + ftb_param.ftbe= ftb->root; + ftb_param.up_quot= 0; + + param->mysql_parse= ftb_parse_query_internal; + param->mysql_add_word= ftb_query_add_word; + param->mysql_ftparam= (void *)&ftb_param; + param->cs= ftb->charset; + param->doc= query; + param->length= len; + param->flags= 0; + param->mode= MYSQL_FTPARSER_FULL_BOOLEAN_INFO; + parser->parse(param); + DBUG_VOID_RETURN; +} + + +static int _ftb_no_dupes_cmp(void* not_used __attribute__((unused)), + const void *a,const void *b) +{ + return CMP_NUM((*((my_off_t*)a)), (*((my_off_t*)b))); +} + +/* returns 1 if the search was finished (must-word wasn't found) */ +static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search) +{ + int r; + int subkeys=1; + my_bool can_go_down; + MARIA_HA *info=ftb->info; + uint off= 0, extra=HA_FT_WLEN+info->s->base.rec_reflength; + byte *lastkey_buf= ftbw->word+ftbw->off; + + if (ftbw->flags & FTB_FLAG_TRUNC) + lastkey_buf+=ftbw->len; + + if (init_search) + { + ftbw->key_root=info->s->state.key_root[ftb->keynr]; + ftbw->keyinfo=info->s->keyinfo+ftb->keynr; + + r= _ma_search(info, ftbw->keyinfo, ftbw->word, ftbw->len, + SEARCH_FIND | SEARCH_BIGGER, ftbw->key_root); + } + else + { + uint sflag= SEARCH_BIGGER; + if (ftbw->docid[0] < *ftbw->max_docid) + { + sflag|= SEARCH_SAME; + _ma_dpointer(info, (ftbw->word + ftbw->len + HA_FT_WLEN), + *ftbw->max_docid); + } + r= _ma_search(info, ftbw->keyinfo, lastkey_buf, + USE_WHOLE_KEY, sflag, ftbw->key_root); + } + + can_go_down=(!ftbw->off && (init_search || (ftbw->flags & FTB_FLAG_TRUNC))); + /* Skip rows inserted by concurrent insert */ + while (!r) + { + if (can_go_down) + { + /* going down ? */ + off=info->lastkey_length-extra; + subkeys=ft_sintXkorr(info->lastkey+off); + } + if (subkeys<0 || info->cur_row.lastpos < info->state->data_file_length) + break; + r= _ma_search_next(info, ftbw->keyinfo, info->lastkey, + info->lastkey_length, + SEARCH_BIGGER, ftbw->key_root); + } + + if (!r && !ftbw->off) + { + r= ha_compare_text(ftb->charset, + (uchar*) info->lastkey+1, + info->lastkey_length-extra-1, + (uchar*) ftbw->word+1, + ftbw->len-1, + (my_bool) (ftbw->flags & FTB_FLAG_TRUNC), 0); + } + + if (r) /* not found */ + { + if (!ftbw->off || !(ftbw->flags & FTB_FLAG_TRUNC)) + { + ftbw->docid[0]=HA_OFFSET_ERROR; + if ((ftbw->flags & FTB_FLAG_YES) && ftbw->up->up==0) + { + /* + This word MUST BE present in every document returned, + so we can stop the search right now + */ + ftb->state=INDEX_DONE; + return 1; /* search is done */ + } + else + return 0; + } + + /* going up to the first-level tree to continue search there */ + _ma_dpointer(info, (lastkey_buf+HA_FT_WLEN), ftbw->key_root); + ftbw->key_root=info->s->state.key_root[ftb->keynr]; + ftbw->keyinfo=info->s->keyinfo+ftb->keynr; + ftbw->off=0; + return _ft2_search(ftb, ftbw, 0); + } + + /* matching key found */ + memcpy(lastkey_buf, info->lastkey, info->lastkey_length); + if (lastkey_buf == ftbw->word) + ftbw->len=info->lastkey_length-extra; + + /* going down ? */ + if (subkeys<0) + { + /* + yep, going down, to the second-level tree + TODO here: subkey-based optimization + */ + ftbw->off=off; + ftbw->key_root= info->cur_row.lastpos; + ftbw->keyinfo=& info->s->ft2_keyinfo; + r= _ma_search_first(info, ftbw->keyinfo, ftbw->key_root); + DBUG_ASSERT(r==0); /* found something */ + memcpy(lastkey_buf+off, info->lastkey, info->lastkey_length); + } + ftbw->docid[0]= info->cur_row.lastpos; + if (ftbw->flags & FTB_FLAG_YES) + *ftbw->max_docid= info->cur_row.lastpos; + return 0; +} + +static void _ftb_init_index_search(FT_INFO *ftb) +{ + int i; + FTB_WORD *ftbw; + + if ((ftb->state != READY && ftb->state !=INDEX_DONE) || + ftb->keynr == NO_SUCH_KEY) + return; + ftb->state=INDEX_SEARCH; + + for (i=ftb->queue.elements; i; i--) + { + ftbw=(FTB_WORD *)(ftb->queue.root[i]); + + if (ftbw->flags & FTB_FLAG_TRUNC) + { + /* + special treatment for truncation operator + 1. there are some (besides this) +words + | no need to search in the index, it can never ADD new rows + | to the result, and to remove half-matched rows we do scan anyway + 2. -trunc* + | same as 1. + 3. in 1 and 2, +/- need not be on the same expr. level, + but can be on any upper level, as in +word +(trunc1* trunc2*) + 4. otherwise + | We have to index-search for this prefix. + | It may cause duplicates, as in the index (sorted by ) + | + | + | + | Searching for "aa*" will find row1 twice... + */ + FTB_EXPR *ftbe; + for (ftbe=(FTB_EXPR*)ftbw; + ftbe->up && !(ftbe->up->flags & FTB_FLAG_TRUNC); + ftbe->up->flags|= FTB_FLAG_TRUNC, ftbe=ftbe->up) + { + if (ftbe->flags & FTB_FLAG_NO || /* 2 */ + ftbe->up->ythresh - ftbe->up->yweaks >1) /* 1 */ + { + FTB_EXPR *top_ftbe=ftbe->up; + ftbw->docid[0]=HA_OFFSET_ERROR; + for (ftbe=(FTB_EXPR *)ftbw; + ftbe != top_ftbe && !(ftbe->flags & FTB_FLAG_NO); + ftbe=ftbe->up) + ftbe->up->yweaks++; + ftbe=0; + break; + } + } + if (!ftbe) + continue; + /* 4 */ + if (!is_tree_inited(& ftb->no_dupes)) + init_tree(& ftb->no_dupes,0,0,sizeof(my_off_t), + _ftb_no_dupes_cmp,0,0,0); + else + reset_tree(& ftb->no_dupes); + } + + ftbw->off=0; /* in case of reinit */ + if (_ft2_search(ftb, ftbw, 1)) + return; + } + queue_fix(& ftb->queue); +} + + +FT_INFO * maria_ft_init_boolean_search(MARIA_HA *info, uint keynr, byte *query, + uint query_len, CHARSET_INFO *cs) +{ + FTB *ftb; + FTB_EXPR *ftbe; + FTB_WORD *ftbw; + + if (!(ftb=(FTB *)my_malloc(sizeof(FTB), MYF(MY_WME)))) + return 0; + ftb->please= (struct _ft_vft *) & _ma_ft_vft_boolean; + ftb->state=UNINITIALIZED; + ftb->info=info; + ftb->keynr=keynr; + ftb->charset=cs; + DBUG_ASSERT(keynr==NO_SUCH_KEY || cs == info->s->keyinfo[keynr].seg->charset); + ftb->with_scan=0; + ftb->lastpos=HA_OFFSET_ERROR; + bzero(& ftb->no_dupes, sizeof(TREE)); + ftb->last_word= 0; + + init_alloc_root(&ftb->mem_root, 1024, 1024); + ftb->queue.max_elements= 0; + if (!(ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR)))) + goto err; + ftbe->weight=1; + ftbe->flags=FTB_FLAG_YES; + ftbe->nos=1; + ftbe->up=0; + ftbe->max_docid= ftbe->ythresh= ftbe->yweaks= 0; + ftbe->docid[0]=ftbe->docid[1]=HA_OFFSET_ERROR; + ftbe->phrase= NULL; + ftbe->document= 0; + ftb->root=ftbe; + _ftb_parse_query(ftb, query, query_len, keynr == NO_SUCH_KEY ? + &ft_default_parser : + info->s->keyinfo[keynr].parser); + /* + Hack: instead of init_queue, we'll use reinit queue to be able + to alloc queue with alloc_root() + */ + if (! (ftb->queue.root= (byte **)alloc_root(&ftb->mem_root, + (ftb->queue.max_elements + 1) * + sizeof(void *)))) + goto err; + reinit_queue(&ftb->queue, ftb->queue.max_elements, 0, 0, + (int (*)(void*, byte*, byte*))FTB_WORD_cmp, 0); + for (ftbw= ftb->last_word; ftbw; ftbw= ftbw->prev) + queue_insert(&ftb->queue, (byte *)ftbw); + ftb->list=(FTB_WORD **)alloc_root(&ftb->mem_root, + sizeof(FTB_WORD *)*ftb->queue.elements); + memcpy(ftb->list, ftb->queue.root+1, sizeof(FTB_WORD *)*ftb->queue.elements); + qsort2(ftb->list, ftb->queue.elements, sizeof(FTB_WORD *), + (qsort2_cmp)FTB_WORD_cmp_list, ftb->charset); + if (ftb->queue.elements<2) ftb->with_scan &= ~FTB_FLAG_TRUNC; + ftb->state=READY; + return ftb; +err: + free_root(& ftb->mem_root, MYF(0)); + my_free((gptr)ftb,MYF(0)); + return 0; +} + + +typedef struct st_my_ftb_phrase_param +{ + LIST *phrase; + LIST *document; + CHARSET_INFO *cs; + uint phrase_length; + uint document_length; + uint match; +} MY_FTB_PHRASE_PARAM; + + +static int ftb_phrase_add_word(MYSQL_FTPARSER_PARAM *param, + char *word, int word_len, + MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info __attribute__((unused))) +{ + MY_FTB_PHRASE_PARAM *phrase_param= param->mysql_ftparam; + FT_WORD *w= (FT_WORD *)phrase_param->document->data; + LIST *phrase, *document; + w->pos= word; + w->len= word_len; + phrase_param->document= phrase_param->document->prev; + if (phrase_param->phrase_length > phrase_param->document_length) + { + phrase_param->document_length++; + return 0; + } + /* TODO: rewrite phrase search to avoid + comparing the same word twice. */ + for (phrase= phrase_param->phrase, document= phrase_param->document->next; + phrase; phrase= phrase->next, document= document->next) + { + FT_WORD *phrase_word= (FT_WORD *)phrase->data; + FT_WORD *document_word= (FT_WORD *)document->data; + if (my_strnncoll(phrase_param->cs, + (uchar*) phrase_word->pos, phrase_word->len, + (uchar*) document_word->pos, document_word->len)) + return 0; + } + phrase_param->match++; + return 0; +} + + +static int ftb_check_phrase_internal(MYSQL_FTPARSER_PARAM *param, + char *document, int len) +{ + FT_WORD word; + MY_FTB_PHRASE_PARAM *phrase_param= param->mysql_ftparam; + const char *docend= document + len; + while (maria_ft_simple_get_word(phrase_param->cs, &document, docend, &word, FALSE)) + { + param->mysql_add_word(param, word.pos, word.len, 0); + if (phrase_param->match) + return 1; + } + return 0; +} + + +/* + Checks if given buffer matches phrase list. + + SYNOPSIS + _ftb_check_phrase() + s0 start of buffer + e0 end of buffer + phrase broken into list phrase + cs charset info + + RETURN VALUE + 1 is returned if phrase found, 0 else. +*/ + +static int _ftb_check_phrase(FTB *ftb, const byte *document, uint len, + FTB_EXPR *ftbe, struct st_mysql_ftparser *parser) +{ + MY_FTB_PHRASE_PARAM ftb_param; + MYSQL_FTPARSER_PARAM *param; + DBUG_ENTER("_ftb_check_phrase"); + DBUG_ASSERT(parser); + + if (! (param= maria_ftparser_call_initializer(ftb->info, ftb->keynr, 1))) + DBUG_RETURN(0); + ftb_param.phrase= ftbe->phrase; + ftb_param.document= ftbe->document; + ftb_param.cs= ftb->charset; + ftb_param.phrase_length= list_length(ftbe->phrase); + ftb_param.document_length= 1; + ftb_param.match= 0; + + param->mysql_parse= ftb_check_phrase_internal; + param->mysql_add_word= ftb_phrase_add_word; + param->mysql_ftparam= (void *)&ftb_param; + param->cs= ftb->charset; + param->doc= (byte *)document; + param->length= len; + param->flags= 0; + param->mode= MYSQL_FTPARSER_WITH_STOPWORDS; + parser->parse(param); + DBUG_RETURN(ftb_param.match ? 1 : 0); +} + + +static void _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_orig) +{ + FT_SEG_ITERATOR ftsi; + FTB_EXPR *ftbe; + float weight=ftbw->weight; + int yn_flag= ftbw->flags, ythresh, mode=(ftsi_orig != 0); + my_off_t curdoc=ftbw->docid[mode]; + struct st_mysql_ftparser *parser= ftb->keynr == NO_SUCH_KEY ? + &ft_default_parser : + ftb->info->s->keyinfo[ftb->keynr].parser; + + for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up) + { + ythresh = ftbe->ythresh - (mode ? 0 : ftbe->yweaks); + if (ftbe->docid[mode] != curdoc) + { + ftbe->cur_weight=0; + ftbe->yesses=ftbe->nos=0; + ftbe->docid[mode]=curdoc; + } + if (ftbe->nos) + break; + if (yn_flag & FTB_FLAG_YES) + { + weight /= ftbe->ythresh; + ftbe->cur_weight += weight; + if ((int) ++ftbe->yesses == ythresh) + { + yn_flag=ftbe->flags; + weight=ftbe->cur_weight*ftbe->weight; + if (mode && ftbe->phrase) + { + int not_found=1; + + memcpy(&ftsi, ftsi_orig, sizeof(ftsi)); + while (_ma_ft_segiterator(&ftsi) && not_found) + { + if (!ftsi.pos) + continue; + not_found = ! _ftb_check_phrase(ftb, ftsi.pos, ftsi.len, + ftbe, parser); + } + if (not_found) break; + } /* ftbe->quot */ + } + else + break; + } + else + if (yn_flag & FTB_FLAG_NO) + { + /* + NOTE: special sort function of queue assures that all + (yn_flag & FTB_FLAG_NO) != 0 + events for every particular subexpression will + "auto-magically" happen BEFORE all the + (yn_flag & FTB_FLAG_YES) != 0 events. So no + already matched expression can become not-matched again. + */ + ++ftbe->nos; + break; + } + else + { + if (ftbe->ythresh) + weight/=3; + ftbe->cur_weight += weight; + if ((int) ftbe->yesses < ythresh) + break; + if (!(yn_flag & FTB_FLAG_WONLY)) + yn_flag= ((int) ftbe->yesses++ == ythresh) ? ftbe->flags : FTB_FLAG_WONLY ; + weight*= ftbe->weight; + } + } +} + + +int maria_ft_boolean_read_next(FT_INFO *ftb, char *record) +{ + FTB_EXPR *ftbe; + FTB_WORD *ftbw; + MARIA_HA *info=ftb->info; + my_off_t curdoc; + + if (ftb->state != INDEX_SEARCH && ftb->state != INDEX_DONE) + return -1; + + /* black magic ON */ + if ((int) _ma_check_index(info, ftb->keynr) < 0) + return my_errno; + if (_ma_readinfo(info, F_RDLCK, 1)) + return my_errno; + /* black magic OFF */ + + if (!ftb->queue.elements) + return my_errno=HA_ERR_END_OF_FILE; + + /* Attention!!! Address of a local variable is used here! See err: label */ + ftb->queue.first_cmp_arg=(void *)&curdoc; + + while (ftb->state == INDEX_SEARCH && + (curdoc=((FTB_WORD *)queue_top(& ftb->queue))->docid[0]) != + HA_OFFSET_ERROR) + { + while (curdoc == (ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid[0]) + { + _ftb_climb_the_tree(ftb, ftbw, 0); + + /* update queue */ + _ft2_search(ftb, ftbw, 0); + queue_replaced(& ftb->queue); + } + + ftbe=ftb->root; + if (ftbe->docid[0]==curdoc && ftbe->cur_weight>0 && + ftbe->yesses>=(ftbe->ythresh-ftbe->yweaks) && !ftbe->nos) + { + /* curdoc matched ! */ + if (is_tree_inited(&ftb->no_dupes) && + tree_insert(&ftb->no_dupes, &curdoc, 0, + ftb->no_dupes.custom_arg)->count >1) + /* but it managed already to get past this line once */ + continue; + + info->cur_row.lastpos= curdoc; + /* Clear all states, except that the table was updated */ + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + + if (!(*info->read_record)(info, record, curdoc)) + { + info->update|= HA_STATE_AKTIV; /* Record is read */ + if (ftb->with_scan && maria_ft_boolean_find_relevance(ftb,record,0)==0) + continue; /* no match */ + my_errno=0; + goto err; + } + goto err; + } + } + ftb->state=INDEX_DONE; + my_errno=HA_ERR_END_OF_FILE; +err: + ftb->queue.first_cmp_arg=(void *)0; + return my_errno; +} + + +typedef struct st_my_ftb_find_param +{ + FT_INFO *ftb; + FT_SEG_ITERATOR *ftsi; +} MY_FTB_FIND_PARAM; + + +static int ftb_find_relevance_add_word(MYSQL_FTPARSER_PARAM *param, + char *word, int len, + MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info __attribute__((unused))) +{ + MY_FTB_FIND_PARAM *ftb_param= param->mysql_ftparam; + FT_INFO *ftb= ftb_param->ftb; + FTB_WORD *ftbw; + int a, b, c; + for (a= 0, b= ftb->queue.elements, c= (a+b)/2; b-a>1; c= (a+b)/2) + { + ftbw= ftb->list[c]; + if (ha_compare_text(ftb->charset, (uchar*)word, len, + (uchar*)ftbw->word+1, ftbw->len-1, + (my_bool)(ftbw->flags&FTB_FLAG_TRUNC), 0) > 0) + b= c; + else + a= c; + } + for (; c >= 0; c--) + { + ftbw= ftb->list[c]; + if (ha_compare_text(ftb->charset, (uchar*)word, len, + (uchar*)ftbw->word + 1,ftbw->len - 1, + (my_bool)(ftbw->flags & FTB_FLAG_TRUNC), 0)) + break; + if (ftbw->docid[1] == ftb->info->cur_row.lastpos) + continue; + ftbw->docid[1]= ftb->info->cur_row.lastpos; + _ftb_climb_the_tree(ftb, ftbw, ftb_param->ftsi); + } + return(0); +} + + +static int ftb_find_relevance_parse(MYSQL_FTPARSER_PARAM *param, + char *doc, int len) +{ + MY_FTB_FIND_PARAM *ftb_param= param->mysql_ftparam; + FT_INFO *ftb= ftb_param->ftb; + char *end= doc + len; + FT_WORD w; + while (maria_ft_simple_get_word(ftb->charset, &doc, end, &w, TRUE)) + param->mysql_add_word(param, w.pos, w.len, 0); + return(0); +} + + +float maria_ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) +{ + FTB_EXPR *ftbe; + FT_SEG_ITERATOR ftsi, ftsi2; + MARIA_RECORD_POS docid= ftb->info->cur_row.lastpos; + MY_FTB_FIND_PARAM ftb_param; + MYSQL_FTPARSER_PARAM *param; + struct st_mysql_ftparser *parser= ftb->keynr == NO_SUCH_KEY ? + &ft_default_parser : + ftb->info->s->keyinfo[ftb->keynr].parser; + + if (docid == HA_OFFSET_ERROR) + return -2.0; + if (!ftb->queue.elements) + return 0; + if (! (param= maria_ftparser_call_initializer(ftb->info, ftb->keynr, 0))) + return 0; + + if (ftb->state != INDEX_SEARCH && docid <= ftb->lastpos) + { + FTB_EXPR *x; + uint i; + + for (i=0; i < ftb->queue.elements; i++) + { + ftb->list[i]->docid[1]=HA_OFFSET_ERROR; + for (x=ftb->list[i]->up; x; x=x->up) + x->docid[1]=HA_OFFSET_ERROR; + } + } + + ftb->lastpos=docid; + + if (ftb->keynr==NO_SUCH_KEY) + _ma_ft_segiterator_dummy_init(record, length, &ftsi); + else + _ma_ft_segiterator_init(ftb->info, ftb->keynr, record, &ftsi); + memcpy(&ftsi2, &ftsi, sizeof(ftsi)); + + ftb_param.ftb= ftb; + ftb_param.ftsi= &ftsi2; + param->mysql_parse= ftb_find_relevance_parse; + param->mysql_add_word= ftb_find_relevance_add_word; + param->mysql_ftparam= (void *)&ftb_param; + param->flags= 0; + param->cs= ftb->charset; + param->mode= MYSQL_FTPARSER_SIMPLE_MODE; + + while (_ma_ft_segiterator(&ftsi)) + { + if (!ftsi.pos) + continue; + param->doc= (byte *)ftsi.pos; + param->length= ftsi.len; + parser->parse(param); + } + ftbe=ftb->root; + if (ftbe->docid[1]==docid && ftbe->cur_weight>0 && + ftbe->yesses>=ftbe->ythresh && !ftbe->nos) + { /* row matched ! */ + return ftbe->cur_weight; + } + else + { /* match failed ! */ + return 0.0; + } +} + + +void maria_ft_boolean_close_search(FT_INFO *ftb) +{ + if (is_tree_inited(& ftb->no_dupes)) + { + delete_tree(& ftb->no_dupes); + } + free_root(& ftb->mem_root, MYF(0)); + my_free((gptr)ftb,MYF(0)); +} + + +float maria_ft_boolean_get_relevance(FT_INFO *ftb) +{ + return ftb->root->cur_weight; +} + + +void maria_ft_boolean_reinit_search(FT_INFO *ftb) +{ + _ftb_init_index_search(ftb); +} diff --git a/storage/maria/ma_ft_eval.c b/storage/maria/ma_ft_eval.c new file mode 100644 index 00000000000..5fc67c6c664 --- /dev/null +++ b/storage/maria/ma_ft_eval.c @@ -0,0 +1,254 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code + added support for long options (my_getopt) 22.5.2002 by Jani Tolonen */ + +#include "ma_ftdefs.h" +#include "maria_ft_eval.h" +#include +#include + +static void print_error(int exit_code, const char *fmt,...); +static void get_options(int argc, char *argv[]); +static int create_record(char *pos, FILE *file); +static void usage(); + +static struct my_option my_long_options[] = +{ + {"", 's', "", 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'q', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'S', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", '#', "", 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'V', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", '?', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'h', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} +}; + +int main(int argc, char *argv[]) +{ + MARIA_HA *file; + int i,j; + + MY_INIT(argv[0]); + get_options(argc,argv); + bzero((char*)recinfo,sizeof(recinfo)); + + maria_init(); + /* First define 2 columns */ + recinfo[0].type=FIELD_SKIP_ENDSPACE; + recinfo[0].length=docid_length; + recinfo[1].type=FIELD_BLOB; + recinfo[1].length= 4+portable_sizeof_char_ptr; + + /* Define a key over the first column */ + keyinfo[0].seg=keyseg; + keyinfo[0].keysegs=1; + keyinfo[0].block_length= 0; /* Default block length */ + keyinfo[0].seg[0].type= HA_KEYTYPE_TEXT; + keyinfo[0].seg[0].flag= HA_BLOB_PART; + keyinfo[0].seg[0].start=recinfo[0].length; + keyinfo[0].seg[0].length=key_length; + keyinfo[0].seg[0].null_bit=0; + keyinfo[0].seg[0].null_pos=0; + keyinfo[0].seg[0].bit_start=4; + keyinfo[0].seg[0].language=MY_CHARSET_CURRENT; + keyinfo[0].flag = HA_FULLTEXT; + + if (!silent) + printf("- Creating isam-file\n"); + if (maria_create(filename,1,keyinfo,2,recinfo,0,NULL,(MARIA_CREATE_INFO*) 0,0)) + goto err; + if (!(file=maria_open(filename,2,0))) + goto err; + if (!silent) + printf("Initializing stopwords\n"); + maria_ft_init_stopwords(stopwordlist); + + if (!silent) + printf("- Writing key:s\n"); + + my_errno=0; + i=0; + while (create_record(record,df)) + { + error=maria_write(file,record); + if (error) + printf("I= %2d maria_write: %d errno: %d\n",i,error,my_errno); + i++; + } + fclose(df); + + if (maria_close(file)) goto err; + if (!silent) + printf("- Reopening file\n"); + if (!(file=maria_open(filename,2,0))) goto err; + if (!silent) + printf("- Reading rows with key\n"); + for (i=1;create_record(record,qf);i++) + { + FT_DOCLIST *result; + double w; + int t, err; + + result=maria_ft_nlq_init_search(file,0,blob_record,(uint) strlen(blob_record),1); + if (!result) + { + printf("Query %d failed with errno %3d\n",i,my_errno); + goto err; + } + if (!silent) + printf("Query %d. Found: %d.\n",i,result->ndocs); + for (j=0;(err=maria_ft_nlq_read_next(result, read_record))==0;j++) + { + t=uint2korr(read_record); + w=maria_ft_nlq_get_relevance(result); + printf("%d %.*s %f\n",i,t,read_record+2,w); + } + if (err != HA_ERR_END_OF_FILE) + { + printf("maria_ft_read_next %d failed with errno %3d\n",j,my_errno); + goto err; + } + maria_ft_nlq_close_search(result); + } + + if (maria_close(file)) goto err; + maria_end(); + my_end(MY_CHECK_ERROR); + + return (0); + + err: + printf("got error: %3d when using maria-database\n",my_errno); + return 1; /* skip warning */ + +} + + +static my_bool +get_one_option(int optid, const struct my_option *opt __attribute__((unused)), + char *argument) +{ + switch (optid) { + case 's': + if (stopwordlist && stopwordlist != maria_ft_precompiled_stopwords) + break; + { + FILE *f; char s[HA_FT_MAXLEN]; int i=0,n=SWL_INIT; + + if (!(stopwordlist=(const char**) malloc(n*sizeof(char *)))) + print_error(1,"malloc(%d)",n*sizeof(char *)); + if (!(f=fopen(argument,"r"))) + print_error(1,"fopen(%s)",argument); + while (!feof(f)) + { + if (!(fgets(s,HA_FT_MAXLEN,f))) + print_error(1,"fgets(s,%d,%s)",HA_FT_MAXLEN,argument); + if (!(stopwordlist[i++]=strdup(s))) + print_error(1,"strdup(%s)",s); + if (i >= n) + { + n+=SWL_PLUS; + if (!(stopwordlist=(const char**) realloc((char*) stopwordlist, + n*sizeof(char *)))) + print_error(1,"realloc(%d)",n*sizeof(char *)); + } + } + fclose(f); + stopwordlist[i]=NULL; + break; + } + case 'q': silent=1; break; + case 'S': if (stopwordlist==maria_ft_precompiled_stopwords) stopwordlist=NULL; break; + case '#': + DBUG_PUSH (argument); + break; + case 'V': + case '?': + case 'h': + usage(); + exit(1); + } + return 0; +} + + +static void get_options(int argc, char *argv[]) +{ + int ho_error; + + if ((ho_error=handle_options(&argc, &argv, my_long_options, get_one_option))) + exit(ho_error); + + if (!(d_file=argv[optind])) print_error(1,"No d_file"); + if (!(df=fopen(d_file,"r"))) + print_error(1,"fopen(%s)",d_file); + if (!(q_file=argv[optind+1])) print_error(1,"No q_file"); + if (!(qf=fopen(q_file,"r"))) + print_error(1,"fopen(%s)",q_file); + return; +} /* get options */ + + +static int create_record(char *pos, FILE *file) +{ + uint tmp; char *ptr; + + bzero((char *)pos,MAX_REC_LENGTH); + + /* column 1 - VARCHAR */ + if (!(fgets(pos+2,MAX_REC_LENGTH-32,file))) + { + if (feof(file)) + return 0; + else + print_error(1,"fgets(docid) - 1"); + } + tmp=(uint) strlen(pos+2)-1; + int2store(pos,tmp); + pos+=recinfo[0].length; + + /* column 2 - BLOB */ + + if (!(fgets(blob_record,MAX_BLOB_LENGTH,file))) + print_error(1,"fgets(docid) - 2"); + tmp=(uint) strlen(blob_record); + int4store(pos,tmp); + ptr=blob_record; + memcpy_fixed(pos+4,&ptr,sizeof(char*)); + return 1; +} + +/* VARARGS */ + +static void print_error(int exit_code, const char *fmt,...) +{ + va_list args; + + va_start(args,fmt); + fprintf(stderr,"%s: error: ",my_progname); + VOID(vfprintf(stderr, fmt, args)); + VOID(fputc('\n',stderr)); + fflush(stderr); + va_end(args); + exit(exit_code); +} + + +static void usage() +{ + printf("%s [options]\n", my_progname); + my_print_help(my_long_options); + my_print_variables(my_long_options); +} diff --git a/storage/maria/ma_ft_eval.h b/storage/maria/ma_ft_eval.h new file mode 100644 index 00000000000..481943dfb0b --- /dev/null +++ b/storage/maria/ma_ft_eval.h @@ -0,0 +1,41 @@ +/* Copyright (C) 2006 MySQL AB & Sergei A. Golubchik + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +const char **stopwordlist=maria_ft_precompiled_stopwords; + +#define MAX_REC_LENGTH 128 +#define MAX_BLOB_LENGTH 60000 +char record[MAX_REC_LENGTH], read_record[MAX_REC_LENGTH+MAX_BLOB_LENGTH]; +char blob_record[MAX_BLOB_LENGTH+20*20]; + +char *filename= (char*) "EVAL"; + +int silent=0, error=0; + +uint key_length=MAX_BLOB_LENGTH,docid_length=32; +char *d_file, *q_file; +FILE *df,*qf; + +MARIA_COLUMNDEF recinfo[3]; +MARIA_KEYDEF keyinfo[2]; +HA_KEYSEG keyseg[10]; + +#define SWL_INIT 500 +#define SWL_PLUS 50 + +#define MAX_LINE_LENGTH 128 +char line[MAX_LINE_LENGTH]; diff --git a/storage/maria/ma_ft_nlq_search.c b/storage/maria/ma_ft_nlq_search.c new file mode 100644 index 00000000000..145b7891dd1 --- /dev/null +++ b/storage/maria/ma_ft_nlq_search.c @@ -0,0 +1,370 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +#define FT_CORE +#include "ma_ftdefs.h" + +/* search with natural language queries */ + +typedef struct ft_doc_rec +{ + my_off_t dpos; + double weight; +} FT_DOC; + +struct st_ft_info +{ + struct _ft_vft *please; + MARIA_HA *info; + int ndocs; + int curdoc; + FT_DOC doc[1]; +}; + +typedef struct st_all_in_one +{ + MARIA_HA *info; + uint keynr; + CHARSET_INFO *charset; + uchar *keybuff; + TREE dtree; +} ALL_IN_ONE; + +typedef struct st_ft_superdoc +{ + FT_DOC doc; + FT_WORD *word_ptr; + double tmp_weight; +} FT_SUPERDOC; + +static int FT_SUPERDOC_cmp(void* cmp_arg __attribute__((unused)), + FT_SUPERDOC *p1, FT_SUPERDOC *p2) +{ + if (p1->doc.dpos < p2->doc.dpos) + return -1; + if (p1->doc.dpos == p2->doc.dpos) + return 0; + return 1; +} + +static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio) +{ + int subkeys, r; + uint keylen, doc_cnt; + FT_SUPERDOC sdoc, *sptr; + TREE_ELEMENT *selem; + double gweight=1; + MARIA_HA *info= aio->info; + byte *keybuff= (byte*) aio->keybuff; + MARIA_KEYDEF *keyinfo=info->s->keyinfo+aio->keynr; + my_off_t key_root=info->s->state.key_root[aio->keynr]; + uint extra=HA_FT_WLEN+info->s->base.rec_reflength; +#if HA_FT_WTYPE == HA_KEYTYPE_FLOAT + float tmp_weight; +#else +#error +#endif + + DBUG_ENTER("walk_and_match"); + + word->weight=LWS_FOR_QUERY; + + keylen= _ma_ft_make_key(info,aio->keynr,(char*) keybuff,word,0); + keylen-=HA_FT_WLEN; + doc_cnt=0; + + /* Skip rows inserted by current inserted */ + for (r= _ma_search(info, keyinfo, keybuff, keylen, SEARCH_FIND, key_root) ; + !r && + (subkeys=ft_sintXkorr(info->lastkey+info->lastkey_length-extra)) > 0 && + info->cur_row.lastpos >= info->state->data_file_length ; + r= _ma_search_next(info, keyinfo, info->lastkey, + info->lastkey_length, SEARCH_BIGGER, key_root)) + ; + + info->update|= HA_STATE_AKTIV; /* for _ma_test_if_changed() */ + + /* The following should be safe, even if we compare doubles */ + while (!r && gweight) + { + + if (keylen && + ha_compare_text(aio->charset, + (uchar*) info->lastkey+1, info->lastkey_length-extra-1, + (uchar*) keybuff+1, keylen-1, 0, 0)) + break; + + if (subkeys<0) + { + if (doc_cnt) + DBUG_RETURN(1); /* index is corrupted */ + /* + TODO here: unsafe optimization, should this word + be skipped (based on subkeys) ? + */ + keybuff+=keylen; + keyinfo=& info->s->ft2_keyinfo; + key_root= info->cur_row.lastpos; + keylen=0; + r= _ma_search_first(info, keyinfo, key_root); + goto do_skip; + } +#if HA_FT_WTYPE == HA_KEYTYPE_FLOAT + tmp_weight=*(float*)&subkeys; +#else +#error +#endif + /* The following should be safe, even if we compare doubles */ + if (tmp_weight==0) + DBUG_RETURN(doc_cnt); /* stopword, doc_cnt should be 0 */ + + sdoc.doc.dpos= info->cur_row.lastpos; + + /* saving document matched into dtree */ + if (!(selem=tree_insert(&aio->dtree, &sdoc, 0, aio->dtree.custom_arg))) + DBUG_RETURN(1); + + sptr=(FT_SUPERDOC *)ELEMENT_KEY((&aio->dtree), selem); + + if (selem->count==1) /* document's first match */ + sptr->doc.weight=0; + else + sptr->doc.weight+=sptr->tmp_weight*sptr->word_ptr->weight; + + sptr->word_ptr=word; + sptr->tmp_weight=tmp_weight; + + doc_cnt++; + + gweight=word->weight*GWS_IN_USE; + if (gweight < 0 || doc_cnt > 2000000) + gweight=0; + + if (_ma_test_if_changed(info) == 0) + r= _ma_search_next(info, keyinfo, info->lastkey, info->lastkey_length, + SEARCH_BIGGER, key_root); + else + r= _ma_search(info, keyinfo, info->lastkey, info->lastkey_length, + SEARCH_BIGGER, key_root); +do_skip: + while ((subkeys=ft_sintXkorr(info->lastkey+info->lastkey_length-extra)) > 0 && + !r && info->cur_row.lastpos >= info->state->data_file_length) + r= _ma_search_next(info, keyinfo, info->lastkey, info->lastkey_length, + SEARCH_BIGGER, key_root); + + } + word->weight=gweight; + + DBUG_RETURN(0); +} + + +static int walk_and_copy(FT_SUPERDOC *from, + uint32 count __attribute__((unused)), FT_DOC **to) +{ + DBUG_ENTER("walk_and_copy"); + from->doc.weight+=from->tmp_weight*from->word_ptr->weight; + (*to)->dpos=from->doc.dpos; + (*to)->weight=from->doc.weight; + (*to)++; + DBUG_RETURN(0); +} + +static int walk_and_push(FT_SUPERDOC *from, + uint32 count __attribute__((unused)), QUEUE *best) +{ + DBUG_ENTER("walk_and_copy"); + from->doc.weight+=from->tmp_weight*from->word_ptr->weight; + set_if_smaller(best->elements, ft_query_expansion_limit-1); + queue_insert(best, (byte *)& from->doc); + DBUG_RETURN(0); +} + + +static int FT_DOC_cmp(void *unused __attribute__((unused)), + FT_DOC *a, FT_DOC *b) +{ + return sgn(b->weight - a->weight); +} + + +FT_INFO *maria_ft_init_nlq_search(MARIA_HA *info, uint keynr, byte *query, + uint query_len, uint flags, byte *record) +{ + TREE wtree; + ALL_IN_ONE aio; + FT_DOC *dptr; + FT_INFO *dlist=NULL; + MARIA_RECORD_POS saved_lastpos= info->cur_row.lastpos; + struct st_mysql_ftparser *parser; + MYSQL_FTPARSER_PARAM *ftparser_param; + DBUG_ENTER("maria_ft_init_nlq_search"); + + /* black magic ON */ + if ((int) (keynr = _ma_check_index(info,keynr)) < 0) + DBUG_RETURN(NULL); + if (_ma_readinfo(info,F_RDLCK,1)) + DBUG_RETURN(NULL); + /* black magic OFF */ + + aio.info=info; + aio.keynr=keynr; + aio.charset=info->s->keyinfo[keynr].seg->charset; + aio.keybuff= (uchar*) info->lastkey+info->s->base.max_key_length; + parser= info->s->keyinfo[keynr].parser; + if (! (ftparser_param= maria_ftparser_call_initializer(info, keynr, 0))) + goto err; + + bzero(&wtree,sizeof(wtree)); + + init_tree(&aio.dtree,0,0,sizeof(FT_SUPERDOC),(qsort_cmp2)&FT_SUPERDOC_cmp,0, + NULL, NULL); + + maria_ft_parse_init(&wtree, aio.charset); + ftparser_param->flags= 0; + if (maria_ft_parse(&wtree, query, query_len, parser, ftparser_param, + &wtree.mem_root)) + goto err; + + if (tree_walk(&wtree, (tree_walk_action)&walk_and_match, &aio, + left_root_right)) + goto err; + + if (flags & FT_EXPAND && ft_query_expansion_limit) + { + QUEUE best; + init_queue(&best,ft_query_expansion_limit,0,0, (queue_compare) &FT_DOC_cmp, + 0); + tree_walk(&aio.dtree, (tree_walk_action) &walk_and_push, + &best, left_root_right); + while (best.elements) + { + my_off_t docid=((FT_DOC *)queue_remove(& best, 0))->dpos; + if (!(*info->read_record)(info, record, docid)) + { + info->update|= HA_STATE_AKTIV; + ftparser_param->flags= MYSQL_FTFLAGS_NEED_COPY; + _ma_ft_parse(&wtree, info, keynr, record, ftparser_param, + &wtree.mem_root); + } + } + delete_queue(&best); + reset_tree(&aio.dtree); + if (tree_walk(&wtree, (tree_walk_action)&walk_and_match, &aio, + left_root_right)) + goto err; + + } + + /* + If ndocs == 0, this will not allocate RAM for FT_INFO.doc[], + so if ndocs == 0, FT_INFO.doc[] must not be accessed. + */ + dlist=(FT_INFO *)my_malloc(sizeof(FT_INFO)+ + sizeof(FT_DOC)* + (int)(aio.dtree.elements_in_tree-1), + MYF(0)); + if (!dlist) + goto err; + + dlist->please= (struct _ft_vft *) & _ma_ft_vft_nlq; + dlist->ndocs=aio.dtree.elements_in_tree; + dlist->curdoc=-1; + dlist->info=aio.info; + dptr=dlist->doc; + + tree_walk(&aio.dtree, (tree_walk_action) &walk_and_copy, + &dptr, left_root_right); + + if (flags & FT_SORTED) + qsort2(dlist->doc, dlist->ndocs, sizeof(FT_DOC), (qsort2_cmp)&FT_DOC_cmp, 0); + +err: + delete_tree(&aio.dtree); + delete_tree(&wtree); + info->cur_row.lastpos= saved_lastpos; + DBUG_RETURN(dlist); +} + + +int maria_ft_nlq_read_next(FT_INFO *handler, char *record) +{ + MARIA_HA *info= (MARIA_HA *) handler->info; + + if (++handler->curdoc >= handler->ndocs) + { + --handler->curdoc; + return HA_ERR_END_OF_FILE; + } + + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + + info->cur_row.lastpos= handler->doc[handler->curdoc].dpos; + if (!(*info->read_record)(info, record, info->cur_row.lastpos)) + { + info->update|= HA_STATE_AKTIV; /* Record is read */ + return 0; + } + return my_errno; +} + + +float maria_ft_nlq_find_relevance(FT_INFO *handler, + byte *record __attribute__((unused)), + uint length __attribute__((unused))) +{ + int a,b,c; + FT_DOC *docs=handler->doc; + MARIA_RECORD_POS docid= handler->info->cur_row.lastpos; + + if (docid == HA_POS_ERROR) + return -5.0; + + /* Assuming docs[] is sorted by dpos... */ + + for (a=0, b=handler->ndocs, c=(a+b)/2; b-a>1; c=(a+b)/2) + { + if (docs[c].dpos > docid) + b=c; + else + a=c; + } + /* bounds check to avoid accessing unallocated handler->doc */ + if (a < handler->ndocs && docs[a].dpos == docid) + return (float) docs[a].weight; + else + return 0.0; +} + + +void maria_ft_nlq_close_search(FT_INFO *handler) +{ + my_free((gptr)handler,MYF(0)); +} + + +float maria_ft_nlq_get_relevance(FT_INFO *handler) +{ + return (float) handler->doc[handler->curdoc].weight; +} + + +void maria_ft_nlq_reinit_search(FT_INFO *handler) +{ + handler->curdoc=-1; +} + diff --git a/storage/maria/ma_ft_parser.c b/storage/maria/ma_ft_parser.c new file mode 100644 index 00000000000..f41b53bf3f7 --- /dev/null +++ b/storage/maria/ma_ft_parser.c @@ -0,0 +1,424 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +#include "ma_ftdefs.h" + +typedef struct st_maria_ft_docstat { + FT_WORD *list; + uint uniq; + double sum; +} FT_DOCSTAT; + + +typedef struct st_my_maria_ft_parser_param +{ + TREE *wtree; + MEM_ROOT *mem_root; +} MY_FT_PARSER_PARAM; + + +static int FT_WORD_cmp(CHARSET_INFO* cs, FT_WORD *w1, FT_WORD *w2) +{ + return ha_compare_text(cs, (uchar*) w1->pos, w1->len, + (uchar*) w2->pos, w2->len, 0, 0); +} + +static int walk_and_copy(FT_WORD *word,uint32 count,FT_DOCSTAT *docstat) +{ + word->weight=LWS_IN_USE; + docstat->sum+=word->weight; + memcpy_fixed((docstat->list)++,word,sizeof(FT_WORD)); + return 0; +} + +/* transforms tree of words into the array, applying normalization */ + +FT_WORD * maria_ft_linearize(TREE *wtree, MEM_ROOT *mem_root) +{ + FT_WORD *wlist,*p; + FT_DOCSTAT docstat; + DBUG_ENTER("maria_ft_linearize"); + + if ((wlist=(FT_WORD *) alloc_root(mem_root, sizeof(FT_WORD)* + (1+wtree->elements_in_tree)))) + { + docstat.list=wlist; + docstat.uniq=wtree->elements_in_tree; + docstat.sum=0; + tree_walk(wtree,(tree_walk_action)&walk_and_copy,&docstat,left_root_right); + } + delete_tree(wtree); + if (!wlist) + DBUG_RETURN(NULL); + + docstat.list->pos=NULL; + + for (p=wlist;p->pos;p++) + { + p->weight=PRENORM_IN_USE; + } + + for (p=wlist;p->pos;p++) + { + p->weight/=NORM_IN_USE; + } + + DBUG_RETURN(wlist); +} + +my_bool maria_ft_boolean_check_syntax_string(const byte *str) +{ + uint i, j; + + if (!str || + (strlen(str)+1 != sizeof(ft_boolean_syntax)) || + (str[0] != ' ' && str[1] != ' ')) + return 1; + for (i=0; i 127 || + my_isalnum(default_charset_info, str[i])) + return 1; + for (j=0; jyesno=(FTB_YES==' ') ? 1 : (param->quot != 0); + param->weight_adjust= param->wasign= 0; + param->type= FT_TOKEN_EOF; + + while (doc 0 ? mbl : 1)) + { + mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end); + if (true_word_char(ctype, *doc)) + break; + if (*doc == FTB_RQUOT && param->quot) + { + param->quot=doc; + *start=doc+1; + param->type= FT_TOKEN_RIGHT_PAREN; + goto ret; + } + if (!param->quot) + { + if (*doc == FTB_LBR || *doc == FTB_RBR || *doc == FTB_LQUOT) + { + /* param->prev=' '; */ + *start=doc+1; + if (*doc == FTB_LQUOT) param->quot=*start; + param->type= (*doc == FTB_RBR ? FT_TOKEN_RIGHT_PAREN : FT_TOKEN_LEFT_PAREN); + goto ret; + } + if (param->prev == ' ') + { + if (*doc == FTB_YES ) { param->yesno=+1; continue; } else + if (*doc == FTB_EGAL) { param->yesno= 0; continue; } else + if (*doc == FTB_NO ) { param->yesno=-1; continue; } else + if (*doc == FTB_INC ) { param->weight_adjust++; continue; } else + if (*doc == FTB_DEC ) { param->weight_adjust--; continue; } else + if (*doc == FTB_NEG ) { param->wasign= !param->wasign; continue; } + } + } + param->prev=*doc; + param->yesno=(FTB_YES==' ') ? 1 : (param->quot != 0); + param->weight_adjust= param->wasign= 0; + } + + mwc=length=0; + for (word->pos= doc; doc < end; length++, doc+= (mbl > 0 ? mbl : 1)) + { + mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end); + if (true_word_char(ctype, *doc)) + mwc=0; + else if (!misc_word_char(*doc) || mwc) + break; + else + mwc++; + } + param->prev='A'; /* be sure *prev is true_word_char */ + word->len= (uint)(doc-word->pos) - mwc; + if ((param->trunc=(doc= ft_min_word_len && !is_stopword(word->pos, word->len)) + || param->trunc) && length < ft_max_word_len) + { + *start=doc; + param->type= FT_TOKEN_WORD; + goto ret; + } + else if (length) /* make sure length > 0 (if start contains spaces only) */ + { + *start= doc; + param->type= FT_TOKEN_STOPWORD; + goto ret; + } + } + if (param->quot) + { + param->quot=*start=doc; + param->type= 3; /* FT_RBR */ + goto ret; + } +ret: + return param->type; +} + +byte maria_ft_simple_get_word(CHARSET_INFO *cs, byte **start, const byte *end, + FT_WORD *word, my_bool skip_stopwords) +{ + byte *doc= *start; + uint mwc, length, mbl; + int ctype; + DBUG_ENTER("maria_ft_simple_get_word"); + + do + { + for (;; doc+= (mbl > 0 ? mbl : 1)) + { + if (doc >= end) + DBUG_RETURN(0); + mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end); + if (true_word_char(ctype, *doc)) + break; + } + + mwc= length= 0; + for (word->pos= doc; doc < end; length++, doc+= (mbl > 0 ? mbl : 1)) + { + mbl= cs->cset->ctype(cs, &ctype, (uchar*)doc, (uchar*)end); + if (true_word_char(ctype, *doc)) + mwc= 0; + else if (!misc_word_char(*doc) || mwc) + break; + else + mwc++; + } + + word->len= (uint)(doc-word->pos) - mwc; + + if (skip_stopwords == FALSE || + (length >= ft_min_word_len && length < ft_max_word_len && + !is_stopword(word->pos, word->len))) + { + *start= doc; + DBUG_RETURN(1); + } + } while (doc < end); + DBUG_RETURN(0); +} + +void maria_ft_parse_init(TREE *wtree, CHARSET_INFO *cs) +{ + DBUG_ENTER("maria_ft_parse_init"); + if (!is_tree_inited(wtree)) + init_tree(wtree,0,0,sizeof(FT_WORD),(qsort_cmp2)&FT_WORD_cmp,0,NULL, cs); + DBUG_VOID_RETURN; +} + + +static int maria_ft_add_word(MYSQL_FTPARSER_PARAM *param, + char *word, int word_len, + MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info __attribute__((unused))) +{ + TREE *wtree; + FT_WORD w; + MY_FT_PARSER_PARAM *ft_param=param->mysql_ftparam; + DBUG_ENTER("maria_ft_add_word"); + wtree= ft_param->wtree; + if (param->flags & MYSQL_FTFLAGS_NEED_COPY) + { + byte *ptr; + DBUG_ASSERT(wtree->with_delete == 0); + ptr= (byte *)alloc_root(ft_param->mem_root, word_len); + memcpy(ptr, word, word_len); + w.pos= ptr; + } + else + w.pos= word; + w.len= word_len; + if (!tree_insert(wtree, &w, 0, wtree->custom_arg)) + { + delete_tree(wtree); + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + + +static int maria_ft_parse_internal(MYSQL_FTPARSER_PARAM *param, + char *doc, int doc_len) +{ + byte *end=doc+doc_len; + MY_FT_PARSER_PARAM *ft_param=param->mysql_ftparam; + TREE *wtree= ft_param->wtree; + FT_WORD w; + DBUG_ENTER("maria_ft_parse_internal"); + + while (maria_ft_simple_get_word(wtree->custom_arg, &doc, end, &w, TRUE)) + if (param->mysql_add_word(param, w.pos, w.len, 0)) + DBUG_RETURN(1); + DBUG_RETURN(0); +} + + +int maria_ft_parse(TREE *wtree, byte *doc, int doclen, + struct st_mysql_ftparser *parser, + MYSQL_FTPARSER_PARAM *param, MEM_ROOT *mem_root) +{ + MY_FT_PARSER_PARAM my_param; + DBUG_ENTER("maria_ft_parse"); + DBUG_ASSERT(parser); + my_param.wtree= wtree; + my_param.mem_root= mem_root; + + param->mysql_parse= maria_ft_parse_internal; + param->mysql_add_word= maria_ft_add_word; + param->mysql_ftparam= &my_param; + param->cs= wtree->custom_arg; + param->doc= doc; + param->length= doclen; + param->mode= MYSQL_FTPARSER_SIMPLE_MODE; + DBUG_RETURN(parser->parse(param)); +} + + +#define MAX_PARAM_NR 2 +MYSQL_FTPARSER_PARAM *maria_ftparser_call_initializer(MARIA_HA *info, + uint keynr, uint paramnr) +{ + uint32 ftparser_nr; + struct st_mysql_ftparser *parser; + if (! info->ftparser_param) + { + /* info->ftparser_param can not be zero after the initialization, + because it always includes built-in fulltext parser. And built-in + parser can be called even if the table has no fulltext indexes and + no varchar/text fields. */ + if (! info->s->ftparsers) + { + /* It's ok that modification to shared structure is done w/o mutex + locks, because all threads would set the same variables to the + same values. */ + uint i, j, keys= info->s->state.header.keys, ftparsers= 1; + for (i= 0; i < keys; i++) + { + MARIA_KEYDEF *keyinfo= &info->s->keyinfo[i]; + if (keyinfo->flag & HA_FULLTEXT) + { + for (j= 0;; j++) + { + if (j == i) + { + keyinfo->ftparser_nr= ftparsers++; + break; + } + if (info->s->keyinfo[j].flag & HA_FULLTEXT && + keyinfo->parser == info->s->keyinfo[j].parser) + { + keyinfo->ftparser_nr= info->s->keyinfo[j].ftparser_nr; + break; + } + } + } + } + info->s->ftparsers= ftparsers; + } + /* + We have to allocate two MYSQL_FTPARSER_PARAM structures per plugin + because in a boolean search a parser is called recursively + ftb_find_relevance* calls ftb_check_phrase* + (MAX_PARAM_NR=2) + */ + info->ftparser_param= (MYSQL_FTPARSER_PARAM *) + my_malloc(MAX_PARAM_NR * sizeof(MYSQL_FTPARSER_PARAM) * + info->s->ftparsers, MYF(MY_WME|MY_ZEROFILL)); + init_alloc_root(&info->ft_memroot, FTPARSER_MEMROOT_ALLOC_SIZE, 0); + if (! info->ftparser_param) + return 0; + } + if (keynr == NO_SUCH_KEY) + { + ftparser_nr= 0; + parser= &ft_default_parser; + } + else + { + ftparser_nr= info->s->keyinfo[keynr].ftparser_nr; + parser= info->s->keyinfo[keynr].parser; + } + DBUG_ASSERT(paramnr < MAX_PARAM_NR); + ftparser_nr= ftparser_nr*MAX_PARAM_NR + paramnr; + if (! info->ftparser_param[ftparser_nr].mysql_add_word) + { + /* Note, that mysql_add_word is used here as a flag: + mysql_add_word == 0 - parser is not initialized + mysql_add_word != 0 - parser is initialized, or no + initialization needed. */ + info->ftparser_param[ftparser_nr].mysql_add_word= (void *)1; + if (parser->init && parser->init(&info->ftparser_param[ftparser_nr])) + return 0; + } + return &info->ftparser_param[ftparser_nr]; +} + + +void maria_ftparser_call_deinitializer(MARIA_HA *info) +{ + uint i, j, keys= info->s->state.header.keys; + free_root(&info->ft_memroot, MYF(0)); + if (! info->ftparser_param) + return; + for (i= 0; i < keys; i++) + { + MARIA_KEYDEF *keyinfo= &info->s->keyinfo[i]; + for (j=0; j < MAX_PARAM_NR; j++) + { + MYSQL_FTPARSER_PARAM *ftparser_param= + &info->ftparser_param[keyinfo->ftparser_nr*MAX_PARAM_NR + j]; + if (keyinfo->flag & HA_FULLTEXT && ftparser_param->mysql_add_word) + { + if (keyinfo->parser->deinit) + keyinfo->parser->deinit(ftparser_param); + ftparser_param->mysql_add_word= 0; + } + else + break; + } + } +} diff --git a/storage/maria/ma_ft_stem.c b/storage/maria/ma_ft_stem.c new file mode 100644 index 00000000000..06fc0b2df6c --- /dev/null +++ b/storage/maria/ma_ft_stem.c @@ -0,0 +1,18 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +/* mulitingual stem */ diff --git a/storage/maria/ma_ft_test1.c b/storage/maria/ma_ft_test1.c new file mode 100644 index 00000000000..4c98e766234 --- /dev/null +++ b/storage/maria/ma_ft_test1.c @@ -0,0 +1,317 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code + added support for long options (my_getopt) 22.5.2002 by Jani Tolonen */ + +#include "ma_ftdefs.h" +#include "maria_ft_test1.h" +#include + +static int key_field=FIELD_VARCHAR,extra_field=FIELD_SKIP_ENDSPACE; +static uint key_length=200,extra_length=50; +static int key_type=HA_KEYTYPE_TEXT; +static int verbose=0,silent=0,skip_update=0, + no_keys=0,no_stopwords=0,no_search=0,no_fulltext=0; +static int create_flag=0,error=0; + +#define MAX_REC_LENGTH 300 +static char record[MAX_REC_LENGTH],read_record[MAX_REC_LENGTH]; + +static int run_test(const char *filename); +static void get_options(int argc, char *argv[]); +static void create_record(char *, int); +static void usage(); + +static struct my_option my_long_options[] = +{ + {"", 'v', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", '?', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'h', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'V', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'v', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 's', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'N', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'S', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'K', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'F', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", 'U', "", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"", '#', "", 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} +}; + +int main(int argc, char *argv[]) +{ + MY_INIT(argv[0]); + + get_options(argc,argv); + maria_init(); + + exit(run_test("FT1")); +} + +static MARIA_COLUMNDEF recinfo[3]; +static MARIA_KEYDEF keyinfo[2]; +static HA_KEYSEG keyseg[10]; + +static int run_test(const char *filename) +{ + MARIA_HA *file; + int i,j; + my_off_t pos; + + bzero((char*) recinfo,sizeof(recinfo)); + + /* First define 2 columns */ + recinfo[0].type=extra_field; + recinfo[0].length= (extra_field == FIELD_BLOB ? 4 + portable_sizeof_char_ptr : + extra_length); + if (extra_field == FIELD_VARCHAR) + recinfo[0].length+= HA_VARCHAR_PACKLENGTH(extra_length); + recinfo[1].type=key_field; + recinfo[1].length= (key_field == FIELD_BLOB ? 4+portable_sizeof_char_ptr : + key_length); + if (key_field == FIELD_VARCHAR) + recinfo[1].length+= HA_VARCHAR_PACKLENGTH(key_length); + + /* Define a key over the first column */ + keyinfo[0].seg=keyseg; + keyinfo[0].keysegs=1; + keyinfo[0].block_length= 0; /* Default block length */ + keyinfo[0].seg[0].type= key_type; + keyinfo[0].seg[0].flag= (key_field == FIELD_BLOB) ? HA_BLOB_PART: + (key_field == FIELD_VARCHAR) ? HA_VAR_LENGTH_PART:0; + keyinfo[0].seg[0].start=recinfo[0].length; + keyinfo[0].seg[0].length=key_length; + keyinfo[0].seg[0].null_bit= 0; + keyinfo[0].seg[0].null_pos=0; + keyinfo[0].seg[0].language= default_charset_info->number; + keyinfo[0].flag = (no_fulltext?HA_PACK_KEY:HA_FULLTEXT); + + if (!silent) + printf("- Creating isam-file\n"); + if (maria_create(filename,(no_keys?0:1),keyinfo,2,recinfo,0,NULL, + (MARIA_CREATE_INFO*) 0, create_flag)) + goto err; + if (!(file=maria_open(filename,2,0))) + goto err; + + if (!silent) + printf("- %s stopwords\n",no_stopwords?"Skipping":"Initializing"); + maria_ft_init_stopwords(no_stopwords?NULL:maria_ft_precompiled_stopwords); + + if (!silent) + printf("- Writing key:s\n"); + + my_errno=0; + for (i=NUPD ; indocs); + for (j=0;j<5;j++) + { + double w; int err; + err= maria_ft_nlq_read_next(result, read_record); + if (err==HA_ERR_END_OF_FILE) + { + printf("No more matches!\n"); + break; + } + else if (err) + { + printf("maria_ft_read_next %d failed with errno %3d\n",j,my_errno); + break; + } + w=maria_ft_nlq_get_relevance(result); + if (key_field == FIELD_VARCHAR) + { + uint l; + char *p; + p=recinfo[0].length+read_record; + l=uint2korr(p); + printf("%10.7f: %.*s\n",w,(int) l,p+2); + } + else + printf("%10.7f: %.*s\n",w,recinfo[1].length, + recinfo[0].length+read_record); + } + maria_ft_nlq_close_search(result); + } + + if (maria_close(file)) goto err; + maria_end(); + my_end(MY_CHECK_ERROR); + + return (0); +err: + printf("got error: %3d when using maria-database\n",my_errno); + return 1; /* skip warning */ +} + +static char blob_key[MAX_REC_LENGTH]; +/* static char blob_record[MAX_REC_LENGTH+20*20]; */ + +void create_record(char *pos, int n) +{ + bzero((char*) pos,MAX_REC_LENGTH); + if (recinfo[0].type == FIELD_BLOB) + { + uint tmp; + char *ptr; + strnmov(blob_key,data[n].f0,keyinfo[0].seg[0].length); + tmp=strlen(blob_key); + int4store(pos,tmp); + ptr=blob_key; + memcpy_fixed(pos+4,&ptr,sizeof(char*)); + pos+=recinfo[0].length; + } + else if (recinfo[0].type == FIELD_VARCHAR) + { + uint tmp; + /* -1 is here because pack_length is stored in seg->length */ + uint pack_length= HA_VARCHAR_PACKLENGTH(keyinfo[0].seg[0].length-1); + strnmov(pos+pack_length,data[n].f0,keyinfo[0].seg[0].length); + tmp=strlen(pos+pack_length); + if (pack_length == 1) + *pos= (char) tmp; + else + int2store(pos,tmp); + pos+=recinfo[0].length; + } + else + { + strnmov(pos,data[n].f0,keyinfo[0].seg[0].length); + pos+=recinfo[0].length; + } + if (recinfo[1].type == FIELD_BLOB) + { + uint tmp; + char *ptr; + strnmov(blob_key,data[n].f2,keyinfo[0].seg[0].length); + tmp=strlen(blob_key); + int4store(pos,tmp); + ptr=blob_key; + memcpy_fixed(pos+4,&ptr,sizeof(char*)); + pos+=recinfo[1].length; + } + else if (recinfo[1].type == FIELD_VARCHAR) + { + uint tmp; + /* -1 is here because pack_length is stored in seg->length */ + uint pack_length= HA_VARCHAR_PACKLENGTH(keyinfo[0].seg[0].length-1); + strnmov(pos+pack_length,data[n].f2,keyinfo[0].seg[0].length); + tmp=strlen(pos+1); + if (pack_length == 1) + *pos= (char) tmp; + else + int2store(pos,tmp); + pos+=recinfo[1].length; + } + else + { + strnmov(pos,data[n].f2,keyinfo[0].seg[0].length); + pos+=recinfo[1].length; + } +} + + +static my_bool +get_one_option(int optid, const struct my_option *opt __attribute__((unused)), + char *argument) +{ + switch(optid) { + case 'v': verbose=1; break; + case 's': silent=1; break; + case 'F': no_fulltext=1; no_search=1; + case 'U': skip_update=1; break; + case 'K': no_keys=no_search=1; break; + case 'N': no_search=1; break; + case 'S': no_stopwords=1; break; + case '#': + DBUG_PUSH (argument); + break; + case 'V': + case '?': + case 'h': + usage(); + exit(1); + } + return 0; +} + +/* Read options */ + +static void get_options(int argc,char *argv[]) +{ + int ho_error; + + if ((ho_error=handle_options(&argc, &argv, my_long_options, get_one_option))) + exit(ho_error); + return; +} /* get options */ + + +static void usage() +{ + printf("%s [options]\n", my_progname); + my_print_help(my_long_options); + my_print_variables(my_long_options); +} diff --git a/storage/maria/ma_ft_test1.h b/storage/maria/ma_ft_test1.h new file mode 100644 index 00000000000..5883c42f5c5 --- /dev/null +++ b/storage/maria/ma_ft_test1.h @@ -0,0 +1,420 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +#define NUPD 20 +#define NDATAS 389 +struct { const char *f0, *f2; } data[NDATAS] = { + {"1", "General Information about MySQL"}, + {"1.1", "What is MySQL?"}, + {"1.2", "About this manual"}, + {"1.3", "History of MySQL"}, + {"1.4", "The main features of MySQL"}, + {"1.5", "General SQL information and tutorials"}, + {"1.6", "Useful MySQL-related links"}, + {"1.7", "What are stored procedures and triggers and so on?"}, + {"2", "MySQL mailing lists and how to ask questions/give error (bug) reports"}, + {"2.1", "Subscribing to/un-subscribing from the MySQL mailing list"}, + {"2.2", "Asking questions or reporting bugs"}, + {"2.3", "I think I have found a bug. What information do you need to help me?"}, + {"2.3.1", "MySQL keeps crashing"}, + {"2.4", "Guidelines for answering questions on the mailing list"}, + {"3", "Licensing or When do I have/want to pay for MySQL?"}, + {"3.1", "How much does MySQL cost?"}, + {"3.2", "How do I get commercial support?"}, + {"3.2.1", "Types of commercial support"}, + {"3.2.1.1", "Basic email support"}, + {"3.2.1.2", "Extended email support"}, +/*------------------------------- NUPD=20 -------------------------------*/ + {"3.2.1.3", "Asking: Login support"}, + {"3.2.1.4", "Extended login support"}, + {"3.3", "How do I pay for licenses/support?"}, + {"3.4", "Who do I contact when I want more information about licensing/support?"}, + {"3.5", "What Copyright does MySQL use?"}, + {"3.6", "When may I distribute MySQL commercially without a fee?"}, + {"3.7", "I want to sell a product that can be configured to use MySQL"}, + {"3.8", "I am running a commercial web server using MySQL"}, + {"3.9", "Do I need a license to sell commercial Perl/tcl/PHP/Web+ etc applications?"}, + {"3.10", "Possible future changes in the licensing"}, + {"4", "Compiling and installing MySQL"}, + {"4.1", "How do I get MySQL?"}, + {"4.2", "Which MySQL version should I use?"}, + {"4.3", "How/when will you release updates?"}, + {"4.4", "What operating systems does MySQL support?"}, + {"4.5", "Compiling MySQL from source code"}, + {"4.5.1", "Quick installation overview"}, + {"4.5.2", "Usual configure switches"}, + {"4.5.3", "Applying a patch"}, + {"4.6", "Problems compiling?"}, + {"4.7", "General compilation notes"}, + {"4.8", "MIT-pthreads notes (FreeBSD)"}, + {"4.9", "Perl installation comments"}, + {"4.10", "Special things to consider for some machine/OS combinations"}, + {"4.10.1", "Solaris notes"}, + {"4.10.2", "SunOS 4 notes"}, + {"4.10.3", "Linux notes for all versions"}, + {"4.10.3.1", "Linux-x86 notes"}, + {"4.10.3.2", "RedHat 5.0"}, + {"4.10.3.3", "RedHat 5.1"}, + {"4.10.3.4", "Linux-Sparc notes"}, + {"4.10.3.5", "Linux-Alpha notes"}, + {"4.10.3.6", "MkLinux notes"}, + {"4.10.4", "Alpha-DEC-Unix notes"}, + {"4.10.5", "Alpha-DEC-OSF1 notes"}, + {"4.10.6", "SGI-IRIX notes"}, + {"4.10.7", "FreeBSD notes"}, + {"4.10.7.1", "FreeBSD-3.0 notes"}, + {"4.10.8", "BSD/OS 2.# notes"}, + {"4.10.8.1", "BSD/OS 3.# notes"}, + {"4.10.9", "SCO notes"}, + {"4.10.10", "SCO Unixware 7.0 notes"}, + {"4.10.11", "IBM-AIX notes"}, + {"4.10.12", "HP-UX notes"}, + {"4.11", "TcX binaries"}, + {"4.12", "Win32 notes"}, + {"4.13", "Installation instructions for MySQL binary releases"}, + {"4.13.1", "How to get MySQL Perl support working"}, + {"4.13.2", "Linux notes"}, + {"4.13.3", "HP-UX notes"}, + {"4.13.4", "Linking client libraries"}, + {"4.14", "Problems running mysql_install_db"}, + {"4.15", "Problems starting MySQL"}, + {"4.16", "Automatic start/stop of MySQL"}, + {"4.17", "Option files"}, + {"5", "How standards-compatible is MySQL?"}, + {"5.1", "What extensions has MySQL to ANSI SQL92?"}, + {"5.2", "What functionality is missing in MySQL?"}, + {"5.2.1", "Sub-selects"}, + {"5.2.2", "SELECT INTO TABLE"}, + {"5.2.3", "Transactions"}, + {"5.2.4", "Triggers"}, + {"5.2.5", "Foreign Keys"}, + {"5.2.5.1", "Some reasons NOT to use FOREIGN KEYS"}, + {"5.2.6", "Views"}, + {"5.2.7", "-- as start of a comment"}, + {"5.3", "What standards does MySQL follow?"}, + {"5.4", "What functions exist only for compatibility?"}, + {"5.5", "Limitations of BLOB and TEXT types"}, + {"5.6", "How to cope without COMMIT-ROLLBACK"}, + {"6", "The MySQL access privilege system"}, + {"6.1", "What the privilege system does"}, + {"6.2", "Connecting to the MySQL server"}, + {"6.2.1", "Keeping your password secure"}, + {"6.3", "Privileges provided by MySQL"}, + {"6.4", "How the privilege system works"}, + {"6.5", "The privilege tables"}, + {"6.6", "Setting up the initial MySQL privileges"}, + {"6.7", "Adding new user privileges to MySQL"}, + {"6.8", "An example permission setup"}, + {"6.9", "Causes of Access denied errors"}, + {"6.10", "How to make MySQL secure against crackers"}, + {"7", "MySQL language reference"}, + {"7.1", "Literals: how to write strings and numbers"}, + {"7.1.1", "Strings"}, + {"7.1.2", "Numbers"}, + {"7.1.3", "NULL values"}, + {"7.1.4", "Database, table, index, column and alias names"}, + {"7.1.4.1", "Case sensitivity in names"}, + {"7.2", "Column types"}, + {"7.2.1", "Column type storage requirements"}, + {"7.2.5", "Numeric types"}, + {"7.2.6", "Date and time types"}, + {"7.2.6.1", "The DATE type"}, + {"7.2.6.2", "The TIME type"}, + {"7.2.6.3", "The DATETIME type"}, + {"7.2.6.4", "The TIMESTAMP type"}, + {"7.2.6.5", "The YEAR type"}, + {"7.2.6.6", "Miscellaneous date and time properties"}, + {"7.2.7", "String types"}, + {"7.2.7.1", "The CHAR and VARCHAR types"}, + {"7.2.7.2", "The BLOB and TEXT types"}, + {"7.2.7.3", "The ENUM type"}, + {"7.2.7.4", "The SET type"}, + {"7.2.8", "Choosing the right type for a column"}, + {"7.2.9", "Column indexes"}, + {"7.2.10", "Multiple-column indexes"}, + {"7.2.11", "Using column types from other database engines"}, + {"7.3", "Functions for use in SELECT and WHERE clauses"}, + {"7.3.1", "Grouping functions"}, + {"7.3.2", "Normal arithmetic operations"}, + {"7.3.3", "Bit functions"}, + {"7.3.4", "Logical operations"}, + {"7.3.5", "Comparison operators"}, + {"7.3.6", "String comparison functions"}, + {"7.3.7", "Control flow functions"}, + {"7.3.8", "Mathematical functions"}, + {"7.3.9", "String functions"}, + {"7.3.10", "Date and time functions"}, + {"7.3.11", "Miscellaneous functions"}, + {"7.3.12", "Functions for use with GROUP BY clauses"}, + {"7.4", "CREATE DATABASE syntax"}, + {"7.5", "DROP DATABASE syntax"}, + {"7.6", "CREATE TABLE syntax"}, + {"7.7", "ALTER TABLE syntax"}, + {"7.8", "OPTIMIZE TABLE syntax"}, + {"7.9", "DROP TABLE syntax"}, + {"7.10", "DELETE syntax"}, + {"7.11", "SELECT syntax"}, + {"7.12", "JOIN syntax"}, + {"7.13", "INSERT syntax"}, + {"7.14", "REPLACE syntax"}, + {"7.15", "LOAD DATA INFILE syntax"}, + {"7.16", "UPDATE syntax"}, + {"7.17", "USE syntax"}, + {"7.18", "SHOW syntax (Get information about tables, columns...)"}, + {"7.19", "EXPLAIN syntax (Get information about a SELECT)"}, + {"7.20", "DESCRIBE syntax (Get information about columns)"}, + {"7.21", "LOCK TABLES/UNLOCK TABLES syntax"}, + {"7.22", "SET OPTION syntax"}, + {"7.23", "GRANT syntax (Compatibility function)"}, + {"7.24", "CREATE INDEX syntax (Compatibility function)"}, + {"7.25", "DROP INDEX syntax (Compatibility function)"}, + {"7.26", "Comment syntax"}, + {"7.27", "CREATE FUNCTION/DROP FUNCTION syntax"}, + {"7.28", "Is MySQL picky about reserved words?"}, + {"8", "Example SQL queries"}, + {"8.1", "Queries from twin project"}, + {"8.1.1", "Find all non-distributed twins"}, + {"8.1.2", "Show a table on twin pair status"}, + {"9", "How safe/stable is MySQL?"}, + {"9.1", "How stable is MySQL?"}, + {"9.2", "Why are there is so many releases of MySQL?"}, + {"9.3", "Checking a table for errors"}, + {"9.4", "How to repair tables"}, + {"9.5", "Is there anything special to do when upgrading/downgrading MySQL?"}, + {"9.5.1", "Upgrading from a 3.21 version to 3.22"}, + {"9.5.2", "Upgrading from a 3.20 version to 3.21"}, + {"9.5.3", "Upgrading to another architecture"}, + {"9.6", "Year 2000 compliance"}, + {"10", "MySQL Server functions"}, + {"10.1", "What languages are supported by MySQL?"}, + {"10.1.1", "Character set used for data & sorting"}, + {"10.2", "The update log"}, + {"10.3", "How big can MySQL tables be?"}, + {"11", "Getting maximum performance from MySQL"}, + {"11.1", "How does one change the size of MySQL buffers?"}, + {"11.2", "How compiling and linking affects the speed of MySQL"}, + {"11.3", "How does MySQL use memory?"}, + {"11.4", "How does MySQL use indexes?"}, + {"11.5", "What optimizations are done on WHERE clauses?"}, + {"11.6", "How does MySQL open & close tables?"}, + {"11.6.0.1", "What are the drawbacks of creating possibly thousands of tables in a database?"}, + {"11.7", "How does MySQL lock tables?"}, + {"11.8", "How should I arrange my table to be as fast/small as possible?"}, + {"11.9", "What affects the speed of INSERT statements?"}, + {"11.10", "What affects the speed DELETE statements?"}, + {"11.11", "How do I get MySQL to run at full speed?"}, + {"11.12", "What are the different row formats? Or, when should VARCHAR/CHAR be used?"}, + {"11.13", "Why so many open tables?"}, + {"12", "MySQL benchmark suite"}, + {"13", "MySQL Utilites"}, + {"13.1", "Overview of the different MySQL programs"}, + {"13.2", "The MySQL table check, optimize and repair program"}, + {"13.2.1", "isamchk memory use"}, + {"13.2.2", "Getting low-level table information"}, + {"13.3", "The MySQL compressed read-only table generator"}, + {"14", "Adding new functions to MySQL"}, + {"15", "MySQL ODBC Support"}, + {"15.1", "Operating systems supported by MyODBC"}, + {"15.2", "How to report problems with MyODBC"}, + {"15.3", "Programs known to work with MyODBC"}, + {"15.4", "How to fill in the various fields in the ODBC administrator program"}, + {"15.5", "How to get the value of an AUTO_INCREMENT column in ODBC"}, + {"16", "Problems and common errors"}, + {"16.1", "Some common errors when using MySQL"}, + {"16.1.1", "MySQL server has gone away error"}, + {"16.1.2", "Can't connect to local MySQL server error"}, + {"16.1.3", "Out of memory error"}, + {"16.1.4", "Packet too large error"}, + {"16.1.5", "The table is full error"}, + {"16.1.6", "Commands out of sync error in client"}, + {"16.1.7", "Removing user error"}, + {"16.2", "How MySQL handles a full disk"}, + {"16.3", "How to run SQL commands from a text file"}, + {"16.4", "Where MySQL stores temporary files"}, + {"16.5", "Access denied error"}, + {"16.6", "How to run MySQL as a normal user"}, + {"16.7", "Problems with file permissions"}, + {"16.8", "File not found"}, + {"16.9", "Problems using DATE columns"}, + {"16.10", "Case sensitivity in searches"}, + {"16.11", "Problems with NULL values"}, + {"17", "Solving some common problems with MySQL"}, + {"17.1", "Database replication"}, + {"17.2", "Database backups"}, + {"18", "MySQL client tools and API's"}, + {"18.1", "MySQL C API"}, + {"18.2", "C API datatypes"}, + {"18.3", "C API function overview"}, + {"18.4", "C API function descriptions"}, + {"18.4.1", "mysql_affected_rows()"}, + {"18.4.2", "mysql_close()"}, + {"18.4.3", "mysql_connect()"}, + {"18.4.4", "mysql_create_db()"}, + {"18.4.5", "mysql_data_seek()"}, + {"18.4.6", "mysql_debug()"}, + {"18.4.7", "mysql_drop_db()"}, + {"18.4.8", "mysql_dump_debug_info()"}, + {"18.4.9", "mysql_eof()"}, + {"18.4.10", "mysql_errno()"}, + {"18.4.11", "mysql_error()"}, + {"18.4.12", "mysql_escape_string()"}, + {"18.4.13", "mysql_fetch_field()"}, + {"18.4.14", "mysql_fetch_fields()"}, + {"18.4.15", "mysql_fetch_field_direct()"}, + {"18.4.16", "mysql_fetch_lengths()"}, + {"18.4.17", "mysql_fetch_row()"}, + {"18.4.18", "mysql_field_seek()"}, + {"18.4.19", "mysql_field_tell()"}, + {"18.4.20", "mysql_free_result()"}, + {"18.4.21", "mysql_get_client_info()"}, + {"18.4.22", "mysql_get_host_info()"}, + {"18.4.23", "mysql_get_proto_info()"}, + {"18.4.24", "mysql_get_server_info()"}, + {"18.4.25", "mysql_info()"}, + {"18.4.26", "mysql_init()"}, + {"18.4.27", "mysql_insert_id()"}, + {"18.4.28", "mysql_kill()"}, + {"18.4.29", "mysql_list_dbs()"}, + {"18.4.30", "mysql_list_fields()"}, + {"18.4.31", "mysql_list_processes()"}, + {"18.4.32", "mysql_list_tables()"}, + {"18.4.33", "mysql_num_fields()"}, + {"18.4.34", "mysql_num_rows()"}, + {"18.4.35", "mysql_query()"}, + {"18.4.36", "mysql_real_connect()"}, + {"18.4.37", "mysql_real_query()"}, + {"18.4.38", "mysql_reload()"}, + {"18.4.39", "mysql_row_tell()"}, + {"18.4.40", "mysql_select_db()"}, + {"18.4.41", "mysql_shutdown()"}, + {"18.4.42", "mysql_stat()"}, + {"18.4.43", "mysql_store_result()"}, + {"18.4.44", "mysql_thread_id()"}, + {"18.4.45", "mysql_use_result()"}, + {"18.4.46", "Why is it that after mysql_query() returns success, mysql_store_result() sometimes returns NULL?"}, + {"18.4.47", "What results can I get from a query?"}, + {"18.4.48", "How can I get the unique ID for the last inserted row?"}, + {"18.4.49", "Problems linking with the C API"}, + {"18.4.50", "How to make a thread-safe client"}, + {"18.5", "MySQL Perl API's"}, + {"18.5.1", "DBI with DBD::mysql"}, + {"18.5.1.1", "The DBI interface"}, + {"18.5.1.2", "More DBI/DBD information"}, + {"18.6", "MySQL Java connectivity (JDBC)"}, + {"18.7", "MySQL PHP API's"}, + {"18.8", "MySQL C++ API's"}, + {"18.9", "MySQL Python API's"}, + {"18.10", "MySQL TCL API's"}, + {"19", "How MySQL compares to other databases"}, + {"19.1", "How MySQL compares to mSQL"}, + {"19.1.1", "How to convert mSQL tools for MySQL"}, + {"19.1.2", "How mSQL and MySQL client/server communications protocols differ"}, + {"19.1.3", "How mSQL 2.0 SQL syntax differs from MySQL"}, + {"19.2", "How MySQL compares to PostgreSQL"}, + {"A", "Some users of MySQL"}, + {"B", "Contributed programs"}, + {"C", "Contributors to MySQL"}, + {"D", "MySQL change history"}, + {"19.3", "Changes in release 3.22.x (Alpha version)"}, + {"19.3.1", "Changes in release 3.22.7"}, + {"19.3.2", "Changes in release 3.22.6"}, + {"19.3.3", "Changes in release 3.22.5"}, + {"19.3.4", "Changes in release 3.22.4"}, + {"19.3.5", "Changes in release 3.22.3"}, + {"19.3.6", "Changes in release 3.22.2"}, + {"19.3.7", "Changes in release 3.22.1"}, + {"19.3.8", "Changes in release 3.22.0"}, + {"19.4", "Changes in release 3.21.x"}, + {"19.4.1", "Changes in release 3.21.33"}, + {"19.4.2", "Changes in release 3.21.32"}, + {"19.4.3", "Changes in release 3.21.31"}, + {"19.4.4", "Changes in release 3.21.30"}, + {"19.4.5", "Changes in release 3.21.29"}, + {"19.4.6", "Changes in release 3.21.28"}, + {"19.4.7", "Changes in release 3.21.27"}, + {"19.4.8", "Changes in release 3.21.26"}, + {"19.4.9", "Changes in release 3.21.25"}, + {"19.4.10", "Changes in release 3.21.24"}, + {"19.4.11", "Changes in release 3.21.23"}, + {"19.4.12", "Changes in release 3.21.22"}, + {"19.4.13", "Changes in release 3.21.21a"}, + {"19.4.14", "Changes in release 3.21.21"}, + {"19.4.15", "Changes in release 3.21.20"}, + {"19.4.16", "Changes in release 3.21.19"}, + {"19.4.17", "Changes in release 3.21.18"}, + {"19.4.18", "Changes in release 3.21.17"}, + {"19.4.19", "Changes in release 3.21.16"}, + {"19.4.20", "Changes in release 3.21.15"}, + {"19.4.21", "Changes in release 3.21.14b"}, + {"19.4.22", "Changes in release 3.21.14a"}, + {"19.4.23", "Changes in release 3.21.13"}, + {"19.4.24", "Changes in release 3.21.12"}, + {"19.4.25", "Changes in release 3.21.11"}, + {"19.4.26", "Changes in release 3.21.10"}, + {"19.4.27", "Changes in release 3.21.9"}, + {"19.4.28", "Changes in release 3.21.8"}, + {"19.4.29", "Changes in release 3.21.7"}, + {"19.4.30", "Changes in release 3.21.6"}, + {"19.4.31", "Changes in release 3.21.5"}, + {"19.4.32", "Changes in release 3.21.4"}, + {"19.4.33", "Changes in release 3.21.3"}, + {"19.4.34", "Changes in release 3.21.2"}, + {"19.4.35", "Changes in release 3.21.0"}, + {"19.5", "Changes in release 3.20.x"}, + {"19.5.1", "Changes in release 3.20.18"}, + {"19.5.2", "Changes in release 3.20.17"}, + {"19.5.3", "Changes in release 3.20.16"}, + {"19.5.4", "Changes in release 3.20.15"}, + {"19.5.5", "Changes in release 3.20.14"}, + {"19.5.6", "Changes in release 3.20.13"}, + {"19.5.7", "Changes in release 3.20.11"}, + {"19.5.8", "Changes in release 3.20.10"}, + {"19.5.9", "Changes in release 3.20.9"}, + {"19.5.10", "Changes in release 3.20.8"}, + {"19.5.11", "Changes in release 3.20.7"}, + {"19.5.12", "Changes in release 3.20.6"}, + {"19.5.13", "Changes in release 3.20.3"}, + {"19.5.14", "Changes in release 3.20.0"}, + {"19.6", "Changes in release 3.19.x"}, + {"19.6.1", "Changes in release 3.19.5"}, + {"19.6.2", "Changes in release 3.19.4"}, + {"19.6.3", "Changes in release 3.19.3"}, + {"E", "Known errors and design deficiencies in MySQL"}, + {"F", "List of things we want to add to MySQL in the future (The TODO)"}, + {"19.7", "Things that must done in the real near future"}, + {"19.8", "Things that have to be done sometime"}, + {"19.9", "Some things we don't have any plans to do"}, + {"G", "Comments on porting to other systems"}, + {"19.10", "Debugging MySQL"}, + {"19.11", "Comments about RTS threads"}, + {"19.12", "What is the difference between different thread packages?"}, + {"H", "Description of MySQL regular expression syntax"}, + {"I", "What is Unireg?"}, + {"J", "The MySQL server license"}, + {"K", "The MySQL license for Microsoft operating systems"}, + {"*", "SQL command, type and function index"}, + {"*", "Concept Index"} +}; + +#define NQUERIES 5 +const char *query[NQUERIES]={ + "mysql information and manual", + "upgrading from previous version", + "column indexes", + "against about after more right the with/without", /* stopwords test */ + "mysql license and copyright" +}; diff --git a/storage/maria/ma_ft_update.c b/storage/maria/ma_ft_update.c new file mode 100644 index 00000000000..97ebdb05b42 --- /dev/null +++ b/storage/maria/ma_ft_update.c @@ -0,0 +1,352 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +/* functions to work with full-text indices */ + +#include "ma_ftdefs.h" +#include + +void _ma_ft_segiterator_init(MARIA_HA *info, uint keynr, const byte *record, + FT_SEG_ITERATOR *ftsi) +{ + DBUG_ENTER("_ma_ft_segiterator_init"); + + ftsi->num=info->s->keyinfo[keynr].keysegs; + ftsi->seg=info->s->keyinfo[keynr].seg; + ftsi->rec=record; + DBUG_VOID_RETURN; +} + +void _ma_ft_segiterator_dummy_init(const byte *record, uint len, + FT_SEG_ITERATOR *ftsi) +{ + DBUG_ENTER("_ma_ft_segiterator_dummy_init"); + + ftsi->num=1; + ftsi->seg=0; + ftsi->pos=record; + ftsi->len=len; + DBUG_VOID_RETURN; +} + +/* + This function breaks convention "return 0 in success" + but it's easier to use like this + + while(_ma_ft_segiterator()) + + so "1" means "OK", "0" means "EOF" +*/ + +uint _ma_ft_segiterator(register FT_SEG_ITERATOR *ftsi) +{ + DBUG_ENTER("_ma_ft_segiterator"); + + if (!ftsi->num) + DBUG_RETURN(0); + + ftsi->num--; + if (!ftsi->seg) + DBUG_RETURN(1); + + ftsi->seg--; + + if (ftsi->seg->null_bit && + (ftsi->rec[ftsi->seg->null_pos] & ftsi->seg->null_bit)) + { + ftsi->pos=0; + DBUG_RETURN(1); + } + ftsi->pos= ftsi->rec+ftsi->seg->start; + if (ftsi->seg->flag & HA_VAR_LENGTH_PART) + { + uint pack_length= (ftsi->seg->bit_start); + ftsi->len= (pack_length == 1 ? (uint) *(uchar*) ftsi->pos : + uint2korr(ftsi->pos)); + ftsi->pos+= pack_length; /* Skip VARCHAR length */ + DBUG_RETURN(1); + } + if (ftsi->seg->flag & HA_BLOB_PART) + { + ftsi->len= _ma_calc_blob_length(ftsi->seg->bit_start,ftsi->pos); + memcpy_fixed((char*) &ftsi->pos, ftsi->pos+ftsi->seg->bit_start, + sizeof(char*)); + DBUG_RETURN(1); + } + ftsi->len=ftsi->seg->length; + DBUG_RETURN(1); +} + + +/* parses a document i.e. calls maria_ft_parse for every keyseg */ + +uint _ma_ft_parse(TREE *parsed, MARIA_HA *info, uint keynr, const byte *record, + MYSQL_FTPARSER_PARAM *param, MEM_ROOT *mem_root) +{ + FT_SEG_ITERATOR ftsi; + struct st_mysql_ftparser *parser; + DBUG_ENTER("_ma_ft_parse"); + + _ma_ft_segiterator_init(info, keynr, record, &ftsi); + + maria_ft_parse_init(parsed, info->s->keyinfo[keynr].seg->charset); + parser= info->s->keyinfo[keynr].parser; + while (_ma_ft_segiterator(&ftsi)) + { + if (ftsi.pos) + if (maria_ft_parse(parsed, (byte *)ftsi.pos, ftsi.len, parser, param, + mem_root)) + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + +FT_WORD * _ma_ft_parserecord(MARIA_HA *info, uint keynr, const byte *record, + MEM_ROOT *mem_root) +{ + TREE ptree; + MYSQL_FTPARSER_PARAM *param; + DBUG_ENTER("_ma_ft_parserecord"); + if (! (param= maria_ftparser_call_initializer(info, keynr, 0))) + DBUG_RETURN(NULL); + bzero((char*) &ptree, sizeof(ptree)); + param->flags= 0; + if (_ma_ft_parse(&ptree, info, keynr, record, param, mem_root)) + DBUG_RETURN(NULL); + + DBUG_RETURN(maria_ft_linearize(&ptree, mem_root)); +} + +static int _ma_ft_store(MARIA_HA *info, uint keynr, byte *keybuf, + FT_WORD *wlist, my_off_t filepos) +{ + uint key_length; + DBUG_ENTER("_ma_ft_store"); + + for (; wlist->pos; wlist++) + { + key_length= _ma_ft_make_key(info,keynr,keybuf,wlist,filepos); + if (_ma_ck_write(info, keynr, keybuf, key_length)) + DBUG_RETURN(1); + } + DBUG_RETURN(0); +} + +static int _ma_ft_erase(MARIA_HA *info, uint keynr, byte *keybuf, + FT_WORD *wlist, my_off_t filepos) +{ + uint key_length, err=0; + DBUG_ENTER("_ma_ft_erase"); + + for (; wlist->pos; wlist++) + { + key_length= _ma_ft_make_key(info,keynr,keybuf,wlist,filepos); + if (_ma_ck_delete(info, keynr, keybuf, key_length)) + err=1; + } + DBUG_RETURN(err); +} + +/* + Compares an appropriate parts of two WORD_KEY keys directly out of records + returns 1 if they are different +*/ + +#define THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT 1 +#define GEE_THEY_ARE_ABSOLUTELY_IDENTICAL 0 + +int _ma_ft_cmp(MARIA_HA *info, uint keynr, const byte *rec1, const byte *rec2) +{ + FT_SEG_ITERATOR ftsi1, ftsi2; + CHARSET_INFO *cs=info->s->keyinfo[keynr].seg->charset; + DBUG_ENTER("_ma_ft_cmp"); + + _ma_ft_segiterator_init(info, keynr, rec1, &ftsi1); + _ma_ft_segiterator_init(info, keynr, rec2, &ftsi2); + + while (_ma_ft_segiterator(&ftsi1) && _ma_ft_segiterator(&ftsi2)) + { + if ((ftsi1.pos != ftsi2.pos) && + (!ftsi1.pos || !ftsi2.pos || + ha_compare_text(cs, (uchar*) ftsi1.pos,ftsi1.len, + (uchar*) ftsi2.pos,ftsi2.len,0,0))) + DBUG_RETURN(THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT); + } + DBUG_RETURN(GEE_THEY_ARE_ABSOLUTELY_IDENTICAL); +} + + +/* update a document entry */ + +int _ma_ft_update(MARIA_HA *info, uint keynr, byte *keybuf, + const byte *oldrec, const byte *newrec, my_off_t pos) +{ + int error= -1; + FT_WORD *oldlist,*newlist, *old_word, *new_word; + CHARSET_INFO *cs=info->s->keyinfo[keynr].seg->charset; + uint key_length; + int cmp, cmp2; + DBUG_ENTER("_ma_ft_update"); + + if (!(old_word=oldlist=_ma_ft_parserecord(info, keynr, oldrec, + &info->ft_memroot)) || + !(new_word=newlist=_ma_ft_parserecord(info, keynr, newrec, + &info->ft_memroot))) + goto err; + + error=0; + while(old_word->pos && new_word->pos) + { + cmp= ha_compare_text(cs, (uchar*) old_word->pos,old_word->len, + (uchar*) new_word->pos,new_word->len,0,0); + cmp2= cmp ? 0 : (fabs(old_word->weight - new_word->weight) > 1.e-5); + + if (cmp < 0 || cmp2) + { + key_length= _ma_ft_make_key(info,keynr,keybuf,old_word,pos); + if ((error= _ma_ck_delete(info,keynr, keybuf,key_length))) + goto err; + } + if (cmp > 0 || cmp2) + { + key_length= _ma_ft_make_key(info, keynr, keybuf, new_word,pos); + if ((error= _ma_ck_write(info, keynr, keybuf,key_length))) + goto err; + } + if (cmp<=0) old_word++; + if (cmp>=0) new_word++; + } + if (old_word->pos) + error= _ma_ft_erase(info,keynr,keybuf,old_word,pos); + else if (new_word->pos) + error= _ma_ft_store(info,keynr,keybuf,new_word,pos); + +err: + free_root(&info->ft_memroot, MYF(MY_MARK_BLOCKS_FREE)); + DBUG_RETURN(error); +} + + +/* adds a document to the collection */ + +int _ma_ft_add(MARIA_HA *info, uint keynr, byte *keybuf, const byte *record, + my_off_t pos) +{ + int error= -1; + FT_WORD *wlist; + DBUG_ENTER("_ma_ft_add"); + DBUG_PRINT("enter",("keynr: %d",keynr)); + + if ((wlist= _ma_ft_parserecord(info, keynr, record, &info->ft_memroot))) + error= _ma_ft_store(info,keynr,keybuf,wlist,pos); + free_root(&info->ft_memroot, MYF(MY_MARK_BLOCKS_FREE)); + DBUG_PRINT("exit",("Return: %d",error)); + DBUG_RETURN(error); +} + + +/* removes a document from the collection */ + +int _ma_ft_del(MARIA_HA *info, uint keynr, byte *keybuf, const byte *record, + my_off_t pos) +{ + int error= -1; + FT_WORD *wlist; + DBUG_ENTER("_ma_ft_del"); + DBUG_PRINT("enter",("keynr: %d",keynr)); + + if ((wlist= _ma_ft_parserecord(info, keynr, record, &info->ft_memroot))) + error= _ma_ft_erase(info,keynr,keybuf,wlist,pos); + free_root(&info->ft_memroot, MYF(MY_MARK_BLOCKS_FREE)); + DBUG_PRINT("exit",("Return: %d",error)); + DBUG_RETURN(error); +} + + +uint _ma_ft_make_key(MARIA_HA *info, uint keynr, byte *keybuf, FT_WORD *wptr, + my_off_t filepos) +{ + byte buf[HA_FT_MAXBYTELEN+16]; + DBUG_ENTER("_ma_ft_make_key"); + +#if HA_FT_WTYPE == HA_KEYTYPE_FLOAT + { + float weight=(float) ((filepos==HA_OFFSET_ERROR) ? 0 : wptr->weight); + mi_float4store(buf,weight); + } +#else +#error +#endif + + int2store(buf+HA_FT_WLEN,wptr->len); + memcpy(buf+HA_FT_WLEN+2,wptr->pos,wptr->len); + DBUG_RETURN(_ma_make_key(info, keynr, keybuf, buf, filepos)); +} + + +/* + convert key value to ft2 +*/ + +uint _ma_ft_convert_to_ft2(MARIA_HA *info, uint keynr, byte *key) +{ + my_off_t root; + DYNAMIC_ARRAY *da=info->ft1_to_ft2; + MARIA_KEYDEF *keyinfo=&info->s->ft2_keyinfo; + byte *key_ptr= (byte*) dynamic_array_ptr(da, 0), *end; + uint length, key_length; + DBUG_ENTER("_ma_ft_convert_to_ft2"); + + /* we'll generate one pageful at once, and insert the rest one-by-one */ + /* calculating the length of this page ...*/ + length=(keyinfo->block_length-2) / keyinfo->keylength; + set_if_smaller(length, da->elements); + length=length * keyinfo->keylength; + + get_key_full_length_rdonly(key_length, key); + while (_ma_ck_delete(info, keynr, key, key_length) == 0) + { + /* + nothing to do here. + _ma_ck_delete() will populate info->ft1_to_ft2 with deleted keys + */ + } + + /* creating pageful of keys */ + maria_putint(info->buff,length+2,0); + memcpy(info->buff+2, key_ptr, length); + info->keyread_buff_used=info->page_changed=1; /* info->buff is used */ + if ((root= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR || + _ma_write_keypage(info,keyinfo,root,DFLT_INIT_HITS,info->buff)) + DBUG_RETURN(-1); + + /* inserting the rest of key values */ + end= (byte*) dynamic_array_ptr(da, da->elements); + for (key_ptr+=length; key_ptr < end; key_ptr+=keyinfo->keylength) + if(_ma_ck_real_write_btree(info, keyinfo, key_ptr, 0, &root, SEARCH_SAME)) + DBUG_RETURN(-1); + + /* now, writing the word key entry */ + ft_intXstore(key+key_length, - (int) da->elements); + _ma_dpointer(info, key+key_length+HA_FT_WLEN, root); + + DBUG_RETURN(_ma_ck_real_write_btree(info, + info->s->keyinfo+keynr, + key, 0, + &info->s->state.key_root[keynr], + SEARCH_SAME)); +} diff --git a/storage/maria/ma_ftdefs.h b/storage/maria/ma_ftdefs.h new file mode 100644 index 00000000000..e25687d54b9 --- /dev/null +++ b/storage/maria/ma_ftdefs.h @@ -0,0 +1,152 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +/* some definitions for full-text indices */ + +#include "ma_fulltext.h" +#include +#include +#include +#include + +#define true_word_char(ctype, character) \ + ((ctype) & (_MY_U | _MY_L | _MY_NMR) || \ + (character) == '_') +#define misc_word_char(X) 0 + +#define FT_MAX_WORD_LEN_FOR_SORT 31 + +#define FTPARSER_MEMROOT_ALLOC_SIZE 65536 + +#define COMPILE_STOPWORDS_IN + +/* Interested readers may consult SMART + (ftp://ftp.cs.cornell.edu/pub/smart/smart.11.0.tar.Z) + for an excellent implementation of vector space model we use. + It also demonstrate the usage of different weghting techniques. + This code, though, is completely original and is not based on the + SMART code but was in some cases inspired by it. + + NORM_PIVOT was taken from the article + A.Singhal, C.Buckley, M.Mitra, "Pivoted Document Length Normalization", + ACM SIGIR'96, 21-29, 1996 + */ + +#define LWS_FOR_QUERY LWS_TF +#define LWS_IN_USE LWS_LOG +#define PRENORM_IN_USE PRENORM_AVG +#define NORM_IN_USE NORM_PIVOT +#define GWS_IN_USE GWS_PROB +/*==============================================================*/ +#define LWS_TF (count) +#define LWS_BINARY (count>0) +#define LWS_SQUARE (count*count) +#define LWS_LOG (count?(log( (double) count)+1):0) +/*--------------------------------------------------------------*/ +#define PRENORM_NONE (p->weight) +#define PRENORM_MAX (p->weight/docstat.max) +#define PRENORM_AUG (0.4+0.6*p->weight/docstat.max) +#define PRENORM_AVG (p->weight/docstat.sum*docstat.uniq) +#define PRENORM_AVGLOG ((1+log(p->weight))/(1+log(docstat.sum/docstat.uniq))) +/*--------------------------------------------------------------*/ +#define NORM_NONE (1) +#define NORM_SUM (docstat.nsum) +#define NORM_COS (sqrt(docstat.nsum2)) + +#define PIVOT_VAL (0.0115) +#define NORM_PIVOT (1+PIVOT_VAL*docstat.uniq) +/*---------------------------------------------------------------*/ +#define GWS_NORM (1/sqrt(sum2)) +#define GWS_GFIDF (sum/doc_cnt) +/* Mysterious, but w/o (double) GWS_IDF performs better :-o */ +#define GWS_IDF log(aio->info->state->records/doc_cnt) +#define GWS_IDF1 log((double)aio->info->state->records/doc_cnt) +#define GWS_PROB ((aio->info->state->records > doc_cnt) ? log(((double)(aio->info->state->records-doc_cnt))/doc_cnt) : 0 ) +#define GWS_FREQ (1.0/doc_cnt) +#define GWS_SQUARED pow(log((double)aio->info->state->records/doc_cnt),2) +#define GWS_CUBIC pow(log((double)aio->info->state->records/doc_cnt),3) +#define GWS_ENTROPY (1-(suml/sum-log(sum))/log(aio->info->state->records)) +/*=================================================================*/ + +/* Boolean search operators */ +#define FTB_YES (ft_boolean_syntax[0]) +#define FTB_EGAL (ft_boolean_syntax[1]) +#define FTB_NO (ft_boolean_syntax[2]) +#define FTB_INC (ft_boolean_syntax[3]) +#define FTB_DEC (ft_boolean_syntax[4]) +#define FTB_LBR (ft_boolean_syntax[5]) +#define FTB_RBR (ft_boolean_syntax[6]) +#define FTB_NEG (ft_boolean_syntax[7]) +#define FTB_TRUNC (ft_boolean_syntax[8]) +#define FTB_LQUOT (ft_boolean_syntax[10]) +#define FTB_RQUOT (ft_boolean_syntax[11]) + +typedef struct st_maria_ft_word { + byte * pos; + uint len; + double weight; +} FT_WORD; + +int is_stopword(char *word, uint len); + +uint _ma_ft_make_key(MARIA_HA *, uint , byte *, FT_WORD *, my_off_t); + +byte maria_ft_get_word(CHARSET_INFO *, byte **, byte *, FT_WORD *, + MYSQL_FTPARSER_BOOLEAN_INFO *); +byte maria_ft_simple_get_word(CHARSET_INFO *, byte **, const byte *, + FT_WORD *, my_bool); + +typedef struct _st_maria_ft_seg_iterator { + uint num, len; + HA_KEYSEG *seg; + const byte *rec, *pos; +} FT_SEG_ITERATOR; + +void _ma_ft_segiterator_init(MARIA_HA *, uint, const byte *, FT_SEG_ITERATOR *); +void _ma_ft_segiterator_dummy_init(const byte *, uint, FT_SEG_ITERATOR *); +uint _ma_ft_segiterator(FT_SEG_ITERATOR *); + +void maria_ft_parse_init(TREE *, CHARSET_INFO *); +int maria_ft_parse(TREE *, byte *, int, struct st_mysql_ftparser *parser, + MYSQL_FTPARSER_PARAM *, MEM_ROOT *); +FT_WORD * maria_ft_linearize(TREE *, MEM_ROOT *); +FT_WORD * _ma_ft_parserecord(MARIA_HA *, uint, const byte *, MEM_ROOT *); +uint _ma_ft_parse(TREE *, MARIA_HA *, uint, const byte *, + MYSQL_FTPARSER_PARAM *, MEM_ROOT *); + +FT_INFO *maria_ft_init_nlq_search(MARIA_HA *, uint, byte *, uint, uint, byte *); +FT_INFO *maria_ft_init_boolean_search(MARIA_HA *, uint, byte *, uint, CHARSET_INFO *); + +extern const struct _ft_vft _ma_ft_vft_nlq; +int maria_ft_nlq_read_next(FT_INFO *, char *); +float maria_ft_nlq_find_relevance(FT_INFO *, byte *, uint); +void maria_ft_nlq_close_search(FT_INFO *); +float maria_ft_nlq_get_relevance(FT_INFO *); +my_off_t maria_ft_nlq_get_docid(FT_INFO *); +void maria_ft_nlq_reinit_search(FT_INFO *); + +extern const struct _ft_vft _ma_ft_vft_boolean; +int maria_ft_boolean_read_next(FT_INFO *, char *); +float maria_ft_boolean_find_relevance(FT_INFO *, byte *, uint); +void maria_ft_boolean_close_search(FT_INFO *); +float maria_ft_boolean_get_relevance(FT_INFO *); +my_off_t maria_ft_boolean_get_docid(FT_INFO *); +void maria_ft_boolean_reinit_search(FT_INFO *); +extern MYSQL_FTPARSER_PARAM *maria_ftparser_call_initializer(MARIA_HA *info, + uint keynr, + uint paramnr); +extern void maria_ftparser_call_deinitializer(MARIA_HA *info); diff --git a/storage/maria/ma_fulltext.h b/storage/maria/ma_fulltext.h new file mode 100644 index 00000000000..778ffd49196 --- /dev/null +++ b/storage/maria/ma_fulltext.h @@ -0,0 +1,27 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +/* some definitions for full-text indices */ + +#include "maria_def.h" +#include "ft_global.h" + +int _ma_ft_cmp(MARIA_HA *, uint, const byte *, const byte *); +int _ma_ft_add(MARIA_HA *, uint, byte *, const byte *, my_off_t); +int _ma_ft_del(MARIA_HA *, uint, byte *, const byte *, my_off_t); + +uint _ma_ft_convert_to_ft2(MARIA_HA *, uint, byte *); diff --git a/storage/maria/ma_info.c b/storage/maria/ma_info.c new file mode 100644 index 00000000000..a04fba4e0d8 --- /dev/null +++ b/storage/maria/ma_info.c @@ -0,0 +1,140 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Return useful base information for an open table */ + +#include "maria_def.h" +#ifdef __WIN__ +#include +#endif + + /* Get position to last record */ + +MARIA_RECORD_POS maria_position(MARIA_HA *info) +{ + return info->cur_row.lastpos; +} + + +/* Get information about the table */ +/* if flag == 2 one get current info (no sync from database */ + +int maria_status(MARIA_HA *info, register MARIA_INFO *x, uint flag) +{ + MY_STAT state; + MARIA_SHARE *share=info->s; + DBUG_ENTER("maria_status"); + + x->recpos= info->cur_row.lastpos; + if (flag == HA_STATUS_POS) + DBUG_RETURN(0); /* Compatible with ISAM */ + if (!(flag & HA_STATUS_NO_LOCK)) + { + pthread_mutex_lock(&share->intern_lock); + VOID(_ma_readinfo(info,F_RDLCK,0)); + fast_ma_writeinfo(info); + pthread_mutex_unlock(&share->intern_lock); + } + if (flag & HA_STATUS_VARIABLE) + { + x->records = info->state->records; + x->deleted = info->state->del; + x->delete_length = info->state->empty; + x->data_file_length =info->state->data_file_length; + x->index_file_length=info->state->key_file_length; + + x->keys = share->state.header.keys; + x->check_time = share->state.check_time; + x->mean_reclength = info->state->records ? + (ulong) ((info->state->data_file_length-info->state->empty)/ + info->state->records) : (ulong) share->min_pack_length; + } + if (flag & HA_STATUS_ERRKEY) + { + x->errkey= info->errkey; + x->dup_key_pos= info->dup_key_pos; + } + if (flag & HA_STATUS_CONST) + { + x->reclength = share->base.reclength; + x->max_data_file_length=share->base.max_data_file_length; + x->max_index_file_length=info->s->base.max_key_file_length; + x->filenr = info->dfile.file; + x->options = share->options; + x->create_time=share->state.create_time; + x->reflength= maria_get_pointer_length(share->base.max_data_file_length, + maria_data_pointer_size); + x->record_offset= (info->s->data_file_type == STATIC_RECORD ? + share->base.pack_reclength: 0); + x->sortkey= -1; /* No clustering */ + x->rec_per_key = share->state.rec_per_key_part; + x->key_map = share->state.key_map; + x->data_file_name = share->data_file_name; + x->index_file_name = share->index_file_name; + x->data_file_type = share->data_file_type; + } + if ((flag & HA_STATUS_TIME) && !my_fstat(info->dfile.file, &state, MYF(0))) + x->update_time=state.st_mtime; + else + x->update_time=0; + if (flag & HA_STATUS_AUTO) + { + x->auto_increment= share->state.auto_increment+1; + if (!x->auto_increment) /* This shouldn't happen */ + x->auto_increment= ~(ulonglong) 0; + } + DBUG_RETURN(0); +} + + +/* + Write a message to the error log. + + SYNOPSIS + _ma_report_error() + file_name Name of table file (e.g. index_file_name). + errcode Error number. + + DESCRIPTION + This function supplies my_error() with a table name. Most error + messages need one. Since string arguments in error messages are limited + to 64 characters by convention, we ensure that in case of truncation, + that the end of the index file path is in the message. This contains + the most valuable information (the table name and the database name). + + RETURN + void +*/ + +void _ma_report_error(int errcode, const char *file_name) +{ + uint length; + DBUG_ENTER("_ma_report_error"); + DBUG_PRINT("enter",("errcode %d, table '%s'", errcode, file_name)); + + if ((length= strlen(file_name)) > 64) + { + /* we first remove the directory */ + uint dir_length= dirname_length(file_name); + file_name+= dir_length; + if ((length-= dir_length) > 64) + { + /* still too long, chop start of table name */ + file_name+= length - 64; + } + } + my_error(errcode, MYF(ME_NOREFRESH), file_name); + DBUG_VOID_RETURN; +} diff --git a/storage/maria/ma_init.c b/storage/maria/ma_init.c new file mode 100644 index 00000000000..8042c6d9873 --- /dev/null +++ b/storage/maria/ma_init.c @@ -0,0 +1,64 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Initialize an maria-database */ + +#include "maria_def.h" +#include +#include "ma_blockrec.h" +#include "trnman_public.h" + +my_bool maria_inited= FALSE; +pthread_mutex_t THR_LOCK_maria; + +/* + Initialize maria + + SYNOPSIS + maria_init() + + TODO + Open log files and do recovery if need + + RETURN + 0 ok + # error number +*/ + +int maria_init(void) +{ + if (!maria_inited) + { + maria_inited= TRUE; + pthread_mutex_init(&THR_LOCK_maria,MY_MUTEX_INIT_SLOW); + _ma_init_block_record_data(); + } + return 0; +} + + +void maria_end(void) +{ + if (maria_inited) + { + maria_inited= maria_multi_threaded= FALSE; + ft_free_stopwords(); + trnman_destroy(); + translog_destroy(); + end_pagecache(maria_log_pagecache, TRUE); + ma_control_file_end(); + pthread_mutex_destroy(&THR_LOCK_maria); + } +} diff --git a/storage/maria/ma_key.c b/storage/maria/ma_key.c new file mode 100644 index 00000000000..941d5d0665e --- /dev/null +++ b/storage/maria/ma_key.c @@ -0,0 +1,591 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Functions to handle keys */ + +#include "maria_def.h" +#include "m_ctype.h" +#include "ma_sp_defs.h" +#ifdef HAVE_IEEEFP_H +#include +#endif + +#define CHECK_KEYS /* Enable safety checks */ + +#define FIX_LENGTH(cs, pos, length, char_length) \ + do { \ + if (length > char_length) \ + char_length= my_charpos(cs, pos, pos+length, char_length); \ + set_if_smaller(char_length,length); \ + } while(0) + +static int _ma_put_key_in_record(MARIA_HA *info,uint keynr,byte *record); + +/* + Make a intern key from a record + + SYNOPSIS + _ma_make_key() + info MyiSAM handler + keynr key number + key Store created key here + record Record + filepos Position to record in the data file + + RETURN + Length of key +*/ + +uint _ma_make_key(register MARIA_HA *info, uint keynr, byte *key, + const byte *record, MARIA_RECORD_POS filepos) +{ + const byte *pos; + byte *start; + reg1 HA_KEYSEG *keyseg; + my_bool is_ft= info->s->keyinfo[keynr].flag & HA_FULLTEXT; + DBUG_ENTER("_ma_make_key"); + + if (info->s->keyinfo[keynr].flag & HA_SPATIAL) + { + /* + TODO: nulls processing + */ +#ifdef HAVE_SPATIAL + DBUG_RETURN(_ma_sp_make_key(info,keynr, key,record,filepos)); +#else + DBUG_ASSERT(0); /* maria_open should check that this never happens*/ +#endif + } + + start=key; + for (keyseg=info->s->keyinfo[keynr].seg ; keyseg->type ;keyseg++) + { + enum ha_base_keytype type=(enum ha_base_keytype) keyseg->type; + uint length=keyseg->length; + uint char_length; + CHARSET_INFO *cs=keyseg->charset; + + if (keyseg->null_bit) + { + if (record[keyseg->null_pos] & keyseg->null_bit) + { + *key++= 0; /* NULL in key */ + continue; + } + *key++=1; /* Not NULL */ + } + + char_length= ((!is_ft && cs && cs->mbmaxlen > 1) ? length/cs->mbmaxlen : + length); + + pos= record+keyseg->start; + if (type == HA_KEYTYPE_BIT) + { + if (keyseg->bit_length) + { + uchar bits= get_rec_bits((uchar*) record + keyseg->bit_pos, + keyseg->bit_start, keyseg->bit_length); + *key++= (char) bits; + length--; + } + memcpy(key, pos, length); + key+= length; + continue; + } + if (keyseg->flag & HA_SPACE_PACK) + { + if (type != HA_KEYTYPE_NUM) + { + length= cs->cset->lengthsp(cs, pos, length); + } + else + { + const byte *end= pos + length; + while (pos < end && pos[0] == ' ') + pos++; + length= (uint) (end-pos); + } + FIX_LENGTH(cs, pos, length, char_length); + store_key_length_inc(key,char_length); + memcpy(key, pos, (size_t) char_length); + key+=char_length; + continue; + } + if (keyseg->flag & HA_VAR_LENGTH_PART) + { + uint pack_length= (keyseg->bit_start == 1 ? 1 : 2); + uint tmp_length= (pack_length == 1 ? (uint) *(uchar*) pos : + uint2korr(pos)); + pos+= pack_length; /* Skip VARCHAR length */ + set_if_smaller(length,tmp_length); + FIX_LENGTH(cs, pos, length, char_length); + store_key_length_inc(key,char_length); + memcpy(key,pos,(size_t) char_length); + key+= char_length; + continue; + } + else if (keyseg->flag & HA_BLOB_PART) + { + uint tmp_length= _ma_calc_blob_length(keyseg->bit_start,pos); + memcpy_fixed(&pos,pos+keyseg->bit_start,sizeof(char*)); + set_if_smaller(length,tmp_length); + FIX_LENGTH(cs, pos, length, char_length); + store_key_length_inc(key,char_length); + memcpy(key,pos,(size_t) char_length); + key+= char_length; + continue; + } + else if (keyseg->flag & HA_SWAP_KEY) + { /* Numerical column */ +#ifdef HAVE_ISNAN + if (type == HA_KEYTYPE_FLOAT) + { + float nr; + float4get(nr,pos); + if (isnan(nr)) + { + /* Replace NAN with zero */ + bzero(key,length); + key+=length; + continue; + } + } + else if (type == HA_KEYTYPE_DOUBLE) + { + double nr; + float8get(nr,pos); + if (isnan(nr)) + { + bzero(key,length); + key+=length; + continue; + } + } +#endif + pos+=length; + while (length--) + { + *key++ = *--pos; + } + continue; + } + FIX_LENGTH(cs, pos, length, char_length); + memcpy(key, pos, char_length); + if (length > char_length) + cs->cset->fill(cs, (char*) key+char_length, length-char_length, ' '); + key+= length; + } + _ma_dpointer(info,key,filepos); + DBUG_PRINT("exit",("keynr: %d",keynr)); + DBUG_DUMP("key",start,(uint) (key-start)+keyseg->length); + DBUG_EXECUTE("key", + _ma_print_key(DBUG_FILE,info->s->keyinfo[keynr].seg,start, + (uint) (key-start));); + DBUG_RETURN((uint) (key-start)); /* Return keylength */ +} /* _ma_make_key */ + + +/* + Pack a key to intern format from given format (c_rkey) + + SYNOPSIS + _ma_pack_key() + info MARIA handler + uint keynr key number + key Store packed key here + old Not packed key + k_length Length of 'old' to use + last_used_keyseg out parameter. May be NULL + + RETURN + length of packed key + + last_use_keyseg Store pointer to the keyseg after the last used one +*/ + +uint _ma_pack_key(register MARIA_HA *info, uint keynr, byte *key, + const byte *old, uint k_length, HA_KEYSEG **last_used_keyseg) +{ + byte *start_key=key; + HA_KEYSEG *keyseg; + my_bool is_ft= info->s->keyinfo[keynr].flag & HA_FULLTEXT; + DBUG_ENTER("_ma_pack_key"); + + for (keyseg=info->s->keyinfo[keynr].seg ; + keyseg->type && (int) k_length > 0; + old+=keyseg->length, keyseg++) + { + enum ha_base_keytype type=(enum ha_base_keytype) keyseg->type; + uint length=min((uint) keyseg->length,(uint) k_length); + uint char_length; + const byte *pos; + CHARSET_INFO *cs=keyseg->charset; + + if (keyseg->null_bit) + { + k_length--; + if (!(*key++= (char) 1-*old++)) /* Copy null marker */ + { + k_length-=length; + if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART)) + { + k_length-=2; /* Skip length */ + old+= 2; + } + continue; /* Found NULL */ + } + } + char_length= ((!is_ft && cs && cs->mbmaxlen > 1) ? length/cs->mbmaxlen : + length); + pos= old; + if (keyseg->flag & HA_SPACE_PACK) + { + const byte *end= pos + length; + if (type != HA_KEYTYPE_NUM) + { + while (end > pos && end[-1] == ' ') + end--; + } + else + { + while (pos < end && pos[0] == ' ') + pos++; + } + k_length-=length; + length=(uint) (end-pos); + FIX_LENGTH(cs, pos, length, char_length); + store_key_length_inc(key,char_length); + memcpy(key,pos,(size_t) char_length); + key+= char_length; + continue; + } + else if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART)) + { + /* Length of key-part used with maria_rkey() always 2 */ + uint tmp_length=uint2korr(pos); + k_length-= 2+length; + pos+=2; + set_if_smaller(length,tmp_length); /* Safety */ + FIX_LENGTH(cs, pos, length, char_length); + store_key_length_inc(key,char_length); + old+=2; /* Skip length */ + memcpy(key, pos,(size_t) char_length); + key+= char_length; + continue; + } + else if (keyseg->flag & HA_SWAP_KEY) + { /* Numerical column */ + pos+=length; + k_length-=length; + while (length--) + { + *key++ = *--pos; + } + continue; + } + FIX_LENGTH(cs, pos, length, char_length); + memcpy(key, pos, char_length); + if (length > char_length) + cs->cset->fill(cs, (char*) key+char_length, length-char_length, ' '); + key+= length; + k_length-=length; + } + if (last_used_keyseg) + *last_used_keyseg= keyseg; + +#ifdef NOT_USED + if (keyseg->type) + { + /* Part-key ; fill with ASCII 0 for easier searching */ + length= (uint) -k_length; /* unused part of last key */ + do + { + if (keyseg->flag & HA_NULL_PART) + length++; + if (keyseg->flag & HA_SPACE_PACK) + length+=2; + else + length+= keyseg->length; + keyseg++; + } while (keyseg->type); + bzero(key,length); + key+=length; + } +#endif + DBUG_PRINT("exit", ("length: %u", (uint) (key-start_key))); + DBUG_RETURN((uint) (key-start_key)); +} /* _ma_pack_key */ + + + +/* + Store found key in record + + SYNOPSIS + _ma_put_key_in_record() + info MARIA handler + keynr Key number that was used + record Store key here + + Last read key is in info->lastkey + + NOTES + Used when only-keyread is wanted + + RETURN + 0 ok + 1 error +*/ + +static int _ma_put_key_in_record(register MARIA_HA *info, uint keynr, + byte *record) +{ + reg2 byte *key; + byte *pos,*key_end; + reg1 HA_KEYSEG *keyseg; + byte *blob_ptr; + DBUG_ENTER("_ma_put_key_in_record"); + + blob_ptr= info->lastkey2; /* Place to put blob parts */ + key=info->lastkey; /* KEy that was read */ + key_end=key+info->lastkey_length; + for (keyseg=info->s->keyinfo[keynr].seg ; keyseg->type ;keyseg++) + { + if (keyseg->null_bit) + { + if (!*key++) + { + record[keyseg->null_pos]|= keyseg->null_bit; + continue; + } + record[keyseg->null_pos]&= ~keyseg->null_bit; + } + if (keyseg->type == HA_KEYTYPE_BIT) + { + uint length= keyseg->length; + + if (keyseg->bit_length) + { + byte bits= *key++; + set_rec_bits(bits, record + keyseg->bit_pos, keyseg->bit_start, + keyseg->bit_length); + length--; + } + else + { + clr_rec_bits(record + keyseg->bit_pos, keyseg->bit_start, + keyseg->bit_length); + } + memcpy(record + keyseg->start, key, length); + key+= length; + continue; + } + if (keyseg->flag & HA_SPACE_PACK) + { + uint length; + get_key_length(length,key); +#ifdef CHECK_KEYS + if (length > keyseg->length || key+length > key_end) + goto err; +#endif + pos= record+keyseg->start; + if (keyseg->type != (int) HA_KEYTYPE_NUM) + { + memcpy(pos,key,(size_t) length); + keyseg->charset->cset->fill(keyseg->charset, + pos + length, keyseg->length - length, + ' '); + } + else + { + bfill(pos,keyseg->length-length,' '); + memcpy(pos+keyseg->length-length,key,(size_t) length); + } + key+=length; + continue; + } + + if (keyseg->flag & HA_VAR_LENGTH_PART) + { + uint length; + get_key_length(length,key); +#ifdef CHECK_KEYS + if (length > keyseg->length || key+length > key_end) + goto err; +#endif + /* Store key length */ + if (keyseg->bit_start == 1) + *(uchar*) (record+keyseg->start)= (uchar) length; + else + int2store(record+keyseg->start, length); + /* And key data */ + memcpy(record+keyseg->start + keyseg->bit_start, key, length); + key+= length; + } + else if (keyseg->flag & HA_BLOB_PART) + { + uint length; + get_key_length(length,key); +#ifdef CHECK_KEYS + if (length > keyseg->length || key+length > key_end) + goto err; +#endif + memcpy(record+keyseg->start+keyseg->bit_start, + (char*) &blob_ptr,sizeof(char*)); + memcpy(blob_ptr,key,length); + blob_ptr+=length; + + /* The above changed info->lastkey2. Inform maria_rnext_same(). */ + info->update&= ~HA_STATE_RNEXT_SAME; + + _ma_store_blob_length(record+keyseg->start, + (uint) keyseg->bit_start,length); + key+=length; + } + else if (keyseg->flag & HA_SWAP_KEY) + { + byte *to= record+keyseg->start+keyseg->length; + byte *end= key+keyseg->length; +#ifdef CHECK_KEYS + if (end > key_end) + goto err; +#endif + do + { + *--to= *key++; + } while (key != end); + continue; + } + else + { +#ifdef CHECK_KEYS + if (key+keyseg->length > key_end) + goto err; +#endif + memcpy(record+keyseg->start, key, (size_t) keyseg->length); + key+= keyseg->length; + } + } + DBUG_RETURN(0); + +err: + DBUG_RETURN(1); /* Crashed row */ +} /* _ma_put_key_in_record */ + + + /* Here when key reads are used */ + +int _ma_read_key_record(MARIA_HA *info, byte *buf, MARIA_RECORD_POS filepos) +{ + fast_ma_writeinfo(info); + if (filepos != HA_OFFSET_ERROR) + { + if (info->lastinx >= 0) + { /* Read only key */ + if (_ma_put_key_in_record(info,(uint) info->lastinx,buf)) + { + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + return -1; + } + info->update|= HA_STATE_AKTIV; /* We should find a record */ + return 0; + } + my_errno=HA_ERR_WRONG_INDEX; + } + return(-1); /* Wrong data to read */ +} + + +/* + Retrieve auto_increment info + + SYNOPSIS + retrieve_auto_increment() + info Maria handler + record Row to update + + IMPLEMENTATION + For signed columns we don't retrieve the auto increment value if it's + less than zero. +*/ + +ulonglong ma_retrieve_auto_increment(MARIA_HA *info,const byte *record) +{ + ulonglong value= 0; /* Store unsigned values here */ + longlong s_value= 0; /* Store signed values here */ + HA_KEYSEG *keyseg= info->s->keyinfo[info->s->base.auto_key-1].seg; + const byte *key= record + keyseg->start; + + switch (keyseg->type) { + case HA_KEYTYPE_INT8: + s_value= (longlong) *(char*)key; + break; + case HA_KEYTYPE_BINARY: + value=(ulonglong) *(uchar*) key; + break; + case HA_KEYTYPE_SHORT_INT: + s_value= (longlong) sint2korr(key); + break; + case HA_KEYTYPE_USHORT_INT: + value=(ulonglong) uint2korr(key); + break; + case HA_KEYTYPE_LONG_INT: + s_value= (longlong) sint4korr(key); + break; + case HA_KEYTYPE_ULONG_INT: + value=(ulonglong) uint4korr(key); + break; + case HA_KEYTYPE_INT24: + s_value= (longlong) sint3korr(key); + break; + case HA_KEYTYPE_UINT24: + value=(ulonglong) uint3korr(key); + break; + case HA_KEYTYPE_FLOAT: /* This shouldn't be used */ + { + float f_1; + float4get(f_1,key); + /* Ignore negative values */ + value = (f_1 < (float) 0.0) ? 0 : (ulonglong) f_1; + break; + } + case HA_KEYTYPE_DOUBLE: /* This shouldn't be used */ + { + double f_1; + float8get(f_1,key); + /* Ignore negative values */ + value = (f_1 < 0.0) ? 0 : (ulonglong) f_1; + break; + } + case HA_KEYTYPE_LONGLONG: + s_value= sint8korr(key); + break; + case HA_KEYTYPE_ULONGLONG: + value= uint8korr(key); + break; + default: + DBUG_ASSERT(0); + value=0; /* Error */ + break; + } + + /* + The following code works becasue if s_value < 0 then value is 0 + and if s_value == 0 then value will contain either s_value or the + correct value. + */ + return (s_value > 0) ? (ulonglong) s_value : value; +} diff --git a/storage/maria/ma_keycache.c b/storage/maria/ma_keycache.c new file mode 100644 index 00000000000..7a2a56488e6 --- /dev/null +++ b/storage/maria/ma_keycache.c @@ -0,0 +1,163 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Key cache assignments +*/ + +#include "maria_def.h" + +/* + Assign pages of the index file for a table to a key cache + + SYNOPSIS + maria_assign_to_pagecache() + info open table + key_map map of indexes to assign to the key cache + pagecache_ptr pointer to the key cache handle + assign_lock Mutex to lock during assignment + + PREREQUESTS + One must have a READ lock or a WRITE lock on the table when calling + the function to ensure that there is no other writers to it. + + The caller must also ensure that one doesn't call this function from + two different threads with the same table. + + NOTES + At present pages for all indexes must be assigned to the same key cache. + In future only pages for indexes specified in the key_map parameter + of the table will be assigned to the specified key cache. + + RETURN VALUE + 0 If a success + # Error code +*/ + +int maria_assign_to_pagecache(MARIA_HA *info, + ulonglong key_map __attribute__((unused)), + PAGECACHE *pagecache) +{ + int error= 0; + MARIA_SHARE* share= info->s; + DBUG_ENTER("maria_assign_to_pagecache"); + DBUG_PRINT("enter", + ("old_pagecache_handle: 0x%lx new_pagecache_handle: 0x%lx", + (long) share->pagecache, (long) pagecache)); + + /* + Skip operation if we didn't change key cache. This can happen if we + call this for all open instances of the same table + */ + if (share->pagecache == pagecache) + DBUG_RETURN(0); + + /* + First flush all blocks for the table in the old key cache. + This is to ensure that the disk is consistent with the data pages + in memory (which may not be the case if the table uses delayed_key_write) + + Note that some other read thread may still fill in the key cache with + new blocks during this call and after, but this doesn't matter as + all threads will start using the new key cache for their next call to + maria library and we know that there will not be any changed blocks + in the old key cache. + */ + + if (flush_pagecache_blocks(share->pagecache, &share->kfile, FLUSH_RELEASE)) + { + error= my_errno; + maria_print_error(info->s, HA_ERR_CRASHED); + maria_mark_crashed(info); /* Mark that table must be checked */ + } + + /* + Flush the new key cache for this file. This is needed to ensure + that there is no old blocks (with outdated data) left in the new key + cache from an earlier assign_to_keycache operation + + (This can never fail as there is never any not written data in the + new key cache) + */ + (void) flush_pagecache_blocks(pagecache, &share->kfile, FLUSH_RELEASE); + + /* + ensure that setting the key cache and changing the multi_pagecache + is done atomicly + */ + pthread_mutex_lock(&share->intern_lock); + /* + Tell all threads to use the new key cache + This should be seen at the lastes for the next call to an maria function. + */ + share->pagecache= pagecache; + + /* store the key cache in the global hash structure for future opens */ + if (multi_pagecache_set(share->unique_file_name, share->unique_name_length, + share->pagecache)) + error= my_errno; + pthread_mutex_unlock(&share->intern_lock); + DBUG_RETURN(error); +} + + +/* + Change all MARIA entries that uses one key cache to another key cache + + SYNOPSIS + maria_change_pagecache() + old_pagecache Old key cache + new_pagecache New key cache + + NOTES + This is used when we delete one key cache. + + To handle the case where some other threads tries to open an MARIA + table associated with the to-be-deleted key cache while this operation + is running, we have to call 'multi_pagecache_change()' from this + function while we have a lock on the MARIA table list structure. + + This is safe as long as it's only MARIA that is using this specific + key cache. +*/ + + +void maria_change_pagecache(PAGECACHE *old_pagecache, + PAGECACHE *new_pagecache) +{ + LIST *pos; + DBUG_ENTER("maria_change_pagecache"); + + /* + Lock list to ensure that no one can close the table while we manipulate it + */ + pthread_mutex_lock(&THR_LOCK_maria); + for (pos=maria_open_list ; pos ; pos=pos->next) + { + MARIA_HA *info= (MARIA_HA*) pos->data; + MARIA_SHARE *share= info->s; + if (share->pagecache == old_pagecache) + maria_assign_to_pagecache(info, (ulonglong) ~0, new_pagecache); + } + + /* + We have to do the following call while we have the lock on the + MARIA list structure to ensure that another thread is not trying to + open a new table that will be associted with the old key cache + */ + multi_pagecache_change(old_pagecache, new_pagecache); + pthread_mutex_unlock(&THR_LOCK_maria); + DBUG_VOID_RETURN; +} diff --git a/storage/maria/ma_least_recently_dirtied.c b/storage/maria/ma_least_recently_dirtied.c new file mode 100644 index 00000000000..3d2c85bbf98 --- /dev/null +++ b/storage/maria/ma_least_recently_dirtied.c @@ -0,0 +1,105 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + WL#3261 Maria - background flushing of the least-recently-dirtied pages + First version written by Guilhem Bichot on 2006-04-27. + Does not compile yet. +*/ + +/* + To be part of the page cache. + The pseudocode below is dependent on the page cache + which is being designed WL#3134. It is not clear if I need to do page + copies, as the page cache already keeps page copies. + So, this code will move to the page cache and take inspiration from its + methods. Below is just to give the idea of what could be done. + And I should compare my imaginations to WL#3134. +*/ + +/* Here is the implementation of this module */ + +#include "page_cache.h" +#include "least_recently_dirtied.h" + +/* + This thread does background flush of pieces of the LRD, and serves + requests for asynchronous checkpoints. + Just launch it when engine starts. + MikaelR questioned why the same thread does two different jobs, the risk + could be that while a checkpoint happens no LRD flushing happens. + For now, we only do checkpoints - no LRD flushing (to be done when the + second version of the page cache is ready WL#3077). + Reasons to delay: + - Recovery will work (just slower) + - new page cache may be different, why do then re-do + - current pagecache probably has issues with flushing when somebody is + writing to the table being flushed - better avoid that. +*/ +pthread_handler_decl background_flush_and_checkpoint_thread() +{ + while (this_thread_not_killed) + { + /* note that we don't care of the checkpoint's success */ + (void)execute_asynchronous_checkpoint_if_any(); + sleep(5); + /* + in the final version, we will not sleep but call flush_pages_from_LRD() + repeatedly. If there are no dirty pages, we'll make sure to not have a + tight loop probing for checkpoint requests. + */ + } +} + +/* The rest of this file will not serve in first version */ + +/* + flushes only the first pages of the LRD. + max_this_number could be FLUSH_CACHE (of mf_pagecache.c) for example. +*/ +flush_pages_from_LRD(uint max_this_number, LSN max_this_lsn) +{ + /* + One rule to better observe is "page must be flushed to disk before it is + removed from LRD" (otherwise checkpoint is incomplete info, corruption). + */ + + /* + Build a list of pages to flush: + changed_blocks[i] is roughly sorted by descending rec_lsn, + so we could do a merge sort of changed_blocks[] lists, stopping after we + have the max_this_number first elements or after we have found a page with + rec_lsn > max_this_lsn. + Then do like pagecache_flush_blocks_int() does (beware! this time we are + not alone on the file! there may be dangers! TODO: sort this out). + */ + + /* + MikaelR noted that he observed that Linux's file cache may never fsync to + disk until this cache is full, at which point it decides to empty the + cache, making the machine very slow. A solution was to fsync after writing + 2 MB. + */ +} + +/* + Note that when we flush all page from LRD up to rec_lsn>=max_lsn, + this is approximate because the LRD list may + not be exactly sorted by rec_lsn (because for a big row, all pages of the + row are inserted into the LRD with rec_lsn being the LSN of the REDO for the + first page, so if there are concurrent insertions, the last page of the big + row may have a smaller rec_lsn than the previous pages inserted by + concurrent inserters). +*/ diff --git a/storage/maria/ma_least_recently_dirtied.h b/storage/maria/ma_least_recently_dirtied.h new file mode 100644 index 00000000000..1d57f3596f8 --- /dev/null +++ b/storage/maria/ma_least_recently_dirtied.h @@ -0,0 +1,25 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + WL#3261 Maria - background flushing of the least-recently-dirtied pages + First version written by Guilhem Bichot on 2006-04-27. + Does not compile yet. +*/ + +/* This is the interface of this module. */ + +/* flushes all page from LRD up to approximately rec_lsn>=max_lsn */ +int flush_all_LRD_to_lsn(LSN max_lsn); diff --git a/storage/maria/ma_locking.c b/storage/maria/ma_locking.c new file mode 100644 index 00000000000..abb095d47c2 --- /dev/null +++ b/storage/maria/ma_locking.c @@ -0,0 +1,522 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + locking of isam-tables. + reads info from a isam-table. Must be first request before doing any furter + calls to any isamfunktion. Is used to allow many process use the same + isamdatabase. +*/ + +#include "ma_ftdefs.h" + + /* lock table by F_UNLCK, F_RDLCK or F_WRLCK */ + +int maria_lock_database(MARIA_HA *info, int lock_type) +{ + int error; + uint count; + MARIA_SHARE *share=info->s; + DBUG_ENTER("maria_lock_database"); + DBUG_PRINT("enter",("lock_type: %d old lock %d r_locks: %u w_locks: %u " + "global_changed: %d open_count: %u name: '%s'", + lock_type, info->lock_type, share->r_locks, + share->w_locks, + share->global_changed, share->state.open_count, + share->index_file_name)); + if (share->options & HA_OPTION_READ_ONLY_DATA || + info->lock_type == lock_type) + DBUG_RETURN(0); + if (lock_type == F_EXTRA_LCK) /* Used by TMP tables */ + { + ++share->w_locks; + ++share->tot_locks; + info->lock_type= lock_type; + DBUG_RETURN(0); + } + + error=0; + pthread_mutex_lock(&share->intern_lock); + if (share->kfile.file >= 0) /* May only be false on windows */ + { + switch (lock_type) { + case F_UNLCK: + maria_ftparser_call_deinitializer(info); + if (info->lock_type == F_RDLCK) + count= --share->r_locks; + else + count= --share->w_locks; + --share->tot_locks; + if (info->lock_type == F_WRLCK && !share->w_locks) + { + if (!share->delay_key_write && + flush_pagecache_blocks(share->pagecache, &share->kfile, + FLUSH_KEEP)) + { + error= my_errno; + maria_print_error(info->s, HA_ERR_CRASHED); + /* Mark that table must be checked */ + maria_mark_crashed(info); + } + if (share->data_file_type == BLOCK_RECORD && + flush_pagecache_blocks(share->pagecache, &info->dfile, FLUSH_KEEP)) + { + error= my_errno; + maria_print_error(info->s, HA_ERR_CRASHED); + /* Mark that table must be checked */ + maria_mark_crashed(info); + } + } + if (info->opt_flag & (READ_CACHE_USED | WRITE_CACHE_USED)) + { + if (end_io_cache(&info->rec_cache)) + { + error=my_errno; + maria_print_error(info->s, HA_ERR_CRASHED); + maria_mark_crashed(info); + } + } + if (!count) + { + DBUG_PRINT("info",("changed: %u w_locks: %u", + (uint) share->changed, share->w_locks)); + if (share->changed && !share->w_locks) + { +#ifdef HAVE_MMAP + if ((info->s->mmaped_length != + info->s->state.state.data_file_length) && + (info->s->nonmmaped_inserts > MAX_NONMAPPED_INSERTS)) + { + if (info->s->concurrent_insert) + rw_wrlock(&info->s->mmap_lock); + _ma_remap_file(info, info->s->state.state.data_file_length); + info->s->nonmmaped_inserts= 0; + if (info->s->concurrent_insert) + rw_unlock(&info->s->mmap_lock); + } +#endif + share->state.process= share->last_process=share->this_process; + share->state.unique= info->last_unique= info->this_unique; + share->state.update_count= info->last_loop= ++info->this_loop; + if (_ma_state_info_write(share->kfile.file, &share->state, 1)) + error=my_errno; + share->changed=0; + if (maria_flush) + { + if (_ma_sync_table_files(info)) + error= my_errno; + } + else + share->not_flushed=1; + if (error) + { + maria_print_error(info->s, HA_ERR_CRASHED); + maria_mark_crashed(info); + } + } + } + info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); + info->lock_type= F_UNLCK; + break; + case F_RDLCK: + if (info->lock_type == F_WRLCK) + { + /* + Change RW to READONLY + + mysqld does not turn write locks to read locks, + so we're never here in mysqld. + */ + share->w_locks--; + share->r_locks++; + info->lock_type=lock_type; + break; + } + if (!share->r_locks && !share->w_locks) + { + if (_ma_state_info_read_dsk(share->kfile.file, &share->state, 1)) + { + error=my_errno; + break; + } + } + VOID(_ma_test_if_changed(info)); + share->r_locks++; + share->tot_locks++; + info->lock_type=lock_type; + break; + case F_WRLCK: + if (info->lock_type == F_RDLCK) + { /* Change READONLY to RW */ + if (share->r_locks == 1) + { + share->r_locks--; + share->w_locks++; + info->lock_type=lock_type; + break; + } + } + if (!(share->options & HA_OPTION_READ_ONLY_DATA)) + { + if (!share->w_locks) + { + if (!share->r_locks) + { + if (_ma_state_info_read_dsk(share->kfile.file, &share->state, 1)) + { + error=my_errno; + break; + } + } + } + } + VOID(_ma_test_if_changed(info)); + + info->lock_type=lock_type; + info->invalidator=info->s->invalidator; + share->w_locks++; + share->tot_locks++; + break; + default: + DBUG_ASSERT(0); + break; /* Impossible */ + } + } +#ifdef __WIN__ + else + { + /* + Check for bad file descriptors if this table is part + of a merge union. Failing to capture this may cause + a crash on windows if the table is renamed and + later on referenced by the merge table. + */ + if( info->owned_by_merge && (info->s)->kfile.file < 0 ) + { + error = HA_ERR_NO_SUCH_TABLE; + } + } +#endif + pthread_mutex_unlock(&share->intern_lock); + DBUG_RETURN(error); +} /* maria_lock_database */ + + +/**************************************************************************** + The following functions are called by thr_lock() in threaded applications +****************************************************************************/ + +/* + Create a copy of the current status for the table + + SYNOPSIS + _ma_get_status() + param Pointer to Myisam handler + concurrent_insert Set to 1 if we are going to do concurrent inserts + (THR_WRITE_CONCURRENT_INSERT was used) +*/ + +void _ma_get_status(void* param, int concurrent_insert) +{ + MARIA_HA *info=(MARIA_HA*) param; + DBUG_ENTER("_ma_get_status"); + DBUG_PRINT("info",("key_file: %ld data_file: %ld concurrent_insert: %d", + (long) info->s->state.state.key_file_length, + (long) info->s->state.state.data_file_length, + concurrent_insert)); +#ifndef DBUG_OFF + if (info->state->key_file_length > info->s->state.state.key_file_length || + info->state->data_file_length > info->s->state.state.data_file_length) + DBUG_PRINT("warning",("old info: key_file: %ld data_file: %ld", + (long) info->state->key_file_length, + (long) info->state->data_file_length)); +#endif + info->save_state=info->s->state.state; + info->state= &info->save_state; + info->append_insert_at_end= concurrent_insert; + DBUG_VOID_RETURN; +} + + +void _ma_update_status(void* param) +{ + MARIA_HA *info=(MARIA_HA*) param; + /* + Because someone may have closed the table we point at, we only + update the state if its our own state. This isn't a problem as + we are always pointing at our own lock or at a read lock. + (This is enforced by thr_multi_lock.c) + */ + if (info->state == &info->save_state) + { +#ifndef DBUG_OFF + DBUG_PRINT("info",("updating status: key_file: %ld data_file: %ld", + (long) info->state->key_file_length, + (long) info->state->data_file_length)); + if (info->state->key_file_length < info->s->state.state.key_file_length || + info->state->data_file_length < info->s->state.state.data_file_length) + DBUG_PRINT("warning",("old info: key_file: %ld data_file: %ld", + (long) info->s->state.state.key_file_length, + (long) info->s->state.state.data_file_length)); +#endif + info->s->state.state= *info->state; + info->state= &info->s->state.state; + } + info->append_insert_at_end= 0; + + /* + We have to flush the write cache here as other threads may start + reading the table before maria_lock_database() is called + */ + if (info->opt_flag & WRITE_CACHE_USED) + { + if (end_io_cache(&info->rec_cache)) + { + maria_print_error(info->s, HA_ERR_CRASHED); + maria_mark_crashed(info); + } + info->opt_flag&= ~WRITE_CACHE_USED; + } +} + + +void _ma_restore_status(void *param) +{ + MARIA_HA *info= (MARIA_HA*) param; + info->state= &info->s->state.state; + info->append_insert_at_end= 0; +} + + +void _ma_copy_status(void* to,void *from) +{ + ((MARIA_HA*) to)->state= &((MARIA_HA*) from)->save_state; +} + + +/* + Check if should allow concurrent inserts + + IMPLEMENTATION + Allow concurrent inserts if we don't have a hole in the table or + if there is no active write lock and there is active read locks and + maria_concurrent_insert == 2. In this last case the new + row('s) are inserted at end of file instead of filling up the hole. + + The last case is to allow one to inserts into a heavily read-used table + even if there is holes. + + NOTES + If there is a an rtree indexes in the table, concurrent inserts are + disabled in maria_open() + + RETURN + 0 ok to use concurrent inserts + 1 not ok +*/ + +my_bool _ma_check_status(void *param) +{ + MARIA_HA *info=(MARIA_HA*) param; + /* + The test for w_locks == 1 is here because this thread has already done an + external lock (in other words: w_locks == 1 means no other threads has + a write lock) + */ + DBUG_PRINT("info",("dellink: %ld r_locks: %u w_locks: %u", + (long) info->s->state.dellink, (uint) info->s->r_locks, + (uint) info->s->w_locks)); + return (my_bool) !(info->s->state.dellink == HA_OFFSET_ERROR || + (maria_concurrent_insert == 2 && info->s->r_locks && + info->s->w_locks == 1)); +} + + +/**************************************************************************** + ** functions to read / write the state +****************************************************************************/ + +int _ma_readinfo(register MARIA_HA *info, int lock_type, int check_keybuffer) +{ + DBUG_ENTER("_ma_readinfo"); + + if (info->lock_type == F_UNLCK) + { + MARIA_SHARE *share=info->s; + if (!share->tot_locks) + { + if (_ma_state_info_read_dsk(share->kfile.file, &share->state, 1)) + { + int error=my_errno ? my_errno : -1; + my_errno=error; + DBUG_RETURN(1); + } + } + if (check_keybuffer) + VOID(_ma_test_if_changed(info)); + info->invalidator=info->s->invalidator; + } + else if (lock_type == F_WRLCK && info->lock_type == F_RDLCK) + { + my_errno=EACCES; /* Not allowed to change */ + DBUG_RETURN(-1); /* when have read_lock() */ + } + DBUG_RETURN(0); +} /* _ma_readinfo */ + + +/* + Every isam-function that uppdates the isam-database MUST end with this + request +*/ + +int _ma_writeinfo(register MARIA_HA *info, uint operation) +{ + int error,olderror; + MARIA_SHARE *share= info->s; + DBUG_ENTER("_ma_writeinfo"); + DBUG_PRINT("info",("operation: %u tot_locks: %u", operation, + share->tot_locks)); + + error=0; + if (share->tot_locks == 0) + { + if (operation) + { /* Two threads can't be here */ + olderror= my_errno; /* Remember last error */ + share->state.process= share->last_process= share->this_process; + share->state.unique= info->last_unique= info->this_unique; + share->state.update_count= info->last_loop= ++info->this_loop; + if ((error= _ma_state_info_write(share->kfile.file, &share->state, 1))) + olderror=my_errno; +#ifdef __WIN__ + if (maria_flush) + { + _commit(share->kfile.file); + _commit(info->dfile.file); + } +#endif + my_errno=olderror; + } + } + else if (operation) + share->changed= 1; /* Mark keyfile changed */ + DBUG_RETURN(error); +} /* _ma_writeinfo */ + + + /* Test if someone has changed the database */ + /* (Should be called after readinfo) */ + +int _ma_test_if_changed(register MARIA_HA *info) +{ + MARIA_SHARE *share=info->s; + if (share->state.process != share->last_process || + share->state.unique != info->last_unique || + share->state.update_count != info->last_loop) + { /* Keyfile has changed */ + DBUG_PRINT("info",("index file changed")); + if (share->state.process != share->this_process) + VOID(flush_pagecache_blocks(share->pagecache, &share->kfile, + FLUSH_RELEASE)); + share->last_process=share->state.process; + info->last_unique= share->state.unique; + info->last_loop= share->state.update_count; + info->update|= HA_STATE_WRITTEN; /* Must use file on next */ + info->data_changed= 1; /* For maria_is_changed */ + return 1; + } + return (!(info->update & HA_STATE_AKTIV) || + (info->update & (HA_STATE_WRITTEN | HA_STATE_DELETED | + HA_STATE_KEY_CHANGED))); +} /* _ma_test_if_changed */ + + +/* + Put a mark in the .MYI file that someone is updating the table + + + DOCUMENTATION + + state.open_count in the .MYI file is used the following way: + - For the first change of the .MYI file in this process open_count is + incremented by maria_mark_file_change(). (We have a write lock on the file + when this happens) + - In maria_close() it's decremented by _ma_decrement_open_count() if it + was incremented in the same process. + + This mean that if we are the only process using the file, the open_count + tells us if the MARIA file wasn't properly closed. (This is true if + my_disable_locking is set). +*/ + + +int _ma_mark_file_changed(MARIA_HA *info) +{ + char buff[3]; + register MARIA_SHARE *share=info->s; + DBUG_ENTER("_ma_mark_file_changed"); + + if (!(share->state.changed & STATE_CHANGED) || ! share->global_changed) + { + share->state.changed|=(STATE_CHANGED | STATE_NOT_ANALYZED | + STATE_NOT_OPTIMIZED_KEYS); + if (!share->global_changed) + { + share->global_changed=1; + share->state.open_count++; + } + if (!share->temporary) + { + mi_int2store(buff,share->state.open_count); + buff[2]=1; /* Mark that it's changed */ + DBUG_RETURN(my_pwrite(share->kfile.file, buff, sizeof(buff), + sizeof(share->state.header), + MYF(MY_NABP))); + } + } + DBUG_RETURN(0); +} + + +/* + This is only called by close or by extra(HA_FLUSH) if the OS has the pwrite() + call. In these context the following code should be safe! + */ + +int _ma_decrement_open_count(MARIA_HA *info) +{ + char buff[2]; + register MARIA_SHARE *share=info->s; + int lock_error=0,write_error=0; + if (share->global_changed) + { + uint old_lock=info->lock_type; + share->global_changed=0; + lock_error=maria_lock_database(info,F_WRLCK); + /* Its not fatal even if we couldn't get the lock ! */ + if (share->state.open_count > 0) + { + share->state.open_count--; + mi_int2store(buff,share->state.open_count); + write_error= my_pwrite(share->kfile.file, buff, sizeof(buff), + sizeof(share->state.header), + MYF(MY_NABP)); + } + if (!lock_error) + lock_error=maria_lock_database(info,old_lock); + } + return test(lock_error || write_error); +} diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c new file mode 100644 index 00000000000..9ed1d4b9d93 --- /dev/null +++ b/storage/maria/ma_loghandler.c @@ -0,0 +1,5717 @@ +/* Copyright (C) 2007 MySQL AB & Sanja Belkin + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" +#include "ma_blockrec.h" +#include "trnman.h" + +/** + @file + @brief Module which writes and reads to a transaction log + + @todo LOG: in functions where the log's lock is required, a + translog_assert_owner() could be added. +*/ + +/* number of opened log files in the pagecache (should be at least 2) */ +#define OPENED_FILES_NUM 3 + +/* records buffer size (should be LOG_PAGE_SIZE * n) */ +#define TRANSLOG_WRITE_BUFFER (1024*1024) +/* min chunk length */ +#define TRANSLOG_MIN_CHUNK 3 +/* + Number of buffers used by loghandler + + Should be at least 4, because one thread can block up to 2 buffers in + normal circumstances (less then half of one and full other, or just + switched one and other), But if we met end of the file in the middle and + have to switch buffer it will be 3. + 1 buffer for flushing/writing. + We have a bigger number here for higher concurrency. +*/ +#define TRANSLOG_BUFFERS_NO 5 +/* number of bytes (+ header) which can be unused on first page in sequence */ +#define TRANSLOG_MINCHUNK_CONTENT 1 +/* version of log file */ +#define TRANSLOG_VERSION_ID 10000 /* 1.00.00 */ + +#define TRANSLOG_PAGE_FLAGS 6 /* transaction log page flags offset */ + +/* QQ: For temporary debugging */ +#define UNRECOVERABLE_ERROR(E) \ + do { \ + DBUG_PRINT("error", E); \ + printf E; \ + putchar('\n'); \ + } while(0); + +/* Maximum length of compressed LSNs (the worst case of whole LSN storing) */ +#define COMPRESSED_LSN_MAX_STORE_SIZE (2 + LSN_STORE_SIZE) +#define MAX_NUMBER_OF_LSNS_PER_RECORD 2 + +/* record parts descriptor */ +struct st_translog_parts +{ + /* full record length */ + translog_size_t record_length; + /* full record length with chunk headers */ + translog_size_t total_record_length; + /* current part index */ + uint current; + /* total number of elements in parts */ + uint elements; + /* array of parts (LEX_STRING) */ + LEX_STRING *parts; +}; + +/* log write buffer descriptor */ +struct st_translog_buffer +{ + LSN last_lsn; + /* This buffer offset in the file */ + TRANSLOG_ADDRESS offset; + /* + How much written (or will be written when copy_to_buffer_in_progress + become 0) to this buffer + */ + translog_size_t size; + /* File handler for this buffer */ + File file; + /* Threads which are waiting for buffer filling/freeing */ + WQUEUE waiting_filling_buffer; + /* Number of record which are in copy progress */ + uint copy_to_buffer_in_progress; + /* list of waiting buffer ready threads */ + struct st_my_thread_var *waiting_flush; + struct st_translog_buffer *overlay; +#ifndef DBUG_OFF + uint buffer_no; +#endif + /* lock for the buffer. Current buffer also lock the handler */ + pthread_mutex_t mutex; + /* IO cache for current log */ + byte buffer[TRANSLOG_WRITE_BUFFER]; +}; + + +struct st_buffer_cursor +{ + /* pointer on the buffer */ + byte *ptr; + /* current buffer */ + struct st_translog_buffer *buffer; + /* current page fill */ + uint16 current_page_fill; + /* how many times we finish this page to write it */ + uint16 write_counter; + /* previous write offset */ + uint16 previous_offset; + /* Number of current buffer */ + uint8 buffer_no; + my_bool chaser, protected; +}; + + +struct st_translog_descriptor +{ + /* *** Parameters of the log handler *** */ + + /* Page cache for the log reads */ + PAGECACHE *pagecache; + /* Flags */ + uint flags; + /* max size of one log size (for new logs creation) */ + uint32 log_file_max_size; + /* server version */ + uint32 server_version; + /* server ID */ + uint32 server_id; + /* Loghandler's buffer capacity in case of chunk 2 filling */ + uint32 buffer_capacity_chunk_2; + /* Half of the buffer capacity in case of chunk 2 filling */ + uint32 half_buffer_capacity_chunk_2; + /* Page overhead calculated by flags */ + uint16 page_overhead; + /* Page capacity calculated by flags (TRANSLOG_PAGE_SIZE-page_overhead-1) */ + uint16 page_capacity_chunk_2; + /* Directory to store files */ + char directory[FN_REFLEN]; + + /* *** Current state of the log handler *** */ + /* Current and (OPENED_FILES_NUM-1) last logs number in page cache */ + File log_file_num[OPENED_FILES_NUM]; + File directory_fd; + /* buffers for log writing */ + struct st_translog_buffer buffers[TRANSLOG_BUFFERS_NO]; + /* + horizon - visible end of the log (here is absolute end of the log: + position where next chunk can start + */ + TRANSLOG_ADDRESS horizon; + /* horizon buffer cursor */ + struct st_buffer_cursor bc; + + /* Last flushed LSN */ + LSN flushed; + LSN sent_to_file; + pthread_mutex_t sent_to_file_lock; +}; + +static struct st_translog_descriptor log_descriptor; + +/* Marker for end of log */ +static byte end_of_log= 0; + +my_bool translog_inited= 0; + +/* record classes */ +enum record_class +{ + LOGRECTYPE_NOT_ALLOWED, + LOGRECTYPE_VARIABLE_LENGTH, + LOGRECTYPE_PSEUDOFIXEDLENGTH, + LOGRECTYPE_FIXEDLENGTH +}; + +/* chunk types */ +#define TRANSLOG_CHUNK_LSN 0x00 /* 0 chunk refer as LSN (head or tail */ +#define TRANSLOG_CHUNK_FIXED (1 << 6) /* 1 (pseudo)fixed record (also LSN) */ +#define TRANSLOG_CHUNK_NOHDR (2 << 6) /* 2 no head chunk (till page end) */ +#define TRANSLOG_CHUNK_LNGTH (3 << 6) /* 3 chunk with chunk length */ +#define TRANSLOG_CHUNK_TYPE (3 << 6) /* Mask to get chunk type */ +#define TRANSLOG_REC_TYPE 0x3F /* Mask to get record type */ + +/* compressed (relative) LSN constants */ +#define TRANSLOG_CLSN_LEN_BITS 0xC0 /* Mask to get compressed LSN length */ + +typedef my_bool(*prewrite_rec_hook) (enum translog_record_type type, + TRN *trn, struct st_maria_share *share, + struct st_translog_parts *parts); + +typedef my_bool(*inwrite_rec_hook) (enum translog_record_type type, + TRN *trn, + LSN *lsn, + struct st_translog_parts *parts); + +typedef uint16(*read_rec_hook) (enum translog_record_type type, + uint16 read_length, uchar *read_buff, + byte *decoded_buff); + +/* + Descriptor of log record type + Note: Don't reorder because of constructs later... +*/ +struct st_log_record_type_descriptor +{ + /* internal class of the record */ + enum record_class class; + /* + length for fixed-size record, pseudo-fixed record + length with uncompressed LSNs + */ + uint16 fixed_length; + /* how much record body (belonged to headers too) read with headers */ + uint16 read_header_len; + /* HOOK for writing the record called before lock */ + prewrite_rec_hook prewrite_hook; + /* HOOK for writing the record called when LSN is known, inside lock */ + inwrite_rec_hook inwrite_hook; + /* HOOK for reading headers */ + read_rec_hook read_hook; + /* + For pseudo fixed records number of compressed LSNs followed by + system header + */ + int16 compressed_LSN; +}; + + +#include +/* an array that maps id of a MARIA_SHARE to this MARIA_SHARE */ +static MARIA_SHARE **id_to_share= NULL; +#define SHARE_ID_MAX 65535 /* array's size */ +/* lock for id_to_share */ +static my_atomic_rwlock_t LOCK_id_to_share; + +static my_bool write_hook_for_redo(enum translog_record_type type, + TRN *trn, LSN *lsn, + struct st_translog_parts *parts); +static my_bool write_hook_for_undo(enum translog_record_type type, + TRN *trn, LSN *lsn, + struct st_translog_parts *parts); + +/* + Initialize log_record_type_descriptors + + NOTE that after first public Maria release, these can NOT be changed +*/ +typedef struct st_log_record_type_descriptor LOG_DESC; + +static LOG_DESC log_record_type_descriptor[LOGREC_NUMBER_OF_TYPES]; + +static LOG_DESC INIT_LOGREC_FIXED_RECORD_0LSN_EXAMPLE= +{LOGRECTYPE_FIXEDLENGTH, 6, 6, NULL, NULL, NULL, 0}; + +static LOG_DESC INIT_LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE= +{LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, NULL, NULL, 0}; + +static LOG_DESC INIT_LOGREC_FIXED_RECORD_1LSN_EXAMPLE= +{LOGRECTYPE_PSEUDOFIXEDLENGTH, 7, 7, NULL, NULL, NULL, 1}; + +static LOG_DESC INIT_LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE= +{LOGRECTYPE_VARIABLE_LENGTH, 0, 12, NULL, NULL, NULL, 1}; + +static LOG_DESC INIT_LOGREC_FIXED_RECORD_2LSN_EXAMPLE= +{LOGRECTYPE_PSEUDOFIXEDLENGTH, 23, 23, NULL, NULL, NULL, 2}; + +static LOG_DESC INIT_LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE= +{LOGRECTYPE_VARIABLE_LENGTH, 0, 19, NULL, NULL, NULL, 2}; + + +void example_loghandler_init() +{ + log_record_type_descriptor[LOGREC_FIXED_RECORD_0LSN_EXAMPLE]= + INIT_LOGREC_FIXED_RECORD_0LSN_EXAMPLE; + log_record_type_descriptor[LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE]= + INIT_LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE; + log_record_type_descriptor[LOGREC_FIXED_RECORD_1LSN_EXAMPLE]= + INIT_LOGREC_FIXED_RECORD_1LSN_EXAMPLE; + log_record_type_descriptor[LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE]= + INIT_LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE; + log_record_type_descriptor[LOGREC_FIXED_RECORD_2LSN_EXAMPLE]= + INIT_LOGREC_FIXED_RECORD_2LSN_EXAMPLE; + log_record_type_descriptor[LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE]= + INIT_LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE; +} + + +static LOG_DESC INIT_LOGREC_RESERVED_FOR_CHUNKS23= +{LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0 }; + +static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_HEAD= +{LOGRECTYPE_VARIABLE_LENGTH, 0, + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL, + write_hook_for_redo, NULL, 0}; + +static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_TAIL= +{LOGRECTYPE_VARIABLE_LENGTH, 0, + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL, + write_hook_for_redo, NULL, 0}; + +static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_BLOB= +{LOGRECTYPE_VARIABLE_LENGTH, 0, 8, NULL, write_hook_for_redo, NULL, 0}; + +/*QQQ:TODO:header???*/ +static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_BLOBS= +{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, write_hook_for_redo, NULL, 0}; + +static LOG_DESC INIT_LOGREC_REDO_PURGE_ROW_HEAD= +{LOGRECTYPE_FIXEDLENGTH, + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, + NULL, write_hook_for_redo, NULL, 0}; + +static LOG_DESC INIT_LOGREC_REDO_PURGE_ROW_TAIL= +{LOGRECTYPE_FIXEDLENGTH, + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, + NULL, write_hook_for_redo, NULL, 0}; + +/* QQQ: TODO: variable and fixed size??? */ +static LOG_DESC INIT_LOGREC_REDO_PURGE_BLOCKS= +{LOGRECTYPE_VARIABLE_LENGTH, + 0, + FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE + PAGE_STORE_SIZE + + PAGERANGE_STORE_SIZE, + NULL, write_hook_for_redo, NULL, 0}; + +static LOG_DESC INIT_LOGREC_REDO_DELETE_ROW= +{LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, write_hook_for_redo, NULL, 0}; + +static LOG_DESC INIT_LOGREC_REDO_UPDATE_ROW_HEAD= +{LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, write_hook_for_redo, NULL, 0}; + +static LOG_DESC INIT_LOGREC_REDO_INDEX= +{LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, write_hook_for_redo, NULL, 0}; + +static LOG_DESC INIT_LOGREC_REDO_UNDELETE_ROW= +{LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, write_hook_for_redo, NULL, 0}; + +static LOG_DESC INIT_LOGREC_CLR_END= +{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, write_hook_for_redo, NULL, 1}; + +static LOG_DESC INIT_LOGREC_PURGE_END= +{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1}; + +static LOG_DESC INIT_LOGREC_UNDO_ROW_INSERT= +{LOGRECTYPE_FIXEDLENGTH, + LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, + LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, + NULL, write_hook_for_undo, NULL, 0}; + +static LOG_DESC INIT_LOGREC_UNDO_ROW_DELETE= +{LOGRECTYPE_VARIABLE_LENGTH, 0, + LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, + NULL, write_hook_for_undo, NULL, 0}; + +static LOG_DESC INIT_LOGREC_UNDO_ROW_UPDATE= +{LOGRECTYPE_VARIABLE_LENGTH, 0, + LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, + NULL, write_hook_for_undo, NULL, 1}; + +static LOG_DESC INIT_LOGREC_UNDO_ROW_PURGE= +{LOGRECTYPE_PSEUDOFIXEDLENGTH, LSN_STORE_SIZE, LSN_STORE_SIZE, + NULL, NULL, NULL, 1}; + +static LOG_DESC INIT_LOGREC_UNDO_KEY_INSERT= +{LOGRECTYPE_VARIABLE_LENGTH, 0, 10, NULL, write_hook_for_undo, NULL, 1}; + +static LOG_DESC INIT_LOGREC_UNDO_KEY_DELETE= +{LOGRECTYPE_VARIABLE_LENGTH, 0, 15, NULL, write_hook_for_undo, NULL, 0}; + +static LOG_DESC INIT_LOGREC_PREPARE= +{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0}; + +static LOG_DESC INIT_LOGREC_PREPARE_WITH_UNDO_PURGE= +{LOGRECTYPE_VARIABLE_LENGTH, 0, 5, NULL, NULL, NULL, 1}; + +static LOG_DESC INIT_LOGREC_COMMIT= +{LOGRECTYPE_FIXEDLENGTH, 0, 0, NULL, NULL, NULL, 0}; + +static LOG_DESC INIT_LOGREC_COMMIT_WITH_UNDO_PURGE= +{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1}; + +static LOG_DESC INIT_LOGREC_CHECKPOINT= +{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0}; + +static LOG_DESC INIT_LOGREC_REDO_CREATE_TABLE= +{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0}; + +static LOG_DESC INIT_LOGREC_REDO_RENAME_TABLE= +{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0}; + +static LOG_DESC INIT_LOGREC_REDO_DROP_TABLE= +{LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0}; + +static LOG_DESC INIT_LOGREC_REDO_DELETE_ALL= +{LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE, FILEID_STORE_SIZE, + NULL, NULL, NULL, 0}; + +static LOG_DESC INIT_LOGREC_REDO_REPAIR_TABLE= +{LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE + 4, FILEID_STORE_SIZE + 4, + NULL, NULL, NULL, 0}; + +static LOG_DESC INIT_LOGREC_FILE_ID= +{LOGRECTYPE_VARIABLE_LENGTH, 0, 4, NULL, NULL, NULL, 0}; + +static LOG_DESC INIT_LOGREC_LONG_TRANSACTION_ID= +{LOGRECTYPE_FIXEDLENGTH, 6, 6, NULL, NULL, NULL, 0}; + +const myf log_write_flags= MY_WME | MY_NABP | MY_WAIT_IF_FULL; + +static void loghandler_init() +{ + log_record_type_descriptor[LOGREC_RESERVED_FOR_CHUNKS23]= + INIT_LOGREC_RESERVED_FOR_CHUNKS23; + log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_HEAD]= + INIT_LOGREC_REDO_INSERT_ROW_HEAD; + log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_TAIL]= + INIT_LOGREC_REDO_INSERT_ROW_TAIL; + log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_BLOB]= + INIT_LOGREC_REDO_INSERT_ROW_BLOB; + log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_BLOBS]= + INIT_LOGREC_REDO_INSERT_ROW_BLOBS; + log_record_type_descriptor[LOGREC_REDO_PURGE_ROW_HEAD]= + INIT_LOGREC_REDO_PURGE_ROW_HEAD; + log_record_type_descriptor[LOGREC_REDO_PURGE_ROW_TAIL]= + INIT_LOGREC_REDO_PURGE_ROW_TAIL; + log_record_type_descriptor[LOGREC_REDO_PURGE_BLOCKS]= + INIT_LOGREC_REDO_PURGE_BLOCKS; + log_record_type_descriptor[LOGREC_REDO_DELETE_ROW]= + INIT_LOGREC_REDO_DELETE_ROW; + log_record_type_descriptor[LOGREC_REDO_UPDATE_ROW_HEAD]= + INIT_LOGREC_REDO_UPDATE_ROW_HEAD; + log_record_type_descriptor[LOGREC_REDO_INDEX]= + INIT_LOGREC_REDO_INDEX; + log_record_type_descriptor[LOGREC_REDO_UNDELETE_ROW]= + INIT_LOGREC_REDO_UNDELETE_ROW; + log_record_type_descriptor[LOGREC_CLR_END]= + INIT_LOGREC_CLR_END; + log_record_type_descriptor[LOGREC_PURGE_END]= + INIT_LOGREC_PURGE_END; + log_record_type_descriptor[LOGREC_UNDO_ROW_INSERT]= + INIT_LOGREC_UNDO_ROW_INSERT; + log_record_type_descriptor[LOGREC_UNDO_ROW_DELETE]= + INIT_LOGREC_UNDO_ROW_DELETE; + log_record_type_descriptor[LOGREC_UNDO_ROW_UPDATE]= + INIT_LOGREC_UNDO_ROW_UPDATE; + log_record_type_descriptor[LOGREC_UNDO_ROW_PURGE]= + INIT_LOGREC_UNDO_ROW_PURGE; + log_record_type_descriptor[LOGREC_UNDO_KEY_INSERT]= + INIT_LOGREC_UNDO_KEY_INSERT; + log_record_type_descriptor[LOGREC_UNDO_KEY_DELETE]= + INIT_LOGREC_UNDO_KEY_DELETE; + log_record_type_descriptor[LOGREC_PREPARE]= + INIT_LOGREC_PREPARE; + log_record_type_descriptor[LOGREC_PREPARE_WITH_UNDO_PURGE]= + INIT_LOGREC_PREPARE_WITH_UNDO_PURGE; + log_record_type_descriptor[LOGREC_COMMIT]= + INIT_LOGREC_COMMIT; + log_record_type_descriptor[LOGREC_COMMIT_WITH_UNDO_PURGE]= + INIT_LOGREC_COMMIT_WITH_UNDO_PURGE; + log_record_type_descriptor[LOGREC_CHECKPOINT]= + INIT_LOGREC_CHECKPOINT; + log_record_type_descriptor[LOGREC_REDO_CREATE_TABLE]= + INIT_LOGREC_REDO_CREATE_TABLE; + log_record_type_descriptor[LOGREC_REDO_RENAME_TABLE]= + INIT_LOGREC_REDO_RENAME_TABLE; + log_record_type_descriptor[LOGREC_REDO_DROP_TABLE]= + INIT_LOGREC_REDO_DROP_TABLE; + log_record_type_descriptor[LOGREC_REDO_DELETE_ALL]= + INIT_LOGREC_REDO_DELETE_ALL; + log_record_type_descriptor[LOGREC_REDO_REPAIR_TABLE]= + INIT_LOGREC_REDO_REPAIR_TABLE; + log_record_type_descriptor[LOGREC_FILE_ID]= + INIT_LOGREC_FILE_ID; + log_record_type_descriptor[LOGREC_LONG_TRANSACTION_ID]= + INIT_LOGREC_LONG_TRANSACTION_ID; +}; + + +/* all possible flags page overheads */ +static uint page_overhead[TRANSLOG_FLAGS_NUM]; + +typedef struct st_translog_validator_data +{ + TRANSLOG_ADDRESS *addr; + my_bool was_recovered; +} TRANSLOG_VALIDATOR_DATA; + + +const char *maria_data_root; + + +/* + Check cursor/buffer consistence + + SYNOPSIS + translog_check_cursor + cursor cursor which will be checked +*/ + +#ifndef DBUG_OFF +static void translog_check_cursor(struct st_buffer_cursor *cursor) +{ + DBUG_ASSERT(cursor->chaser || + ((ulong) (cursor->ptr - cursor->buffer->buffer) == + cursor->buffer->size)); + DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no); + DBUG_ASSERT((cursor->ptr -cursor->buffer->buffer) %TRANSLOG_PAGE_SIZE == + cursor->current_page_fill % TRANSLOG_PAGE_SIZE); + DBUG_ASSERT(cursor->current_page_fill <= TRANSLOG_PAGE_SIZE); +} +#endif + +/* + Get file name of the log by log number + + SYNOPSIS + translog_filename_by_fileno() + file_no Number of the log we want to open + path Pointer to buffer where file name will be + stored (must be FN_REFLEN bytes at least + RETURN + pointer to path +*/ + +static char *translog_filename_by_fileno(uint32 file_no, char *path) +{ + char file_name[10 + 8 + 1]; /* See my_sprintf */ + char *res; + DBUG_ENTER("translog_filename_by_fileno"); + DBUG_ASSERT(file_no <= 0xfffffff); + my_sprintf(file_name, (file_name, "maria_log.%08u", file_no)); + res= fn_format(path, file_name, log_descriptor.directory, "", MYF(MY_WME)); + DBUG_PRINT("info", ("Path: '%s' path: 0x%lx res: 0x%lx", + res, (ulong) path, (ulong) res)); + DBUG_RETURN(res); +} + + +/* + Open log file with given number without cache + + SYNOPSIS + open_logfile_by_number_no_cache() + file_no Number of the log we want to open + + RETURN + -1 error + # file descriptor number +*/ + +static File open_logfile_by_number_no_cache(uint32 file_no) +{ + File file; + char path[FN_REFLEN]; + DBUG_ENTER("open_logfile_by_number_no_cache"); + + /* TODO: add O_DIRECT to open flags (when buffer is aligned) */ + /* TODO: use my_create() */ + if ((file= my_open(translog_filename_by_fileno(file_no, path), + O_CREAT | O_BINARY | O_RDWR, + MYF(MY_WME))) < 0) + { + UNRECOVERABLE_ERROR(("Error %d during opening file '%s'", errno, path)); + DBUG_RETURN(-1); + } + DBUG_PRINT("info", ("File: '%s' handler: %d", path, file)); + DBUG_RETURN(file); +} + + +/* + Write log file page header in the just opened new log file + + SYNOPSIS + translog_write_file_header(); + + NOTES + First page is just a marker page; We don't store any real log data in it. + + RETURN + 0 OK + 1 ERROR +*/ + +uchar NEAR maria_trans_file_magic[]= +{ (uchar) 254, (uchar) 254, (uchar) 11, '\001', 'M', 'A', 'R', 'I', 'A', + 'L', 'O', 'G' }; + +static my_bool translog_write_file_header() +{ + ulonglong timestamp; + byte page_buff[TRANSLOG_PAGE_SIZE], *page= page_buff; + DBUG_ENTER("translog_write_file_header"); + + /* file tag */ + memcpy(page, maria_trans_file_magic, sizeof(maria_trans_file_magic)); + page+= sizeof(maria_trans_file_magic); + /* timestamp */ + timestamp= my_getsystime(); + int8store(page, timestamp); + page+= 8; + /* maria version */ + int4store(page, TRANSLOG_VERSION_ID); + page+= 4; + /* mysql version (MYSQL_VERSION_ID) */ + int4store(page, log_descriptor.server_version); + page+= 4; + /* server ID */ + int4store(page, log_descriptor.server_id); + page+= 4; + /* loghandler page_size/DISK_DRIVE_SECTOR_SIZE */ + int2store(page, TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE); + page+= 2; + /* file number */ + int3store(page, LSN_FILE_NO(log_descriptor.horizon)); + page+= 3; + bzero(page, sizeof(page_buff) - (page- page_buff)); + + DBUG_RETURN(my_pwrite(log_descriptor.log_file_num[0], page_buff, + sizeof(page_buff), 0, log_write_flags) != 0); +} + + +/* + Initialize transaction log file buffer + + SYNOPSIS + translog_buffer_init() + buffer The buffer to initialize + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_buffer_init(struct st_translog_buffer *buffer) +{ + DBUG_ENTER("translog_buffer_init"); + /* This buffer offset */ + buffer->last_lsn= CONTROL_FILE_IMPOSSIBLE_LSN; + /* This Buffer File */ + buffer->file= -1; + buffer->overlay= 0; + /* IO cache for current log */ + bzero(buffer->buffer, TRANSLOG_WRITE_BUFFER); + /* Buffer size */ + buffer->size= 0; + /* cond of thread which is waiting for buffer filling */ + buffer->waiting_filling_buffer.last_thread= 0; + /* Number of record which are in copy progress */ + buffer->copy_to_buffer_in_progress= 0; + /* list of waiting buffer ready threads */ + buffer->waiting_flush= 0; + /* lock for the buffer. Current buffer also lock the handler */ + if (pthread_mutex_init(&buffer->mutex, MY_MUTEX_INIT_FAST)) + DBUG_RETURN(1); + DBUG_RETURN(0); +} + + +/* + Close transaction log file by descriptor + + SYNOPSIS + translog_close_log_file() + file file descriptor + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_close_log_file(File file) +{ + int rc; + PAGECACHE_FILE fl; + fl.file= file; + flush_pagecache_blocks(log_descriptor.pagecache, &fl, FLUSH_RELEASE); + /* + Sync file when we close it + TODO: sync only we have changed the log + */ + rc= my_sync(file, MYF(MY_WME)); + rc|= my_close(file, MYF(MY_WME)); + return test(rc); +} + + +/* + Create and fill header of new file + + SYNOPSIS + translog_create_new_file() + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_create_new_file() +{ + int i; + uint32 file_no= LSN_FILE_NO(log_descriptor.horizon); + DBUG_ENTER("translog_create_new_file"); + + if (log_descriptor.log_file_num[OPENED_FILES_NUM - 1] != -1 && + translog_close_log_file(log_descriptor.log_file_num[OPENED_FILES_NUM - + 1])) + DBUG_RETURN(1); + for (i= OPENED_FILES_NUM - 1; i > 0; i--) + log_descriptor.log_file_num[i]= log_descriptor.log_file_num[i - 1]; + + if ((log_descriptor.log_file_num[0]= + open_logfile_by_number_no_cache(file_no)) == -1 || + translog_write_file_header()) + DBUG_RETURN(1); + + if (ma_control_file_write_and_force(CONTROL_FILE_IMPOSSIBLE_LSN, file_no, + CONTROL_FILE_UPDATE_ONLY_LOGNO)) + DBUG_RETURN(1); + + DBUG_RETURN(0); +} + + +/* + Lock the loghandler buffer + + SYNOPSIS + translog_buffer_lock() + buffer This buffer which should be locked + + RETURN + 0 OK + 1 Error +*/ + +#ifndef DBUG_OFF +static my_bool translog_buffer_lock(struct st_translog_buffer *buffer) +{ + int res; + DBUG_ENTER("translog_buffer_lock"); + DBUG_PRINT("enter", + ("Lock buffer #%u: (0x%lx) mutex: 0x%lx", + (uint) buffer->buffer_no, (ulong) buffer, + (ulong) &buffer->mutex)); + res= (pthread_mutex_lock(&buffer->mutex) != 0); + DBUG_RETURN(res); +} +#else +#define translog_buffer_lock(B) \ + pthread_mutex_lock(&B->mutex) +#endif + + +/* + Unlock the loghandler buffer + + SYNOPSIS + translog_buffer_unlock() + buffer This buffer which should be unlocked + + RETURN + 0 OK + 1 Error +*/ + +#ifndef DBUG_OFF +static my_bool translog_buffer_unlock(struct st_translog_buffer *buffer) +{ + int res; + DBUG_ENTER("translog_buffer_unlock"); + DBUG_PRINT("enter", ("Unlock buffer... #%u (0x%lx) " + "mutex: 0x%lx", + (uint) buffer->buffer_no, (ulong) buffer, + (ulong) &buffer->mutex)); + + res= (pthread_mutex_unlock(&buffer->mutex) != 0); + DBUG_PRINT("enter", ("Unlocked buffer... #%u: 0x%lx mutex: 0x%lx", + (uint) buffer->buffer_no, (ulong) buffer, + (ulong) &buffer->mutex)); + DBUG_RETURN(res); +} +#else +#define translog_buffer_unlock(B) \ + pthread_mutex_unlock(&B->mutex) +#endif + + +/* + Write a header on the page + + SYNOPSIS + translog_new_page_header() + horizon Where to write the page + cursor Where to write the page + + NOTE + - space for page header should be checked before +*/ + +static void translog_new_page_header(TRANSLOG_ADDRESS *horizon, + struct st_buffer_cursor *cursor) +{ + byte *ptr; + + DBUG_ENTER("translog_new_page_header"); + DBUG_ASSERT(cursor->ptr); + + cursor->protected= 0; + + ptr= cursor->ptr; + /* Page number */ + int3store(ptr, LSN_OFFSET(*horizon) / TRANSLOG_PAGE_SIZE); + ptr+= 3; + /* File number */ + int3store(ptr, LSN_FILE_NO(*horizon)); + ptr+= 3; + *(ptr++)= (byte) log_descriptor.flags; + if (log_descriptor.flags & TRANSLOG_PAGE_CRC) + { +#ifndef DBUG_OFF + DBUG_PRINT("info", ("write 0x11223344 CRC to (%lu,0x%lx)", + (ulong) LSN_FILE_NO(*horizon), + (ulong) LSN_OFFSET(*horizon))); + /* This will be overwritten by real CRC; This is just for debugging */ + int4store(ptr, 0x11223344); +#endif + /* CRC will be put when page is finished */ + ptr+= CRC_LENGTH; + } + if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION) + { + time_t tm; + uint16 tmp_time= time(&tm); + int2store(ptr, tmp_time); + ptr+= (TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE) * 2; + } + { + uint len= (ptr - cursor->ptr); + (*horizon)+= len; /* it is increasing of offset part of the address */ + cursor->current_page_fill= len; + if (!cursor->chaser) + cursor->buffer->size+= len; + } + cursor->ptr= ptr; + DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx chaser: %d Size: %lu (%lu)", + (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer, + cursor->chaser, (ulong) cursor->buffer->size, + (ulong) (cursor->ptr - cursor->buffer->buffer))); + DBUG_EXECUTE("info", translog_check_cursor(cursor);); + DBUG_VOID_RETURN; +} + + +/* + Put sector protection on the page image + + SYNOPSIS + translog_put_sector_protection() + page reference on the page content + cursor cursor of the buffer + + NOTES + We put a sector protection on all following sectors on the page, + except the first sector that is protected by page header. +*/ + +static void translog_put_sector_protection(byte *page, + struct st_buffer_cursor *cursor) +{ + byte *table= page + log_descriptor.page_overhead - + (TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE) * 2; + uint16 value= uint2korr(table) + cursor->write_counter; + uint16 last_protected_sector= ((cursor->previous_offset - 1) / + DISK_DRIVE_SECTOR_SIZE); + uint16 start_sector= cursor->previous_offset / DISK_DRIVE_SECTOR_SIZE; + uint i, offset; + DBUG_ENTER("translog_put_sector_protection"); + + if (start_sector == 0) + start_sector= 1; /* First sector is protected */ + + DBUG_PRINT("enter", ("Write counter:%u value:%u offset:%u, " + "last protected:%u start sector:%u", + (uint) cursor->write_counter, + (uint) value, + (uint) cursor->previous_offset, + (uint) last_protected_sector, (uint) start_sector)); + if (last_protected_sector == start_sector) + { + i= last_protected_sector * 2; + offset= last_protected_sector * DISK_DRIVE_SECTOR_SIZE; + /* restore data, because we modified sector which was protected */ + if (offset < cursor->previous_offset) + page[offset]= table[i]; + offset++; + if (offset < cursor->previous_offset) + page[offset]= table[i + 1]; + } + for (i= start_sector * 2, offset= start_sector * DISK_DRIVE_SECTOR_SIZE; + i < (TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE) * 2; + (i+= 2), (offset+= DISK_DRIVE_SECTOR_SIZE)) + { + DBUG_PRINT("info", ("sector:%u offset:%u data 0x%x%x", + i / 2, offset, (uint) page[offset], + (uint) page[offset + 1])); + table[i]= page[offset]; + table[i + 1]= page[offset + 1]; + int2store(page + offset, value); + DBUG_PRINT("info", ("sector:%u offset:%u data 0x%x%x", + i / 2, offset, (uint) page[offset], + (uint) page[offset + 1])); + } + DBUG_VOID_RETURN; +} + + +/* + Calculate CRC32 of given area + + SYNOPSIS + translog_crc() + area Pointer of the area beginning + length The Area length + + RETURN + CRC32 +*/ + +static uint32 translog_crc(byte *area, uint length) +{ + return crc32(0L, (unsigned char*) area, length); +} + + +/* + Finish current page with zeros + + SYNOPSIS + translog_finish_page() + horizon \ horizon & buffer pointers + cursor / +*/ + +static void translog_finish_page(TRANSLOG_ADDRESS *horizon, + struct st_buffer_cursor *cursor) +{ + uint16 left= TRANSLOG_PAGE_SIZE - cursor->current_page_fill; + byte *page= cursor->ptr -cursor->current_page_fill; + DBUG_ENTER("translog_finish_page"); + DBUG_PRINT("enter", ("Buffer: #%u 0x%lx " + "Buffer addr: (%lu,0x%lx) " + "Page addr: (%lu,0x%lx) " + "size:%lu (%lu) Pg:%u left:%u", + (uint) cursor->buffer_no, (ulong) cursor->buffer, + (ulong) LSN_FILE_NO(cursor->buffer->offset), + (ulong) LSN_OFFSET(cursor->buffer->offset), + (ulong) LSN_FILE_NO(*horizon), + (ulong) (LSN_OFFSET(*horizon) - + cursor->current_page_fill), + (ulong) cursor->buffer->size, + (ulong) (cursor->ptr -cursor->buffer->buffer), + (uint) cursor->current_page_fill, (uint) left)); + DBUG_ASSERT(LSN_FILE_NO(*horizon) == LSN_FILE_NO(cursor->buffer->offset)); + DBUG_EXECUTE("info", translog_check_cursor(cursor);); + if (cursor->protected) + { + DBUG_PRINT("info", ("Already protected and finished")); + DBUG_VOID_RETURN; + } + cursor->protected= 1; + + DBUG_ASSERT(left < TRANSLOG_PAGE_SIZE); + if (left != 0) + { + DBUG_PRINT("info", ("left: %u", (uint) left)); + bzero(cursor->ptr, left); + cursor->ptr +=left; + (*horizon)+= left; /* offset increasing */ + if (!cursor->chaser) + cursor->buffer->size+= left; + cursor->current_page_fill= 0; + DBUG_PRINT("info", ("Finish Page buffer #%u: 0x%lx " + "chaser: %d Size: %lu (%lu)", + (uint) cursor->buffer->buffer_no, + (ulong) cursor->buffer, cursor->chaser, + (ulong) cursor->buffer->size, + (ulong) (cursor->ptr - cursor->buffer->buffer))); + DBUG_EXECUTE("info", translog_check_cursor(cursor);); + } + if (page[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION) + { + translog_put_sector_protection(page, cursor); + DBUG_PRINT("info", ("drop write_counter")); + cursor->write_counter= 0; + cursor->previous_offset= 0; + } + if (page[TRANSLOG_PAGE_FLAGS] & TRANSLOG_PAGE_CRC) + { + uint32 crc= translog_crc(page + log_descriptor.page_overhead, + TRANSLOG_PAGE_SIZE - + log_descriptor.page_overhead); + DBUG_PRINT("info", ("CRC: %lx", (ulong) crc)); + /* We have page number, file number and flag before crc */ + int4store(page + 3 + 3 + 1, crc); + } + DBUG_VOID_RETURN; +} + + +/* + Wait until all thread finish filling this buffer + + SYNOPSIS + translog_wait_for_writers() + buffer This buffer should be check + + NOTE + This buffer should be locked +*/ + +static void translog_wait_for_writers(struct st_translog_buffer *buffer) +{ + struct st_my_thread_var *thread= my_thread_var; + DBUG_ENTER("translog_wait_for_writers"); + DBUG_PRINT("enter", ("Buffer #%u 0x%lx copies in progress: %u", + (uint) buffer->buffer_no, (ulong) buffer, + (int) buffer->copy_to_buffer_in_progress)); + + while (buffer->copy_to_buffer_in_progress) + { + DBUG_PRINT("info", ("wait for writers... " + "buffer: #%u 0x%lx " + "mutex: 0x%lx", + (uint) buffer->buffer_no, (ulong) buffer, + (ulong) &buffer->mutex)); + DBUG_ASSERT(buffer->file != -1); + wqueue_add_and_wait(&buffer->waiting_filling_buffer, thread, + &buffer->mutex); + DBUG_PRINT("info", ("wait for writers done " + "buffer: #%u 0x%lx " + "mutex: 0x%lx", + (uint) buffer->buffer_no, (ulong) buffer, + (ulong) &buffer->mutex)); + } + + DBUG_VOID_RETURN; +} + + +/* + + Wait for buffer to become free + + SYNOPSIS + translog_wait_for_buffer_free() + buffer The buffer we are waiting for + + NOTE + - this buffer should be locked +*/ + +static void translog_wait_for_buffer_free(struct st_translog_buffer *buffer) +{ + struct st_my_thread_var *thread= my_thread_var; + DBUG_ENTER("translog_wait_for_buffer_free"); + DBUG_PRINT("enter", ("Buffer: #%u 0x%lx copies in progress: %u " + "File: %d size: 0x%lu", + (uint) buffer->buffer_no, (ulong) buffer, + (int) buffer->copy_to_buffer_in_progress, + buffer->file, (ulong) buffer->size)); + + translog_wait_for_writers(buffer); + + while (buffer->file != -1) + { + DBUG_PRINT("info", ("wait for writers... " + "buffer: #%u 0x%lx " + "mutex: 0x%lx", + (uint) buffer->buffer_no, (ulong) buffer, + (ulong) &buffer->mutex)); + wqueue_add_and_wait(&buffer->waiting_filling_buffer, thread, + &buffer->mutex); + DBUG_PRINT("info", ("wait for writers done. " + "buffer: #%u 0x%lx " + "mutex: 0x%lx", + (uint) buffer->buffer_no, (ulong) buffer, + (ulong) &buffer->mutex)); + } + DBUG_ASSERT(buffer->copy_to_buffer_in_progress == 0); + DBUG_VOID_RETURN; +} + + +/* + Initialize the cursor for a buffer + + SYNOPSIS + translog_cursor_init() + buffer The buffer + cursor It's cursor + buffer_no Number of buffer +*/ + +static void translog_cursor_init(struct st_buffer_cursor *cursor, + struct st_translog_buffer *buffer, + uint8 buffer_no) +{ + DBUG_ENTER("translog_cursor_init"); + cursor->ptr= buffer->buffer; + cursor->buffer= buffer; + cursor->buffer_no= buffer_no; + cursor->current_page_fill= 0; + cursor->chaser= (cursor != &log_descriptor.bc); + cursor->write_counter= 0; + cursor->previous_offset= 0; + cursor->protected= 0; + DBUG_VOID_RETURN; +} + + +/* + Initialize buffer for current file + + SYNOPSIS + translog_start_buffer() + buffer The buffer + cursor It's cursor + buffer_no Number of buffer +*/ + +static void translog_start_buffer(struct st_translog_buffer *buffer, + struct st_buffer_cursor *cursor, + uint buffer_no) +{ + DBUG_ENTER("translog_start_buffer"); + DBUG_PRINT("enter", + ("Assign buffer: #%u (0x%lx) to file: %d offset: 0x%lx(%lu)", + (uint) buffer->buffer_no, (ulong) buffer, + log_descriptor.log_file_num[0], + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon))); + DBUG_ASSERT(buffer_no == buffer->buffer_no); + buffer->last_lsn= CONTROL_FILE_IMPOSSIBLE_LSN; + buffer->offset= log_descriptor.horizon; + buffer->file= log_descriptor.log_file_num[0]; + buffer->overlay= 0; + buffer->size= 0; + translog_cursor_init(cursor, buffer, buffer_no); + DBUG_PRINT("info", ("init cursor #%u: 0x%lx chaser: %d Size: %lu (%lu)", + (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer, + cursor->chaser, (ulong) cursor->buffer->size, + (ulong) (cursor->ptr - cursor->buffer->buffer))); + DBUG_EXECUTE("info", translog_check_cursor(cursor);); + DBUG_VOID_RETURN; +} + + +/* + Switch to the next buffer in a chain + + SYNOPSIS + translog_buffer_next() + horizon \ Pointers on current position in file and buffer + cursor / + next_file Also start new file + + NOTE: + - loghandler should be locked + - after return new and old buffer still are locked + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_buffer_next(TRANSLOG_ADDRESS *horizon, + struct st_buffer_cursor *cursor, + my_bool new_file) +{ + uint old_buffer_no= cursor->buffer_no; + uint new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO; + struct st_translog_buffer *new_buffer= log_descriptor.buffers + new_buffer_no; + my_bool chasing= cursor->chaser; + DBUG_ENTER("translog_buffer_next"); + + DBUG_PRINT("info", ("horizon: (%lu,0x%lx) chasing: %d", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), chasing)); + + DBUG_ASSERT(cmp_translog_addr(log_descriptor.horizon, *horizon) >= 0); + + translog_finish_page(horizon, cursor); + + if (!chasing) + { + translog_buffer_lock(new_buffer); + translog_wait_for_buffer_free(new_buffer); + } +#ifndef DBUG_OFF + else + DBUG_ASSERT(new_buffer->file != 0); +#endif + if (new_file) + { + /* move the horizon to the next file and its header page */ + (*horizon)+= LSN_ONE_FILE; + (*horizon)= LSN_REPLACE_OFFSET(*horizon, TRANSLOG_PAGE_SIZE); + if (!chasing && translog_create_new_file()) + { + DBUG_RETURN(1); + } + } + + /* prepare next page */ + if (chasing) + translog_cursor_init(cursor, new_buffer, new_buffer_no); + else + translog_start_buffer(new_buffer, cursor, new_buffer_no); + translog_new_page_header(horizon, cursor); + DBUG_RETURN(0); +} + + +/* + Set max LSN sent to file + + SYNOPSIS + translog_set_sent_to_file() + lsn LSN to assign +*/ + +static void translog_set_sent_to_file(LSN *lsn) +{ + DBUG_ENTER("translog_set_sent_to_file"); + pthread_mutex_lock(&log_descriptor.sent_to_file_lock); + DBUG_ASSERT(cmp_translog_addr(*lsn, log_descriptor.sent_to_file) >= 0); + log_descriptor.sent_to_file= *lsn; + pthread_mutex_unlock(&log_descriptor.sent_to_file_lock); + DBUG_VOID_RETURN; +} + + +/* + Get max LSN send to file + + SYNOPSIS + translog_get_sent_to_file() + lsn LSN to value +*/ + +static void translog_get_sent_to_file(LSN *lsn) +{ + DBUG_ENTER("translog_get_sent_to_file"); + pthread_mutex_lock(&log_descriptor.sent_to_file_lock); + *lsn= log_descriptor.sent_to_file; + pthread_mutex_unlock(&log_descriptor.sent_to_file_lock); + DBUG_VOID_RETURN; +} + + +/* + Get first chunk address on the given page + + SYNOPSIS + translog_get_first_chunk_offset() + page The page where to find first chunk + + RETURN + first chunk offset +*/ + +static my_bool translog_get_first_chunk_offset(byte *page) +{ + uint16 page_header= 7; + DBUG_ENTER("translog_get_first_chunk_offset"); + + if (page[TRANSLOG_PAGE_FLAGS] & TRANSLOG_PAGE_CRC) + page_header+= 4; + if (page[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION) + page_header+= (TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE) * 2; + DBUG_RETURN(page_header); +} + + +/* + Write coded length of record + + SYNOPSIS + translog_write_variable_record_1group_code_len + dst Destination buffer pointer + length Length which should be coded + header_len Calculated total header length +*/ + +static void +translog_write_variable_record_1group_code_len(byte *dst, + translog_size_t length, + uint16 header_len) +{ + switch (header_len) { + case 6: /* (5 + 1) */ + DBUG_ASSERT(length <= 250); + *dst= (uint8) length; + return; + case 8: /* (5 + 3) */ + DBUG_ASSERT(length <= 0xFFFF); + *dst= 251; + int2store(dst + 1, length); + return; + case 9: /* (5 + 4) */ + DBUG_ASSERT(length <= (ulong) 0xFFFFFF); + *dst= 252; + int3store(dst + 1, length); + return; + case 10: /* (5 + 5) */ + *dst= 253; + int4store(dst + 1, length); + return; + default: + DBUG_ASSERT(0); + } + return; +} + + +/* + Decode record data length and advance given pointer to the next field + + SYNOPSIS + translog_variable_record_1group_decode_len() + src The pointer to the pointer to the length beginning + + RETURN + decoded length +*/ + +static translog_size_t translog_variable_record_1group_decode_len(byte **src) +{ + uint8 first= (uint8) (**src); + switch (first) { + case 251: + (*src)+= 3; + return (uint2korr((*src) - 2)); + case 252: + (*src)+= 4; + return (uint3korr((*src) - 3)); + case 253: + (*src)+= 5; + return (uint4korr((*src) - 4)); + case 254: + case 255: + DBUG_ASSERT(0); /* reserved for future use */ + return (0); + default: + (*src)++; + return (first); + } +} + + +/* + Get total length of this chunk (not only body) + + SYNOPSIS + translog_get_total_chunk_length() + page The page where chunk placed + offset Offset of the chunk on this place + + RETURN + total length of the chunk +*/ + +static uint16 translog_get_total_chunk_length(byte *page, uint16 offset) +{ + DBUG_ENTER("translog_get_total_chunk_length"); + switch (page[offset] & TRANSLOG_CHUNK_TYPE) { + case TRANSLOG_CHUNK_LSN: + { + /* 0 chunk referred as LSN (head or tail) */ + translog_size_t rec_len; + byte *start= page + offset; + byte *ptr= start + 1 + 2; + uint16 chunk_len, header_len, page_rest; + DBUG_PRINT("info", ("TRANSLOG_CHUNK_LSN")); + rec_len= translog_variable_record_1group_decode_len(&ptr); + chunk_len= uint2korr(ptr); + header_len= (ptr -start) + 2; + DBUG_PRINT("info", ("rec len: %lu chunk len: %u header len: %u", + (ulong) rec_len, (uint) chunk_len, (uint) header_len)); + if (chunk_len) + { + DBUG_PRINT("info", ("chunk len: %u + %u = %u", + (uint) header_len, (uint) chunk_len, + (uint) (chunk_len + header_len))); + DBUG_RETURN(chunk_len + header_len); + } + page_rest= TRANSLOG_PAGE_SIZE - offset; + DBUG_PRINT("info", ("page_rest %u", (uint) page_rest)); + if (rec_len + header_len < page_rest) + DBUG_RETURN(rec_len + header_len); + DBUG_RETURN(page_rest); + } + case TRANSLOG_CHUNK_FIXED: + { + byte *ptr; + uint type= page[offset] & TRANSLOG_REC_TYPE; + uint length; + int i; + /* 1 (pseudo)fixed record (also LSN) */ + DBUG_PRINT("info", ("TRANSLOG_CHUNK_FIXED")); + DBUG_ASSERT(log_record_type_descriptor[type].class == + LOGRECTYPE_FIXEDLENGTH || + log_record_type_descriptor[type].class == + LOGRECTYPE_PSEUDOFIXEDLENGTH); + if (log_record_type_descriptor[type].class == LOGRECTYPE_FIXEDLENGTH) + { + DBUG_PRINT("info", + ("Fixed length: %u", + (uint) (log_record_type_descriptor[type].fixed_length + 3))); + DBUG_RETURN(log_record_type_descriptor[type].fixed_length + 3); + } + + ptr= page + offset + 3; /* first compressed LSN */ + length= log_record_type_descriptor[type].fixed_length + 3; + for (i= 0; i < log_record_type_descriptor[type].compressed_LSN; i++) + { + /* first 2 bits is length - 2 */ + uint len= ((((uint8) (*ptr)) & TRANSLOG_CLSN_LEN_BITS) >> 6) + 2; + if (ptr[0] == 0 && ((uint8) ptr[1]) == 1) + len+= LSN_STORE_SIZE; /* case of full LSN storing */ + ptr+= len; + /* subtract economized bytes */ + length-= (LSN_STORE_SIZE - len); + } + DBUG_PRINT("info", ("Pseudo-fixed length: %u", length)); + DBUG_RETURN(length); + } + case TRANSLOG_CHUNK_NOHDR: + /* 2 no header chunk (till page end) */ + DBUG_PRINT("info", ("TRANSLOG_CHUNK_NOHDR length: %u", + (uint) (TRANSLOG_PAGE_SIZE - offset))); + DBUG_RETURN(TRANSLOG_PAGE_SIZE - offset); + case TRANSLOG_CHUNK_LNGTH: /* 3 chunk with chunk length */ + DBUG_PRINT("info", ("TRANSLOG_CHUNK_LNGTH")); + DBUG_ASSERT(TRANSLOG_PAGE_SIZE - offset >= 3); + DBUG_PRINT("info", ("length: %u", uint2korr(page + offset + 1) + 3)); + DBUG_RETURN(uint2korr(page + offset + 1) + 3); + default: + DBUG_ASSERT(0); + DBUG_RETURN(0); + } +} + + +/* + Flush given buffer + + SYNOPSIS + translog_buffer_flush() + buffer This buffer should be flushed + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_buffer_flush(struct st_translog_buffer *buffer) +{ + uint32 i; + PAGECACHE_FILE file; + DBUG_ENTER("translog_buffer_flush"); + DBUG_PRINT("enter", + ("Buffer: #%u 0x%lx: " + "file: %d offset: (%lu,0x%lx) size: %lu", + (uint) buffer->buffer_no, (ulong) buffer, + buffer->file, + (ulong) LSN_FILE_NO(buffer->offset), + (ulong) LSN_OFFSET(buffer->offset), + (ulong) buffer->size)); + + DBUG_ASSERT(buffer->file != -1); + + translog_wait_for_writers(buffer); + if (buffer->overlay && buffer->overlay->file != -1) + { + struct st_translog_buffer *overlay= buffer->overlay; + translog_buffer_unlock(buffer); + translog_buffer_lock(overlay); + translog_wait_for_buffer_free(overlay); + translog_buffer_unlock(overlay); + translog_buffer_lock(buffer); + } + + file.file= buffer->file; + for (i= 0; i < buffer->size; i+= TRANSLOG_PAGE_SIZE) + { + DBUG_ASSERT(log_descriptor.pagecache->block_size == TRANSLOG_PAGE_SIZE); + DBUG_ASSERT(i + TRANSLOG_PAGE_SIZE <= buffer->size); + if (pagecache_write(log_descriptor.pagecache, + &file, + (LSN_OFFSET(buffer->offset) + i) / TRANSLOG_PAGE_SIZE, + 3, + buffer->buffer + i, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, PAGECACHE_WRITE_DONE, 0)) + { + UNRECOVERABLE_ERROR(("Can't write page (%lu,0x%lx) to pagecache", + (ulong) buffer->file, + (ulong) (LSN_OFFSET(buffer->offset)+ i))); + } + } + if (my_pwrite(buffer->file, (char*) buffer->buffer, + buffer->size, LSN_OFFSET(buffer->offset), + log_write_flags)) + { + UNRECOVERABLE_ERROR(("Can't write buffer (%lu,0x%lx) size %lu " + "to the disk (%d)", + (ulong) buffer->file, + (ulong) LSN_OFFSET(buffer->offset), + (ulong) buffer->size, errno)); + DBUG_RETURN(1); + } + if (LSN_OFFSET(buffer->last_lsn) != 0) /* if buffer->last_lsn is set */ + translog_set_sent_to_file(&buffer->last_lsn); + + /* Free buffer */ + buffer->file= -1; + buffer->overlay= 0; + if (buffer->waiting_filling_buffer.last_thread) + { + wqueue_release_queue(&buffer->waiting_filling_buffer); + } + DBUG_RETURN(0); +} + + +/* + Recover page with sector protection (wipe out failed chunks) + + SYNOPSYS + translog_recover_page_up_to_sector() + page reference on the page + offset offset of failed sector + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_recover_page_up_to_sector(byte *page, uint16 offset) +{ + uint16 chunk_offset= translog_get_first_chunk_offset(page), valid_chunk_end; + DBUG_ENTER("translog_recover_page_up_to_sector"); + DBUG_PRINT("enter", ("offset: %u first chunk: %u", + (uint) offset, (uint) chunk_offset)); + + while (page[chunk_offset] != '\0' && chunk_offset < offset) + { + uint16 chunk_length; + if ((chunk_length= + translog_get_total_chunk_length(page, chunk_offset)) == 0) + { + UNRECOVERABLE_ERROR(("cant get chunk length (offset %u)", + (uint) chunk_offset)); + DBUG_RETURN(1); + } + DBUG_PRINT("info", ("chunk: offset: %u length %u", + (uint) chunk_offset, (uint) chunk_length)); + if (((ulong) chunk_offset) + ((ulong) chunk_length) > TRANSLOG_PAGE_SIZE) + { + UNRECOVERABLE_ERROR(("damaged chunk (offset %u) in trusted area", + (uint) chunk_offset)); + DBUG_RETURN(1); + } + chunk_offset+= chunk_length; + } + + valid_chunk_end= chunk_offset; + /* end of trusted area - sector parsing */ + while (page[chunk_offset] != '\0') + { + uint16 chunk_length; + if ((chunk_length= + translog_get_total_chunk_length(page, chunk_offset)) == 0) + break; + + DBUG_PRINT("info", ("chunk: offset: %u length %u", + (uint) chunk_offset, (uint) chunk_length)); + if (((ulong) chunk_offset) + ((ulong) chunk_length) > + (uint) (offset + DISK_DRIVE_SECTOR_SIZE)) + break; + + chunk_offset+= chunk_length; + valid_chunk_end= chunk_offset; + } + DBUG_PRINT("info", ("valid chunk end offset: %u", (uint) valid_chunk_end)); + + bzero(page + valid_chunk_end, TRANSLOG_PAGE_SIZE - valid_chunk_end); + + DBUG_RETURN(0); +} + + +/* + Log page validator + + SYNOPSIS + translog_page_validator() + page_addr The page to check + data data, need for validation (address in this case) + + RETURN + 0 OK + 1 Error +*/ +static my_bool translog_page_validator(byte *page_addr, gptr data_ptr) +{ + uint this_page_page_overhead; + uint flags; + byte *page= (byte*) page_addr, *page_pos; + TRANSLOG_VALIDATOR_DATA *data= (TRANSLOG_VALIDATOR_DATA *) data_ptr; + TRANSLOG_ADDRESS addr= *(data->addr); + DBUG_ENTER("translog_page_validator"); + + data->was_recovered= 0; + + if (uint3korr(page) != LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE || + uint3korr(page + 3) != LSN_FILE_NO(addr)) + { + UNRECOVERABLE_ERROR(("Page (%lu,0x%lx): " + "page address written in the page is incorrect: " + "File %lu instead of %lu or page %lu instead of %lu", + (ulong) LSN_FILE_NO(addr), (ulong) LSN_OFFSET(addr), + (ulong) uint3korr(page + 3), (ulong) LSN_FILE_NO(addr), + (ulong) uint3korr(page), + (ulong) LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE)); + DBUG_RETURN(1); + } + flags= (uint)(page[TRANSLOG_PAGE_FLAGS]); + this_page_page_overhead= page_overhead[flags]; + if (flags & ~(TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION | + TRANSLOG_RECORD_CRC)) + { + UNRECOVERABLE_ERROR(("Page (%lu,0x%lx): " + "Garbage in the page flags field detected : %x", + (ulong) LSN_FILE_NO(addr), (ulong) LSN_OFFSET(addr), + (uint) flags)); + DBUG_RETURN(1); + } + page_pos= page + (3 + 3 + 1); + if (flags & TRANSLOG_PAGE_CRC) + { + uint32 crc= translog_crc(page + this_page_page_overhead, + TRANSLOG_PAGE_SIZE - + this_page_page_overhead); + if (crc != uint4korr(page_pos)) + { + UNRECOVERABLE_ERROR(("Page (%lu,0x%lx): " + "CRC mismatch: calculated: %lx on the page %lx", + (ulong) LSN_FILE_NO(addr), (ulong) LSN_OFFSET(addr), + (ulong) crc, (ulong) uint4korr(page_pos))); + DBUG_RETURN(1); + } + page_pos+= CRC_LENGTH; /* Skip crc */ + } + if (flags & TRANSLOG_SECTOR_PROTECTION) + { + uint i, offset; + byte *table= page_pos; + uint16 current= uint2korr(table); + for (i= 2, offset= DISK_DRIVE_SECTOR_SIZE; + i < (TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE) * 2; + i+= 2, offset+= DISK_DRIVE_SECTOR_SIZE) + { + /* + TODO: add chunk counting for "suspecting" sectors (difference is + more than 1-2) + */ + uint16 test= uint2korr(page + offset); + DBUG_PRINT("info", ("sector: #%u offset: %u current: %lx " + "read: 0x%x stored: 0x%x%x", + i / 2, offset, (ulong) current, + (uint) uint2korr(page + offset), (uint) table[i], + (uint) table[i + 1])); + if (((test < current) && + (LL(0xFFFF) - current + test > DISK_DRIVE_SECTOR_SIZE / 3)) || + ((test >= current) && + (test - current > DISK_DRIVE_SECTOR_SIZE / 3))) + { + if (translog_recover_page_up_to_sector(page, offset)) + DBUG_RETURN(1); + data->was_recovered= 1; + DBUG_RETURN(0); + } + + /* Return value on the page */ + page[offset]= table[i]; + page[offset + 1]= table[i + 1]; + current= test; + DBUG_PRINT("info", ("sector: #%u offset: %u current: %lx " + "read: 0x%x stored: 0x%x%x", + i / 2, offset, (ulong) current, + (uint) uint2korr(page + offset), (uint) table[i], + (uint) table[i + 1])); + } + } + DBUG_RETURN(0); +} + + +/* + Get log page by file number and offset of the beginning of the page + + SYNOPSIS + translog_get_page() + data validator data, which contains the page address + buffer buffer for page placing + (might not be used in some cache implementations) + + RETURN + NULL - Error + # pointer to the page cache which should be used to read this page +*/ + +static byte *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, byte *buffer) +{ + TRANSLOG_ADDRESS addr= *(data->addr); + uint cache_index; + uint32 file_no= LSN_FILE_NO(addr); + DBUG_ENTER("translog_get_page"); + DBUG_PRINT("enter", ("File: %lu Offset: %lu(0x%lx)", + (ulong) file_no, + (ulong) LSN_OFFSET(addr), + (ulong) LSN_OFFSET(addr))); + + /* it is really page address */ + DBUG_ASSERT(LSN_OFFSET(addr) % TRANSLOG_PAGE_SIZE == 0); + + if ((cache_index= LSN_FILE_NO(log_descriptor.horizon) - file_no) < + OPENED_FILES_NUM) + { + PAGECACHE_FILE file; + /* file in the cache */ + if (log_descriptor.log_file_num[cache_index] == -1) + { + if ((log_descriptor.log_file_num[cache_index]= + open_logfile_by_number_no_cache(file_no)) == -1) + DBUG_RETURN(NULL); + } + file.file= log_descriptor.log_file_num[cache_index]; + + buffer= (byte*) + pagecache_valid_read(log_descriptor.pagecache, &file, + LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE, + 3, (char*) buffer, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, 0, + &translog_page_validator, (gptr) data); + } + else + { + /* + TODO: WE KEEP THE LAST OPENED_FILES_NUM FILES IN THE LOG CACHE, NOT + THE LAST USED FILES. THIS WILL BE A NOTABLE PROBLEM IF WE ARE + FOLLOWING AN UNDO CHAIN THAT GOES OVER MANY OLD LOG FILES. WE WILL + PROBABLY NEED SPECIAL HANDLING OF THIS OR HAVE A FILO FOR THE LOG + FILES. + */ + + File file= open_logfile_by_number_no_cache(file_no); + if (file == -1) + DBUG_RETURN(NULL); + if (my_pread(file, (char*) buffer, TRANSLOG_PAGE_SIZE, + LSN_OFFSET(addr), MYF(MY_FNABP | MY_WME))) + buffer= NULL; + else if (translog_page_validator((byte*) buffer, (gptr) data)) + buffer= NULL; + my_close(file, MYF(MY_WME)); + } + DBUG_RETURN(buffer); +} + + +/* + Finds last page of the given log file + + SYNOPSIS + translog_get_last_page_addr() + addr address structure to fill with data, which contain + file number of the log file + last_page_ok assigned 1 if last page was OK + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_get_last_page_addr(TRANSLOG_ADDRESS *addr, + my_bool *last_page_ok) +{ + MY_STAT stat_buff, *stat; + char path[FN_REFLEN]; + uint32 rec_offset; + uint32 file_no= LSN_FILE_NO(*addr); + DBUG_ENTER("translog_get_last_page_addr"); + + if (!(stat= my_stat(translog_filename_by_fileno(file_no, path), + &stat_buff, MYF(MY_WME)))) + DBUG_RETURN(1); + DBUG_PRINT("info", ("File size: %lu", (ulong) stat->st_size)); + if (stat->st_size > TRANSLOG_PAGE_SIZE) + { + rec_offset= (((stat->st_size / TRANSLOG_PAGE_SIZE) - 1) * + TRANSLOG_PAGE_SIZE); + *last_page_ok= (stat->st_size == rec_offset + TRANSLOG_PAGE_SIZE); + } + else + { + *last_page_ok= 0; + rec_offset= 0; + } + *addr= MAKE_LSN(file_no, rec_offset); + DBUG_PRINT("info", ("Last page: 0x%lx ok: %d", (ulong) rec_offset, + *last_page_ok)); + DBUG_RETURN(0); +} + + +/* + Get number bytes for record length storing + + SYNOPSIS + translog_variable_record_length_bytes() + length Record length wich will be codded + + RETURN + 1,3,4,5 - number of bytes to store given length +*/ + +static uint translog_variable_record_length_bytes(translog_size_t length) +{ + if (length < 250) + return 1; + if (length < 0xFFFF) + return 3; + if (length < (ulong) 0xFFFFFF) + return 4; + return 5; +} + + +/* + Get header of this chunk + + SYNOPSIS + translog_get_chunk_header_length() + page The page where chunk placed + offset Offset of the chunk on this place + + RETURN + # total length of the chunk + 0 Error +*/ + +static uint16 translog_get_chunk_header_length(byte *page, uint16 offset) +{ + DBUG_ENTER("translog_get_chunk_header_length"); + page+= offset; + switch (*page & TRANSLOG_CHUNK_TYPE) { + case TRANSLOG_CHUNK_LSN: + { + /* 0 chunk referred as LSN (head or tail) */ + translog_size_t rec_len; + byte *start= page; + byte *ptr= start + 1 + 2; + uint16 chunk_len, header_len; + DBUG_PRINT("info", ("TRANSLOG_CHUNK_LSN")); + rec_len= translog_variable_record_1group_decode_len(&ptr); + chunk_len= uint2korr(ptr); + header_len= (ptr - start) +2; + DBUG_PRINT("info", ("rec len: %lu chunk len: %u header len: %u", + (ulong) rec_len, (uint) chunk_len, (uint) header_len)); + if (chunk_len) + { + /* TODO: fine header end */ + DBUG_ASSERT(0); + DBUG_RETURN(0); /* Keep compiler happy */ + } + DBUG_RETURN(header_len); + } + case TRANSLOG_CHUNK_FIXED: + { + /* 1 (pseudo)fixed record (also LSN) */ + DBUG_PRINT("info", ("TRANSLOG_CHUNK_FIXED = 3")); + DBUG_RETURN(3); + } + case TRANSLOG_CHUNK_NOHDR: + /* 2 no header chunk (till page end) */ + DBUG_PRINT("info", ("TRANSLOG_CHUNK_NOHDR = 1")); + DBUG_RETURN(1); + break; + case TRANSLOG_CHUNK_LNGTH: + /* 3 chunk with chunk length */ + DBUG_PRINT("info", ("TRANSLOG_CHUNK_LNGTH = 3")); + DBUG_RETURN(3); + break; + default: + DBUG_ASSERT(0); + DBUG_RETURN(0); /* Keep compiler happy */ + } +} + + +/* + Initialize transaction log + + SYNOPSIS + translog_init() + directory Directory where log files are put + log_file_max_size max size of one log size (for new logs creation) + server_version version of MySQL server (MYSQL_VERSION_ID) + server_id server ID (replication & Co) + pagecache Page cache for the log reads + flags flags (TRANSLOG_PAGE_CRC, TRANSLOG_SECTOR_PROTECTION + TRANSLOG_RECORD_CRC) + + TODO + Free used resources in case of error. + + RETURN + 0 OK + 1 Error +*/ + +my_bool translog_init(const char *directory, + uint32 log_file_max_size, + uint32 server_version, + uint32 server_id, PAGECACHE *pagecache, uint flags) +{ + int i; + int old_log_was_recovered= 0, logs_found= 0; + uint old_flags= flags; + TRANSLOG_ADDRESS sure_page, last_page, last_valid_page; + DBUG_ENTER("translog_init"); + + loghandler_init(); /* Safe to do many times */ + + if (pthread_mutex_init(&log_descriptor.sent_to_file_lock, + MY_MUTEX_INIT_FAST)) + DBUG_RETURN(1); + + /* Directory to store files */ + unpack_dirname(log_descriptor.directory, directory); + + if ((log_descriptor.directory_fd= my_open(log_descriptor.directory, + O_RDONLY, MYF(MY_WME))) < 0) + { + UNRECOVERABLE_ERROR(("Error %d during opening directory '%s'", + errno, log_descriptor.directory)); + DBUG_RETURN(1); + } + + /* max size of one log size (for new logs creation) */ + log_descriptor.log_file_max_size= + log_file_max_size - (log_file_max_size % TRANSLOG_PAGE_SIZE); + /* server version */ + log_descriptor.server_version= server_version; + /* server ID */ + log_descriptor.server_id= server_id; + /* Page cache for the log reads */ + log_descriptor.pagecache= pagecache; + /* Flags */ + DBUG_ASSERT((flags & + ~(TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION | + TRANSLOG_RECORD_CRC)) == 0); + log_descriptor.flags= flags; + for (i= 0; i < TRANSLOG_FLAGS_NUM; i++) + { + page_overhead[i]= 7; + if (i & TRANSLOG_PAGE_CRC) + page_overhead[i]+= CRC_LENGTH; + if (i & TRANSLOG_SECTOR_PROTECTION) + page_overhead[i]+= (TRANSLOG_PAGE_SIZE / + DISK_DRIVE_SECTOR_SIZE) * 2; + } + log_descriptor.page_overhead= page_overhead[flags]; + log_descriptor.page_capacity_chunk_2= + TRANSLOG_PAGE_SIZE - log_descriptor.page_overhead - 1; + DBUG_ASSERT(TRANSLOG_WRITE_BUFFER % TRANSLOG_PAGE_SIZE == 0); + log_descriptor.buffer_capacity_chunk_2= + (TRANSLOG_WRITE_BUFFER / TRANSLOG_PAGE_SIZE) * + log_descriptor.page_capacity_chunk_2; + log_descriptor.half_buffer_capacity_chunk_2= + log_descriptor.buffer_capacity_chunk_2 / 2; + DBUG_PRINT("info", + ("Overhead: %u pc2: %u bc2: %u, bc2/2: %u", + log_descriptor.page_overhead, + log_descriptor.page_capacity_chunk_2, + log_descriptor.buffer_capacity_chunk_2, + log_descriptor.half_buffer_capacity_chunk_2)); + + /* *** Current state of the log handler *** */ + + /* Init log handler file handlers cache */ + for (i= 0; i < OPENED_FILES_NUM; i++) + log_descriptor.log_file_num[i]= -1; + + /* just to init it somehow */ + translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0); + + /* Buffers for log writing */ + for (i= 0; i < TRANSLOG_BUFFERS_NO; i++) + { + if (translog_buffer_init(log_descriptor.buffers + i)) + DBUG_RETURN(1); +#ifndef DBUG_OFF + log_descriptor.buffers[i].buffer_no= (uint8) i; +#endif + DBUG_PRINT("info", ("translog_buffer buffer #%u: 0x%lx", + i, (ulong) log_descriptor.buffers + i)); + } + + logs_found= (last_logno != CONTROL_FILE_IMPOSSIBLE_FILENO); + + if (logs_found) + { + my_bool pageok; + /* + TODO: scan directory for maria_log.XXXXXXXX files and find + highest XXXXXXXX & set logs_found + TODO: check that last checkpoint within present log addresses space + + find the log end + */ + if (LSN_FILE_NO(last_checkpoint_lsn) == CONTROL_FILE_IMPOSSIBLE_FILENO) + { + DBUG_ASSERT(LSN_OFFSET(last_checkpoint_lsn) == 0); + /* there was no checkpoints we will read from the beginning */ + sure_page= (LSN_ONE_FILE | TRANSLOG_PAGE_SIZE); + } + else + { + sure_page= last_checkpoint_lsn; + DBUG_ASSERT(LSN_OFFSET(sure_page) % TRANSLOG_PAGE_SIZE != 0); + sure_page-= LSN_OFFSET(sure_page) % TRANSLOG_PAGE_SIZE; + } + log_descriptor.horizon= last_page= MAKE_LSN(last_logno,0); + if (translog_get_last_page_addr(&last_page, &pageok)) + DBUG_RETURN(1); + if (LSN_OFFSET(last_page) == 0) + { + if (LSN_FILE_NO(last_page) == 1) + { + logs_found= 0; /* file #1 has no pages */ + } + else + { + last_page-= LSN_ONE_FILE; + if (translog_get_last_page_addr(&last_page, &pageok)) + DBUG_RETURN(1); + } + } + } + if (logs_found) + { + TRANSLOG_ADDRESS current_page= sure_page; + my_bool pageok; + + DBUG_ASSERT(sure_page <= last_page); + + /* TODO: check page size */ + + last_valid_page= CONTROL_FILE_IMPOSSIBLE_LSN; + /* scan and validate pages */ + do + { + TRANSLOG_ADDRESS current_file_last_page; + current_file_last_page= current_page; + if (translog_get_last_page_addr(¤t_file_last_page, &pageok)) + DBUG_RETURN(1); + if (!pageok) + { + DBUG_PRINT("error", ("File %lu have no complete last page", + (ulong) LSN_FILE_NO(current_file_last_page))); + old_log_was_recovered= 1; + /* This file is not written till the end so it should be last */ + last_page= current_file_last_page; + /* TODO: issue warning */ + } + do + { + TRANSLOG_VALIDATOR_DATA data; + byte buffer[TRANSLOG_PAGE_SIZE], *page; + data.addr= ¤t_page; + if ((page= translog_get_page(&data, buffer)) == NULL) + DBUG_RETURN(1); + if (data.was_recovered) + { + DBUG_PRINT("error", ("file no: %lu (%d) " + "rec_offset: 0x%lx (%lu) (%d)", + (ulong) LSN_FILE_NO(current_page), + (uint3korr(page + 3) != + LSN_FILE_NO(current_page)), + (ulong) LSN_OFFSET(current_page), + (ulong) (LSN_OFFSET(current_page) / + TRANSLOG_PAGE_SIZE), + (uint3korr(page) != + LSN_OFFSET(current_page) / + TRANSLOG_PAGE_SIZE))); + old_log_was_recovered= 1; + break; + } + old_flags= page[TRANSLOG_PAGE_FLAGS]; + last_valid_page= current_page; + current_page+= TRANSLOG_PAGE_SIZE; /* increase offset */ + } while (current_page <= current_file_last_page); + current_page+= LSN_ONE_FILE; + current_page= LSN_REPLACE_OFFSET(current_page, TRANSLOG_PAGE_SIZE); + } while (LSN_FILE_NO(current_page) <= LSN_FILE_NO(last_page) && + !old_log_was_recovered); + if (last_valid_page == CONTROL_FILE_IMPOSSIBLE_LSN) + { + /* Panic!!! Even page which should be valid is invalid */ + /* TODO: issue error */ + DBUG_RETURN(1); + } + DBUG_PRINT("info", ("Last valid page is in file: %lu " + "offset: %lu (0x%lx) " + "Logs found: %d was recovered: %d " + "flags match: %d", + (ulong) LSN_FILE_NO(last_valid_page), + (ulong) LSN_OFFSET(last_valid_page), + (ulong) LSN_OFFSET(last_valid_page), + logs_found, old_log_was_recovered, + (old_flags == flags))); + + /* TODO: check server ID */ + if (logs_found && !old_log_was_recovered && old_flags == flags) + { + TRANSLOG_VALIDATOR_DATA data; + byte buffer[TRANSLOG_PAGE_SIZE], *page; + uint16 chunk_offset; + data.addr= &last_valid_page; + /* continue old log */ + DBUG_ASSERT(LSN_FILE_NO(last_valid_page)== + LSN_FILE_NO(log_descriptor.horizon)); + if ((page= translog_get_page(&data, buffer)) == NULL || + (chunk_offset= translog_get_first_chunk_offset(page)) == 0) + DBUG_RETURN(1); + + /* Puts filled part of old page in the buffer */ + log_descriptor.horizon= last_valid_page; + translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0); + /* + Free space if filled with 0 and first byte of + real chunk can't be 0 + */ + while (chunk_offset < TRANSLOG_PAGE_SIZE && page[chunk_offset] != '\0') + { + uint16 chunk_length; + if ((chunk_length= + translog_get_total_chunk_length(page, chunk_offset)) == 0) + DBUG_RETURN(1); + DBUG_PRINT("info", ("chunk: offset: %u length: %u", + (uint) chunk_offset, (uint) chunk_length)); + chunk_offset+= chunk_length; + + /* chunk can't cross the page border */ + DBUG_ASSERT(chunk_offset <= TRANSLOG_PAGE_SIZE); + } + memcpy(log_descriptor.buffers->buffer, page, chunk_offset); + log_descriptor.bc.buffer->size+= chunk_offset; + log_descriptor.bc.ptr+= chunk_offset; + log_descriptor.bc.current_page_fill= chunk_offset; + log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon, + (chunk_offset + + LSN_OFFSET(last_valid_page))); + DBUG_PRINT("info", ("Move Page #%u: 0x%lx chaser: %d Size: %lu (%lu)", + (uint) log_descriptor.bc.buffer_no, + (ulong) log_descriptor.bc.buffer, + log_descriptor.bc.chaser, + (ulong) log_descriptor.bc.buffer->size, + (ulong) (log_descriptor.bc.ptr - log_descriptor.bc. + buffer->buffer))); + DBUG_EXECUTE("info", translog_check_cursor(&log_descriptor.bc);); + } + } + DBUG_PRINT("info", ("Logs found: %d was recovered: %d", + logs_found, old_log_was_recovered)); + if (!logs_found) + { + /* Start new log system from scratch */ + /* Used space */ + log_descriptor.horizon= MAKE_LSN(1, TRANSLOG_PAGE_SIZE); /* header page */ + /* Current logs file number in page cache */ + if ((log_descriptor.log_file_num[0]= + open_logfile_by_number_no_cache(1)) == -1 || + translog_write_file_header()) + DBUG_RETURN(1); + if (ma_control_file_write_and_force(CONTROL_FILE_IMPOSSIBLE_LSN, 1, + CONTROL_FILE_UPDATE_ONLY_LOGNO)) + DBUG_RETURN(1); + /* assign buffer 0 */ + translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0); + translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc); + } + else if (old_log_was_recovered || old_flags != flags) + { + /* leave the damaged file untouched */ + log_descriptor.horizon+= LSN_ONE_FILE; + /* header page */ + log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon, + TRANSLOG_PAGE_SIZE); + if (translog_create_new_file()) + DBUG_RETURN(1); + /* + Buffer system left untouched after recovery => we should init it + (starting from buffer 0) + */ + translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0); + translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc); + } + + /* all LSNs that are on disk are flushed */ + log_descriptor.sent_to_file= log_descriptor.flushed= log_descriptor.horizon; + /* + horizon is (potentially) address of the next LSN we need decrease + it to signal that all LSNs before it are flushed + */ + log_descriptor.flushed--; /* offset decreased */ + log_descriptor.sent_to_file--; /* offset decreased */ + /* + Log records will refer to a MARIA_SHARE by a unique 2-byte id; set up + structures for generating 2-byte ids: + */ + my_atomic_rwlock_init(&LOCK_id_to_share); + id_to_share= (MARIA_SHARE **) my_malloc(SHARE_ID_MAX*sizeof(MARIA_SHARE*), + MYF(MY_WME|MY_ZEROFILL)); + if (unlikely(!id_to_share)) + DBUG_RETURN(1); + id_to_share--; /* min id is 1 */ + translog_inited= 1; + DBUG_RETURN(0); +} + + +/* + Free transaction log file buffer + + SYNOPSIS + translog_buffer_destroy() + buffer_no The buffer to free + + NOTE + This buffer should be locked +*/ + +static void translog_buffer_destroy(struct st_translog_buffer *buffer) +{ + DBUG_ENTER("translog_buffer_destroy"); + DBUG_PRINT("enter", + ("Buffer #%u: 0x%lx file: %d offset: (%lu,0x%lx) size: %lu", + (uint) buffer->buffer_no, (ulong) buffer, + buffer->file, + (ulong) LSN_FILE_NO(buffer->offset), + (ulong) LSN_OFFSET(buffer->offset), + (ulong) buffer->size)); + DBUG_ASSERT(buffer->waiting_filling_buffer.last_thread == 0); + if (buffer->file != -1) + { + /* + We ignore errors here, because we can't do something about it + (it is shutting down) + */ + translog_buffer_flush(buffer); + } + DBUG_PRINT("info", ("Destroy mutex: 0x%lx", (ulong) &buffer->mutex)); + pthread_mutex_destroy(&buffer->mutex); + DBUG_VOID_RETURN; +} + + +/* + Free log handler resources + + SYNOPSIS + translog_destroy() +*/ + +void translog_destroy() +{ + uint i; + DBUG_ENTER("translog_destroy"); + + if (translog_inited) + { + if (log_descriptor.bc.buffer->file != -1) + translog_finish_page(&log_descriptor.horizon, &log_descriptor.bc); + + for (i= 0; i < TRANSLOG_BUFFERS_NO; i++) + { + struct st_translog_buffer *buffer= log_descriptor.buffers + i; + translog_buffer_destroy(buffer); + } + + /* close files */ + for (i= 0; i < OPENED_FILES_NUM; i++) + { + if (log_descriptor.log_file_num[i] != -1) + translog_close_log_file(log_descriptor.log_file_num[i]); + } + pthread_mutex_destroy(&log_descriptor.sent_to_file_lock); + my_close(log_descriptor.directory_fd, MYF(MY_WME)); + my_atomic_rwlock_destroy(&LOCK_id_to_share); + my_free((gptr)(id_to_share + 1), MYF(MY_ALLOW_ZERO_PTR)); + translog_inited= 0; + } + DBUG_VOID_RETURN; +} + + +/* + Lock the loghandler + + SYNOPSIS + translog_lock() + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_lock() +{ + struct st_translog_buffer *current_buffer; + DBUG_ENTER("translog_lock"); + + /* + Locking the loghandler mean locking current buffer, but it can change + during locking, so we should check it + */ + for (;;) + { + current_buffer= log_descriptor.bc.buffer; + if (translog_buffer_lock(current_buffer)) + DBUG_RETURN(1); + if (log_descriptor.bc.buffer == current_buffer) + break; + translog_buffer_unlock(current_buffer); + } + DBUG_RETURN(0); +} + + +/* + Unlock the loghandler + + SYNOPSIS + translog_unlock() + + RETURN + 0 OK + 1 Error +*/ + +static inline my_bool translog_unlock() +{ + DBUG_ENTER("translog_unlock"); + translog_buffer_unlock(log_descriptor.bc.buffer); + + DBUG_RETURN(0); +} + + +#define translog_buffer_lock_assert_owner(B) \ + safe_mutex_assert_owner(&B->mutex); +void translog_lock_assert_owner() +{ + translog_buffer_lock_assert_owner(log_descriptor.bc.buffer); +} + + +/* + Start new page + + SYNOPSIS + translog_page_next() + horizon \ Position in file and buffer where we are + cursor / + prev_buffer Buffer which should be flushed will be assigned + here if it is need. This is always set. + + NOTE + handler should be locked + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_page_next(TRANSLOG_ADDRESS *horizon, + struct st_buffer_cursor *cursor, + struct st_translog_buffer **prev_buffer) +{ + struct st_translog_buffer *buffer= cursor->buffer; + DBUG_ENTER("translog_page_next"); + + if ((cursor->ptr +TRANSLOG_PAGE_SIZE > + cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER) || + (LSN_OFFSET(*horizon) > + log_descriptor.log_file_max_size - TRANSLOG_PAGE_SIZE)) + { + DBUG_PRINT("info", ("Switch to next buffer Buffer Size: %lu (%lu) => %d " + "File size: %lu max: %lu => %d", + (ulong) cursor->buffer->size, + (ulong) (cursor->ptr - cursor->buffer->buffer), + (cursor->ptr + TRANSLOG_PAGE_SIZE > + cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER), + (ulong) LSN_OFFSET(*horizon), + (ulong) log_descriptor.log_file_max_size, + (LSN_OFFSET(*horizon) > + (log_descriptor.log_file_max_size - + TRANSLOG_PAGE_SIZE)))); + if (translog_buffer_next(horizon, cursor, + LSN_OFFSET(*horizon) > + (log_descriptor.log_file_max_size - + TRANSLOG_PAGE_SIZE))) + DBUG_RETURN(1); + *prev_buffer= buffer; + DBUG_PRINT("info", ("Buffer #%u (0x%lu): have to be flushed", + (uint) buffer->buffer_no, (ulong) buffer)); + } + else + { + DBUG_PRINT("info", ("Use the same buffer #%u (0x%lu): " + "Buffer Size: %lu (%lu)", + (uint) buffer->buffer_no, + (ulong) buffer, + (ulong) cursor->buffer->size, + (ulong) (cursor->ptr - cursor->buffer->buffer))); + translog_finish_page(horizon, cursor); + translog_new_page_header(horizon, cursor); + *prev_buffer= NULL; + } + DBUG_RETURN(0); +} + + +/* + Write data of given length to the current page + + SYNOPSIS + translog_write_data_on_page() + horizon \ Pointers on file and buffer + cursor / + length IN length of the chunk + buffer buffer with data + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_write_data_on_page(TRANSLOG_ADDRESS *horizon, + struct st_buffer_cursor *cursor, + translog_size_t length, + byte *buffer) +{ + DBUG_ENTER("translog_write_data_on_page"); + DBUG_PRINT("enter", ("Chunk length: %lu Page size %u", + (ulong) length, (uint) cursor->current_page_fill)); + DBUG_ASSERT(length > 0); + DBUG_ASSERT(length + cursor->current_page_fill <= TRANSLOG_PAGE_SIZE); + DBUG_ASSERT(length + cursor->ptr <=cursor->buffer->buffer + + TRANSLOG_WRITE_BUFFER); + + memcpy(cursor->ptr, buffer, length); + cursor->ptr+= length; + (*horizon)+= length; /* adds offset */ + cursor->current_page_fill+= length; + if (!cursor->chaser) + cursor->buffer->size+= length; + DBUG_PRINT("info", ("Write data buffer #%u: 0x%lx " + "chaser: %d Size: %lu (%lu)", + (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer, + cursor->chaser, (ulong) cursor->buffer->size, + (ulong) (cursor->ptr - cursor->buffer->buffer))); + DBUG_EXECUTE("info", translog_check_cursor(cursor);); + + DBUG_RETURN(0); +} + + +/* + Write data from parts of given length to the current page + + SYNOPSIS + translog_write_parts_on_page() + horizon \ Pointers on file and buffer + cursor / + length IN length of the chunk + parts IN/OUT chunk source + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_write_parts_on_page(TRANSLOG_ADDRESS *horizon, + struct st_buffer_cursor *cursor, + translog_size_t length, + struct st_translog_parts *parts) +{ + translog_size_t left= length; + uint cur= (uint) parts->current; + DBUG_ENTER("translog_write_parts_on_page"); + DBUG_PRINT("enter", ("Chunk length: %lu parts: %u of %u. Page size: %u " + "Buffer size: %lu (%lu)", + (ulong) length, + (uint) (cur + 1), (uint) parts->elements, + (uint) cursor->current_page_fill, + (ulong) cursor->buffer->size, + (ulong) (cursor->ptr - cursor->buffer->buffer))); + DBUG_ASSERT(length > 0); + DBUG_ASSERT(length + cursor->current_page_fill <= TRANSLOG_PAGE_SIZE); + DBUG_ASSERT(length + cursor->ptr <=cursor->buffer->buffer + + TRANSLOG_WRITE_BUFFER); + + do + { + translog_size_t len; + LEX_STRING *part; + byte *buff; + + DBUG_ASSERT(cur < parts->elements); + part= parts->parts + cur; + buff= (byte*) part->str; + DBUG_PRINT("info", ("Part: %u Length: %lu left: %lu buff: 0x%lx", + (uint) (cur + 1), (ulong) part->length, (ulong) left, + (ulong) buff)); + + if (part->length > left) + { + /* we should write less then the current part */ + len= left; + part->length-= len; + part->str+= len; + DBUG_PRINT("info", ("Set new part: %u Length: %lu", + (uint) (cur + 1), (ulong) part->length)); + } + else + { + len= part->length; + cur++; + DBUG_PRINT("info", ("moved to next part (len: %lu)", (ulong) len)); + } + DBUG_PRINT("info", ("copy: 0x%lx <- 0x%lx %u", + (ulong) cursor->ptr, (ulong)buff, (uint)len)); + if (likely(len)) + { + memcpy(cursor->ptr, buff, len); + left-= len; + cursor->ptr+= len; + } + } while (left); + + DBUG_PRINT("info", ("Horizon: (%lu,0x%lx) Length %lu(0x%lx)", + (ulong) LSN_FILE_NO(*horizon), + (ulong) LSN_OFFSET(*horizon), + (ulong) length, (ulong) length)); + parts->current= cur; + (*horizon)+= length; /* offset increasing */ + cursor->current_page_fill+= length; + if (!cursor->chaser) + cursor->buffer->size+= length; + DBUG_PRINT("info", ("Write parts buffer #%u: 0x%lx " + "chaser: %d Size: %lu (%lu) " + "Horizon: (%lu,0x%lx) buff offset: 0x%lx", + (uint) cursor->buffer->buffer_no, (ulong) cursor->buffer, + cursor->chaser, (ulong) cursor->buffer->size, + (ulong) (cursor->ptr - cursor->buffer->buffer), + (ulong) LSN_FILE_NO(*horizon), + (ulong) LSN_OFFSET(*horizon), + (ulong) (LSN_OFFSET(cursor->buffer->offset) + + cursor->buffer->size))); + DBUG_EXECUTE("info", translog_check_cursor(cursor);); + + DBUG_RETURN(0); +} + + +/* + Put 1 group chunk type 0 header into parts array + + SYNOPSIS + translog_write_variable_record_1group_header() + parts Descriptor of record source parts + type The log record type + short_trid Short transaction ID or 0 if it has no sense + header_length Calculated header length of chunk type 0 + chunk0_header Buffer for the chunk header writing +*/ + +static void +translog_write_variable_record_1group_header(struct st_translog_parts *parts, + enum translog_record_type type, + SHORT_TRANSACTION_ID short_trid, + uint16 header_length, + byte *chunk0_header) +{ + LEX_STRING *part; + DBUG_ASSERT(parts->current != 0); /* first part is left for header */ + part= parts->parts + (--parts->current); + parts->total_record_length+= (part->length= header_length); + part->str= (char*)chunk0_header; + /* puts chunk type */ + *chunk0_header= (byte) (type | TRANSLOG_CHUNK_LSN); + int2store(chunk0_header + 1, short_trid); + /* puts record length */ + translog_write_variable_record_1group_code_len(chunk0_header + 3, + parts->record_length, + header_length); + /* puts 0 as chunk length which indicate 1 group record */ + int2store(chunk0_header + header_length - 2, 0); +} + + +/* + Increase number of writers for this buffer + + SYNOPSIS + translog_buffer_increase_writers() + buffer target buffer +*/ + +static inline void +translog_buffer_increase_writers(struct st_translog_buffer *buffer) +{ + DBUG_ENTER("translog_buffer_increase_writers"); + buffer->copy_to_buffer_in_progress++; + DBUG_PRINT("info", ("copy_to_buffer_in_progress. Buffer #%u 0x%lx: %d", + (uint) buffer->buffer_no, (ulong) buffer, + buffer->copy_to_buffer_in_progress)); + DBUG_VOID_RETURN; +} + + +/* + Decrease number of writers for this buffer + + SYNOPSIS + translog_buffer_decrease_writers() + buffer target buffer +*/ + + +static void translog_buffer_decrease_writers(struct st_translog_buffer *buffer) +{ + DBUG_ENTER("translog_buffer_decrease_writers"); + buffer->copy_to_buffer_in_progress--; + DBUG_PRINT("info", ("copy_to_buffer_in_progress. Buffer #%u 0x%lx: %d", + (uint) buffer->buffer_no, (ulong) buffer, + buffer->copy_to_buffer_in_progress)); + if (buffer->copy_to_buffer_in_progress == 0 && + buffer->waiting_filling_buffer.last_thread != NULL) + wqueue_release_queue(&buffer->waiting_filling_buffer); + DBUG_VOID_RETURN; +} + + +/* + Put chunk 2 from new page beginning + + SYNOPSIS + translog_write_variable_record_chunk2_page() + parts Descriptor of record source parts + horizon \ Pointers on file position and buffer + cursor / + + RETURN + 0 OK + 1 Error +*/ + +static my_bool +translog_write_variable_record_chunk2_page(struct st_translog_parts *parts, + TRANSLOG_ADDRESS *horizon, + struct st_buffer_cursor *cursor) +{ + struct st_translog_buffer *buffer_to_flush; + int rc; + byte chunk2_header[1]; + DBUG_ENTER("translog_write_variable_record_chunk2_page"); + chunk2_header[0]= TRANSLOG_CHUNK_NOHDR; + + LINT_INIT(buffer_to_flush); + rc= translog_page_next(horizon, cursor, &buffer_to_flush); + if (buffer_to_flush != NULL) + { + rc|= translog_buffer_lock(buffer_to_flush); + translog_buffer_decrease_writers(buffer_to_flush); + if (!rc) + rc= translog_buffer_flush(buffer_to_flush); + rc|= translog_buffer_unlock(buffer_to_flush); + } + if (rc) + DBUG_RETURN(1); + + /* Puts chunk type */ + translog_write_data_on_page(horizon, cursor, 1, chunk2_header); + /* Puts chunk body */ + translog_write_parts_on_page(horizon, cursor, + log_descriptor.page_capacity_chunk_2, parts); + DBUG_RETURN(0); +} + + +/* + Put chunk 3 of requested length in the buffer from new page beginning + + SYNOPSIS + translog_write_variable_record_chunk3_page() + parts Descriptor of record source parts + length Length of this chunk + horizon \ Pointers on file position and buffer + cursor / + + RETURN + 0 OK + 1 Error +*/ + +static my_bool +translog_write_variable_record_chunk3_page(struct st_translog_parts *parts, + uint16 length, + TRANSLOG_ADDRESS *horizon, + struct st_buffer_cursor *cursor) +{ + struct st_translog_buffer *buffer_to_flush; + LEX_STRING *part; + int rc; + byte chunk3_header[1 + 2]; + DBUG_ENTER("translog_write_variable_record_chunk3_page"); + + LINT_INIT(buffer_to_flush); + rc= translog_page_next(horizon, cursor, &buffer_to_flush); + if (buffer_to_flush != NULL) + { + rc|= translog_buffer_lock(buffer_to_flush); + translog_buffer_decrease_writers(buffer_to_flush); + if (!rc) + rc= translog_buffer_flush(buffer_to_flush); + rc|= translog_buffer_unlock(buffer_to_flush); + } + if (rc) + DBUG_RETURN(1); + if (length == 0) + { + /* It was call to write page header only (no data for chunk 3) */ + DBUG_PRINT("info", ("It is a call to make page header only")); + DBUG_RETURN(0); + } + + DBUG_ASSERT(parts->current != 0); /* first part is left for header */ + part= parts->parts + (--parts->current); + parts->total_record_length+= (part->length= 1 + 2); + part->str= (char*)chunk3_header; + /* Puts chunk type */ + *chunk3_header= (byte) (TRANSLOG_CHUNK_LNGTH); + /* Puts chunk length */ + int2store(chunk3_header + 1, length); + + translog_write_parts_on_page(horizon, cursor, length + 1 + 2, parts); + DBUG_RETURN(0); +} + +/* + Move log pointer (horizon) on given number pages starting from next page, + and given offset on the last page + + SYNOPSIS + translog_advance_pointer() + pages Number of full pages starting from the next one + last_page_data Plus this data on the last page + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_advance_pointer(uint pages, uint16 last_page_data) +{ + translog_size_t last_page_offset= (log_descriptor.page_overhead + + last_page_data); + translog_size_t offset= (TRANSLOG_PAGE_SIZE - + log_descriptor.bc.current_page_fill + + pages * TRANSLOG_PAGE_SIZE + last_page_offset); + translog_size_t buffer_end_offset, file_end_offset, min_offset; + DBUG_ENTER("translog_advance_pointer"); + DBUG_PRINT("enter", ("Pointer: (%lu, 0x%lx) + %u + %u pages + %u + %u", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (uint) (TRANSLOG_PAGE_SIZE - + log_descriptor.bc.current_page_fill), + pages, (uint) log_descriptor.page_overhead, + (uint) last_page_data)); + + for (;;) + { + uint8 new_buffer_no; + struct st_translog_buffer *new_buffer; + struct st_translog_buffer *old_buffer; + buffer_end_offset= TRANSLOG_WRITE_BUFFER - log_descriptor.bc.buffer->size; + file_end_offset= (log_descriptor.log_file_max_size - + LSN_OFFSET(log_descriptor.horizon)); + DBUG_PRINT("info", ("offset: %lu buffer_end_offs: %lu, " + "file_end_offs: %lu", + (ulong) offset, (ulong) buffer_end_offset, + (ulong) file_end_offset)); + DBUG_PRINT("info", ("Buff #%u %u (0x%lx) offset 0x%lx + size 0x%lx = " + "0x%lx (0x%lx)", + (uint) log_descriptor.bc.buffer->buffer_no, + (uint) log_descriptor.bc.buffer_no, + (ulong) log_descriptor.bc.buffer, + (ulong) LSN_OFFSET(log_descriptor.bc.buffer->offset), + (ulong) log_descriptor.bc.buffer->size, + (ulong) (LSN_OFFSET(log_descriptor.bc.buffer->offset) + + log_descriptor.bc.buffer->size), + (ulong) LSN_OFFSET(log_descriptor.horizon))); + DBUG_ASSERT(LSN_OFFSET(log_descriptor.bc.buffer->offset) + + log_descriptor.bc.buffer->size == + LSN_OFFSET(log_descriptor.horizon)); + + if (offset <= buffer_end_offset && offset <= file_end_offset) + break; + old_buffer= log_descriptor.bc.buffer; + new_buffer_no= (log_descriptor.bc.buffer_no + 1) % TRANSLOG_BUFFERS_NO; + new_buffer= log_descriptor.buffers + new_buffer_no; + + translog_buffer_lock(new_buffer); + translog_wait_for_buffer_free(new_buffer); + + min_offset= min(buffer_end_offset, file_end_offset); + /* TODO: check is it ptr or size enough */ + log_descriptor.bc.buffer->size+= min_offset; + log_descriptor.bc.ptr+= min_offset; + DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx chaser: %d Size: %lu (%lu)", + (uint) log_descriptor.bc.buffer->buffer_no, + (ulong) log_descriptor.bc.buffer, + log_descriptor.bc.chaser, + (ulong) log_descriptor.bc.buffer->size, + (ulong) (log_descriptor.bc.ptr -log_descriptor.bc. + buffer->buffer))); + DBUG_ASSERT((ulong) (log_descriptor.bc.ptr - + log_descriptor.bc.buffer->buffer) == + log_descriptor.bc.buffer->size); + DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no == + log_descriptor.bc.buffer_no); + translog_buffer_increase_writers(log_descriptor.bc.buffer); + + if (file_end_offset <= buffer_end_offset) + { + log_descriptor.horizon+= LSN_ONE_FILE; + log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon, + TRANSLOG_PAGE_SIZE); + DBUG_PRINT("info", ("New file: %lu", + (ulong) LSN_FILE_NO(log_descriptor.horizon))); + if (translog_create_new_file()) + { + DBUG_RETURN(1); + } + } + else + { + DBUG_PRINT("info", ("The same file")); + log_descriptor.horizon+= min_offset; /* offset increasing */ + } + translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no); + if (translog_buffer_unlock(old_buffer)) + DBUG_RETURN(1); + offset-= min_offset; + } + log_descriptor.bc.ptr+= offset; + log_descriptor.bc.buffer->size+= offset; + translog_buffer_increase_writers(log_descriptor.bc.buffer); + log_descriptor.horizon+= offset; /* offset increasing */ + log_descriptor.bc.current_page_fill= last_page_offset; + DBUG_PRINT("info", ("drop write_counter")); + log_descriptor.bc.write_counter= 0; + log_descriptor.bc.previous_offset= 0; + DBUG_PRINT("info", ("NewP buffer #%u: 0x%lx chaser: %d Size: %lu (%lu) " + "offset: %u last page: %u", + (uint) log_descriptor.bc.buffer->buffer_no, + (ulong) log_descriptor.bc.buffer, + log_descriptor.bc.chaser, + (ulong) log_descriptor.bc.buffer->size, + (ulong) (log_descriptor.bc.ptr - + log_descriptor.bc.buffer-> + buffer), (uint) offset, + (uint) last_page_offset)); + DBUG_PRINT("info", + ("pointer moved to: (%lu, 0x%lx)", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon))); + DBUG_EXECUTE("info", translog_check_cursor(&log_descriptor.bc);); + log_descriptor.bc.protected= 0; + DBUG_RETURN(0); +} + + + +/* + Get page rest + + SYNOPSIS + translog_get_current_page_rest() + + NOTE loghandler should be locked + + RETURN + number of bytes left on the current page +*/ + +#define translog_get_current_page_rest() \ + (TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill) + +/* + Get buffer rest in full pages + + SYNOPSIS + translog_get_current_buffer_rest() + + NOTE loghandler should be locked + + RETURN + number of full pages left on the current buffer +*/ + +#define translog_get_current_buffer_rest() \ + ((log_descriptor.bc.buffer->buffer + TRANSLOG_WRITE_BUFFER - \ + log_descriptor.bc.ptr) / \ + TRANSLOG_PAGE_SIZE) + +/* + Calculate possible group size without first (current) page + + SYNOPSIS + translog_get_current_group_size() + + NOTE loghandler should be locked + + RETURN + group size without first (current) page +*/ + +static translog_size_t translog_get_current_group_size() +{ + /* buffer rest in full pages */ + translog_size_t buffer_rest= translog_get_current_buffer_rest(); + DBUG_ENTER("translog_get_current_group_size"); + DBUG_PRINT("info", ("buffer_rest in pages: %u", buffer_rest)); + + buffer_rest*= log_descriptor.page_capacity_chunk_2; + /* in case of only half of buffer free we can write this and next buffer */ + if (buffer_rest < log_descriptor.half_buffer_capacity_chunk_2) + { + DBUG_PRINT("info", ("buffer_rest: %lu -> add %lu", + (ulong) buffer_rest, + (ulong) log_descriptor.buffer_capacity_chunk_2)); + buffer_rest+= log_descriptor.buffer_capacity_chunk_2; + } + + DBUG_PRINT("info", ("buffer_rest: %lu", (ulong) buffer_rest)); + + DBUG_RETURN(buffer_rest); +} + + +/* + Write variable record in 1 group + + SYNOPSIS + translog_write_variable_record_1group() + lsn LSN of the record will be written here + type the log record type + short_trid Short transaction ID or 0 if it has no sense + parts Descriptor of record source parts + buffer_to_flush Buffer which have to be flushed if it is not 0 + header_length Calculated header length of chunk type 0 + trn Transaction structure pointer for hooks by + record log type, for short_id + + RETURN + 0 OK + 1 Error +*/ + +static my_bool +translog_write_variable_record_1group(LSN *lsn, + enum translog_record_type type, + SHORT_TRANSACTION_ID short_trid, + struct st_translog_parts *parts, + struct st_translog_buffer + *buffer_to_flush, uint16 header_length, + TRN *trn) +{ + TRANSLOG_ADDRESS horizon; + struct st_buffer_cursor cursor; + int rc= 0; + uint i; + translog_size_t record_rest, full_pages, first_page; + uint additional_chunk3_page= 0; + byte chunk0_header[1 + 2 + 5 + 2]; + DBUG_ENTER("translog_write_variable_record_1group"); + + *lsn= horizon= log_descriptor.horizon; + if (log_record_type_descriptor[type].inwrite_hook && + (*log_record_type_descriptor[type].inwrite_hook)(type, trn, lsn, parts)) + { + translog_unlock(); + DBUG_RETURN(1); + } + cursor= log_descriptor.bc; + cursor.chaser= 1; + + /* Advance pointer To be able unlock the loghandler */ + first_page= translog_get_current_page_rest(); + record_rest= parts->record_length - (first_page - header_length); + full_pages= record_rest / log_descriptor.page_capacity_chunk_2; + record_rest= (record_rest % log_descriptor.page_capacity_chunk_2); + + if (record_rest + 1 == log_descriptor.page_capacity_chunk_2) + { + DBUG_PRINT("info", ("2 chunks type 3 is needed")); + /* We will write 2 chunks type 3 at the end of this group */ + additional_chunk3_page= 1; + record_rest= 1; + } + + DBUG_PRINT("info", ("first_page: %u (%u) full_pages: %u (%lu) " + "additional: %u (%u) rest %u = %u", + first_page, first_page - header_length, + full_pages, + (ulong) full_pages * + log_descriptor.page_capacity_chunk_2, + additional_chunk3_page, + additional_chunk3_page * + (log_descriptor.page_capacity_chunk_2 - 1), + record_rest, parts->record_length)); + /* record_rest + 3 is chunk type 3 overhead + record_rest */ + rc|= translog_advance_pointer(full_pages + additional_chunk3_page, + (record_rest ? record_rest + 3 : 0)); + log_descriptor.bc.buffer->last_lsn= *lsn; + + rc|= translog_unlock(); + + /* + Check if we switched buffer and need process it (current buffer is + unlocked already => we will not delay other threads + */ + if (buffer_to_flush != NULL) + { + if (!rc) + rc= translog_buffer_flush(buffer_to_flush); + rc|= translog_buffer_unlock(buffer_to_flush); + } + if (rc) + DBUG_RETURN(1); + + translog_write_variable_record_1group_header(parts, type, short_trid, + header_length, chunk0_header); + + /* fill the pages */ + translog_write_parts_on_page(&horizon, &cursor, first_page, parts); + + + DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx)", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_FILE_NO(horizon), + (ulong) LSN_OFFSET(horizon))); + + for (i= 0; i < full_pages; i++) + { + if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor)) + DBUG_RETURN(1); + + DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx)", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_FILE_NO(horizon), + (ulong) LSN_OFFSET(horizon))); + } + + if (additional_chunk3_page) + { + if (translog_write_variable_record_chunk3_page(parts, + log_descriptor. + page_capacity_chunk_2 - 2, + &horizon, &cursor)) + DBUG_RETURN(1); + DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx)", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_FILE_NO(horizon), + (ulong) LSN_OFFSET(horizon))); + DBUG_ASSERT(cursor.current_page_fill == TRANSLOG_PAGE_SIZE); + } + + if (translog_write_variable_record_chunk3_page(parts, + record_rest, + &horizon, &cursor)) + DBUG_RETURN(1); + DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx)", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_FILE_NO(horizon), + (ulong) LSN_OFFSET(horizon))); + + if (!(rc= translog_buffer_lock(cursor.buffer))) + { + /* + Check if we wrote something on 1:st not full page and need to reconstruct + CRC and sector protection + */ + translog_buffer_decrease_writers(cursor.buffer); + } + rc|= translog_buffer_unlock(cursor.buffer); + DBUG_RETURN(rc); +} + + +/* + Write variable record in 1 chunk + + SYNOPSIS + translog_write_variable_record_1chunk() + lsn LSN of the record will be written here + type the log record type + short_trid Short transaction ID or 0 if it has no sense + parts Descriptor of record source parts + buffer_to_flush Buffer which have to be flushed if it is not 0 + header_length Calculated header length of chunk type 0 + trn Transaction structure pointer for hooks by + record log type, for short_id + + RETURN + 0 OK + 1 Error +*/ + +static my_bool +translog_write_variable_record_1chunk(LSN *lsn, + enum translog_record_type type, + SHORT_TRANSACTION_ID short_trid, + struct st_translog_parts *parts, + struct st_translog_buffer + *buffer_to_flush, uint16 header_length, + TRN *trn) +{ + int rc; + byte chunk0_header[1 + 2 + 5 + 2]; + DBUG_ENTER("translog_write_variable_record_1chunk"); + + translog_write_variable_record_1group_header(parts, type, short_trid, + header_length, chunk0_header); + + *lsn= log_descriptor.horizon; + if (log_record_type_descriptor[type].inwrite_hook && + (*log_record_type_descriptor[type].inwrite_hook)(type, trn, + lsn, parts)) + { + translog_unlock(); + DBUG_RETURN(1); + } + + rc= translog_write_parts_on_page(&log_descriptor.horizon, + &log_descriptor.bc, + parts->total_record_length, parts); + log_descriptor.bc.buffer->last_lsn= *lsn; + rc|= translog_unlock(); + + /* + check if we switched buffer and need process it (current buffer is + unlocked already => we will not delay other threads + */ + if (buffer_to_flush != NULL) + { + if (!rc) + rc= translog_buffer_flush(buffer_to_flush); + rc|= translog_buffer_unlock(buffer_to_flush); + } + + DBUG_RETURN(rc); +} + + +/* + Calculate and write LSN difference (compressed LSN) + + SYNOPSIS + translog_put_LSN_diff() + base_lsn LSN from which we calculate difference + lsn LSN for codding + dst Result will be written to dst[-pack_length] .. dst[-1] + + NOTE: + To store an LSN in a compact way we will use the following compression: + + If a log record has LSN1, and it contains the lSN2 as a back reference, + Instead of LSN2 we write LSN1-LSN2, encoded as: + + two bits the number N (see below) + 14 bits + N bytes + + That is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2 + is stored in the first two bits. + + RETURN + # pointer on coded LSN + NULL Error +*/ + +static byte *translog_put_LSN_diff(LSN base_lsn, LSN lsn, byte *dst) +{ + DBUG_ENTER("translog_put_LSN_diff"); + DBUG_PRINT("enter", ("Base: (0x%lu,0x%lx) val: (0x%lu,0x%lx) dst: 0x%lx", + (ulong) LSN_FILE_NO(base_lsn), + (ulong) LSN_OFFSET(base_lsn), + (ulong) LSN_FILE_NO(lsn), + (ulong) LSN_OFFSET(lsn), (ulong) dst)); + if (LSN_FILE_NO(base_lsn) == LSN_FILE_NO(lsn)) + { + uint32 diff; + DBUG_ASSERT(base_lsn > lsn); + diff= base_lsn - lsn; + DBUG_PRINT("info", ("File is the same. Diff: 0x%lx", (ulong) diff)); + if (diff <= 0x3FFF) + { + dst-= 2; + /* + Note we store this high byte first to ensure that first byte has + 0 in the 3 upper bits. + */ + dst[0]= diff >> 8; + dst[1]= (diff & 0xFF); + } + else if (diff <= 0x3FFFFF) + { + dst-= 3; + dst[0]= 0x40 | (diff >> 16); + int2store(dst + 1, diff & 0xFFFF); + } + else if (diff <= 0x3FFFFFFF) + { + dst-= 4; + dst[0]= 0x80 | (diff >> 24); + int3store(dst + 1, diff & 0xFFFFFF); + } + else + { + dst-= 5; + dst[0]= 0xC0; + int4store(dst + 1, diff); + } + } + else + { + uint32 diff; + uint32 offset_diff; + ulonglong base_offset= LSN_OFFSET(base_lsn); + DBUG_ASSERT(base_lsn > lsn); + diff= LSN_FILE_NO(base_lsn) - LSN_FILE_NO(lsn); + DBUG_PRINT("info", ("File is different. Diff: 0x%lx", (ulong) diff)); + + if (base_offset < LSN_OFFSET(lsn)) + { + /* take 1 from file offset */ + diff--; + base_offset+= LL(0x100000000); + } + offset_diff= base_offset - LSN_OFFSET(lsn); + if (diff > 0x3f) + { + /* + It is full LSN after special 1 diff (which is impossible + in real life) + */ + dst-= 2 + LSN_STORE_SIZE; + dst[0]= 0; + dst[1]= 1; + lsn_store(dst + 2, lsn); + } + else + { + dst-= 5; + *dst= (0xC0 | diff); + int4store(dst + 1, offset_diff); + } + } + DBUG_PRINT("info", ("new dst: 0x%lx", (ulong) dst)); + DBUG_RETURN(dst); +} + + +/* + Get LSN from LSN-difference (compressed LSN) + + SYNOPSIS + translog_get_LSN_from_diff() + base_lsn LSN from which we calculate difference + src pointer to coded lsn + dst pointer to buffer where to write 7byte LSN + + NOTE: + To store an LSN in a compact way we will use the following compression: + + If a log record has LSN1, and it contains the lSN2 as a back reference, + Instead of LSN2 we write LSN1-LSN2, encoded as: + + two bits the number N (see below) + 14 bits + N bytes + + That is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2 + is stored in the first two bits. + + RETURN + pointer to buffer after decoded LSN +*/ + +static byte *translog_get_LSN_from_diff(LSN base_lsn, byte *src, byte *dst) +{ + LSN lsn; + uint32 diff; + uint32 first_byte; + uint32 file_no, rec_offset; + uint8 code; + DBUG_ENTER("translog_get_LSN_from_diff"); + DBUG_PRINT("enter", ("Base: (0x%lx,0x%lx) src: 0x%lx dst 0x%lx", + (ulong) LSN_FILE_NO(base_lsn), + (ulong) LSN_OFFSET(base_lsn), + (ulong) src, (ulong) dst)); + first_byte= *((uint8*) src); + code= first_byte >> 6; /* Length is in 2 most significant bits */ + first_byte&= 0x3F; + src++; /* Skip length + encode */ + file_no= LSN_FILE_NO(base_lsn); /* Assume relative */ + DBUG_PRINT("info", ("code: %u first byte: %lu", + (uint) code, (ulong) first_byte)); + switch (code) { + case 0: + if (first_byte == 0 && *((uint8*)src) == 1) + { + /* + It is full LSN after special 1 diff (which is impossible + in real life) + */ + memcpy(dst, src + 1, LSN_STORE_SIZE); + DBUG_PRINT("info", ("Special case of full LSN, new src: 0x%lx", + (ulong) (src + 1 + LSN_STORE_SIZE))); + DBUG_RETURN(src + 1 + LSN_STORE_SIZE); + } + rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 8) + *((uint8*)src)); + break; + case 1: + diff= uint2korr(src); + rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 16) + diff); + break; + case 2: + diff= uint3korr(src); + rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 24) + diff); + break; + case 3: + { + ulonglong base_offset= LSN_OFFSET(base_lsn); + diff= uint4korr(src); + if (diff > LSN_OFFSET(base_lsn)) + { + /* take 1 from file offset */ + first_byte++; + base_offset+= LL(0x100000000); + } + file_no= LSN_FILE_NO(base_lsn) - first_byte; + rec_offset= base_offset - diff; + break; + } + default: + DBUG_ASSERT(0); + DBUG_RETURN(NULL); + } + lsn= MAKE_LSN(file_no, rec_offset); + src+= code + 1; + lsn_store(dst, lsn); + DBUG_PRINT("info", ("new src: 0x%lx", (ulong) src)); + DBUG_RETURN(src); +} + + +/* + Encode relative LSNs listed in the parameters + + SYNOPSIS + translog_relative_LSN_encode() + parts Parts list with encoded LSN(s) + base_lsn LSN which is base for encoding + lsns number of LSN(s) to encode + compressed_LSNs buffer which can be used for storing compressed LSN(s) + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_relative_LSN_encode(struct st_translog_parts *parts, + LSN base_lsn, + uint lsns, byte *compressed_LSNs) +{ + LEX_STRING *part; + uint lsns_len= lsns * LSN_STORE_SIZE; + char buffer_src[MAX_NUMBER_OF_LSNS_PER_RECORD * LSN_STORE_SIZE]; + char *buffer= buffer_src; + + DBUG_ENTER("translog_relative_LSN_encode"); + + DBUG_ASSERT(parts->current != 0); + part= parts->parts + parts->current; + + /* collect all LSN(s) in one chunk if it (they) is (are) divided */ + if (part->length < lsns_len) + { + uint copied= part->length; + LEX_STRING *next_part; + DBUG_PRINT("info", ("Using buffer: 0x%lx", (ulong) compressed_LSNs)); + memcpy(buffer, (byte*)part->str, part->length); + next_part= parts->parts + parts->current + 1; + do + { + DBUG_ASSERT(next_part < parts->parts + parts->elements); + if ((next_part->length + copied) < lsns_len) + { + memcpy(buffer + copied, (byte*)next_part->str, + next_part->length); + copied+= next_part->length; + next_part->length= 0; next_part->str= 0; + /* delete_dynamic_element(&parts->parts, parts->current + 1); */ + next_part++; + parts->current++; + part= parts->parts + parts->current; + } + else + { + uint len= lsns_len - copied; + memcpy(buffer + copied, (byte*)next_part->str, len); + copied= lsns_len; + next_part->str+= len; + next_part->length-= len; + } + } while (copied < lsns_len); + } + else + { + buffer= part->str; + part->str+= lsns_len; + part->length-= lsns_len; + parts->current--; + part= parts->parts + parts->current; + } + + { + /* Compress */ + LSN ref; + int economy; + byte *src_ptr; + byte *dst_ptr= compressed_LSNs + (MAX_NUMBER_OF_LSNS_PER_RECORD * + COMPRESSED_LSN_MAX_STORE_SIZE); + for (src_ptr= buffer + lsns_len - LSN_STORE_SIZE; + src_ptr >= buffer; + src_ptr-= LSN_STORE_SIZE) + { + ref= lsn_korr(src_ptr); + if ((dst_ptr= translog_put_LSN_diff(base_lsn, ref, dst_ptr)) == NULL) + DBUG_RETURN(1); + } + part->length= (uint)((compressed_LSNs + + (MAX_NUMBER_OF_LSNS_PER_RECORD * + COMPRESSED_LSN_MAX_STORE_SIZE)) - + dst_ptr); + parts->record_length-= (economy= lsns_len - part->length); + DBUG_PRINT("info", ("new length of LSNs: %u economy: %d", + part->length, economy)); + parts->total_record_length-= economy; + part->str= (char*)dst_ptr; + } + DBUG_RETURN(0); +} + + +/* + Write multi-group variable-size record + + SYNOPSIS + translog_write_variable_record_mgroup() + lsn LSN of the record will be written here + type the log record type + short_trid Short transaction ID or 0 if it has no sense + parts Descriptor of record source parts + buffer_to_flush Buffer which have to be flushed if it is not 0 + header_length Header length calculated for 1 group + buffer_rest Beginning from which we plan to write in full pages + trn Transaction structure pointer for hooks by + record log type, for short_id + + RETURN + 0 OK + 1 Error +*/ + +static my_bool +translog_write_variable_record_mgroup(LSN *lsn, + enum translog_record_type type, + SHORT_TRANSACTION_ID short_trid, + struct st_translog_parts *parts, + struct st_translog_buffer + *buffer_to_flush, + uint16 header_length, + translog_size_t buffer_rest, + TRN *trn) +{ + TRANSLOG_ADDRESS horizon; + struct st_buffer_cursor cursor; + int rc= 0; + uint i, chunk2_page, full_pages; + uint curr_group= 0; + translog_size_t record_rest, first_page, chunk3_pages, chunk0_pages= 1; + translog_size_t done= 0; + struct st_translog_group_descriptor group; + DYNAMIC_ARRAY groups; + uint16 chunk3_size; + uint16 page_capacity= log_descriptor.page_capacity_chunk_2 + 1; + uint16 last_page_capacity; + my_bool new_page_before_chunk0= 1, first_chunk0= 1; + byte chunk0_header[1 + 2 + 5 + 2 + 2], group_desc[7 + 1]; + byte chunk2_header[1]; + uint header_fixed_part= header_length + 2; + uint groups_per_page= (page_capacity - header_fixed_part) / (7 + 1); + DBUG_ENTER("translog_write_variable_record_mgroup"); + + chunk2_header[0]= TRANSLOG_CHUNK_NOHDR; + + if (init_dynamic_array(&groups, sizeof(struct st_translog_group_descriptor), + 10, 10 CALLER_INFO)) + { + translog_unlock(); + UNRECOVERABLE_ERROR(("init array failed")); + DBUG_RETURN(1); + } + + first_page= translog_get_current_page_rest(); + record_rest= parts->record_length - (first_page - 1); + DBUG_PRINT("info", ("Record Rest: %lu", (ulong) record_rest)); + + if (record_rest < buffer_rest) + { + DBUG_PRINT("info", ("too many free space because changing header")); + buffer_rest-= log_descriptor.page_capacity_chunk_2; + DBUG_ASSERT(record_rest >= buffer_rest); + } + + do + { + group.addr= horizon= log_descriptor.horizon; + cursor= log_descriptor.bc; + cursor.chaser= 1; + if ((full_pages= buffer_rest / log_descriptor.page_capacity_chunk_2) > 255) + { + /* sizeof(uint8) == 256 is max number of chunk in multi-chunks group */ + full_pages= 255; + buffer_rest= full_pages * log_descriptor.page_capacity_chunk_2; + } + /* + group chunks = + full pages + first page (which actually can be full, too). + But here we assign number of chunks - 1 + */ + group.num= full_pages; + if (insert_dynamic(&groups, (gptr) &group)) + { + UNRECOVERABLE_ERROR(("insert into array failed")); + goto err_unlock; + } + + DBUG_PRINT("info", ("chunk: #%u first_page: %u (%u) " + "full_pages: %lu (%lu) " + "Left %lu", + groups.elements, + first_page, first_page - 1, + (ulong) full_pages, + (ulong) (full_pages * + log_descriptor.page_capacity_chunk_2), + (ulong)(parts->record_length - (first_page - 1 + + buffer_rest) - + done))); + rc|= translog_advance_pointer(full_pages, 0); + + rc|= translog_unlock(); + + if (buffer_to_flush != NULL) + { + rc|= translog_buffer_lock(buffer_to_flush); + translog_buffer_decrease_writers(buffer_to_flush); + if (!rc) + rc= translog_buffer_flush(buffer_to_flush); + rc|= translog_buffer_unlock(buffer_to_flush); + buffer_to_flush= NULL; + } + if (rc) + { + UNRECOVERABLE_ERROR(("flush of unlock buffer failed")); + goto err; + } + + translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header); + translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts); + DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx) " + "Left %lu", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_FILE_NO(horizon), + (ulong) LSN_OFFSET(horizon), + (ulong) (parts->record_length - (first_page - 1) - + done))); + + for (i= 0; i < full_pages; i++) + { + if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor)) + goto err; + + DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) " + "local: (%lu,0x%lx) " + "Left: %lu", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_FILE_NO(horizon), + (ulong) LSN_OFFSET(horizon), + (ulong) (parts->record_length - (first_page - 1) - + i * log_descriptor.page_capacity_chunk_2 - + done))); + } + + done+= (first_page - 1 + buffer_rest); + + /* TODO: make separate function for following */ + rc= translog_page_next(&horizon, &cursor, &buffer_to_flush); + if (buffer_to_flush != NULL) + { + rc|= translog_buffer_lock(buffer_to_flush); + translog_buffer_decrease_writers(buffer_to_flush); + if (!rc) + rc= translog_buffer_flush(buffer_to_flush); + rc|= translog_buffer_unlock(buffer_to_flush); + buffer_to_flush= NULL; + } + if (rc) + { + UNRECOVERABLE_ERROR(("flush of unlock buffer failed")); + goto err; + } + rc= translog_buffer_lock(cursor.buffer); + if (!rc) + translog_buffer_decrease_writers(cursor.buffer); + rc|= translog_buffer_unlock(cursor.buffer); + if (rc) + goto err; + + translog_lock(); + + first_page= translog_get_current_page_rest(); + buffer_rest= translog_get_current_group_size(); + } while (first_page + buffer_rest < (uint) (parts->record_length - done)); + + group.addr= horizon= log_descriptor.horizon; + cursor= log_descriptor.bc; + cursor.chaser= 1; + group.num= 0; /* 0 because it does not matter */ + if (insert_dynamic(&groups, (gptr) &group)) + { + UNRECOVERABLE_ERROR(("insert into array failed")); + goto err_unlock; + } + record_rest= parts->record_length - done; + DBUG_PRINT("info", ("Record rest: %lu", (ulong) record_rest)); + if (first_page <= record_rest + 1) + { + chunk2_page= 1; + record_rest-= (first_page - 1); + full_pages= record_rest / log_descriptor.page_capacity_chunk_2; + record_rest= (record_rest % log_descriptor.page_capacity_chunk_2); + last_page_capacity= page_capacity; + } + else + { + chunk2_page= full_pages= 0; + last_page_capacity= first_page; + } + chunk3_size= 0; + chunk3_pages= 0; + if (last_page_capacity > record_rest + 1 && record_rest != 0) + { + if (last_page_capacity > + record_rest + header_fixed_part + groups.elements * (7 + 1)) + { + /* 1 record of type 0 */ + chunk3_pages= 0; + } + else + { + chunk3_pages= 1; + if (record_rest + 2 == last_page_capacity) + { + chunk3_size= record_rest - 1; + record_rest= 1; + } + else + { + chunk3_size= record_rest; + record_rest= 0; + } + } + } + /* + A first non-full page will hold type 0 chunk only if it fit in it with + all its headers + */ + while (page_capacity < + record_rest + header_fixed_part + + (groups.elements - groups_per_page * (chunk0_pages - 1)) * (7 + 1)) + chunk0_pages++; + DBUG_PRINT("info", ("chunk0_pages: %u groups %u groups per full page: %u " + "Group on last page: %u", + chunk0_pages, groups.elements, + groups_per_page, + (groups.elements - + ((page_capacity - header_fixed_part) / (7 + 1)) * + (chunk0_pages - 1)))); + DBUG_PRINT("info", ("first_page: %u chunk2: %u full_pages: %u (%lu) " + "chunk3: %u (%u) rest: %u", + first_page, + chunk2_page, full_pages, + (ulong) full_pages * + log_descriptor.page_capacity_chunk_2, + chunk3_pages, (uint) chunk3_size, (uint) record_rest)); + rc= translog_advance_pointer(full_pages + chunk3_pages + + (chunk0_pages - 1), + record_rest + header_fixed_part + + (groups.elements - + ((page_capacity - + header_fixed_part) / (7 + 1)) * + (chunk0_pages - 1)) * (7 + 1)); + rc|= translog_unlock(); + if (rc) + goto err; + + if (chunk2_page) + { + DBUG_PRINT("info", ("chunk 2 to finish first page")); + translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header); + translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts); + DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx) " + "Left: %lu", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_FILE_NO(horizon), + (ulong) LSN_OFFSET(horizon), + (ulong) (parts->record_length - (first_page - 1) - + done))); + } + else if (chunk3_pages) + { + DBUG_PRINT("info", ("chunk 3")); + DBUG_ASSERT(full_pages == 0); + byte chunk3_header[3]; + chunk3_pages= 0; + chunk3_header[0]= TRANSLOG_CHUNK_LNGTH; + int2store(chunk3_header + 1, chunk3_size); + translog_write_data_on_page(&horizon, &cursor, 3, chunk3_header); + translog_write_parts_on_page(&horizon, &cursor, chunk3_size, parts); + DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx) " + "Left: %lu", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_FILE_NO(horizon), + (ulong) LSN_OFFSET(horizon), + (ulong) (parts->record_length - chunk3_size - done))); + } + else + { + DBUG_PRINT("info", ("no new_page_before_chunk0")); + new_page_before_chunk0= 0; + } + + for (i= 0; i < full_pages; i++) + { + DBUG_ASSERT(chunk2_page != 0); + if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor)) + goto err; + + DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx) " + "Left: %lu", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_FILE_NO(horizon), + (ulong) LSN_OFFSET(horizon), + (ulong) (parts->record_length - (first_page - 1) - + i * log_descriptor.page_capacity_chunk_2 - + done))); + } + + if (chunk3_pages && + translog_write_variable_record_chunk3_page(parts, + chunk3_size, + &horizon, &cursor)) + goto err; + DBUG_PRINT("info", ("absolute horizon: (%lu,0x%lx) local: (%lu,0x%lx)", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon), + (ulong) LSN_FILE_NO(horizon), + (ulong) LSN_OFFSET(horizon))); + + + *chunk0_header= (byte) (type |TRANSLOG_CHUNK_LSN); + int2store(chunk0_header + 1, short_trid); + translog_write_variable_record_1group_code_len(chunk0_header + 3, + parts->record_length, + header_length); + do + { + int limit; + if (new_page_before_chunk0) + { + rc= translog_page_next(&horizon, &cursor, &buffer_to_flush); + if (buffer_to_flush != NULL) + { + rc|= translog_buffer_lock(buffer_to_flush); + translog_buffer_decrease_writers(buffer_to_flush); + if (!rc) + rc= translog_buffer_flush(buffer_to_flush); + rc|= translog_buffer_unlock(buffer_to_flush); + buffer_to_flush= NULL; + } + if (rc) + { + UNRECOVERABLE_ERROR(("flush of unlock buffer failed")); + goto err; + } + } + new_page_before_chunk0= 1; + + if (first_chunk0) + { + first_chunk0= 0; + *lsn= horizon; + if (log_record_type_descriptor[type].inwrite_hook && + (*log_record_type_descriptor[type].inwrite_hook) (type, trn, + lsn, parts)) + goto err; + } + + /* + A first non-full page will hold type 0 chunk only if it fit in it with + all its headers => the fist page is full or number of groups less then + possible number of full page. + */ + limit= (groups_per_page < groups.elements - curr_group ? + groups_per_page : groups.elements - curr_group); + DBUG_PRINT("info", ("Groups: %u curr: %u limit: %u", + (uint) groups.elements, (uint) curr_group, + (uint) limit)); + + if (chunk0_pages == 1) + { + DBUG_PRINT("info", ("chunk_len: 2 + %u * (7+1) + %u = %u", + (uint) limit, (uint) record_rest, + (uint) (2 + limit * (7 + 1) + record_rest))); + int2store(chunk0_header + header_length - 2, + 2 + limit * (7 + 1) + record_rest); + } + else + { + DBUG_PRINT("info", ("chunk_len: 2 + %u * (7+1) = %u", + (uint) limit, (uint) (2 + limit * (7 + 1)))); + int2store(chunk0_header + header_length - 2, 2 + limit * (7 + 1)); + } + int2store(chunk0_header + header_length, groups.elements - curr_group); + translog_write_data_on_page(&horizon, &cursor, header_fixed_part, + chunk0_header); + for (i= curr_group; i < limit + curr_group; i++) + { + struct st_translog_group_descriptor *grp_ptr; + grp_ptr= dynamic_element(&groups, i, + struct st_translog_group_descriptor *); + lsn_store(group_desc, grp_ptr->addr); + group_desc[7]= grp_ptr->num; + translog_write_data_on_page(&horizon, &cursor, (7 + 1), group_desc); + } + + if (chunk0_pages == 1 && record_rest != 0) + translog_write_parts_on_page(&horizon, &cursor, record_rest, parts); + + chunk0_pages--; + curr_group+= limit; + + } while (chunk0_pages != 0); + rc= translog_buffer_lock(cursor.buffer); + if (cmp_translog_addr(cursor.buffer->last_lsn, *lsn) < 0) + cursor.buffer->last_lsn= *lsn; + translog_buffer_decrease_writers(cursor.buffer); + rc|= translog_buffer_unlock(cursor.buffer); + + delete_dynamic(&groups); + DBUG_RETURN(rc); + +err_unlock: + translog_unlock(); +err: + delete_dynamic(&groups); + DBUG_RETURN(1); +} + + +/* + Write the variable length log record + + SYNOPSIS + translog_write_variable_record() + lsn LSN of the record will be written here + type the log record type + short_trid Short transaction ID or 0 if it has no sense + parts Descriptor of record source parts + trn Transaction structure pointer for hooks by + record log type, for short_id + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_write_variable_record(LSN *lsn, + enum translog_record_type type, + SHORT_TRANSACTION_ID short_trid, + struct st_translog_parts *parts, + TRN *trn) +{ + struct st_translog_buffer *buffer_to_flush= NULL; + uint header_length1= 1 + 2 + 2 + + translog_variable_record_length_bytes(parts->record_length); + ulong buffer_rest; + uint page_rest; + /* Max number of such LSNs per record is 2 */ + byte compressed_LSNs[MAX_NUMBER_OF_LSNS_PER_RECORD * + COMPRESSED_LSN_MAX_STORE_SIZE]; + DBUG_ENTER("translog_write_variable_record"); + + translog_lock(); + DBUG_PRINT("info", ("horizon: (%lu,0x%lx)", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon))); + page_rest= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill; + DBUG_PRINT("info", ("header length: %u page_rest: %u", + header_length1, page_rest)); + + /* + header and part which we should read have to fit in one chunk + TODO: allow to divide readable header + */ + if (page_rest < + (header_length1 + log_record_type_descriptor[type].read_header_len)) + { + DBUG_PRINT("info", + ("Next page, size: %u header: %u + %u", + log_descriptor.bc.current_page_fill, + header_length1, + log_record_type_descriptor[type].read_header_len)); + translog_page_next(&log_descriptor.horizon, &log_descriptor.bc, + &buffer_to_flush); + /* Chunk 2 header is 1 byte, so full page capacity will be one byte more */ + page_rest= log_descriptor.page_capacity_chunk_2 + 1; + DBUG_PRINT("info", ("page_rest: %u", page_rest)); + } + + /* + To minimize compressed size we will compress always relative to + very first chunk address (log_descriptor.horizon for now) + */ + if (log_record_type_descriptor[type].compressed_LSN > 0) + { + if (translog_relative_LSN_encode(parts, log_descriptor.horizon, + log_record_type_descriptor[type]. + compressed_LSN, compressed_LSNs)) + { + translog_unlock(); + if (buffer_to_flush != NULL) + { + /* + It is just try to finish log in nice way in case of error, so we + do not check result of the following functions, because we are + going return error state in any case + */ + translog_buffer_flush(buffer_to_flush); + translog_buffer_unlock(buffer_to_flush); + } + DBUG_RETURN(1); + } + /* recalculate header length after compression */ + header_length1= 1 + 2 + 2 + + translog_variable_record_length_bytes(parts->record_length); + DBUG_PRINT("info", ("after compressing LSN(s) header length: %u " + "record length: %lu", + header_length1, (ulong)parts->record_length)); + } + + /* TODO: check space on current page for header + few bytes */ + if (page_rest >= parts->record_length + header_length1) + { + /* following function makes translog_unlock(); */ + DBUG_RETURN(translog_write_variable_record_1chunk(lsn, type, short_trid, + parts, buffer_to_flush, + header_length1, trn)); + } + + buffer_rest= translog_get_current_group_size(); + + if (buffer_rest >= parts->record_length + header_length1 - page_rest) + { + /* following function makes translog_unlock(); */ + DBUG_RETURN(translog_write_variable_record_1group(lsn, type, short_trid, + parts, buffer_to_flush, + header_length1, trn)); + } + /* following function makes translog_unlock(); */ + DBUG_RETURN(translog_write_variable_record_mgroup(lsn, type, short_trid, + parts, buffer_to_flush, + header_length1, + buffer_rest, trn)); +} + + +/* + Write the fixed and pseudo-fixed log record + + SYNOPSIS + translog_write_fixed_record() + lsn LSN of the record will be written here + type the log record type + short_trid Short transaction ID or 0 if it has no sense + parts Descriptor of record source parts + trn Transaction structure pointer for hooks by + record log type, for short_id + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_write_fixed_record(LSN *lsn, + enum translog_record_type type, + SHORT_TRANSACTION_ID short_trid, + struct st_translog_parts *parts, + TRN *trn) +{ + struct st_translog_buffer *buffer_to_flush= NULL; + byte chunk1_header[1 + 2]; + /* Max number of such LSNs per record is 2 */ + byte compressed_LSNs[MAX_NUMBER_OF_LSNS_PER_RECORD * + COMPRESSED_LSN_MAX_STORE_SIZE]; + LEX_STRING *part; + int rc; + DBUG_ENTER("translog_write_fixed_record"); + DBUG_ASSERT((log_record_type_descriptor[type].class == + LOGRECTYPE_FIXEDLENGTH && + parts->record_length == + log_record_type_descriptor[type].fixed_length) || + (log_record_type_descriptor[type].class == + LOGRECTYPE_PSEUDOFIXEDLENGTH && + parts->record_length == + log_record_type_descriptor[type].fixed_length)); + + translog_lock(); + DBUG_PRINT("info", ("horizon: (%lu,0x%lx)", + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) LSN_OFFSET(log_descriptor.horizon))); + + DBUG_ASSERT(log_descriptor.bc.current_page_fill <= TRANSLOG_PAGE_SIZE); + DBUG_PRINT("info", + ("Page size: %u record: %u next cond: %d", + log_descriptor.bc.current_page_fill, + (parts->record_length + + log_record_type_descriptor[type].compressed_LSN * 2 + 3), + ((((uint) log_descriptor.bc.current_page_fill) + + (parts->record_length + + log_record_type_descriptor[type].compressed_LSN * 2 + 3)) > + TRANSLOG_PAGE_SIZE))); + /* + check that there is enough place on current page. + NOTE: compressing may increase page LSN size on two bytes for every LSN + */ + if ((((uint) log_descriptor.bc.current_page_fill) + + (parts->record_length + + log_record_type_descriptor[type].compressed_LSN * 2 + 3)) > + TRANSLOG_PAGE_SIZE) + { + DBUG_PRINT("info", ("Next page")); + translog_page_next(&log_descriptor.horizon, &log_descriptor.bc, + &buffer_to_flush); + } + + *lsn= log_descriptor.horizon; + if (log_record_type_descriptor[type].inwrite_hook && + (*log_record_type_descriptor[type].inwrite_hook) (type, trn, + lsn, parts)) + { + rc= 1; + goto err; + } + + /* compress LSNs */ + if (log_record_type_descriptor[type].class == LOGRECTYPE_PSEUDOFIXEDLENGTH) + { + DBUG_ASSERT(log_record_type_descriptor[type].compressed_LSN > 0); + if (translog_relative_LSN_encode(parts, *lsn, + log_record_type_descriptor[type]. + compressed_LSN, compressed_LSNs)) + { + rc= 1; + goto err; + } + } + + /* + Write the whole record at once (we know that there is enough place on + the destination page) + */ + DBUG_ASSERT(parts->current != 0); /* first part is left for header */ + part= parts->parts + (--parts->current); + parts->total_record_length+= (part->length= 1 + 2); + part->str= (char*)chunk1_header; + *chunk1_header= (byte) (type | TRANSLOG_CHUNK_FIXED); + int2store(chunk1_header + 1, short_trid); + + rc= translog_write_parts_on_page(&log_descriptor.horizon, + &log_descriptor.bc, + parts->total_record_length, parts); + + log_descriptor.bc.buffer->last_lsn= *lsn; + +err: + rc|= translog_unlock(); + + /* + check if we switched buffer and need process it (current buffer is + unlocked already => we will not delay other threads + */ + if (buffer_to_flush != NULL) + { + if (!rc) + rc= translog_buffer_flush(buffer_to_flush); + rc|= translog_buffer_unlock(buffer_to_flush); + } + + DBUG_RETURN(rc); +} + + +/** + @brief Writes the log record + + If share has no 2-byte-id yet, gives an id to the share and logs + LOGREC_FILE_ID. If transaction has not logged LOGREC_LONG_TRANSACTION_ID + yet, logs it. + + @param lsn LSN of the record will be written here + @param type the log record type + @param trn Transaction structure pointer for hooks by + record log type, for short_id + @param share MARIA_SHARE of table or NULL + @param rec_len record length or 0 (count it) + @param part_no number of parts or 0 (count it) + @param parts_data zero ended (in case of number of parts is 0) + array of LEX_STRINGs (parts), first + TRANSLOG_INTERNAL_PARTS positions in the log + should be unused (need for loghandler) + @param store_share_id if share!=NULL then share's id will automatically + be stored in the two first bytes pointed (so + pointer is assumed to be !=NULL) + @return Operation status + @retval 0 OK + @retval 1 Error +*/ + +my_bool translog_write_record(LSN *lsn, + enum translog_record_type type, + TRN *trn, struct st_maria_share *share, + translog_size_t rec_len, + uint part_no, + LEX_STRING *parts_data, + uchar *store_share_id) +{ + struct st_translog_parts parts; + LEX_STRING *part; + int rc; + uint short_trid= trn->short_id; + DBUG_ENTER("translog_write_record"); + DBUG_PRINT("enter", ("type: %u ShortTrID: %u", + (uint) type, (uint)short_trid)); + + if (share) + { + if (!share->base.transactional) + { + DBUG_PRINT("info", ("It is not transactional table")); + DBUG_RETURN(0); + } + if (unlikely(share->id == 0)) + { + /* + First log write for this MARIA_SHARE; give it a short id. + When the lock manager is enabled and needs a short id, it should be + assigned in the lock manager (because row locks will be taken before + log records are written; for example SELECT FOR UPDATE takes locks but + writes no log record. + */ + if (unlikely(translog_assign_id_to_share(share, trn))) + DBUG_RETURN(1); + } + fileid_store(store_share_id, share->id); + } + if (unlikely(!(trn->first_undo_lsn & TRANSACTION_LOGGED_LONG_ID))) + { + LSN lsn; + LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; + uchar log_data[6]; + int6store(log_data, trn->trid); + log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); + trn->first_undo_lsn|= TRANSACTION_LOGGED_LONG_ID; /* no recursion */ + if (unlikely(translog_write_record(&lsn, LOGREC_LONG_TRANSACTION_ID, + trn, NULL, sizeof(log_data), + sizeof(log_array)/sizeof(log_array[0]), + log_array, NULL))) + DBUG_RETURN(1); + } + + parts.parts= parts_data; + + /* count parts if they are not counted by upper level */ + if (part_no == 0) + { + for (part_no= TRANSLOG_INTERNAL_PARTS; + parts_data[part_no].length != 0; + part_no++); + } + parts.elements= part_no; + parts.current= TRANSLOG_INTERNAL_PARTS; + + /* clear TRANSLOG_INTERNAL_PARTS */ + DBUG_ASSERT(TRANSLOG_INTERNAL_PARTS != 0); + parts_data[0].str= 0; + parts_data[0].length= 0; + + /* count length of the record */ + if (rec_len == 0) + { + for(part= parts_data + TRANSLOG_INTERNAL_PARTS;\ + part < parts_data + part_no; + part++) + { + rec_len+= part->length; + } + } + parts.record_length= rec_len; + +#ifndef DBUG_OFF + { + uint i; + uint len= 0; +#ifdef HAVE_PURIFY + ha_checksum checksum= 0; +#endif + for (i= TRANSLOG_INTERNAL_PARTS; i < part_no; i++) + { +#ifdef HAVE_PURIFY + /* Find unitialized bytes early */ + checksum+= my_checksum(checksum, parts_data[i].str, + parts_data[i].length); +#endif + len+= parts_data[i].length; + } + DBUG_ASSERT(len == rec_len); + } +#endif + /* + Start total_record_length from record_length then overhead will + be add + */ + parts.total_record_length= parts.record_length; + DBUG_PRINT("info", ("record length: %lu %lu", + (ulong) parts.record_length, + (ulong) parts.total_record_length)); + + /* process this parts */ + if (!(rc= (log_record_type_descriptor[type].prewrite_hook && + (*log_record_type_descriptor[type].prewrite_hook) (type, trn, + share, + &parts)))) + { + switch (log_record_type_descriptor[type].class) { + case LOGRECTYPE_VARIABLE_LENGTH: + rc= translog_write_variable_record(lsn, type, short_trid, &parts, trn); + break; + case LOGRECTYPE_PSEUDOFIXEDLENGTH: + case LOGRECTYPE_FIXEDLENGTH: + rc= translog_write_fixed_record(lsn, type, short_trid, &parts, trn); + break; + case LOGRECTYPE_NOT_ALLOWED: + default: + DBUG_ASSERT(0); + rc= 1; + } + } + + DBUG_RETURN(rc); +} + + +/* + Decode compressed (relative) LSN(s) + + SYNOPSIS + translog_relative_lsn_decode() + base_lsn LSN for encoding + src Decode LSN(s) from here + dst Put decoded LSNs here + lsns number of LSN(s) + + RETURN + position in sources after decoded LSN(s) +*/ + +static byte *translog_relative_LSN_decode(LSN base_lsn, + byte *src, byte *dst, uint lsns) +{ + uint i; + for (i= 0; i < lsns; i++, dst+= LSN_STORE_SIZE) + { + src= translog_get_LSN_from_diff(base_lsn, src, dst); + } + return src; +} + +/* + Get header of fixed/pseudo length record and call hook for it processing + + SYNOPSIS + translog_fixed_length_header() + page Pointer to the buffer with page where LSN chunk is + placed + page_offset Offset of the first chunk in the page + buff Buffer to be filled with header data + + RETURN + 0 error + # number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded + part of the header +*/ + +translog_size_t translog_fixed_length_header(byte *page, + translog_size_t page_offset, + TRANSLOG_HEADER_BUFFER *buff) +{ + struct st_log_record_type_descriptor *desc= + log_record_type_descriptor + buff->type; + byte *src= page + page_offset + 3; + byte *dst= buff->header; + byte *start= src; + uint lsns= desc->compressed_LSN; + uint length= desc->fixed_length; + + DBUG_ENTER("translog_fixed_length_header"); + + buff->record_length= length; + + if (desc->class == LOGRECTYPE_PSEUDOFIXEDLENGTH) + { + DBUG_ASSERT(lsns > 0); + src= translog_relative_LSN_decode(buff->lsn, src, dst, lsns); + lsns*= LSN_STORE_SIZE; + dst+= lsns; + length-= lsns; + buff->compressed_LSN_economy= (lsns - (src - start)); + } + else + buff->compressed_LSN_economy= 0; + + memcpy(dst, src, length); + buff->non_header_data_start_offset= page_offset + + ((src + length) - (page + page_offset)); + buff->non_header_data_len= 0; + DBUG_RETURN(buff->record_length); +} + + +/* + Free resources used by TRANSLOG_HEADER_BUFFER + + SYNOPSIS + translog_free_record_header(); +*/ + +void translog_free_record_header(TRANSLOG_HEADER_BUFFER *buff) +{ + DBUG_ENTER("translog_free_record_header"); + if (buff->groups_no != 0) + { + my_free((gptr) buff->groups, MYF(0)); + buff->groups_no= 0; + } + DBUG_VOID_RETURN; +} + + +/** + @brief Returns the current horizon at the end of the current log + + @return Horizon +*/ + +TRANSLOG_ADDRESS translog_get_horizon() +{ + TRANSLOG_ADDRESS res; + translog_lock(); + res= log_descriptor.horizon; + translog_unlock(); + return res; +} + + +/* + Set last page in the scanner data structure + + SYNOPSIS + translog_scanner_set_last_page() + scanner Information about current chunk during scanning + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_scanner_set_last_page(TRANSLOG_SCANNER_DATA + *scanner) +{ + my_bool page_ok; + scanner->last_file_page= scanner->page_addr; + return (translog_get_last_page_addr(&scanner->last_file_page, &page_ok)); +} + + +/* + Initialize reader scanner + + SYNOPSIS + translog_init_scanner() + lsn LSN with which it have to be inited + fixed_horizon true if it is OK do not read records which was written + after scanning beginning + scanner scanner which have to be inited + + RETURN + 0 OK + 1 Error +*/ + +my_bool translog_init_scanner(LSN lsn, + my_bool fixed_horizon, + struct st_translog_scanner_data *scanner) +{ + TRANSLOG_VALIDATOR_DATA data; + DBUG_ENTER("translog_init_scanner"); + DBUG_PRINT("enter", ("LSN: (0x%lu,0x%lx)", + (ulong) LSN_FILE_NO(lsn), + (ulong) LSN_OFFSET(lsn)); + DBUG_ASSERT(LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE != 0); + + data.addr= &scanner->page_addr; + data.was_recovered= 0; + + scanner->page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE; + + scanner->fixed_horizon= fixed_horizon; + + scanner->horizon= translog_get_horizon(); + DBUG_PRINT("info", ("horizon: (0x%lu,0x%lx)", + (ulong) LSN_FILE_NO(scanner->horizon), + (ulong) LSN_OFFSET(scanner->horizon))); + + /* lsn < horizon */ + DBUG_ASSERT(lsn < scanner->horizon)); + + scanner->page_addr= lsn; + scanner->page_addr-= scanner->page_offset; /*decrease offset */ + + if (translog_scanner_set_last_page(scanner)) + DBUG_RETURN(1); + + if ((scanner->page= translog_get_page(&data, scanner->buffer)) == NULL) + DBUG_RETURN(1); + DBUG_RETURN(0); +} + + +/* + Checks End of the Log + + SYNOPSIS + translog_scanner_eol() + scanner Information about current chunk during scanning + + RETURN + 1 End of the Log + 0 OK +*/ +static my_bool translog_scanner_eol(TRANSLOG_SCANNER_DATA *scanner) +{ + DBUG_ENTER("translog_scanner_eol"); + DBUG_PRINT("enter", + ("Horizon: (%lu, 0x%lx) Current: (%lu, 0x%lx+0x%x=0x%lx)", + (ulong) LSN_FILE_NO(scanner->horizon), + (ulong) LSN_OFFSET(scanner->horizon), + (ulong) LSN_FILE_NO(scanner->page_addr), + (ulong) LSN_OFFSET(scanner->page_addr), + (uint) scanner->page_offset, + (ulong) (LSN_OFFSET(scanner->page_addr) + scanner->page_offset))); + if (scanner->horizon > (scanner->page_addr + + scanner->page_offset)) + { + DBUG_PRINT("info", ("Horizon is not reached")); + DBUG_RETURN(0); + } + if (scanner->fixed_horizon) + { + DBUG_PRINT("info", ("Horizon is fixed and reached")); + DBUG_RETURN(1); + } + scanner->horizon= translog_get_horizon(); + DBUG_PRINT("info", + ("Horizon is re-read, EOL: %d", + scanner->horizon <= (scanner->page_addr + + scanner->page_offset))); + DBUG_RETURN(scanner->horizon <= (scanner->page_addr + + scanner->page_offset)); +} + + +/* + Cheks End of the Page + + SYNOPSIS + translog_scanner_eop() + scanner Information about current chunk during scanning + + RETURN + 1 End of the Page + 0 OK +*/ +static my_bool translog_scanner_eop(TRANSLOG_SCANNER_DATA *scanner) +{ + DBUG_ENTER("translog_scanner_eop"); + DBUG_RETURN(scanner->page_offset >= TRANSLOG_PAGE_SIZE || + scanner->page[scanner->page_offset] == 0); +} + + +/* + Checks End of the File (I.e. we are scanning last page, which do not + mean end of this page) + + SYNOPSIS + translog_scanner_eof() + scanner Information about current chunk during scanning + + RETURN + 1 End of the File + 0 OK +*/ +static my_bool translog_scanner_eof(TRANSLOG_SCANNER_DATA *scanner) +{ + DBUG_ENTER("translog_scanner_eof"); + DBUG_ASSERT(LSN_FILE_NO(scanner->page_addr) == + LSN_FILE_NO(scanner->last_file_page)); + DBUG_PRINT("enter", ("curr Page: 0x%lx last page: 0x%lx " + "normal EOF: %d", + (ulong) LSN_OFFSET(scanner->page_addr), + (ulong) LSN_OFFSET(scanner->last_file_page), + LSN_OFFSET(scanner->page_addr) == + LSN_OFFSET(scanner->last_file_page))); + /* + TODO: detect damaged file EOF, + TODO: issue warning if damaged file EOF detected + */ + DBUG_RETURN(scanner->page_addr == + scanner->last_file_page); +} + + +/* + Move scanner to the next chunk + + SYNOPSIS + translog_get_next_chunk() + scanner Information about current chunk during scanning + + RETURN + 0 OK + 1 Error +*/ + +static my_bool +translog_get_next_chunk(TRANSLOG_SCANNER_DATA *scanner) +{ + uint16 len; + TRANSLOG_VALIDATOR_DATA data; + DBUG_ENTER("translog_get_next_chunk"); + + if ((len= translog_get_total_chunk_length(scanner->page, + scanner->page_offset)) == 0) + DBUG_RETURN(1); + scanner->page_offset+= len; + + if (translog_scanner_eol(scanner)) + { + scanner->page= &end_of_log; + scanner->page_offset= 0; + DBUG_RETURN(0); + } + if (translog_scanner_eop(scanner)) + { + if (translog_scanner_eof(scanner)) + { + DBUG_PRINT("info", ("horizon: (%lu,0x%lx) pageaddr: (%lu,0x%lx)", + (ulong) LSN_FILE_NO(scanner->horizon), + (ulong) LSN_OFFSET(scanner->horizon), + (ulong) LSN_FILE_NO(scanner->page_addr), + (ulong) LSN_OFFSET(scanner->page_addr))); + /* if it is log end it have to be caught before */ + DBUG_ASSERT(LSN_FILE_NO(scanner->horizon) > + LSN_FILE_NO(scanner->page_addr)); + scanner->page_addr+= LSN_ONE_FILE; + scanner->page_addr= LSN_REPLACE_OFFSET(scanner->page_addr, + TRANSLOG_PAGE_SIZE); + if (translog_scanner_set_last_page(scanner)) + DBUG_RETURN(1); + } + else + { + scanner->page_addr+= TRANSLOG_PAGE_SIZE; /* offset increased */ + } + + data.addr= &scanner->page_addr; + data.was_recovered= 0; + if ((scanner->page= translog_get_page(&data, scanner->buffer)) == NULL) + DBUG_RETURN(1); + + scanner->page_offset= translog_get_first_chunk_offset(scanner->page); + if (translog_scanner_eol(scanner)) + { + scanner->page= &end_of_log; + scanner->page_offset= 0; + DBUG_RETURN(0); + } + DBUG_ASSERT(scanner->page[scanner->page_offset]); + } + DBUG_RETURN(0); +} + + +/* + Get header of variable length record and call hook for it processing + + SYNOPSIS + translog_variable_length_header() + page Pointer to the buffer with page where LSN chunk is + placed + page_offset Offset of the first chunk in the page + buff Buffer to be filled with header data + scanner If present should be moved to the header page if + it differ from LSN page + + RETURN + 0 error + # number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded + part of the header +*/ + +translog_size_t translog_variable_length_header(byte *page, + translog_size_t page_offset, + TRANSLOG_HEADER_BUFFER *buff, + TRANSLOG_SCANNER_DATA + *scanner) +{ + struct st_log_record_type_descriptor *desc= (log_record_type_descriptor + + buff->type); + byte *src= page + page_offset + 1 + 2; + byte *dst= buff->header; + LSN base_lsn; + uint lsns= desc->compressed_LSN; + uint16 chunk_len; + uint16 length= desc->read_header_len; + uint16 buffer_length= length; + uint16 body_len; + TRANSLOG_SCANNER_DATA internal_scanner; + + DBUG_ENTER("translog_variable_length_header"); + + buff->record_length= translog_variable_record_1group_decode_len(&src); + chunk_len= uint2korr(src); + DBUG_PRINT("info", ("rec len: %lu chunk len: %u length: %u bufflen: %u", + (ulong) buff->record_length, (uint) chunk_len, + (uint) length, (uint) buffer_length)); + if (chunk_len == 0) + { + uint16 page_rest; + DBUG_PRINT("info", ("1 group")); + src+= 2; + page_rest= TRANSLOG_PAGE_SIZE - (src - page); + + base_lsn= buff->lsn; + body_len= min(page_rest, buff->record_length); + } + else + { + uint grp_no, curr; + uint header_to_skip; + uint16 page_rest; + + DBUG_PRINT("info", ("multi-group")); + grp_no= buff->groups_no= uint2korr(src + 2); + if (!(buff->groups= + (TRANSLOG_GROUP*) my_malloc(sizeof(TRANSLOG_GROUP) * grp_no, + MYF(0)))) + DBUG_RETURN(0); + DBUG_PRINT("info", ("Groups: %u", (uint) grp_no)); + src+= (2 + 2); + page_rest= TRANSLOG_PAGE_SIZE - (src - page); + curr= 0; + header_to_skip= src - (page + page_offset); + buff->chunk0_pages= 0; + + for (;;) + { + uint i, read= grp_no; + + buff->chunk0_pages++; + if (page_rest < grp_no * (7 + 1)) + read= page_rest / (7 + 1); + DBUG_PRINT("info", ("Read chunk0 page#%u read: %u left: %u " + "start from: %u", + buff->chunk0_pages, read, grp_no, curr)); + for (i= 0; i < read; i++, curr++) + { + DBUG_ASSERT(curr < buff->groups_no); + buff->groups[curr].addr= lsn_korr(src + i * (7 + 1)); + buff->groups[curr].num= src[i * (7 + 1) + 7]; + DBUG_PRINT("info", ("group #%u (%lu,0x%lx) chunks: %u", + curr, + (ulong) LSN_FILE_NO(buff->groups[curr].addr), + (ulong) LSN_OFFSET(buff->groups[curr].addr), + (uint) buff->groups[curr].num)); + } + grp_no-= read; + if (grp_no == 0) + { + if (scanner) + { + buff->chunk0_data_addr= scanner->page_addr; + buff->chunk0_data_addr+= (page_offset + header_to_skip + + read * (7 + 1)); /* offset increased */ + } + else + { + buff->chunk0_data_addr= buff->lsn; + /* offset increased */ + buff->chunk0_data_addr+= (header_to_skip + read * (7 + 1)); + } + buff->chunk0_data_len= chunk_len - 2 - read * (7 + 1); + DBUG_PRINT("info", ("Data address: (%lu,0x%lx) len: %u", + (ulong) LSN_FILE_NO(buff->chunk0_data_addr), + (ulong) LSN_OFFSET(buff->chunk0_data_addr), + buff->chunk0_data_len)); + break; + } + if (scanner == NULL) + { + DBUG_PRINT("info", ("use internal scanner for header reading")); + scanner= &internal_scanner; + translog_init_scanner(buff->lsn, 1, scanner); + } + translog_get_next_chunk(scanner); + page= scanner->page; + page_offset= scanner->page_offset; + src= page + page_offset + header_to_skip; + chunk_len= uint2korr(src - 2 - 2); + DBUG_PRINT("info", ("Chunk len: %u", (uint) chunk_len)); + page_rest= TRANSLOG_PAGE_SIZE - (src - page); + } + + if (scanner == NULL) + { + DBUG_PRINT("info", ("use internal scanner")); + scanner= &internal_scanner; + } + + base_lsn= buff->groups[0].addr; + translog_init_scanner(base_lsn, 1, scanner); + /* first group chunk is always chunk type 2 */ + page= scanner->page; + page_offset= scanner->page_offset; + src= page + page_offset + 1; + page_rest= TRANSLOG_PAGE_SIZE - (src - page); + body_len= page_rest; + } + if (lsns) + { + byte *start= src; + src= translog_relative_LSN_decode(base_lsn, src, dst, lsns); + lsns*= LSN_STORE_SIZE; + dst+= lsns; + length-= lsns; + buff->record_length+= (buff->compressed_LSN_economy= + (lsns - (src - start))); + DBUG_PRINT("info", ("lsns: %u length: %u economy: %d new length: %lu", + lsns / LSN_STORE_SIZE, (uint) length, + (int) buff->compressed_LSN_economy, + (ulong) buff->record_length)); + body_len-= (src - start); + } + else + buff->compressed_LSN_economy= 0; + + DBUG_ASSERT(body_len >= length); + body_len-= length; + memcpy(dst, src, length); + buff->non_header_data_start_offset= src + length - page; + buff->non_header_data_len= body_len; + DBUG_PRINT("info", ("non_header_data_start_offset: %u len: %u buffer: %u", + buff->non_header_data_start_offset, + buff->non_header_data_len, buffer_length)); + DBUG_RETURN(buffer_length); +} + + +/* + Read record header from the given buffer + + SYNOPSIS + translog_read_record_header_from_buffer() + page page content buffer + page_offset offset of the chunk in the page + buff destination buffer + scanner If this is set the scanner will be moved to the + record header page (differ from LSN page in case of + multi-group records) +*/ + +translog_size_t +translog_read_record_header_from_buffer(byte *page, + uint16 page_offset, + TRANSLOG_HEADER_BUFFER *buff, + TRANSLOG_SCANNER_DATA *scanner) +{ + DBUG_ENTER("translog_read_record_header_from_buffer"); + DBUG_ASSERT((page[page_offset] & TRANSLOG_CHUNK_TYPE) == + TRANSLOG_CHUNK_LSN || + (page[page_offset] & TRANSLOG_CHUNK_TYPE) == + TRANSLOG_CHUNK_FIXED); + buff->type= (page[page_offset] & TRANSLOG_REC_TYPE); + buff->short_trid= uint2korr(page + page_offset + 1); + DBUG_PRINT("info", ("Type %u, Short TrID %u, LSN (%lu,0x%lx)", + (uint) buff->type, (uint)buff->short_trid, + (ulong) LSN_FILE_NO(buff->lsn), + (ulong) LSN_OFFSET(buff->lsn))); + /* Read required bytes from the header and call hook */ + switch (log_record_type_descriptor[buff->type].class) { + case LOGRECTYPE_VARIABLE_LENGTH: + DBUG_RETURN(translog_variable_length_header(page, page_offset, buff, + scanner)); + case LOGRECTYPE_PSEUDOFIXEDLENGTH: + case LOGRECTYPE_FIXEDLENGTH: + DBUG_RETURN(translog_fixed_length_header(page, page_offset, buff)); + default: + DBUG_ASSERT(0); + } + DBUG_RETURN(0); /* purecov: deadcode */ +} + + +/* + Read record header and some fixed part of a record (the part depend on + record type). + + SYNOPSIS + translog_read_record_header() + lsn log record serial number (address of the record) + buff log record header buffer + + NOTE + - Some type of record can be read completely by this call + - "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative + LSN can be translated to absolute one), some fields can be added + (like actual header length in the record if the header has variable + length) + + RETURN + 0 error + # number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded + part of the header +*/ + +translog_size_t translog_read_record_header(LSN lsn, + TRANSLOG_HEADER_BUFFER *buff) +{ + byte buffer[TRANSLOG_PAGE_SIZE], *page; + translog_size_t page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE; + TRANSLOG_ADDRESS addr; + TRANSLOG_VALIDATOR_DATA data; + DBUG_ENTER("translog_read_record_header"); + DBUG_PRINT("enter", ("LSN: (0x%lu,0x%lx)", + (ulong) LSN_FILE_NO(lsn), (ulong) LSN_OFFSET(lsn))); + DBUG_ASSERT(LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE != 0); + + buff->lsn= lsn; + buff->groups_no= 0; + data.addr= &addr; + data.was_recovered= 0; + addr= lsn; + addr-= page_offset; /* offset decreasing */ + if (!(page= translog_get_page(&data, buffer))) + DBUG_RETURN(0); + + DBUG_RETURN(translog_read_record_header_from_buffer(page, page_offset, + buff, 0)); +} + + +/* + Read record header and some fixed part of a record (the part depend on + record type). + + SYNOPSIS + translog_read_record_header_scan() + scan scanner position to read + buff log record header buffer + move_scanner request to move scanner to the header position + + NOTE + - Some type of record can be read completely by this call + - "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative + LSN can be translated to absolute one), some fields can be added + (like actual header length in the record if the header has variable + length) + + RETURN + 0 error + # number of bytes in TRANSLOG_HEADER_BUFFER::header where stored decoded + part of the header +*/ + +translog_size_t +translog_read_record_header_scan(TRANSLOG_SCANNER_DATA + *scanner, + TRANSLOG_HEADER_BUFFER *buff, + my_bool move_scanner) +{ + DBUG_ENTER("translog_read_record_header_scan"); + DBUG_PRINT("enter", ("Scanner: Cur: (%lu,0x%lx) Hrz: (%lu,0x%lx) " + "Lst: (%lu,0x%lx) Offset: %u(%x) fixed %d", + (ulong) LSN_FILE_NO(scanner->page_addr), + (ulong) LSN_OFFSET(scanner->page_addr), + (ulong) LSN_FILE_NO(scanner->horizon), + (ulong) LSN_OFFSET(scanner->horizon), + (ulong) LSN_FILE_NO(scanner->last_file_page), + (ulong) LSN_OFFSET(scanner->last_file_page), + (uint) scanner->page_offset, + (uint) scanner->page_offset, scanner->fixed_horizon)); + buff->groups_no= 0; + buff->lsn= scanner->page_addr; + buff->lsn+= scanner->page_offset; /* offset increasing */ + DBUG_RETURN(translog_read_record_header_from_buffer(scanner->page, + scanner->page_offset, + buff, + (move_scanner ? + scanner : 0))); +} + + +/* + Read record header and some fixed part of the next record (the part + depend on record type). + + SYNOPSIS + translog_read_next_record_header() + scanner data for scanning if lsn is NULL scanner data + will be used for continue scanning. + The scanner can be NULL. + buff log record header buffer + + NOTE + - it is like translog_read_record_header, but read next record, so see + its NOTES. + - in case of end of the log buff->lsn will be set to + (CONTROL_FILE_IMPOSSIBLE_LSN) + + RETURN + 0 error + TRANSLOG_RECORD_HEADER_MAX_SIZE + 1 End of the log + # number of bytes in + TRANSLOG_HEADER_BUFFER::header + where stored decoded + part of the header +*/ + +translog_size_t translog_read_next_record_header(TRANSLOG_SCANNER_DATA + *scanner, + TRANSLOG_HEADER_BUFFER *buff) +{ + uint8 chunk_type; + + buff->groups_no= 0; /* to be sure that we will free it right */ + + DBUG_ENTER("translog_read_next_record_header"); + DBUG_PRINT("enter", ("scanner: 0x%lx", (ulong) scanner)); + DBUG_PRINT("info", ("Scanner: Cur: (%lu,0x%lx) Hrz: (%lu,0x%lx) " + "Lst: (%lu,0x%lx) Offset: %u(%x) fixed: %d", + (ulong) LSN_FILE_NO(scanner->page_addr), + (ulong) LSN_OFFSET(scanner->page_addr), + (ulong) LSN_FILE_NO(scanner->horizon), + (ulong) LSN_OFFSET(scanner->horizon), + (ulong) LSN_FILE_NO(scanner->last_file_page), + (ulong) LSN_OFFSET(scanner->last_file_page), + (uint) scanner->page_offset, + (uint) scanner->page_offset, scanner->fixed_horizon)); + + do + { + if (translog_get_next_chunk(scanner)) + DBUG_RETURN(0); + chunk_type= scanner->page[scanner->page_offset] & TRANSLOG_CHUNK_TYPE; + DBUG_PRINT("info", ("type: %x byte: %x", (uint) chunk_type, + (uint) scanner->page[scanner->page_offset])); + } while (chunk_type != TRANSLOG_CHUNK_LSN && chunk_type != + TRANSLOG_CHUNK_FIXED && scanner->page[scanner->page_offset] != 0); + + if (scanner->page[scanner->page_offset] == 0) + { + /* Last record was read */ + buff->lsn= CONTROL_FILE_IMPOSSIBLE_LSN; + /* Return 'end of log' marker */ + DBUG_RETURN(TRANSLOG_RECORD_HEADER_MAX_SIZE + 1); + } + DBUG_RETURN(translog_read_record_header_scan(scanner, buff, 0)); +} + + +/* + Moves record data reader to the next chunk and fill the data reader + information about that chunk. + + SYNOPSIS + translog_record_read_next_chunk() + data data cursor + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_record_read_next_chunk(struct st_translog_reader_data + *data) +{ + translog_size_t new_current_offset= data->current_offset + data->chunk_size; + uint16 chunk_header_len, chunk_len; + uint8 type; + DBUG_ENTER("translog_record_read_next_chunk"); + + if (data->eor) + { + DBUG_PRINT("info", ("end of the record flag set")); + DBUG_RETURN(1); + } + + if (data->header.groups_no && + data->header.groups_no - 1 != data->current_group && + data->header.groups[data->current_group].num == data->current_chunk) + { + /* Goto next group */ + data->current_group++; + data->current_chunk= 0; + DBUG_PRINT("info", ("skip to group: #%u", data->current_group)); + translog_init_scanner(data->header.groups[data->current_group].addr, + 1, &data->scanner); + } + else + { + data->current_chunk++; + if (translog_get_next_chunk(&data->scanner)) + DBUG_RETURN(1); + } + type= data->scanner.page[data->scanner.page_offset] & TRANSLOG_CHUNK_TYPE; + + if (type == TRANSLOG_CHUNK_LSN && data->header.groups_no) + { + DBUG_PRINT("info", + ("Last chunk: data len: %u offset: %u group: %u of %u", + data->header.chunk0_data_len, data->scanner.page_offset, + data->current_group, data->header.groups_no - 1)); + DBUG_ASSERT(data->header.groups_no - 1 == data->current_group); + DBUG_ASSERT(data->header.lsn == + data->scanner.page_addr + data->scanner.page_offset); + translog_init_scanner(data->header.chunk0_data_addr, 1, &data->scanner); + data->chunk_size= data->header.chunk0_data_len; + data->body_offset= data->scanner.page_offset; + data->current_offset= new_current_offset; + data->eor= 1; + DBUG_RETURN(0); + } + + if (type == TRANSLOG_CHUNK_LSN || type == TRANSLOG_CHUNK_FIXED) + { + data->eor= 1; + DBUG_RETURN(1); /* End of record */ + } + + chunk_header_len= + translog_get_chunk_header_length(data->scanner.page, + data->scanner.page_offset); + chunk_len= translog_get_total_chunk_length(data->scanner.page, + data->scanner.page_offset); + data->chunk_size= chunk_len - chunk_header_len; + data->body_offset= data->scanner.page_offset + chunk_header_len; + data->current_offset= new_current_offset; + DBUG_PRINT("info", ("grp: %u chunk: %u body_offset: %u chunk_size: %u " + "current_offset: %lu", + (uint) data->current_group, + (uint) data->current_chunk, + (uint) data->body_offset, + (uint) data->chunk_size, (ulong) data->current_offset)); + DBUG_RETURN(0); +} + + +/* + Initialize record reader data from LSN + + SYNOPSIS + translog_init_reader_data() + lsn reference to LSN we should start from + data reader data to initialize + + RETURN + 0 OK + 1 Error +*/ + +static my_bool translog_init_reader_data(LSN lsn, + struct st_translog_reader_data *data) +{ + DBUG_ENTER("translog_init_reader_data"); + if (translog_init_scanner(lsn, 1, &data->scanner) || + !(data->read_header= + translog_read_record_header_scan(&data->scanner, &data->header, 1))) + { + DBUG_RETURN(1); + } + data->body_offset= data->header.non_header_data_start_offset; + data->chunk_size= data->header.non_header_data_len; + data->current_offset= data->read_header; + data->current_group= 0; + data->current_chunk= 0; + data->eor= 0; + DBUG_PRINT("info", ("read_header: %u " + "body_offset: %u chunk_size: %u current_offset: %lu", + (uint) data->read_header, + (uint) data->body_offset, + (uint) data->chunk_size, (ulong) data->current_offset)); + DBUG_RETURN(0); +} + + +/* + Read a part of the record. + + SYNOPSIS + translog_read_record_header() + lsn log record serial number (address of the record) + offset From the beginning of the record beginning (read§ + by translog_read_record_header). + length Length of record part which have to be read. + buffer Buffer where to read the record part (have to be at + least 'length' bytes length) + + RETURN + length of data actually read +*/ + +translog_size_t translog_read_record(LSN lsn, + translog_size_t offset, + translog_size_t length, + byte *buffer, + struct st_translog_reader_data *data) +{ + translog_size_t requested_length= length; + translog_size_t end= offset + length; + struct st_translog_reader_data internal_data; + DBUG_ENTER("translog_read_record"); + + if (data == NULL) + { + DBUG_ASSERT(lsn != CONTROL_FILE_IMPOSSIBLE_LSN); + data= &internal_data; + } + if (lsn || + (offset < data->current_offset && + !(offset < data->read_header && offset + length < data->read_header))) + { + if (translog_init_reader_data(lsn, data)) + DBUG_RETURN(0); + } + DBUG_PRINT("info", ("Offset: %lu length: %lu " + "Scanner: Cur: (%lu,0x%lx) Hrz: (%lu,0x%lx) " + "Lst: (%lu,0x%lx) Offset: %u(%x) fixed: %d", + (ulong) offset, (ulong) length, + (ulong) LSN_FILE_NO(data->scanner.page_addr), + (ulong) LSN_OFFSET(data->scanner.page_addr), + (ulong) LSN_FILE_NO(data->scanner.horizon), + (ulong) LSN_OFFSET(data->scanner.horizon), + (ulong) LSN_FILE_NO(data->scanner.last_file_page), + (ulong) LSN_OFFSET(data->scanner.last_file_page), + (uint) data->scanner.page_offset, + (uint) data->scanner.page_offset, + data->scanner.fixed_horizon)); + if (offset < data->read_header) + { + uint16 len= min(data->read_header, end) - offset; + DBUG_PRINT("info", + ("enter header offset: %lu length: %lu", + (ulong) offset, (ulong) length)); + memcpy(buffer, data->header.header + offset, len); + length-= len; + if (length == 0) + DBUG_RETURN(requested_length); + offset+= len; + buffer+= len; + DBUG_PRINT("info", + ("len: %u offset: %lu curr: %lu length: %lu", + len, (ulong) offset, (ulong) data->current_offset, + (ulong) length)); + } + /* TODO: find first page which we should read by offset */ + + /* read the record chunk by chunk */ + for(;;) + { + uint page_end= data->current_offset + data->chunk_size; + DBUG_PRINT("info", + ("enter body offset: %lu curr: %lu " + "length: %lu page_end: %lu", + (ulong) offset, (ulong) data->current_offset, (ulong) length, + (ulong) page_end)); + if (offset < page_end) + { + uint len= page_end - offset; + DBUG_ASSERT(offset >= data->current_offset); + memcpy(buffer, + data->scanner.page + data->body_offset + + (offset - data->current_offset), len); + length-= len; + if (length == 0) + DBUG_RETURN(requested_length); + offset+= len; + buffer+= len; + DBUG_PRINT("info", + ("len: %u offset: %lu curr: %lu length: %lu", + len, (ulong) offset, (ulong) data->current_offset, + (ulong) length)); + } + if (translog_record_read_next_chunk(data)) + DBUG_RETURN(requested_length - length); + } +} + + +/* + Force skipping to the next buffer + + SYNOPSIS + translog_force_current_buffer_to_finish() +*/ + +static void translog_force_current_buffer_to_finish() +{ + TRANSLOG_ADDRESS new_buff_beginning; + uint16 old_buffer_no= log_descriptor.bc.buffer_no; + uint16 new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO; + struct st_translog_buffer *new_buffer= (log_descriptor.buffers + + new_buffer_no); + struct st_translog_buffer *old_buffer= log_descriptor.bc.buffer; + byte *data= log_descriptor.bc.ptr -log_descriptor.bc.current_page_fill; + uint16 left= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill; + uint16 current_page_fill, write_counter, previous_offset; + DBUG_ENTER("translog_force_current_buffer_to_finish"); + DBUG_PRINT("enter", ("Buffer #%u 0x%lx " + "Buffer addr: (%lu,0x%lx) " + "Page addr: (%lu,0x%lx) " + "size: %lu (%lu) Pg: %u left: %u", + (uint) log_descriptor.bc.buffer_no, + (ulong) log_descriptor.bc.buffer, + (ulong) LSN_FILE_NO(log_descriptor.bc.buffer->offset), + (ulong) LSN_OFFSET(log_descriptor.bc.buffer->offset), + (ulong) LSN_FILE_NO(log_descriptor.horizon), + (ulong) (LSN_OFFSET(log_descriptor.horizon) - + log_descriptor.bc.current_page_fill), + (ulong) log_descriptor.bc.buffer->size, + (ulong) (log_descriptor.bc.ptr -log_descriptor.bc. + buffer->buffer), + (uint) log_descriptor.bc.current_page_fill, + (uint) left)); + + LINT_INIT(current_page_fill); + new_buff_beginning= log_descriptor.bc.buffer->offset; + new_buff_beginning+= log_descriptor.bc.buffer->size; /* increase offset */ + + DBUG_ASSERT(log_descriptor.bc.ptr !=NULL); + DBUG_ASSERT(LSN_FILE_NO(log_descriptor.horizon) == + LSN_FILE_NO(log_descriptor.bc.buffer->offset)); + DBUG_EXECUTE("info", translog_check_cursor(&log_descriptor.bc);); + DBUG_ASSERT(left < TRANSLOG_PAGE_SIZE); + if (left != 0) + { + /* + TODO: if 'left' is so small that can't hold any other record + then do not move the page + */ + DBUG_PRINT("info", ("left: %u", (uint) left)); + + /* decrease offset */ + new_buff_beginning-= log_descriptor.bc.current_page_fill; + current_page_fill= log_descriptor.bc.current_page_fill; + + bzero(log_descriptor.bc.ptr, left); + log_descriptor.bc.buffer->size+= left; + DBUG_PRINT("info", ("Finish Page buffer #%u: 0x%lx " + "Size: %lu", + (uint) log_descriptor.bc.buffer->buffer_no, + (ulong) log_descriptor.bc.buffer, + (ulong) log_descriptor.bc.buffer->size)); + DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no == + log_descriptor.bc.buffer_no); + } + else + { + log_descriptor.bc.current_page_fill= 0; + } + + translog_buffer_lock(new_buffer); + translog_wait_for_buffer_free(new_buffer); + + write_counter= log_descriptor.bc.write_counter; + previous_offset= log_descriptor.bc.previous_offset; + translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no); + log_descriptor.bc.buffer->offset= new_buff_beginning; + log_descriptor.bc.write_counter= write_counter; + log_descriptor.bc.previous_offset= previous_offset; + + if (data[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION) + { + translog_put_sector_protection(data, &log_descriptor.bc); + if (left) + { + log_descriptor.bc.write_counter++; + log_descriptor.bc.previous_offset= current_page_fill; + } + else + { + DBUG_PRINT("info", ("drop write_counter")); + log_descriptor.bc.write_counter= 0; + log_descriptor.bc.previous_offset= 0; + } + } + + if (data[TRANSLOG_PAGE_FLAGS] & TRANSLOG_PAGE_CRC) + { + uint32 crc= translog_crc(data + log_descriptor.page_overhead, + TRANSLOG_PAGE_SIZE - + log_descriptor.page_overhead); + DBUG_PRINT("info", ("CRC: 0x%lx", (ulong) crc)); + int4store(data + 3 + 3 + 1, crc); + } + + if (left) + { + memcpy(new_buffer->buffer, data, current_page_fill); + log_descriptor.bc.ptr +=current_page_fill; + log_descriptor.bc.buffer->size= log_descriptor.bc.current_page_fill= + current_page_fill; + new_buffer->overlay= old_buffer; + } + else + translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc); + + DBUG_VOID_RETURN; +} + + +/** + @brief Flush the log up to given LSN (included) + + @param lsn log record serial number up to which (inclusive) + the log has to be flushed + + @return Operation status + @retval 0 OK + @retval 1 Error + + @todo LOG: when a log write fails, we should not write to this log anymore + (if we add more log records to this log they will be unreadable: we will hit + the broken log record): all translog_flush() should be made to fail (because + translog_flush() is when a a transaction wants something durable and we + cannot make anything durable as log is corrupted). For that, a "my_bool + st_translog_descriptor::write_error" could be set to 1 when a + translog_write_record() or translog_flush() fails, and translog_flush() + would test this var (and translog_write_record() could also test this var if + it wants, though it's not absolutely needed). + Then, either shut Maria down immediately, or switch to a new log (but if we + get write error after write error, that would create too many logs). + A popular open-source transactional engine intentionally crashes as soon as + a log flush fails (we however don't want to crash the entire mysqld, but + stopping all engine's operations immediately would make sense). + Same applies to translog_write_record(). +*/ + +my_bool translog_flush(LSN lsn) +{ + LSN old_flushed, sent_to_file; + int rc= 0; + uint i; + my_bool full_circle= 0; + DBUG_ENTER("translog_flush"); + DBUG_PRINT("enter", ("Flush up to LSN: (%lu,0x%lx)", + (ulong) LSN_FILE_NO(lsn), + (ulong) LSN_OFFSET(lsn))); + + translog_lock(); + old_flushed= log_descriptor.flushed; + for (;;) + { + uint16 buffer_no= log_descriptor.bc.buffer_no; + uint16 buffer_start= buffer_no; + struct st_translog_buffer *buffer_unlock= log_descriptor.bc.buffer; + struct st_translog_buffer *buffer= log_descriptor.bc.buffer; + /* we can't flush in future */ + DBUG_ASSERT(cmp_translog_addr(log_descriptor.horizon, lsn) >= 0); + if (cmp_translog_addr(log_descriptor.flushed, lsn) >= 0) + { + DBUG_PRINT("info", ("already flushed: (%lu,0x%lx)", + (ulong) LSN_FILE_NO(log_descriptor.flushed), + (ulong) LSN_OFFSET(log_descriptor.flushed))); + translog_unlock(); + DBUG_RETURN(0); + } + /* send to the file if it is not sent */ + translog_get_sent_to_file(&sent_to_file); + if (cmp_translog_addr(sent_to_file, lsn) >= 0) + break; + + do + { + buffer_no= (buffer_no + 1) % TRANSLOG_BUFFERS_NO; + buffer= log_descriptor.buffers + buffer_no; + translog_buffer_lock(buffer); + translog_buffer_unlock(buffer_unlock); + buffer_unlock= buffer; + if (buffer->file != -1) + { + buffer_unlock= NULL; + if (buffer_start == buffer_no) + { + /* we made a circle */ + full_circle= 1; + translog_force_current_buffer_to_finish(); + } + break; + } + } while ((buffer_start != buffer_no) && + cmp_translog_addr(log_descriptor.flushed, lsn) < 0); + if (buffer_unlock != NULL) + translog_buffer_unlock(buffer_unlock); + rc= translog_buffer_flush(buffer); + translog_buffer_unlock(buffer); + if (rc) + DBUG_RETURN(1); + if (!full_circle) + translog_lock(); + } + + for (i= LSN_FILE_NO(old_flushed); i <= LSN_FILE_NO(lsn); i++) + { + uint cache_index; + File file; + + if ((cache_index= LSN_FILE_NO(log_descriptor.horizon) - i) < + OPENED_FILES_NUM) + { + /* file in the cache */ + if (log_descriptor.log_file_num[cache_index] == -1) + { + if ((log_descriptor.log_file_num[cache_index]= + open_logfile_by_number_no_cache(i)) == -1) + { + translog_unlock(); + DBUG_RETURN(1); + } + } + file= log_descriptor.log_file_num[cache_index]; + rc|= my_sync(file, MYF(MY_WME)); + } + /* We sync file when we are closing it => do nothing if file closed */ + } + log_descriptor.flushed= sent_to_file; + /** @todo LOG decide if syncing of directory is needed */ + rc|= my_sync(log_descriptor.directory_fd, MYF(MY_WME)); + translog_unlock(); + DBUG_RETURN(rc); +} + + +/** + @brief Sets transaction's rec_lsn if needed + + A transaction sometimes writes a REDO even before the page is in the + pagecache (example: brand new head or tail pages; full pages). So, if + Checkpoint happens just after the REDO write, it needs to know that the + REDO phase must start before this REDO. Scanning the pagecache cannot + tell that as the page is not in the cache. So, transaction sets its rec_lsn + to the REDO's LSN or somewhere before, and Checkpoint reads the + transaction's rec_lsn. + + @todo move it to a separate file + + @return Operation status, always 0 (success) +*/ + +static my_bool write_hook_for_redo(enum translog_record_type type + __attribute__ ((unused)), + TRN *trn, LSN *lsn, + struct st_translog_parts *parts + __attribute__ ((unused))) +{ + /* + If the hook stays so simple, it would be faster to pass + !trn->rec_lsn ? trn->rec_lsn : some_dummy_lsn + to translog_write_record(), like Monty did in his original code, and not + have a hook. For now we keep it like this. + */ + if (trn->rec_lsn == 0) + trn->rec_lsn= *lsn; + return 0; +} + + +/** + @brief Sets transaction's undo_lsn, first_undo_lsn if needed + + @todo move it to a separate file + + @return Operation status, always 0 (success) +*/ + +static my_bool write_hook_for_undo(enum translog_record_type type + __attribute__ ((unused)), + TRN *trn, LSN *lsn, + struct st_translog_parts *parts + __attribute__ ((unused))) +{ + trn->undo_lsn= *lsn; + if (unlikely(LSN_WITH_FLAGS_TO_LSN(trn->first_undo_lsn) == 0)) + trn->first_undo_lsn= + trn->undo_lsn | LSN_WITH_FLAGS_TO_FLAGS(trn->first_undo_lsn); + return 0; + /* + when we implement purging, we will specialize this hook: UNDO_PURGE + records will additionally set trn->undo_purge_lsn + */ +} + + +/** + @brief Gives a 2-byte-id to MARIA_SHARE and logs this fact + + If a MARIA_SHARE does not yet have a 2-byte-id (unique over all currently + open MARIA_SHAREs), give it one and record this assignment in the log + (LOGREC_FILE_ID log record). + + @param share table + @param trn calling transaction + + @return Operation status + @retval 0 OK + @retval 1 Error + + @note Can be called even if share already has an id (then will do nothing) +*/ + +int translog_assign_id_to_share(MARIA_SHARE *share, TRN *trn) +{ + /* + If you give an id to a non-BLOCK_RECORD table, you also need to release + this id somewhere. Then you can change the assertion. + */ + DBUG_ASSERT(share->data_file_type == BLOCK_RECORD); + /* re-check under mutex to avoid having 2 ids for the same share */ + pthread_mutex_lock(&share->intern_lock); + if (likely(share->id == 0)) + { + /* Inspired by set_short_trid() of trnman.c */ + int i= share->kfile.file % SHARE_ID_MAX + 1; + my_atomic_rwlock_wrlock(&LOCK_id_to_share); + /** + @todo RECOVERY BUG: if all slots are used, and we're using rwlocks + above, we will never exit the loop. To be discussed with Serg. + */ + for ( ; ; i= i % SHARE_ID_MAX + 1) /* the range is [1..SHARE_ID_MAX] */ + { + void *tmp= NULL; + if (id_to_share[i] == NULL && + my_atomic_casptr((void **)&id_to_share[i], &tmp, share)) + break; + } + my_atomic_rwlock_wrunlock(&LOCK_id_to_share); + share->id= (uint16)i; + DBUG_PRINT("info", ("id_to_share: 0x%lx -> %u", (ulong)share, i)); + LSN lsn; + LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 2]; + uchar log_data[FILEID_STORE_SIZE]; + log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); + /* + open_file_name is an unresolved name (symlinks are not resolved, datadir + is not realpath-ed, etc) which is good: the log can be moved to another + directory and continue working. + */ + log_array[TRANSLOG_INTERNAL_PARTS + 1].str= share->open_file_name; + /** + @todo if we had the name's length in MARIA_SHARE we could avoid this + strlen() + */ + log_array[TRANSLOG_INTERNAL_PARTS + 1].length= + strlen(share->open_file_name); + if (unlikely(translog_write_record(&lsn, LOGREC_FILE_ID, trn, share, + sizeof(log_data) + + log_array[TRANSLOG_INTERNAL_PARTS + + 1].length, + sizeof(log_array)/sizeof(log_array[0]), + log_array, log_data))) + return 1; + } + pthread_mutex_unlock(&share->intern_lock); + return 0; +} + + +/** + @brief Recycles a MARIA_SHARE's short id. + + @param share table + + @note Must be called only if share has an id (i.e. id != 0) +*/ + +void translog_deassign_id_from_share(MARIA_SHARE *share) +{ + DBUG_PRINT("info", ("id_to_share: 0x%lx id %u -> 0", + (ulong)share, share->id)); + /* + We don't need any mutex as we are called only when closing the last + instance of the table: no writes can be happening. + */ + my_atomic_rwlock_rdlock(&LOCK_id_to_share); + my_atomic_storeptr((void **)&id_to_share[share->id], 0); + my_atomic_rwlock_rdunlock(&LOCK_id_to_share); +} diff --git a/storage/maria/ma_loghandler.h b/storage/maria/ma_loghandler.h new file mode 100644 index 00000000000..0a160a9bc53 --- /dev/null +++ b/storage/maria/ma_loghandler.h @@ -0,0 +1,237 @@ +/* transaction log default cache size (TODO: make it global variable) */ +#define TRANSLOG_PAGECACHE_SIZE 1024*1024*2 +/* transaction log default file size (TODO: make it global variable) */ +#define TRANSLOG_FILE_SIZE 1024*1024*1024 +/* transaction log default flags (TODO: make it global variable) */ +#define TRANSLOG_DEFAULT_FLAGS 0 + +/* Transaction log flags */ +#define TRANSLOG_PAGE_CRC 1 +#define TRANSLOG_SECTOR_PROTECTION (1<<1) +#define TRANSLOG_RECORD_CRC (1<<2) +#define TRANSLOG_FLAGS_NUM ((TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION | \ + TRANSLOG_RECORD_CRC) + 1) + +/* + Page size in transaction log + It should be Power of 2 and multiple of DISK_DRIVE_SECTOR_SIZE + (DISK_DRIVE_SECTOR_SIZE * 2^N) +*/ +#define TRANSLOG_PAGE_SIZE (8*1024) + +#include "ma_loghandler_lsn.h" + +/* short transaction ID type */ +typedef uint16 SHORT_TRANSACTION_ID; + +struct st_maria_share; + +/* Length of CRC at end of pages */ +#define CRC_LENGTH 4 +/* Size of file id in logs */ +#define FILEID_STORE_SIZE 2 +/* Size of page reference in log */ +#define PAGE_STORE_SIZE ROW_EXTENT_PAGE_SIZE +/* Size of page ranges in log */ +#define PAGERANGE_STORE_SIZE ROW_EXTENT_COUNT_SIZE +#define DIRPOS_STORE_SIZE 1 + +/* Store methods to match the above sizes */ +#define fileid_store(T,A) int2store(T,A) +#define page_store(T,A) int5store(T,A) +#define dirpos_store(T,A) ((*(uchar*) (T)) = A) +#define pagerange_store(T,A) int2store(T,A) + +/* + Length of disk drive sector size (we assume that writing it + to disk is atomic operation) +*/ +#define DISK_DRIVE_SECTOR_SIZE 512 + +/* + Number of empty entries we need to have in LEX_STRING for + translog_write_record() +*/ +#define LOG_INTERNAL_PARTS 1 + +/* position reserved in an array of parts of a log record */ +#define TRANSLOG_INTERNAL_PARTS 2 + +/* types of records in the transaction log */ +/* Todo: Set numbers for these when we have all entries figured out */ + +enum translog_record_type +{ + LOGREC_RESERVED_FOR_CHUNKS23= 0, + LOGREC_REDO_INSERT_ROW_HEAD, + LOGREC_REDO_INSERT_ROW_TAIL, + LOGREC_REDO_INSERT_ROW_BLOB, + LOGREC_REDO_INSERT_ROW_BLOBS, + LOGREC_REDO_PURGE_ROW_HEAD, + LOGREC_REDO_PURGE_ROW_TAIL, + LOGREC_REDO_PURGE_BLOCKS, + LOGREC_REDO_DELETE_ROW, + LOGREC_REDO_UPDATE_ROW_HEAD, + LOGREC_REDO_INDEX, + LOGREC_REDO_UNDELETE_ROW, + LOGREC_CLR_END, + LOGREC_PURGE_END, + LOGREC_UNDO_ROW_INSERT, + LOGREC_UNDO_ROW_DELETE, + LOGREC_UNDO_ROW_UPDATE, + LOGREC_UNDO_ROW_PURGE, + LOGREC_UNDO_KEY_INSERT, + LOGREC_UNDO_KEY_DELETE, + LOGREC_PREPARE, + LOGREC_PREPARE_WITH_UNDO_PURGE, + LOGREC_COMMIT, + LOGREC_COMMIT_WITH_UNDO_PURGE, + LOGREC_CHECKPOINT, + LOGREC_REDO_CREATE_TABLE, + LOGREC_REDO_RENAME_TABLE, + LOGREC_REDO_DROP_TABLE, + LOGREC_REDO_DELETE_ALL, + LOGREC_REDO_REPAIR_TABLE, + LOGREC_FILE_ID, + LOGREC_LONG_TRANSACTION_ID, + LOGREC_RESERVED_FUTURE_EXTENSION= 63 +}; +#define LOGREC_NUMBER_OF_TYPES 64 /* Maximum, can't be extended */ + +/* Size of log file; One log file is restricted to 4G */ +typedef uint32 translog_size_t; + +#define TRANSLOG_RECORD_HEADER_MAX_SIZE 1024 + +typedef struct st_translog_group_descriptor +{ + TRANSLOG_ADDRESS addr; + uint8 num; +} TRANSLOG_GROUP; + + +typedef struct st_translog_header_buffer +{ + /* LSN of the read record */ + LSN lsn; + /* array of groups descriptors, can be used only if groups_no > 0 */ + TRANSLOG_GROUP *groups; + /* short transaction ID or 0 if it has no sense for the record */ + SHORT_TRANSACTION_ID short_trid; + /* + The Record length in buffer (including read header, but excluding + hidden part of record (type, short TrID, length) + */ + translog_size_t record_length; + /* + Buffer for write decoded header of the record (depend on the record + type) + */ + byte header[TRANSLOG_RECORD_HEADER_MAX_SIZE]; + /* number of groups listed in */ + uint groups_no; + /* in multi-group number of chunk0 pages (valid only if groups_no > 0) */ + uint chunk0_pages; + /* type of the read record */ + enum translog_record_type type; + /* chunk 0 data address (valid only if groups_no > 0) */ + TRANSLOG_ADDRESS chunk0_data_addr; + /* + Real compressed LSN(s) size economy (*7 - ) + */ + int16 compressed_LSN_economy; + /* short transaction ID or 0 if it has no sense for the record */ + uint16 non_header_data_start_offset; + /* non read body data length in this first chunk */ + uint16 non_header_data_len; + /* chunk 0 data size (valid only if groups_no > 0) */ + uint16 chunk0_data_len; +} TRANSLOG_HEADER_BUFFER; + + +typedef struct st_translog_scanner_data +{ + byte buffer[TRANSLOG_PAGE_SIZE]; /* buffer for page content */ + TRANSLOG_ADDRESS page_addr; /* current page address */ + /* end of the log which we saw last time */ + TRANSLOG_ADDRESS horizon; + TRANSLOG_ADDRESS last_file_page; /* Last page on in this file */ + byte *page; /* page content pointer */ + /* offset of the chunk in the page */ + translog_size_t page_offset; + /* set horizon only once at init */ + my_bool fixed_horizon; +} TRANSLOG_SCANNER_DATA; + + +struct st_translog_reader_data +{ + TRANSLOG_HEADER_BUFFER header; /* Header */ + TRANSLOG_SCANNER_DATA scanner; /* chunks scanner */ + translog_size_t body_offset; /* current chunk body offset */ + /* data offset from the record beginning */ + translog_size_t current_offset; + /* number of bytes read in header */ + uint16 read_header; + uint16 chunk_size; /* current chunk size */ + uint current_group; /* current group */ + uint current_chunk; /* current chunk in the group */ + my_bool eor; /* end of the record */ +}; + +struct st_transaction; +C_MODE_START + +/* Records types for unittests */ +#define LOGREC_FIXED_RECORD_0LSN_EXAMPLE 1 +#define LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE 2 +#define LOGREC_FIXED_RECORD_1LSN_EXAMPLE 3 +#define LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE 4 +#define LOGREC_FIXED_RECORD_2LSN_EXAMPLE 5 +#define LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE 6 + +extern void example_loghandler_init(); + +extern my_bool translog_init(const char *directory, uint32 log_file_max_size, + uint32 server_version, uint32 server_id, + PAGECACHE *pagecache, uint flags); + +extern my_bool +translog_write_record(LSN *lsn, enum translog_record_type type, + struct st_transaction *trn, + struct st_maria_share *share, + translog_size_t rec_len, uint part_no, + LEX_STRING *parts_data, uchar *store_share_id); + +extern void translog_destroy(); + +extern translog_size_t translog_read_record_header(LSN lsn, + TRANSLOG_HEADER_BUFFER + *buff); + +extern void translog_free_record_header(TRANSLOG_HEADER_BUFFER *buff); + +extern translog_size_t translog_read_record(LSN lsn, + translog_size_t offset, + translog_size_t length, + byte *buffer, + struct st_translog_reader_data + *data); + +extern my_bool translog_flush(LSN lsn); + +extern my_bool translog_init_scanner(LSN lsn, + my_bool fixed_horizon, + struct st_translog_scanner_data *scanner); + +extern translog_size_t translog_read_next_record_header(TRANSLOG_SCANNER_DATA + *scanner, + TRANSLOG_HEADER_BUFFER + *buff); +extern void translog_lock_assert_owner(); +extern TRANSLOG_ADDRESS translog_get_horizon(); +extern int translog_assign_id_to_share(struct st_maria_share *share, + struct st_transaction *trn); +extern void translog_deassign_id_from_share(struct st_maria_share *share); +extern my_bool translog_inited; +C_MODE_END diff --git a/storage/maria/ma_loghandler_lsn.h b/storage/maria/ma_loghandler_lsn.h new file mode 100644 index 00000000000..c641337e8ba --- /dev/null +++ b/storage/maria/ma_loghandler_lsn.h @@ -0,0 +1,64 @@ +#ifndef _ma_loghandler_lsn_h +#define _ma_loghandler_lsn_h + +/* + Transaction log record address: + file_no << 32 | offset + file_no is only 3 bytes so we can use signed integer to make + comparison more simple. +*/ +typedef int64 TRANSLOG_ADDRESS; + +/* + Compare addresses + A1 > A2 -> result > 0 + A1 == A2 -> 0 + A1 < A2 -> result < 0 +*/ +#define cmp_translog_addr(A1,A2) ((A1) - (A2)) + +/* LSN type (address of certain log record chank */ +typedef TRANSLOG_ADDRESS LSN; + +/* Gets file number part of a LSN/log address */ +#define LSN_FILE_NO(L) ((L) >> 32) + +/* Gets raw file number part of a LSN/log address */ +#define LSN_FINE_NO_PART(L) ((L) & ((int64)0xFFFFFF00000000LL)) + +/* Gets record offset of a LSN/log address */ +#define LSN_OFFSET(L) ((L) & 0xFFFFFFFFL) + +/* Makes lsn/log address from file number and record offset */ +#define MAKE_LSN(F,S) ((((uint64)(F)) << 32) | (S)) + +/* checks LSN */ +#define LSN_VALID(L) DBUG_ASSERT((L) >= 0 && (L) < (uint64)0xFFFFFFFFFFFFFFLL) + +/* size of stored LSN on a disk, don't change it! */ +#define LSN_STORE_SIZE 7 + +/* Puts LSN into buffer (dst) */ +#define lsn_store(dst, lsn) \ + do { \ + int3store((dst), LSN_FILE_NO(lsn)); \ + int4store((dst) + 3, LSN_OFFSET(lsn)); \ + } while (0) + +/* Unpacks LSN from the buffer (P) */ +#define lsn_korr(P) MAKE_LSN(uint3korr(P), uint4korr((P) + 3)) + +/* what we need to add to LSN to increase it on one file */ +#define LSN_ONE_FILE ((int64)0x100000000LL) + +#define LSN_REPLACE_OFFSET(L, S) (LSN_FINE_NO_PART(L) | (S)) + +/* + an 8-byte type whose most significant byte is used for "flags"; 7 + other bytes are a LSN. +*/ +typedef LSN LSN_WITH_FLAGS; +#define LSN_WITH_FLAGS_TO_LSN(x) (x & ULL(0x00FFFFFFFFFFFFFF)) +#define LSN_WITH_FLAGS_TO_FLAGS(x) (x & ULL(0xFF00000000000000)) + +#endif diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c new file mode 100644 index 00000000000..4e72adf3b7e --- /dev/null +++ b/storage/maria/ma_open.c @@ -0,0 +1,1454 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* open a isam-database */ + +#include "ma_fulltext.h" +#include "ma_sp_defs.h" +#include "ma_rt_index.h" +#include "ma_blockrec.h" +#include "trnman.h" +#include + +#if defined(MSDOS) || defined(__WIN__) +#ifdef __WIN__ +#include +#else +#include /* Prototype for getpid */ +#endif +#endif +#ifdef VMS +#include "static.c" +#endif + +static void setup_key_functions(MARIA_KEYDEF *keyinfo); +static my_bool maria_scan_init_dummy(MARIA_HA *info); +static void maria_scan_end_dummy(MARIA_HA *info); +static my_bool maria_once_init_dummy(MARIA_SHARE *, File); +static my_bool maria_once_end_dummy(MARIA_SHARE *); +static byte *_ma_base_info_read(byte *ptr, MARIA_BASE_INFO *base); + +#define get_next_element(to,pos,size) { memcpy((char*) to,pos,(size_t) size); \ + pos+=size;} + + +#define disk_pos_assert(pos, end_pos) \ +if (pos > end_pos) \ +{ \ + my_errno=HA_ERR_CRASHED; \ + goto err; \ +} + + +/****************************************************************************** +** Return the shared struct if the table is already open. +** In MySQL the server will handle version issues. +******************************************************************************/ + +MARIA_HA *_ma_test_if_reopen(char *filename) +{ + LIST *pos; + + for (pos=maria_open_list ; pos ; pos=pos->next) + { + MARIA_HA *info=(MARIA_HA*) pos->data; + MARIA_SHARE *share=info->s; + if (!strcmp(share->unique_file_name,filename) && share->last_version) + return info; + } + return 0; +} + + +/* + Open a new instance of an already opened Maria table + + SYNOPSIS + maria_clone_internal() + share Share of already open table + mode Mode of table (O_RDONLY | O_RDWR) + data_file Filedescriptor of data file to use < 0 if one should open + open it. + + RETURN + # Maria handler + 0 Error +*/ + + +static MARIA_HA *maria_clone_internal(MARIA_SHARE *share, int mode, + File data_file) +{ + int save_errno; + uint errpos; + MARIA_HA info,*m_info; + DBUG_ENTER("maria_clone_internal"); + + errpos= 0; + bzero((byte*) &info,sizeof(info)); + + if (mode == O_RDWR && share->mode == O_RDONLY) + { + my_errno=EACCES; /* Can't open in write mode */ + goto err; + } + if (data_file >= 0) + info.dfile.file= data_file; + else if (_ma_open_datafile(&info, share, -1)) + goto err; + errpos= 5; + + /* alloc and set up private structure parts */ + if (!my_multi_malloc(MY_WME, + &m_info,sizeof(MARIA_HA), + &info.blobs,sizeof(MARIA_BLOB)*share->base.blobs, + &info.buff,(share->base.max_key_block_length*2+ + share->base.max_key_length), + &info.lastkey,share->base.max_key_length*3+1, + &info.first_mbr_key, share->base.max_key_length, + &info.maria_rtree_recursion_state, + share->have_rtree ? 1024 : 0, + NullS)) + goto err; + errpos= 6; + + memcpy(info.blobs,share->blobs,sizeof(MARIA_BLOB)*share->base.blobs); + info.lastkey2=info.lastkey+share->base.max_key_length; + + info.s=share; + info.cur_row.lastpos= HA_OFFSET_ERROR; + info.update= (short) (HA_STATE_NEXT_FOUND+HA_STATE_PREV_FOUND); + info.opt_flag=READ_CHECK_USED; + info.this_unique= (ulong) info.dfile.file; /* Uniq number in process */ + if (share->data_file_type == COMPRESSED_RECORD) + info.this_unique= share->state.unique; + info.this_loop=0; /* Update counter */ + info.last_unique= share->state.unique; + info.last_loop= share->state.update_count; + info.lock_type=F_UNLCK; + info.quick_mode=0; + info.bulk_insert=0; + info.ft1_to_ft2=0; + info.errkey= -1; + info.page_changed=1; + info.keyread_buff= info.buff + share->base.max_key_block_length; + if ((*share->init)(&info)) + goto err; + + pthread_mutex_lock(&share->intern_lock); + info.read_record= share->read_record; + share->reopen++; + share->write_flag=MYF(MY_NABP | MY_WAIT_IF_FULL); + if (share->options & HA_OPTION_READ_ONLY_DATA) + { + info.lock_type=F_RDLCK; + share->r_locks++; + share->tot_locks++; + } + if (share->options & HA_OPTION_TMP_TABLE) + { + share->temporary= share->delay_key_write= 1; + + share->write_flag=MYF(MY_NABP); + share->w_locks++; /* We don't have to update status */ + share->tot_locks++; + info.lock_type=F_WRLCK; + } + if ((share->options & HA_OPTION_DELAY_KEY_WRITE) && + maria_delay_key_write) + share->delay_key_write=1; + + info.state= &share->state.state; /* Change global values by default */ + info.trn= &dummy_transaction_object; + pthread_mutex_unlock(&share->intern_lock); + + /* Allocate buffer for one record */ + /* prerequisites: info->rec_buffer == 0 && info->rec_buff_size == 0 */ + if (_ma_alloc_buffer(&info.rec_buff, &info.rec_buff_size, + share->base.default_rec_buff_size)) + goto err; + + bzero(info.rec_buff, share->base.default_rec_buff_size); + + *m_info=info; +#ifdef THREAD + thr_lock_data_init(&share->lock,&m_info->lock,(void*) m_info); +#endif + m_info->open_list.data=(void*) m_info; + maria_open_list=list_add(maria_open_list,&m_info->open_list); + + DBUG_RETURN(m_info); + +err: + save_errno=my_errno ? my_errno : HA_ERR_END_OF_FILE; + if ((save_errno == HA_ERR_CRASHED) || + (save_errno == HA_ERR_CRASHED_ON_USAGE) || + (save_errno == HA_ERR_CRASHED_ON_REPAIR)) + _ma_report_error(save_errno, share->open_file_name); + switch (errpos) { + case 6: + (*share->end)(&info); + my_free((gptr) m_info,MYF(0)); + /* fall through */ + case 5: + if (data_file < 0) + VOID(my_close(info.dfile.file, MYF(0))); + break; + } + my_errno=save_errno; + DBUG_RETURN (NULL); +} /* maria_clone_internal */ + + +/* Make a clone of a maria table */ + +MARIA_HA *maria_clone(MARIA_SHARE *share, int mode) +{ + MARIA_HA *new_info; + pthread_mutex_lock(&THR_LOCK_maria); + new_info= maria_clone_internal(share, mode, + share->data_file_type == BLOCK_RECORD ? + share->bitmap.file.file : -1); + pthread_mutex_unlock(&THR_LOCK_maria); + return new_info; +} + + +/****************************************************************************** + open a MARIA table + + See my_base.h for the handle_locking argument + if handle_locking and HA_OPEN_ABORT_IF_CRASHED then abort if the table + is marked crashed or if we are not using locking and the table doesn't + have an open count of 0. +******************************************************************************/ + +MARIA_HA *maria_open(const char *name, int mode, uint open_flags) +{ + int kfile,open_mode,save_errno; + uint i,j,len,errpos,head_length,base_pos,info_length,keys, + key_parts,unique_key_parts,fulltext_keys,uniques; + char name_buff[FN_REFLEN], org_name[FN_REFLEN], index_name[FN_REFLEN], + data_name[FN_REFLEN]; + char *disk_cache, *disk_pos, *end_pos; + MARIA_HA info,*m_info,*old_info; + MARIA_SHARE share_buff,*share; + ulong rec_per_key_part[HA_MAX_POSSIBLE_KEY*HA_MAX_KEY_SEG]; + my_off_t key_root[HA_MAX_POSSIBLE_KEY]; + ulonglong max_key_file_length, max_data_file_length; + File data_file= -1; + DBUG_ENTER("maria_open"); + + LINT_INIT(m_info); + kfile= -1; + errpos= 0; + head_length=sizeof(share_buff.state.header); + bzero((byte*) &info,sizeof(info)); + + my_realpath(name_buff, fn_format(org_name,name,"",MARIA_NAME_IEXT, + MY_UNPACK_FILENAME),MYF(0)); + pthread_mutex_lock(&THR_LOCK_maria); + if (!(old_info=_ma_test_if_reopen(name_buff))) + { + share= &share_buff; + bzero((gptr) &share_buff,sizeof(share_buff)); + share_buff.state.rec_per_key_part=rec_per_key_part; + share_buff.state.key_root=key_root; + share_buff.pagecache= multi_pagecache_search(name_buff, strlen(name_buff), + maria_pagecache); + + DBUG_EXECUTE_IF("maria_pretend_crashed_table_on_open", + if (strstr(name, "/t1")) + { + my_errno= HA_ERR_CRASHED; + goto err; + }); + if ((kfile=my_open(name_buff,(open_mode=O_RDWR) | O_SHARE,MYF(0))) < 0) + { + if ((errno != EROFS && errno != EACCES) || + mode != O_RDONLY || + (kfile=my_open(name_buff,(open_mode=O_RDONLY) | O_SHARE,MYF(0))) < 0) + goto err; + } + share->mode=open_mode; + errpos= 1; + if (my_read(kfile,(char*) share->state.header.file_version,head_length, + MYF(MY_NABP))) + { + my_errno= HA_ERR_NOT_A_TABLE; + goto err; + } + if (memcmp((byte*) share->state.header.file_version, + (byte*) maria_file_magic, 4)) + { + DBUG_PRINT("error",("Wrong header in %s",name_buff)); + DBUG_DUMP("error_dump",(char*) share->state.header.file_version, + head_length); + my_errno=HA_ERR_NOT_A_TABLE; + goto err; + } + share->options= mi_uint2korr(share->state.header.options); + if (share->options & + ~(HA_OPTION_PACK_RECORD | HA_OPTION_PACK_KEYS | + HA_OPTION_COMPRESS_RECORD | HA_OPTION_READ_ONLY_DATA | + HA_OPTION_TEMP_COMPRESS_RECORD | HA_OPTION_CHECKSUM | + HA_OPTION_TMP_TABLE | HA_OPTION_DELAY_KEY_WRITE | + HA_OPTION_RELIES_ON_SQL_LAYER)) + { + DBUG_PRINT("error",("wrong options: 0x%lx", share->options)); + my_errno=HA_ERR_OLD_FILE; + goto err; + } + if ((share->options & HA_OPTION_RELIES_ON_SQL_LAYER) && + ! (open_flags & HA_OPEN_FROM_SQL_LAYER)) + { + DBUG_PRINT("error", ("table cannot be openned from non-sql layer")); + my_errno= HA_ERR_UNSUPPORTED; + goto err; + } + /* Don't call realpath() if the name can't be a link */ + if (!strcmp(name_buff, org_name) || + my_readlink(index_name, org_name, MYF(0)) == -1) + (void) strmov(index_name, org_name); + *strrchr(org_name, '.')= '\0'; + (void) fn_format(data_name,org_name,"",MARIA_NAME_DEXT, + MY_APPEND_EXT|MY_UNPACK_FILENAME|MY_RESOLVE_SYMLINKS); + + info_length=mi_uint2korr(share->state.header.header_length); + base_pos= mi_uint2korr(share->state.header.base_pos); + if (!(disk_cache=(char*) my_alloca(info_length+128))) + { + my_errno=ENOMEM; + goto err; + } + end_pos=disk_cache+info_length; + errpos= 2; + + VOID(my_seek(kfile,0L,MY_SEEK_SET,MYF(0))); + errpos= 3; + if (my_read(kfile,disk_cache,info_length,MYF(MY_NABP))) + { + my_errno=HA_ERR_CRASHED; + goto err; + } + len=mi_uint2korr(share->state.header.state_info_length); + keys= (uint) share->state.header.keys; + uniques= (uint) share->state.header.uniques; + fulltext_keys= (uint) share->state.header.fulltext_keys; + key_parts= mi_uint2korr(share->state.header.key_parts); + unique_key_parts= mi_uint2korr(share->state.header.unique_key_parts); + if (len != MARIA_STATE_INFO_SIZE) + { + DBUG_PRINT("warning", + ("saved_state_info_length: %d state_info_length: %d", + len,MARIA_STATE_INFO_SIZE)); + } + share->state_diff_length=len-MARIA_STATE_INFO_SIZE; + + _ma_state_info_read(disk_cache, &share->state); + len= mi_uint2korr(share->state.header.base_info_length); + if (len != MARIA_BASE_INFO_SIZE) + { + DBUG_PRINT("warning",("saved_base_info_length: %d base_info_length: %d", + len,MARIA_BASE_INFO_SIZE)); + } + disk_pos= _ma_base_info_read(disk_cache + base_pos, &share->base); + share->state.state_length=base_pos; + + if (!(open_flags & HA_OPEN_FOR_REPAIR) && + ((share->state.changed & STATE_CRASHED) || + ((open_flags & HA_OPEN_ABORT_IF_CRASHED) && + (my_disable_locking && share->state.open_count)))) + { + DBUG_PRINT("error",("Table is marked as crashed. open_flags: %u " + "changed: %u open_count: %u !locking: %d", + open_flags, share->state.changed, + share->state.open_count, my_disable_locking)); + my_errno=((share->state.changed & STATE_CRASHED_ON_REPAIR) ? + HA_ERR_CRASHED_ON_REPAIR : HA_ERR_CRASHED_ON_USAGE); + goto err; + } + + /* sanity check */ + if (share->base.keystart > 65535 || share->base.rec_reflength > 8) + { + my_errno=HA_ERR_CRASHED; + goto err; + } + + key_parts+=fulltext_keys*FT_SEGS; + if (share->base.max_key_length > maria_max_key_length() || + keys > MARIA_MAX_KEY || key_parts >= MARIA_MAX_KEY * HA_MAX_KEY_SEG) + { + DBUG_PRINT("error",("Wrong key info: Max_key_length: %d keys: %d key_parts: %d", share->base.max_key_length, keys, key_parts)); + my_errno=HA_ERR_UNSUPPORTED; + goto err; + } + /* + If page cache is not initialized, then assume we will create it + after the table is opened! + */ + if (share->base.block_size != maria_block_size && + share_buff.pagecache->inited != 0) + { + DBUG_PRINT("error", ("Wrong block size %u; Expected %u", + (uint) share->base.block_size, + (uint) maria_block_size)); + my_errno=HA_ERR_UNSUPPORTED; + goto err; + } + + /* Correct max_file_length based on length of sizeof(off_t) */ + max_data_file_length= + (share->options & (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) ? + (((ulonglong) 1 << (share->base.rec_reflength*8))-1) : + (_ma_safe_mul(share->base.pack_reclength, + (ulonglong) 1 << (share->base.rec_reflength*8))-1); + + max_key_file_length= + _ma_safe_mul(MARIA_MIN_KEY_BLOCK_LENGTH, + ((ulonglong) 1 << (share->base.key_reflength*8))-1); +#if SIZEOF_OFF_T == 4 + set_if_smaller(max_data_file_length, INT_MAX32); + set_if_smaller(max_key_file_length, INT_MAX32); +#endif + share->base.max_data_file_length=(my_off_t) max_data_file_length; + share->base.max_key_file_length=(my_off_t) max_key_file_length; + + if (share->options & HA_OPTION_COMPRESS_RECORD) + share->base.max_key_length+=2; /* For safety */ + + if (!my_multi_malloc(MY_WME, + &share,sizeof(*share), + &share->state.rec_per_key_part,sizeof(long)*key_parts, + &share->keyinfo,keys*sizeof(MARIA_KEYDEF), + &share->uniqueinfo,uniques*sizeof(MARIA_UNIQUEDEF), + &share->keyparts, + (key_parts+unique_key_parts+keys+uniques) * + sizeof(HA_KEYSEG), + &share->columndef, + (share->base.fields+1)*sizeof(MARIA_COLUMNDEF), + &share->blobs,sizeof(MARIA_BLOB)*share->base.blobs, + &share->unique_file_name,strlen(name_buff)+1, + &share->index_file_name,strlen(index_name)+1, + &share->data_file_name,strlen(data_name)+1, + &share->open_file_name,strlen(name)+1, + &share->state.key_root,keys*sizeof(my_off_t), +#ifdef THREAD + &share->key_root_lock,sizeof(rw_lock_t)*keys, +#endif + &share->mmap_lock,sizeof(rw_lock_t), + NullS)) + goto err; + errpos= 4; + + *share=share_buff; + memcpy((char*) share->state.rec_per_key_part, + (char*) rec_per_key_part, sizeof(long)*key_parts); + memcpy((char*) share->state.key_root, + (char*) key_root, sizeof(my_off_t)*keys); + strmov(share->unique_file_name, name_buff); + share->unique_name_length= strlen(name_buff); + strmov(share->index_file_name, index_name); + strmov(share->data_file_name, data_name); + strmov(share->open_file_name, name); + + share->block_size= share->base.block_size; + { + HA_KEYSEG *pos=share->keyparts; + for (i=0 ; i < keys ; i++) + { + share->keyinfo[i].share= share; + disk_pos=_ma_keydef_read(disk_pos, &share->keyinfo[i]); + disk_pos_assert(disk_pos + share->keyinfo[i].keysegs * HA_KEYSEG_SIZE, + end_pos); + if (share->keyinfo[i].key_alg == HA_KEY_ALG_RTREE) + share->have_rtree= 1; + share->keyinfo[i].seg=pos; + for (j=0 ; j < share->keyinfo[i].keysegs; j++,pos++) + { + disk_pos=_ma_keyseg_read(disk_pos, pos); + if (pos->type == HA_KEYTYPE_TEXT || + pos->type == HA_KEYTYPE_VARTEXT1 || + pos->type == HA_KEYTYPE_VARTEXT2) + { + if (!pos->language) + pos->charset=default_charset_info; + else if (!(pos->charset= get_charset(pos->language, MYF(MY_WME)))) + { + my_errno=HA_ERR_UNKNOWN_CHARSET; + goto err; + } + } + else if (pos->type == HA_KEYTYPE_BINARY) + pos->charset= &my_charset_bin; + } + if (share->keyinfo[i].flag & HA_SPATIAL) + { +#ifdef HAVE_SPATIAL + uint sp_segs=SPDIMS*2; + share->keyinfo[i].seg=pos-sp_segs; + share->keyinfo[i].keysegs--; +#else + my_errno=HA_ERR_UNSUPPORTED; + goto err; +#endif + } + else if (share->keyinfo[i].flag & HA_FULLTEXT) + { + if (!fulltext_keys) + { /* 4.0 compatibility code, to be removed in 5.0 */ + share->keyinfo[i].seg=pos-FT_SEGS; + share->keyinfo[i].keysegs-=FT_SEGS; + } + else + { + uint k; + share->keyinfo[i].seg=pos; + for (k=0; k < FT_SEGS; k++) + { + *pos= ft_keysegs[k]; + pos[0].language= pos[-1].language; + if (!(pos[0].charset= pos[-1].charset)) + { + my_errno=HA_ERR_CRASHED; + goto err; + } + pos++; + } + } + if (!share->ft2_keyinfo.seg) + { + memcpy(& share->ft2_keyinfo, & share->keyinfo[i], sizeof(MARIA_KEYDEF)); + share->ft2_keyinfo.keysegs=1; + share->ft2_keyinfo.flag=0; + share->ft2_keyinfo.keylength= + share->ft2_keyinfo.minlength= + share->ft2_keyinfo.maxlength=HA_FT_WLEN+share->base.rec_reflength; + share->ft2_keyinfo.seg=pos-1; + share->ft2_keyinfo.end=pos; + setup_key_functions(& share->ft2_keyinfo); + } + } + setup_key_functions(share->keyinfo+i); + share->keyinfo[i].end=pos; + pos->type=HA_KEYTYPE_END; /* End */ + pos->length=share->base.rec_reflength; + pos->null_bit=0; + pos->flag=0; /* For purify */ + pos++; + } + for (i=0 ; i < uniques ; i++) + { + disk_pos=_ma_uniquedef_read(disk_pos, &share->uniqueinfo[i]); + disk_pos_assert(disk_pos + share->uniqueinfo[i].keysegs * + HA_KEYSEG_SIZE, end_pos); + share->uniqueinfo[i].seg=pos; + for (j=0 ; j < share->uniqueinfo[i].keysegs; j++,pos++) + { + disk_pos=_ma_keyseg_read(disk_pos, pos); + if (pos->type == HA_KEYTYPE_TEXT || + pos->type == HA_KEYTYPE_VARTEXT1 || + pos->type == HA_KEYTYPE_VARTEXT2) + { + if (!pos->language) + pos->charset=default_charset_info; + else if (!(pos->charset= get_charset(pos->language, MYF(MY_WME)))) + { + my_errno=HA_ERR_UNKNOWN_CHARSET; + goto err; + } + } + } + share->uniqueinfo[i].end=pos; + pos->type=HA_KEYTYPE_END; /* End */ + pos->null_bit=0; + pos->flag=0; + pos++; + } + share->ftparsers= 0; + } + share->data_file_type= share->state.header.data_file_type; + share->base_length= (BASE_ROW_HEADER_SIZE + + share->base.is_nulls_extended + + share->base.null_bytes + + share->base.pack_bytes + + test(share->options & HA_OPTION_CHECKSUM)); + if (share->base.transactional) + share->base_length+= TRANS_ROW_EXTRA_HEADER_SIZE; + share->base.default_rec_buff_size= max(share->base.pack_reclength, + share->base.max_key_length); + share->page_type= (share->base.transactional ? PAGECACHE_LSN_PAGE : + PAGECACHE_PLAIN_PAGE); + + if (share->data_file_type == DYNAMIC_RECORD) + { + share->base.extra_rec_buff_size= + (ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER) + MARIA_SPLIT_LENGTH + + MARIA_REC_BUFF_OFFSET); + share->base.default_rec_buff_size+= share->base.extra_rec_buff_size; + } + disk_pos_assert(disk_pos + share->base.fields *MARIA_COLUMNDEF_SIZE, + end_pos); + for (i= j= 0 ; i < share->base.fields ; i++) + { + disk_pos=_ma_columndef_read(disk_pos,&share->columndef[i]); + share->columndef[i].pack_type=0; + share->columndef[i].huff_tree=0; + if (share->columndef[i].type == (int) FIELD_BLOB) + { + share->blobs[j].pack_length= + share->columndef[i].length-portable_sizeof_char_ptr;; + share->blobs[j].offset= share->columndef[i].offset; + j++; + } + } + share->columndef[i].type=(int) FIELD_LAST; /* End marker */ + + if ((share->data_file_type == BLOCK_RECORD || + share->data_file_type == COMPRESSED_RECORD)) + { + if (_ma_open_datafile(&info, share, -1)) + goto err; + data_file= info.dfile.file; + } + errpos= 5; + + share->kfile.file= kfile; + share->this_process=(ulong) getpid(); + share->last_process= share->state.process; + share->base.key_parts=key_parts; + share->base.all_key_parts=key_parts+unique_key_parts; + if (!(share->last_version=share->state.version)) + share->last_version=1; /* Safety */ + share->rec_reflength=share->base.rec_reflength; /* May be changed */ + share->base.margin_key_file_length=(share->base.max_key_file_length - + (keys ? MARIA_INDEX_BLOCK_MARGIN * + share->block_size * keys : 0)); + share->block_size= share->base.block_size; + my_afree((gptr) disk_cache); + _ma_setup_functions(share); + if ((*share->once_init)(share, info.dfile.file)) + goto err; + if (open_flags & HA_OPEN_MMAP) + { + info.s= share; + if (_ma_dynmap_file(&info, share->state.state.data_file_length)) + { + /* purecov: begin inspected */ + /* Ignore if mmap fails. Use file I/O instead. */ + DBUG_PRINT("warning", ("mmap failed: errno: %d", errno)); + /* purecov: end */ + } + else + { + share->file_read= _ma_mmap_pread; + share->file_write= _ma_mmap_pwrite; + } + } + share->is_log_table= FALSE; + if (open_flags & HA_OPEN_TMP_TABLE) + share->options|= HA_OPTION_TMP_TABLE; + if (open_flags & HA_OPEN_DELAY_KEY_WRITE) + share->options|= HA_OPTION_DELAY_KEY_WRITE; + if (mode == O_RDONLY) + share->options|= HA_OPTION_READ_ONLY_DATA; + +#ifdef THREAD + thr_lock_init(&share->lock); + VOID(pthread_mutex_init(&share->intern_lock,MY_MUTEX_INIT_FAST)); + for (i=0; ikey_root_lock[i], NULL)); + VOID(my_rwlock_init(&share->mmap_lock, NULL)); + if (!thr_lock_inited) + { + /* Probably a single threaded program; Don't use concurrent inserts */ + maria_concurrent_insert=0; + } + else if (maria_concurrent_insert) + { + share->concurrent_insert= + ((share->options & (HA_OPTION_READ_ONLY_DATA | HA_OPTION_TMP_TABLE | + HA_OPTION_COMPRESS_RECORD | + HA_OPTION_TEMP_COMPRESS_RECORD)) || + (open_flags & HA_OPEN_TMP_TABLE) || + share->data_file_type == BLOCK_RECORD || + share->have_rtree) ? 0 : 1; + if (share->concurrent_insert) + { + share->lock.get_status=_ma_get_status; + share->lock.copy_status=_ma_copy_status; + share->lock.update_status=_ma_update_status; + share->lock.restore_status=_ma_restore_status; + share->lock.check_status=_ma_check_status; + } + } +#endif + } + else + { + share= old_info->s; + if (share->data_file_type == BLOCK_RECORD) + data_file= share->bitmap.file.file; /* Only opened once */ + } + + if (!(m_info= maria_clone_internal(share, mode, data_file))) + goto err; + pthread_mutex_unlock(&THR_LOCK_maria); + DBUG_RETURN(m_info); + +err: + save_errno=my_errno ? my_errno : HA_ERR_END_OF_FILE; + if ((save_errno == HA_ERR_CRASHED) || + (save_errno == HA_ERR_CRASHED_ON_USAGE) || + (save_errno == HA_ERR_CRASHED_ON_REPAIR)) + _ma_report_error(save_errno, name); + switch (errpos) { + case 5: + if (data_file >= 0) + VOID(my_close(data_file, MYF(0))); + if (old_info) + break; /* Don't remove open table */ + (*share->once_end)(share); + /* fall through */ + case 4: + my_free((gptr) share,MYF(0)); + /* fall through */ + case 3: + /* fall through */ + case 2: + my_afree((gptr) disk_cache); + /* fall through */ + case 1: + VOID(my_close(kfile,MYF(0))); + /* fall through */ + case 0: + default: + break; + } + pthread_mutex_unlock(&THR_LOCK_maria); + my_errno= save_errno; + DBUG_RETURN (NULL); +} /* maria_open */ + + +/* + Reallocate a buffer, if the current buffer is not large enough +*/ + +my_bool _ma_alloc_buffer(byte **old_addr, my_size_t *old_size, + my_size_t new_size) +{ + if (*old_size < new_size) + { + byte *addr; + if (!(addr= (byte*) my_realloc((gptr) *old_addr, new_size, + MYF(MY_ALLOW_ZERO_PTR)))) + return 1; + *old_addr= addr; + *old_size= new_size; + } + return 0; +} + + +ulonglong _ma_safe_mul(ulonglong a, ulonglong b) +{ + ulonglong max_val= ~ (ulonglong) 0; /* my_off_t is unsigned */ + + if (!a || max_val / a < b) + return max_val; + return a*b; +} + + /* Set up functions in structs */ + +void _ma_setup_functions(register MARIA_SHARE *share) +{ + share->once_init= maria_once_init_dummy; + share->once_end= maria_once_end_dummy; + share->init= maria_scan_init_dummy; + share->end= maria_scan_end_dummy; + share->scan_init= maria_scan_init_dummy;/* Compat. dummy function */ + share->scan_end= maria_scan_end_dummy;/* Compat. dummy function */ + share->write_record_init= _ma_write_init_default; + share->write_record_abort= _ma_write_abort_default; + + switch (share->data_file_type) { + case COMPRESSED_RECORD: + share->read_record= _ma_read_pack_record; + share->scan= _ma_read_rnd_pack_record; + share->once_init= _ma_once_init_pack_row; + share->once_end= _ma_once_end_pack_row; + /* + Calculate checksum according to data in the original, not compressed, + row. + */ + if (share->state.header.org_data_file_type == STATIC_RECORD) + share->calc_checksum= _ma_static_checksum; + else + share->calc_checksum= _ma_checksum; + share->calc_write_checksum= share->calc_checksum; + break; + case DYNAMIC_RECORD: + share->read_record= _ma_read_dynamic_record; + share->scan= _ma_read_rnd_dynamic_record; + share->delete_record= _ma_delete_dynamic_record; + share->compare_record= _ma_cmp_dynamic_record; + share->compare_unique= _ma_cmp_dynamic_unique; + share->calc_checksum= share->calc_write_checksum= _ma_checksum; + /* add bits used to pack data to pack_reclength for faster allocation */ + share->base.pack_reclength+= share->base.pack_bytes; + if (share->base.blobs) + { + share->update_record= _ma_update_blob_record; + share->write_record= _ma_write_blob_record; + } + else + { + share->write_record= _ma_write_dynamic_record; + share->update_record= _ma_update_dynamic_record; + } + break; + case STATIC_RECORD: + share->read_record= _ma_read_static_record; + share->scan= _ma_read_rnd_static_record; + share->delete_record= _ma_delete_static_record; + share->compare_record= _ma_cmp_static_record; + share->update_record= _ma_update_static_record; + share->write_record= _ma_write_static_record; + share->compare_unique= _ma_cmp_static_unique; + share->calc_checksum= share->calc_write_checksum= _ma_static_checksum; + break; + case BLOCK_RECORD: + share->once_init= _ma_once_init_block_record; + share->once_end= _ma_once_end_block_record; + share->init= _ma_init_block_record; + share->end= _ma_end_block_record; + share->write_record_init= _ma_write_init_block_record; + share->write_record_abort= _ma_write_abort_block_record; + share->scan_init= _ma_scan_init_block_record; + share->scan_end= _ma_scan_end_block_record; + share->read_record= _ma_read_block_record; + share->scan= _ma_scan_block_record; + share->delete_record= _ma_delete_block_record; + share->compare_record= _ma_compare_block_record; + share->update_record= _ma_update_block_record; + share->write_record= _ma_write_block_record; + share->compare_unique= _ma_cmp_block_unique; + share->calc_checksum= _ma_checksum; + /* + write_block_record() will calculate the checksum; Tell maria_write() + that it doesn't have to do this. + */ + share->calc_write_checksum= 0; + break; + } + share->file_read= _ma_nommap_pread; + share->file_write= _ma_nommap_pwrite; + if (!(share->options & HA_OPTION_CHECKSUM) && + share->data_file_type != COMPRESSED_RECORD) + share->calc_checksum= share->calc_write_checksum= 0; + return; +} + + +static void setup_key_functions(register MARIA_KEYDEF *keyinfo) +{ + if (keyinfo->key_alg == HA_KEY_ALG_RTREE) + { +#ifdef HAVE_RTREE_KEYS + keyinfo->ck_insert = maria_rtree_insert; + keyinfo->ck_delete = maria_rtree_delete; +#else + DBUG_ASSERT(0); /* maria_open should check it never happens */ +#endif + } + else + { + keyinfo->ck_insert = _ma_ck_write; + keyinfo->ck_delete = _ma_ck_delete; + } + if (keyinfo->flag & HA_BINARY_PACK_KEY) + { /* Simple prefix compression */ + keyinfo->bin_search= _ma_seq_search; + keyinfo->get_key= _ma_get_binary_pack_key; + keyinfo->pack_key= _ma_calc_bin_pack_key_length; + keyinfo->store_key= _ma_store_bin_pack_key; + } + else if (keyinfo->flag & HA_VAR_LENGTH_KEY) + { + keyinfo->get_key= _ma_get_pack_key; + if (keyinfo->seg[0].flag & HA_PACK_KEY) + { /* Prefix compression */ + if (!keyinfo->seg->charset || use_strnxfrm(keyinfo->seg->charset) || + (keyinfo->seg->flag & HA_NULL_PART)) + keyinfo->bin_search= _ma_seq_search; + else + keyinfo->bin_search= _ma_prefix_search; + keyinfo->pack_key= _ma_calc_var_pack_key_length; + keyinfo->store_key= _ma_store_var_pack_key; + } + else + { + keyinfo->bin_search= _ma_seq_search; + keyinfo->pack_key= _ma_calc_var_key_length; /* Variable length key */ + keyinfo->store_key= _ma_store_static_key; + } + } + else + { + keyinfo->bin_search= _ma_bin_search; + keyinfo->get_key= _ma_get_static_key; + keyinfo->pack_key= _ma_calc_static_key_length; + keyinfo->store_key= _ma_store_static_key; + } + return; +} + + +/** + @brief Function to save and store the header in the index file (.MYI) + + @param file descriptor of the index file to write + @param state state information to write to the file + @param pWrite bitmap (determines the amount of information to + write, and if my_write() or my_pwrite() should be + used) + + @return Operation status + @retval 0 OK + @retval 1 Error +*/ + +uint _ma_state_info_write(File file, MARIA_STATE_INFO *state, uint pWrite) +{ + /** @todo RECOVERY write it only at checkpoint time */ + uchar buff[MARIA_STATE_INFO_SIZE + MARIA_STATE_EXTRA_SIZE]; + uchar *ptr=buff; + uint i, keys= (uint) state->header.keys; + DBUG_ENTER("_ma_state_info_write"); + + memcpy_fixed(ptr,&state->header,sizeof(state->header)); + ptr+=sizeof(state->header); + + /* open_count must be first because of _ma_mark_file_changed ! */ + mi_int2store(ptr,state->open_count); ptr+= 2; + /* + if you change the offset of this LSN inside the file, fix + ma_create + ma_rename + ma_delete_all + backward-compatibility. + */ + lsn_store(ptr, state->create_rename_lsn); ptr+= LSN_STORE_SIZE; + *ptr++= (uchar)state->changed; + *ptr++= state->sortkey; + mi_rowstore(ptr,state->state.records); ptr+= 8; + mi_rowstore(ptr,state->state.del); ptr+= 8; + mi_rowstore(ptr,state->split); ptr+= 8; + mi_sizestore(ptr,state->dellink); ptr+= 8; + mi_sizestore(ptr,state->first_bitmap_with_space); ptr+= 8; + mi_sizestore(ptr,state->state.key_file_length); ptr+= 8; + mi_sizestore(ptr,state->state.data_file_length); ptr+= 8; + mi_sizestore(ptr,state->state.empty); ptr+= 8; + mi_sizestore(ptr,state->state.key_empty); ptr+= 8; + mi_int8store(ptr,state->auto_increment); ptr+= 8; + mi_int8store(ptr,(ulonglong) state->state.checksum); ptr+= 8; + mi_int4store(ptr,state->process); ptr+= 4; + mi_int4store(ptr,state->unique); ptr+= 4; + mi_int4store(ptr,state->status); ptr+= 4; + mi_int4store(ptr,state->update_count); ptr+= 4; + + ptr+= state->state_diff_length; + + for (i=0; i < keys; i++) + { + mi_sizestore(ptr,state->key_root[i]); ptr+= 8; + } + /** @todo RECOVERY key_del is a problem for recovery */ + mi_sizestore(ptr,state->key_del); ptr+= 8; + if (pWrite & 2) /* From maria_chk */ + { + uint key_parts= mi_uint2korr(state->header.key_parts); + mi_int4store(ptr,state->sec_index_changed); ptr+= 4; + mi_int4store(ptr,state->sec_index_used); ptr+= 4; + mi_int4store(ptr,state->version); ptr+= 4; + mi_int8store(ptr,state->key_map); ptr+= 8; + mi_int8store(ptr,(ulonglong) state->create_time); ptr+= 8; + mi_int8store(ptr,(ulonglong) state->recover_time); ptr+= 8; + mi_int8store(ptr,(ulonglong) state->check_time); ptr+= 8; + mi_sizestore(ptr,state->rec_per_key_rows); ptr+= 8; + for (i=0 ; i < key_parts ; i++) + { + mi_int4store(ptr,state->rec_per_key_part[i]); ptr+=4; + } + } + + if (pWrite & 1) + DBUG_RETURN(my_pwrite(file,(char*) buff, (uint) (ptr-buff), 0L, + MYF(MY_NABP | MY_THREADSAFE))); + DBUG_RETURN(my_write(file, (char*) buff, (uint) (ptr-buff), + MYF(MY_NABP))); +} + + +byte *_ma_state_info_read(byte *ptr, MARIA_STATE_INFO *state) +{ + uint i,keys,key_parts; + memcpy_fixed(&state->header,ptr, sizeof(state->header)); + ptr+= sizeof(state->header); + keys= (uint) state->header.keys; + key_parts= mi_uint2korr(state->header.key_parts); + + state->open_count = mi_uint2korr(ptr); ptr+= 2; + state->create_rename_lsn= lsn_korr(ptr); ptr+= LSN_STORE_SIZE; + state->changed= (my_bool) *ptr++; + state->sortkey= (uint) *ptr++; + state->state.records= mi_rowkorr(ptr); ptr+= 8; + state->state.del = mi_rowkorr(ptr); ptr+= 8; + state->split = mi_rowkorr(ptr); ptr+= 8; + state->dellink= mi_sizekorr(ptr); ptr+= 8; + state->first_bitmap_with_space= mi_sizekorr(ptr); ptr+= 8; + state->state.key_file_length = mi_sizekorr(ptr); ptr+= 8; + state->state.data_file_length= mi_sizekorr(ptr); ptr+= 8; + state->state.empty = mi_sizekorr(ptr); ptr+= 8; + state->state.key_empty= mi_sizekorr(ptr); ptr+= 8; + state->auto_increment=mi_uint8korr(ptr); ptr+= 8; + state->state.checksum=(ha_checksum) mi_uint8korr(ptr);ptr+= 8; + state->process= mi_uint4korr(ptr); ptr+= 4; + state->unique = mi_uint4korr(ptr); ptr+= 4; + state->status = mi_uint4korr(ptr); ptr+= 4; + state->update_count=mi_uint4korr(ptr); ptr+= 4; + + ptr+= state->state_diff_length; + + for (i=0; i < keys; i++) + { + state->key_root[i]= mi_sizekorr(ptr); ptr+= 8; + } + state->key_del= mi_sizekorr(ptr); ptr+= 8; + state->sec_index_changed = mi_uint4korr(ptr); ptr+= 4; + state->sec_index_used = mi_uint4korr(ptr); ptr+= 4; + state->version = mi_uint4korr(ptr); ptr+= 4; + state->key_map = mi_uint8korr(ptr); ptr+= 8; + state->create_time = (time_t) mi_sizekorr(ptr); ptr+= 8; + state->recover_time =(time_t) mi_sizekorr(ptr); ptr+= 8; + state->check_time = (time_t) mi_sizekorr(ptr); ptr+= 8; + state->rec_per_key_rows=mi_sizekorr(ptr); ptr+= 8; + for (i=0 ; i < key_parts ; i++) + { + state->rec_per_key_part[i]= mi_uint4korr(ptr); ptr+=4; + } + return ptr; +} + + +uint _ma_state_info_read_dsk(File file, MARIA_STATE_INFO *state, my_bool pRead) +{ + char buff[MARIA_STATE_INFO_SIZE + MARIA_STATE_EXTRA_SIZE]; + + if (!maria_single_user) + { + if (pRead) + { + if (my_pread(file, buff, state->state_length,0L, MYF(MY_NABP))) + return (MY_FILE_ERROR); + } + else if (my_read(file, buff, state->state_length,MYF(MY_NABP))) + return (MY_FILE_ERROR); + _ma_state_info_read(buff, state); + } + return 0; +} + + +/**************************************************************************** +** store and read of MARIA_BASE_INFO +****************************************************************************/ + +uint _ma_base_info_write(File file, MARIA_BASE_INFO *base) +{ + uchar buff[MARIA_BASE_INFO_SIZE], *ptr=buff; + + mi_sizestore(ptr,base->keystart); ptr+= 8; + mi_sizestore(ptr,base->max_data_file_length); ptr+= 8; + mi_sizestore(ptr,base->max_key_file_length); ptr+= 8; + mi_rowstore(ptr,base->records); ptr+= 8; + mi_rowstore(ptr,base->reloc); ptr+= 8; + mi_int4store(ptr,base->mean_row_length); ptr+= 4; + mi_int4store(ptr,base->reclength); ptr+= 4; + mi_int4store(ptr,base->pack_reclength); ptr+= 4; + mi_int4store(ptr,base->min_pack_length); ptr+= 4; + mi_int4store(ptr,base->max_pack_length); ptr+= 4; + mi_int4store(ptr,base->min_block_length); ptr+= 4; + mi_int2store(ptr,base->fields); ptr+= 2; + mi_int2store(ptr,base->fixed_not_null_fields); ptr+= 2; + mi_int2store(ptr,base->fixed_not_null_fields_length); ptr+= 2; + mi_int2store(ptr,base->max_field_lengths); ptr+= 2; + mi_int2store(ptr,base->pack_fields); ptr+= 2; + mi_int2store(ptr,0); ptr+= 2; + mi_int2store(ptr,base->null_bytes); ptr+= 2; + mi_int2store(ptr,base->original_null_bytes); ptr+= 2; + mi_int2store(ptr,base->field_offsets); ptr+= 2; + mi_int2store(ptr,base->min_row_length); ptr+= 2; + mi_int2store(ptr,base->block_size); ptr+= 2; + *ptr++= base->rec_reflength; + *ptr++= base->key_reflength; + *ptr++= base->keys; + *ptr++= base->auto_key; + *ptr++= base->transactional; + *ptr++= 0; /* Reserved */ + mi_int2store(ptr,base->pack_bytes); ptr+= 2; + mi_int2store(ptr,base->blobs); ptr+= 2; + mi_int2store(ptr,base->max_key_block_length); ptr+= 2; + mi_int2store(ptr,base->max_key_length); ptr+= 2; + mi_int2store(ptr,base->extra_alloc_bytes); ptr+= 2; + *ptr++= base->extra_alloc_procent; + bzero(ptr,16); ptr+= 16; /* extra */ + DBUG_ASSERT((ptr - buff) == MARIA_BASE_INFO_SIZE); + return my_write(file,(char*) buff, (uint) (ptr-buff), MYF(MY_NABP)); +} + + +static byte *_ma_base_info_read(byte *ptr, MARIA_BASE_INFO *base) +{ + base->keystart= mi_sizekorr(ptr); ptr+= 8; + base->max_data_file_length= mi_sizekorr(ptr); ptr+= 8; + base->max_key_file_length= mi_sizekorr(ptr); ptr+= 8; + base->records= (ha_rows) mi_sizekorr(ptr); ptr+= 8; + base->reloc= (ha_rows) mi_sizekorr(ptr); ptr+= 8; + base->mean_row_length= mi_uint4korr(ptr); ptr+= 4; + base->reclength= mi_uint4korr(ptr); ptr+= 4; + base->pack_reclength= mi_uint4korr(ptr); ptr+= 4; + base->min_pack_length= mi_uint4korr(ptr); ptr+= 4; + base->max_pack_length= mi_uint4korr(ptr); ptr+= 4; + base->min_block_length= mi_uint4korr(ptr); ptr+= 4; + base->fields= mi_uint2korr(ptr); ptr+= 2; + base->fixed_not_null_fields= mi_uint2korr(ptr); ptr+= 2; + base->fixed_not_null_fields_length= mi_uint2korr(ptr);ptr+= 2; + base->max_field_lengths= mi_uint2korr(ptr); ptr+= 2; + base->pack_fields= mi_uint2korr(ptr); ptr+= 2; + ptr+= 2; + base->null_bytes= mi_uint2korr(ptr); ptr+= 2; + base->original_null_bytes= mi_uint2korr(ptr); ptr+= 2; + base->field_offsets= mi_uint2korr(ptr); ptr+= 2; + base->min_row_length= mi_uint2korr(ptr); ptr+= 2; + base->block_size= mi_uint2korr(ptr); ptr+= 2; + + base->rec_reflength= *ptr++; + base->key_reflength= *ptr++; + base->keys= *ptr++; + base->auto_key= *ptr++; + base->transactional= *ptr++; + ptr++; + base->pack_bytes= mi_uint2korr(ptr); ptr+= 2; + base->blobs= mi_uint2korr(ptr); ptr+= 2; + base->max_key_block_length= mi_uint2korr(ptr); ptr+= 2; + base->max_key_length= mi_uint2korr(ptr); ptr+= 2; + base->extra_alloc_bytes= mi_uint2korr(ptr); ptr+= 2; + base->extra_alloc_procent= *ptr++; + ptr+= 16; + return ptr; +} + +/*-------------------------------------------------------------------------- + maria_keydef +---------------------------------------------------------------------------*/ + +uint _ma_keydef_write(File file, MARIA_KEYDEF *keydef) +{ + uchar buff[MARIA_KEYDEF_SIZE]; + uchar *ptr=buff; + + *ptr++= (uchar) keydef->keysegs; + *ptr++= keydef->key_alg; /* Rtree or Btree */ + mi_int2store(ptr,keydef->flag); ptr+= 2; + mi_int2store(ptr,keydef->block_length); ptr+= 2; + mi_int2store(ptr,keydef->keylength); ptr+= 2; + mi_int2store(ptr,keydef->minlength); ptr+= 2; + mi_int2store(ptr,keydef->maxlength); ptr+= 2; + return my_write(file,(char*) buff, (uint) (ptr-buff), MYF(MY_NABP)); +} + +char *_ma_keydef_read(char *ptr, MARIA_KEYDEF *keydef) +{ + keydef->keysegs = (uint) *ptr++; + keydef->key_alg = *ptr++; /* Rtree or Btree */ + + keydef->flag = mi_uint2korr(ptr); ptr+= 2; + keydef->block_length = mi_uint2korr(ptr); ptr+= 2; + keydef->keylength = mi_uint2korr(ptr); ptr+= 2; + keydef->minlength = mi_uint2korr(ptr); ptr+= 2; + keydef->maxlength = mi_uint2korr(ptr); ptr+= 2; + keydef->underflow_block_length=keydef->block_length/3; + keydef->version = 0; /* Not saved */ + keydef->parser = &ft_default_parser; + keydef->ftparser_nr = 0; + return ptr; +} + +/*************************************************************************** +** maria_keyseg +***************************************************************************/ + +int _ma_keyseg_write(File file, const HA_KEYSEG *keyseg) +{ + uchar buff[HA_KEYSEG_SIZE]; + uchar *ptr=buff; + ulong pos; + + *ptr++= keyseg->type; + *ptr++= keyseg->language; + *ptr++= keyseg->null_bit; + *ptr++= keyseg->bit_start; + *ptr++= keyseg->bit_end; + *ptr++= keyseg->bit_length; + mi_int2store(ptr,keyseg->flag); ptr+= 2; + mi_int2store(ptr,keyseg->length); ptr+= 2; + mi_int4store(ptr,keyseg->start); ptr+= 4; + pos= keyseg->null_bit ? keyseg->null_pos : keyseg->bit_pos; + mi_int4store(ptr, pos); + ptr+=4; + + return my_write(file,(char*) buff, (uint) (ptr-buff), MYF(MY_NABP)); +} + + +char *_ma_keyseg_read(char *ptr, HA_KEYSEG *keyseg) +{ + keyseg->type = *ptr++; + keyseg->language = *ptr++; + keyseg->null_bit = *ptr++; + keyseg->bit_start = *ptr++; + keyseg->bit_end = *ptr++; + keyseg->bit_length = *ptr++; + keyseg->flag = mi_uint2korr(ptr); ptr+= 2; + keyseg->length = mi_uint2korr(ptr); ptr+= 2; + keyseg->start = mi_uint4korr(ptr); ptr+= 4; + keyseg->null_pos = mi_uint4korr(ptr); ptr+= 4; + keyseg->charset=0; /* Will be filled in later */ + if (keyseg->null_bit) + keyseg->bit_pos= (uint16)(keyseg->null_pos + (keyseg->null_bit == 7)); + else + { + keyseg->bit_pos= (uint16)keyseg->null_pos; + keyseg->null_pos= 0; + } + return ptr; +} + +/*-------------------------------------------------------------------------- + maria_uniquedef +---------------------------------------------------------------------------*/ + +uint _ma_uniquedef_write(File file, MARIA_UNIQUEDEF *def) +{ + uchar buff[MARIA_UNIQUEDEF_SIZE]; + uchar *ptr=buff; + + mi_int2store(ptr,def->keysegs); ptr+=2; + *ptr++= (uchar) def->key; + *ptr++ = (uchar) def->null_are_equal; + + return my_write(file,(char*) buff, (uint) (ptr-buff), MYF(MY_NABP)); +} + +char *_ma_uniquedef_read(char *ptr, MARIA_UNIQUEDEF *def) +{ + def->keysegs = mi_uint2korr(ptr); + def->key = ptr[2]; + def->null_are_equal=ptr[3]; + return ptr+4; /* 1 extra byte */ +} + +/*************************************************************************** +** MARIA_COLUMNDEF +***************************************************************************/ + +uint _ma_columndef_write(File file, MARIA_COLUMNDEF *columndef) +{ + uchar buff[MARIA_COLUMNDEF_SIZE]; + uchar *ptr=buff; + + mi_int6store(ptr,columndef->offset); ptr+= 6; + mi_int2store(ptr,columndef->type); ptr+= 2; + mi_int2store(ptr,columndef->length); ptr+= 2; + mi_int2store(ptr,columndef->fill_length); ptr+= 2; + mi_int2store(ptr,columndef->null_pos); ptr+= 2; + mi_int2store(ptr,columndef->empty_pos); ptr+= 2; + (*ptr++)= columndef->null_bit; + (*ptr++)= columndef->empty_bit; + return my_write(file,(char*) buff, (uint) (ptr-buff), MYF(MY_NABP)); +} + +char *_ma_columndef_read(char *ptr, MARIA_COLUMNDEF *columndef) +{ + columndef->offset= mi_uint6korr(ptr); ptr+= 6; + columndef->type= mi_sint2korr(ptr); ptr+= 2; + columndef->length= mi_uint2korr(ptr); ptr+= 2; + columndef->fill_length= mi_uint2korr(ptr); ptr+= 2; + columndef->null_pos= mi_uint2korr(ptr); ptr+= 2; + columndef->empty_pos= mi_uint2korr(ptr); ptr+= 2; + columndef->null_bit= (uint8) *ptr++; + columndef->empty_bit= (uint8) *ptr++; + return ptr; +} + +/************************************************************************** + Open data file + We can't use dup() here as the data file descriptors need to have different + active seek-positions. + + The argument file_to_dup is here for the future if there would on some OS + exist a dup()-like call that would give us two different file descriptors. +*************************************************************************/ + +int _ma_open_datafile(MARIA_HA *info, MARIA_SHARE *share, + File file_to_dup __attribute__((unused))) +{ + info->dfile.file= my_open(share->data_file_name, share->mode | O_SHARE, + MYF(MY_WME)); + return info->dfile.file >= 0 ? 0 : 1; +} + + +int _ma_open_keyfile(MARIA_SHARE *share) +{ + if ((share->kfile.file= my_open(share->unique_file_name, + share->mode | O_SHARE, + MYF(MY_WME))) < 0) + return 1; + return 0; +} + + +/* + Disable all indexes. + + SYNOPSIS + maria_disable_indexes() + info A pointer to the MARIA storage engine MARIA_HA struct. + + DESCRIPTION + Disable all indexes. + + RETURN + 0 ok +*/ + +int maria_disable_indexes(MARIA_HA *info) +{ + MARIA_SHARE *share= info->s; + + maria_clear_all_keys_active(share->state.key_map); + return 0; +} + + +/* + Enable all indexes + + SYNOPSIS + maria_enable_indexes() + info A pointer to the MARIA storage engine MARIA_HA struct. + + DESCRIPTION + Enable all indexes. The indexes might have been disabled + by maria_disable_index() before. + The function works only if both data and indexes are empty, + otherwise a repair is required. + To be sure, call handler::delete_all_rows() before. + + RETURN + 0 ok + HA_ERR_CRASHED data or index is non-empty. +*/ + +int maria_enable_indexes(MARIA_HA *info) +{ + int error= 0; + MARIA_SHARE *share= info->s; + + if (share->state.state.data_file_length || + (share->state.state.key_file_length != share->base.keystart)) + { + maria_print_error(info->s, HA_ERR_CRASHED); + error= HA_ERR_CRASHED; + } + else + maria_set_all_keys_active(share->state.key_map, share->base.keys); + return error; +} + + +/* + Test if indexes are disabled. + + SYNOPSIS + maria_indexes_are_disabled() + info A pointer to the MARIA storage engine MARIA_HA struct. + + DESCRIPTION + Test if indexes are disabled. + + RETURN + 0 indexes are not disabled + 1 all indexes are disabled + 2 non-unique indexes are disabled +*/ + +int maria_indexes_are_disabled(MARIA_HA *info) +{ + MARIA_SHARE *share= info->s; + + /* + No keys or all are enabled. keys is the number of keys. Left shifted + gives us only one bit set. When decreased by one, gives us all all bits + up to this one set and it gets unset. + */ + if (!share->base.keys || + (maria_is_all_keys_active(share->state.key_map, share->base.keys))) + return 0; + + /* All are disabled */ + if (maria_is_any_key_active(share->state.key_map)) + return 1; + + /* + We have keys. Some enabled, some disabled. + Don't check for any non-unique disabled but return directly 2 + */ + return 2; +} + + +static my_bool maria_scan_init_dummy(MARIA_HA *info __attribute__((unused))) +{ + return 0; +} + +static void maria_scan_end_dummy(MARIA_HA *info __attribute__((unused))) +{ +} + +static my_bool maria_once_init_dummy(MARIA_SHARE *share + __attribute__((unused)), + File dfile __attribute__((unused))) +{ + return 0; +} + +static my_bool maria_once_end_dummy(MARIA_SHARE *share __attribute__((unused))) +{ + return 0; +} diff --git a/storage/maria/ma_packrec.c b/storage/maria/ma_packrec.c new file mode 100644 index 00000000000..f3187314f0e --- /dev/null +++ b/storage/maria/ma_packrec.c @@ -0,0 +1,1716 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + /* Functions to compressed records */ + +#include "maria_def.h" + +#define IS_CHAR ((uint) 32768) /* Bit if char (not offset) in tree */ + +/* Some definitions to keep in sync with maria_pack.c */ +#define HEAD_LENGTH 32 /* Length of fixed header */ + +#if INT_MAX > 32767 +#define BITS_SAVED 32 +#define MAX_QUICK_TABLE_BITS 9 /* Because we may shift in 24 bits */ +#else +#define BITS_SAVED 16 +#define MAX_QUICK_TABLE_BITS 6 +#endif + +#define get_bit(BU) ((BU)->bits ? \ + (BU)->current_byte & ((maria_bit_type) 1 << --(BU)->bits) :\ + (fill_buffer(BU), (BU)->bits= BITS_SAVED-1,\ + (BU)->current_byte & ((maria_bit_type) 1 << (BITS_SAVED-1)))) +#define skip_to_next_byte(BU) ((BU)->bits&=~7) +#define get_bits(BU,count) (((BU)->bits >= count) ? (((BU)->current_byte >> ((BU)->bits-=count)) & mask[count]) : fill_and_get_bits(BU,count)) + +#define decode_bytes_test_bit(bit) \ + if (low_byte & (1 << (7-bit))) \ + pos++; \ + if (*pos & IS_CHAR) \ + { bits-=(bit+1); break; } \ + pos+= *pos + +/* Size in uint16 of a Huffman tree for byte compression of 256 byte values. */ +#define OFFSET_TABLE_SIZE 512 + +static my_bool _ma_read_pack_info(MARIA_SHARE *share, File file, + pbool fix_keys); +static uint read_huff_table(MARIA_BIT_BUFF *bit_buff, + MARIA_DECODE_TREE *decode_tree, + uint16 **decode_table,byte **intervall_buff, + uint16 *tmp_buff); +static void make_quick_table(uint16 *to_table,uint16 *decode_table, + uint *next_free,uint value,uint bits, + uint max_bits); +static void fill_quick_table(uint16 *table,uint bits, uint max_bits, + uint value); +static uint copy_decode_table(uint16 *to_pos,uint offset, + uint16 *decode_table); +static uint find_longest_bitstream(uint16 *table, uint16 *end); +static void (*get_unpack_function(MARIA_COLUMNDEF *rec))(MARIA_COLUMNDEF *field, + MARIA_BIT_BUFF *buff, + byte *to, + byte *end); +static void uf_zerofill_skip_zero(MARIA_COLUMNDEF *rec, + MARIA_BIT_BUFF *bit_buff, + byte *to,byte *end); +static void uf_skip_zero(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + byte *to,byte *end); +static void uf_space_normal(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + byte *to,byte *end); +static void uf_space_endspace_selected(MARIA_COLUMNDEF *rec, + MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end); +static void uf_endspace_selected(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + byte *to,byte *end); +static void uf_space_endspace(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + byte *to,byte *end); +static void uf_endspace(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + byte *to,byte *end); +static void uf_space_prespace_selected(MARIA_COLUMNDEF *rec, + MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end); +static void uf_prespace_selected(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + byte *to,byte *end); +static void uf_space_prespace(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + byte *to,byte *end); +static void uf_prespace(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + byte *to,byte *end); +static void uf_zerofill_normal(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + byte *to,byte *end); +static void uf_constant(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + byte *to,byte *end); +static void uf_intervall(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + byte *to,byte *end); +static void uf_zero(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + byte *to,byte *end); +static void uf_blob(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end); +static void uf_varchar1(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end); +static void uf_varchar2(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end); +static void decode_bytes(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + byte *to,byte *end); +static uint decode_pos(MARIA_BIT_BUFF *bit_buff, + MARIA_DECODE_TREE *decode_tree); +static void init_bit_buffer(MARIA_BIT_BUFF *bit_buff,uchar *buffer, + uint length); +static uint fill_and_get_bits(MARIA_BIT_BUFF *bit_buff,uint count); +static void fill_buffer(MARIA_BIT_BUFF *bit_buff); +static uint max_bit(uint value); +static uint read_pack_length(uint version, const uchar *buf, ulong *length); +#ifdef HAVE_MMAP +static uchar *_ma_mempack_get_block_info(MARIA_HA *maria, + MARIA_BIT_BUFF *bit_buff, + MARIA_BLOCK_INFO *info, + byte **rec_buff_p, + my_size_t *rec_buff_size_p, + uchar *header); +#endif + +static maria_bit_type mask[]= +{ + 0x00000000, + 0x00000001, 0x00000003, 0x00000007, 0x0000000f, + 0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff, + 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff, + 0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff, +#if BITS_SAVED > 16 + 0x0001ffff, 0x0003ffff, 0x0007ffff, 0x000fffff, + 0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff, + 0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff, + 0x1fffffff, 0x3fffffff, 0x7fffffff, 0xffffffff, +#endif +}; + + +my_bool _ma_once_init_pack_row(MARIA_SHARE *share, File dfile) +{ + share->options|= HA_OPTION_READ_ONLY_DATA; + return (_ma_read_pack_info(share, dfile, + (pbool) + test(!(share->options & + (HA_OPTION_PACK_RECORD | + HA_OPTION_TEMP_COMPRESS_RECORD))))); +} + + +my_bool _ma_once_end_pack_row(MARIA_SHARE *share) +{ + if (share->decode_trees) + { + my_free((gptr) share->decode_trees,MYF(0)); + my_free((gptr) share->decode_tables,MYF(0)); + } + return 0; +} + + +/* Read all packed info, allocate memory and fix field structs */ + +static my_bool _ma_read_pack_info(MARIA_SHARE *share, File file, + pbool fix_keys) +{ + int diff_length; + uint i,trees,huff_tree_bits,rec_reflength,length; + uint16 *decode_table,*tmp_buff; + ulong elements,intervall_length; + char *disk_cache,*intervall_buff; + uchar header[HEAD_LENGTH]; + MARIA_BIT_BUFF bit_buff; + DBUG_ENTER("_ma_read_pack_info"); + + if (maria_quick_table_bits < 4) + maria_quick_table_bits=4; + else if (maria_quick_table_bits > MAX_QUICK_TABLE_BITS) + maria_quick_table_bits=MAX_QUICK_TABLE_BITS; + + my_errno=0; + if (my_read(file,(byte*) header,sizeof(header),MYF(MY_NABP))) + { + if (!my_errno) + my_errno=HA_ERR_END_OF_FILE; + goto err0; + } + /* Only the first three bytes of magic number are independent of version. */ + if (memcmp((byte*) header, (byte*) maria_pack_file_magic, 3)) + { + my_errno=HA_ERR_WRONG_IN_RECORD; + goto err0; + } + share->pack.version= header[3]; /* fourth byte of magic number */ + share->pack.header_length= uint4korr(header+4); + share->min_pack_length=(uint) uint4korr(header+8); + share->max_pack_length=(uint) uint4korr(header+12); + set_if_bigger(share->base.pack_reclength,share->max_pack_length); + elements=uint4korr(header+16); + intervall_length=uint4korr(header+20); + trees=uint2korr(header+24); + share->pack.ref_length=header[26]; + rec_reflength=header[27]; + diff_length=(int) rec_reflength - (int) share->base.rec_reflength; + if (fix_keys) + share->rec_reflength=rec_reflength; + share->base.min_block_length=share->min_pack_length+1; + if (share->min_pack_length > 254) + share->base.min_block_length+=2; + DBUG_PRINT("info", ("fixed header length: %u", HEAD_LENGTH)); + DBUG_PRINT("info", ("total header length: %lu", share->pack.header_length)); + DBUG_PRINT("info", ("pack file version: %u", share->pack.version)); + DBUG_PRINT("info", ("min pack length: %lu", share->min_pack_length)); + DBUG_PRINT("info", ("max pack length: %lu", share->max_pack_length)); + DBUG_PRINT("info", ("elements of all trees: %lu", elements)); + DBUG_PRINT("info", ("distinct values bytes: %lu", intervall_length)); + DBUG_PRINT("info", ("number of code trees: %u", trees)); + DBUG_PRINT("info", ("bytes for record lgt: %u", share->pack.ref_length)); + DBUG_PRINT("info", ("record pointer length: %u", rec_reflength)); + + + /* + Memory segment #1: + - Decode tree heads + - Distinct column values + */ + if (!(share->decode_trees=(MARIA_DECODE_TREE*) + my_malloc((uint) (trees*sizeof(MARIA_DECODE_TREE)+ + intervall_length*sizeof(byte)), + MYF(MY_WME)))) + goto err0; + intervall_buff=(byte*) (share->decode_trees+trees); + + /* + Memory segment #2: + - Decode tables + - Quick decode tables + - Temporary decode table + - Compressed data file header cache + This segment will be reallocated after construction of the tables. + */ + length=(uint) (elements*2+trees*(1 << maria_quick_table_bits)); + if (!(share->decode_tables=(uint16*) + my_malloc((length+OFFSET_TABLE_SIZE)*sizeof(uint16)+ + (uint) (share->pack.header_length - sizeof(header)), + MYF(MY_WME | MY_ZEROFILL)))) + goto err1; + tmp_buff=share->decode_tables+length; + disk_cache=(byte*) (tmp_buff+OFFSET_TABLE_SIZE); + + if (my_read(file,disk_cache, + (uint) (share->pack.header_length-sizeof(header)), + MYF(MY_NABP))) + goto err2; + + huff_tree_bits=max_bit(trees ? trees-1 : 0); + init_bit_buffer(&bit_buff, (uchar*) disk_cache, + (uint) (share->pack.header_length-sizeof(header))); + /* Read new info for each field */ + for (i=0 ; i < share->base.fields ; i++) + { + share->columndef[i].base_type=(enum en_fieldtype) get_bits(&bit_buff,5); + share->columndef[i].pack_type=(uint) get_bits(&bit_buff,6); + share->columndef[i].space_length_bits=get_bits(&bit_buff,5); + share->columndef[i].huff_tree=share->decode_trees+(uint) get_bits(&bit_buff, + huff_tree_bits); + share->columndef[i].unpack= get_unpack_function(share->columndef + i); + DBUG_PRINT("info", ("col: %2u type: %2u pack: %u slbits: %2u", + i, share->columndef[i].base_type, + share->columndef[i].pack_type, + share->columndef[i].space_length_bits)); + } + skip_to_next_byte(&bit_buff); + /* + Construct the decoding tables from the file header. Keep track of + the used memory. + */ + decode_table=share->decode_tables; + for (i=0 ; i < trees ; i++) + if (read_huff_table(&bit_buff,share->decode_trees+i,&decode_table, + &intervall_buff,tmp_buff)) + goto err3; + /* Reallocate the decoding tables to the used size. */ + decode_table=(uint16*) + my_realloc((gptr) share->decode_tables, + (uint) ((byte*) decode_table - (byte*) share->decode_tables), + MYF(MY_HOLD_ON_ERROR)); + /* Fix the table addresses in the tree heads. */ + { + long diff=PTR_BYTE_DIFF(decode_table,share->decode_tables); + share->decode_tables=decode_table; + for (i=0 ; i < trees ; i++) + share->decode_trees[i].table=ADD_TO_PTR(share->decode_trees[i].table, + diff, uint16*); + } + + /* Fix record-ref-length for keys */ + if (fix_keys) + { + for (i=0 ; i < share->base.keys ; i++) + { + MARIA_KEYDEF *keyinfo= &share->keyinfo[i]; + keyinfo->keylength+= (uint16) diff_length; + keyinfo->minlength+= (uint16) diff_length; + keyinfo->maxlength+= (uint16) diff_length; + keyinfo->seg[keyinfo->flag & HA_FULLTEXT ? + FT_SEGS : keyinfo->keysegs].length= (uint16) rec_reflength; + } + if (share->ft2_keyinfo.seg) + { + MARIA_KEYDEF *ft2_keyinfo= &share->ft2_keyinfo; + ft2_keyinfo->keylength+= (uint16) diff_length; + ft2_keyinfo->minlength+= (uint16) diff_length; + ft2_keyinfo->maxlength+= (uint16) diff_length; + } + } + + if (bit_buff.error || bit_buff.pos < bit_buff.end) + goto err3; + + DBUG_RETURN(0); + +err3: + my_errno=HA_ERR_WRONG_IN_RECORD; +err2: + my_free((gptr) share->decode_tables,MYF(0)); +err1: + my_free((gptr) share->decode_trees,MYF(0)); +err0: + DBUG_RETURN(1); +} + + +/* + Read a huff-code-table from datafile. + + SYNOPSIS + read_huff_table() + bit_buff Bit buffer pointing at start of the + decoding table in the file header cache. + decode_tree Pointer to the decode tree head. + decode_table IN/OUT Address of a pointer to the next free space. + intervall_buff IN/OUT Address of a pointer to the next unused values. + tmp_buff Buffer for temporary extraction of a full + decoding table as read from bit_buff. + + RETURN + 0 OK. + 1 Error. +*/ +static uint read_huff_table(MARIA_BIT_BUFF *bit_buff, + MARIA_DECODE_TREE *decode_tree, + uint16 **decode_table, byte **intervall_buff, + uint16 *tmp_buff) +{ + uint min_chr,elements,char_bits,offset_bits,size,intervall_length,table_bits, + next_free_offset; + uint16 *ptr,*end; + DBUG_ENTER("read_huff_table"); + + if (!get_bits(bit_buff,1)) + { + /* Byte value compression. */ + min_chr=get_bits(bit_buff,8); + elements=get_bits(bit_buff,9); + char_bits=get_bits(bit_buff,5); + offset_bits=get_bits(bit_buff,5); + intervall_length=0; + ptr=tmp_buff; + ptr=tmp_buff; + DBUG_PRINT("info", ("byte value compression")); + DBUG_PRINT("info", ("minimum byte value: %u", min_chr)); + DBUG_PRINT("info", ("number of tree nodes: %u", elements)); + DBUG_PRINT("info", ("bits for values: %u", char_bits)); + DBUG_PRINT("info", ("bits for tree offsets: %u", offset_bits)); + if (elements > 256) + { + DBUG_PRINT("error", ("ERROR: illegal number of tree elements: %u", + elements)); + DBUG_RETURN(1); + } + } + else + { + /* Distinct column value compression. */ + min_chr=0; + elements=get_bits(bit_buff,15); + intervall_length=get_bits(bit_buff,16); + char_bits=get_bits(bit_buff,5); + offset_bits=get_bits(bit_buff,5); + decode_tree->quick_table_bits=0; + ptr= *decode_table; + DBUG_PRINT("info", ("distinct column value compression")); + DBUG_PRINT("info", ("number of tree nodes: %u", elements)); + DBUG_PRINT("info", ("value buffer length: %u", intervall_length)); + DBUG_PRINT("info", ("bits for value index: %u", char_bits)); + DBUG_PRINT("info", ("bits for tree offsets: %u", offset_bits)); + } + size=elements*2-2; + DBUG_PRINT("info", ("tree size in uint16: %u", size)); + DBUG_PRINT("info", ("tree size in bytes: %u", + size * (uint) sizeof(uint16))); + + for (end=ptr+size ; ptr < end ; ptr++) + { + if (get_bit(bit_buff)) + { + *ptr= (uint16) get_bits(bit_buff,offset_bits); + if ((ptr + *ptr >= end) || !*ptr) + { + DBUG_PRINT("error", ("ERROR: illegal pointer in decode tree")); + DBUG_RETURN(1); + } + } + else + *ptr= (uint16) (IS_CHAR + (get_bits(bit_buff,char_bits) + min_chr)); + } + skip_to_next_byte(bit_buff); + + decode_tree->table= *decode_table; + decode_tree->intervalls= *intervall_buff; + if (! intervall_length) + { + /* Byte value compression. ptr started from tmp_buff. */ + /* Find longest Huffman code from begin to end of tree in bits. */ + table_bits= find_longest_bitstream(tmp_buff, ptr); + if (table_bits >= OFFSET_TABLE_SIZE) + DBUG_RETURN(1); + if (table_bits > maria_quick_table_bits) + table_bits=maria_quick_table_bits; + DBUG_PRINT("info", ("table bits: %u", table_bits)); + + next_free_offset= (1 << table_bits); + make_quick_table(*decode_table,tmp_buff,&next_free_offset,0,table_bits, + table_bits); + (*decode_table)+= next_free_offset; + decode_tree->quick_table_bits=table_bits; + } + else + { + /* Distinct column value compression. ptr started from *decode_table */ + (*decode_table)=end; + /* + get_bits() moves some bytes to a cache buffer in advance. May need + to step back. + */ + bit_buff->pos-= bit_buff->bits/8; + /* Copy the distinct column values from the buffer. */ + memcpy(*intervall_buff,bit_buff->pos,(size_t) intervall_length); + (*intervall_buff)+=intervall_length; + bit_buff->pos+=intervall_length; + bit_buff->bits=0; + } + DBUG_RETURN(0); +} + + +/* + Make a quick_table for faster decoding. + + SYNOPSIS + make_quick_table() + to_table Target quick_table and remaining decode table. + decode_table Source Huffman (sub-)tree within tmp_buff. + next_free_offset IN/OUT Next free offset from to_table. + Starts behind quick_table on the top-level. + value Huffman bits found so far. + bits Remaining bits to be collected. + max_bits Total number of bits to collect (table_bits). + + DESCRIPTION + + The quick table is an array of 16-bit values. There exists one value + for each possible code representable by max_bits (table_bits) bits. + In most cases table_bits is 9. So there are 512 16-bit values. + + If the high-order bit (16) is set (IS_CHAR) then the array slot for + this value is a valid Huffman code for a resulting byte value. + + The low-order 8 bits (1..8) are the resulting byte value. + + Bits 9..14 are the length of the Huffman code for this byte value. + This means so many bits from the input stream were needed to + represent this byte value. The remaining bits belong to later + Huffman codes. This also means that for every Huffman code shorter + than table_bits there are multiple entires in the array, which + differ just in the unused bits. + + If the high-order bit (16) is clear (0) then the remaining bits are + the position of the remaining Huffman decode tree segment behind the + quick table. + + RETURN + void +*/ + +static void make_quick_table(uint16 *to_table, uint16 *decode_table, + uint *next_free_offset, uint value, uint bits, + uint max_bits) +{ + DBUG_ENTER("make_quick_table"); + + /* + When down the table to the requested maximum, copy the rest of the + Huffman table. + */ + if (!bits--) + { + /* + Remaining left Huffman tree segment starts behind quick table. + Remaining right Huffman tree segment starts behind left segment. + */ + to_table[value]= (uint16) *next_free_offset; + /* + Re-construct the remaining Huffman tree segment at + next_free_offset in to_table. + */ + *next_free_offset=copy_decode_table(to_table, *next_free_offset, + decode_table); + DBUG_VOID_RETURN; + } + + /* Descent on the left side. Left side bits are clear (0). */ + if (!(*decode_table & IS_CHAR)) + { + /* Not a leaf. Follow the pointer. */ + make_quick_table(to_table,decode_table+ *decode_table, + next_free_offset,value,bits,max_bits); + } + else + { + /* + A leaf. A Huffman code is complete. Fill the quick_table + array for all possible bit strings starting with this Huffman + code. + */ + fill_quick_table(to_table+value,bits,max_bits,(uint) *decode_table); + } + + /* Descent on the right side. Right side bits are set (1). */ + decode_table++; + value|= (1 << bits); + if (!(*decode_table & IS_CHAR)) + { + /* Not a leaf. Follow the pointer. */ + make_quick_table(to_table,decode_table+ *decode_table, + next_free_offset,value,bits,max_bits); + } + else + { + /* + A leaf. A Huffman code is complete. Fill the quick_table + array for all possible bit strings starting with this Huffman + code. + */ + fill_quick_table(to_table+value,bits,max_bits,(uint) *decode_table); + } + + DBUG_VOID_RETURN; +} + + +/* + Fill quick_table for all possible values starting with this Huffman code. + + SYNOPSIS + fill_quick_table() + table Target quick_table position. + bits Unused bits from max_bits. + max_bits Total number of bits to collect (table_bits). + value The byte encoded by the found Huffman code. + + DESCRIPTION + + Fill the segment (all slots) of the quick_table array with the + resulting value for the found Huffman code. There are as many slots + as there are combinations representable by the unused bits. + + In most cases we use 9 table bits. Assume a 3-bit Huffman code. Then + there are 6 unused bits. Hence we fill 2**6 = 64 slots with the + value. + + RETURN + void +*/ + +static void fill_quick_table(uint16 *table, uint bits, uint max_bits, + uint value) +{ + uint16 *end; + DBUG_ENTER("fill_quick_table"); + + /* + Bits 1..8 of value represent the decoded byte value. + Bits 9..14 become the length of the Huffman code for this byte value. + Bit 16 flags a valid code (IS_CHAR). + */ + value|= (max_bits - bits) << 8 | IS_CHAR; + + for (end= table + (uint) (((uint) 1 << bits)); table < end; table++) + { + *table= (uint16) value; + } + DBUG_VOID_RETURN; +} + + +/* + Reconstruct a decode subtree at the target position. + + SYNOPSIS + copy_decode_table() + to_pos Target quick_table and remaining decode table. + offset Next free offset from to_pos. + decode_table Source Huffman subtree within tmp_buff. + + NOTE + Pointers in the decode tree are relative to the pointers position. + + RETURN + next free offset from to_pos. +*/ + +static uint copy_decode_table(uint16 *to_pos, uint offset, + uint16 *decode_table) +{ + uint prev_offset= offset; + DBUG_ENTER("copy_decode_table"); + + /* Descent on the left side. */ + if (!(*decode_table & IS_CHAR)) + { + /* Set a pointer to the next target node. */ + to_pos[offset]=2; + /* Copy the left hand subtree there. */ + offset=copy_decode_table(to_pos,offset+2,decode_table+ *decode_table); + } + else + { + /* Copy the byte value. */ + to_pos[offset]= *decode_table; + /* Step behind this node. */ + offset+=2; + } + + /* Descent on the right side. */ + decode_table++; + if (!(*decode_table & IS_CHAR)) + { + /* Set a pointer to the next free target node. */ + to_pos[prev_offset+1]=(uint16) (offset-prev_offset-1); + /* Copy the right hand subtree to the entry of that node. */ + offset=copy_decode_table(to_pos,offset,decode_table+ *decode_table); + } + else + { + /* Copy the byte value. */ + to_pos[prev_offset+1]= *decode_table; + } + DBUG_RETURN(offset); +} + + +/* + Find the length of the longest Huffman code in this table in bits. + + SYNOPSIS + find_longest_bitstream() + table Code (sub-)table start. + end End of code table. + + IMPLEMENTATION + + Recursively follow the branch(es) of the code pair on every level of + the tree until two byte values (and no branch) are found. Add one to + each level when returning back from each recursion stage. + + 'end' is used for error checking only. A clean tree terminates + before reaching 'end'. Hence the exact value of 'end' is not too + important. However having it higher than necessary could lead to + misbehaviour should 'next' jump into the dirty area. + + RETURN + length Length of longest Huffman code in bits. + >= OFFSET_TABLE_SIZE Error, broken tree. It does not end before 'end'. +*/ + +static uint find_longest_bitstream(uint16 *table, uint16 *end) +{ + uint length=1; + uint length2; + if (!(*table & IS_CHAR)) + { + uint16 *next= table + *table; + if (next > end || next == table) + { + DBUG_PRINT("error", ("ERROR: illegal pointer in decode tree")); + return OFFSET_TABLE_SIZE; + } + length=find_longest_bitstream(next, end)+1; + } + table++; + if (!(*table & IS_CHAR)) + { + uint16 *next= table + *table; + if (next > end || next == table) + { + DBUG_PRINT("error", ("ERROR: illegal pointer in decode tree")); + return OFFSET_TABLE_SIZE; + } + length2= find_longest_bitstream(next, end) + 1; + length=max(length,length2); + } + return length; +} + + +/* + Read record from datafile. + + SYNOPSIS + _ma_read_pack_record() + info A pointer to MARIA_HA. + filepos File offset of the record. + buf RETURN The buffer to receive the record. + + RETURN + 0 on success + HA_ERR_WRONG_IN_RECORD or -1 on error +*/ + +int _ma_read_pack_record(MARIA_HA *info, byte *buf, MARIA_RECORD_POS filepos) +{ + MARIA_BLOCK_INFO block_info; + File file; + DBUG_ENTER("maria_read_pack_record"); + + if (filepos == HA_OFFSET_ERROR) + DBUG_RETURN(-1); /* _search() didn't find record */ + + file= info->dfile.file; + if (_ma_pack_get_block_info(info, &info->bit_buff, &block_info, + &info->rec_buff, &info->rec_buff_size, file, + filepos)) + goto err; + if (my_read(file,(byte*) info->rec_buff + block_info.offset , + block_info.rec_len - block_info.offset, MYF(MY_NABP))) + goto panic; + info->update|= HA_STATE_AKTIV; + DBUG_RETURN(_ma_pack_rec_unpack(info,&info->bit_buff, buf, + info->rec_buff, block_info.rec_len)); +panic: + my_errno=HA_ERR_WRONG_IN_RECORD; +err: + DBUG_RETURN(-1); +} + + + +int _ma_pack_rec_unpack(register MARIA_HA *info, MARIA_BIT_BUFF *bit_buff, + register byte *to, byte *from, ulong reclength) +{ + byte *end_field; + reg3 MARIA_COLUMNDEF *end; + MARIA_COLUMNDEF *current_field; + MARIA_SHARE *share=info->s; + DBUG_ENTER("_ma_pack_rec_unpack"); + + if (info->s->base.null_bytes) + { + memcpy(to, from, info->s->base.null_bytes); + to+= info->s->base.null_bytes; + from+= info->s->base.null_bytes; + reclength-= info->s->base.null_bytes; + } + init_bit_buffer(bit_buff, (uchar*) from, reclength); + for (current_field=share->columndef, end=current_field+share->base.fields ; + current_field < end ; + current_field++,to=end_field) + { + end_field=to+current_field->length; + (*current_field->unpack)(current_field, bit_buff, to, end_field); + } + if (!bit_buff->error && + bit_buff->pos - bit_buff->bits / 8 == bit_buff->end) + DBUG_RETURN(0); + info->update&= ~HA_STATE_AKTIV; + DBUG_RETURN(my_errno=HA_ERR_WRONG_IN_RECORD); +} /* _ma_pack_rec_unpack */ + + + /* Return function to unpack field */ + +static void (*get_unpack_function(MARIA_COLUMNDEF *rec)) + (MARIA_COLUMNDEF *, MARIA_BIT_BUFF *, byte *, byte *) +{ + switch (rec->base_type) { + case FIELD_SKIP_ZERO: + if (rec->pack_type & PACK_TYPE_ZERO_FILL) + return &uf_zerofill_skip_zero; + return &uf_skip_zero; + case FIELD_NORMAL: + if (rec->pack_type & PACK_TYPE_SPACE_FIELDS) + return &uf_space_normal; + if (rec->pack_type & PACK_TYPE_ZERO_FILL) + return &uf_zerofill_normal; + return &decode_bytes; + case FIELD_SKIP_ENDSPACE: + if (rec->pack_type & PACK_TYPE_SPACE_FIELDS) + { + if (rec->pack_type & PACK_TYPE_SELECTED) + return &uf_space_endspace_selected; + return &uf_space_endspace; + } + if (rec->pack_type & PACK_TYPE_SELECTED) + return &uf_endspace_selected; + return &uf_endspace; + case FIELD_SKIP_PRESPACE: + if (rec->pack_type & PACK_TYPE_SPACE_FIELDS) + { + if (rec->pack_type & PACK_TYPE_SELECTED) + return &uf_space_prespace_selected; + return &uf_space_prespace; + } + if (rec->pack_type & PACK_TYPE_SELECTED) + return &uf_prespace_selected; + return &uf_prespace; + case FIELD_CONSTANT: + return &uf_constant; + case FIELD_INTERVALL: + return &uf_intervall; + case FIELD_ZERO: + case FIELD_CHECK: + return &uf_zero; + case FIELD_BLOB: + return &uf_blob; + case FIELD_VARCHAR: + if (rec->length <= 256) /* 255 + 1 byte length */ + return &uf_varchar1; + return &uf_varchar2; + case FIELD_LAST: + default: + return 0; /* This should never happend */ + } +} + + /* The different functions to unpack a field */ + +static void uf_zerofill_skip_zero(MARIA_COLUMNDEF *rec, + MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) +{ + if (get_bit(bit_buff)) + bzero((char*) to,(uint) (end-to)); + else + { + end-=rec->space_length_bits; + decode_bytes(rec,bit_buff,to,end); + bzero((char*) end,rec->space_length_bits); + } +} + +static void uf_skip_zero(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) +{ + if (get_bit(bit_buff)) + bzero((char*) to,(uint) (end-to)); + else + decode_bytes(rec,bit_buff,to,end); +} + +static void uf_space_normal(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) +{ + if (get_bit(bit_buff)) + bfill((byte*) to,(end-to),' '); + else + decode_bytes(rec,bit_buff,to,end); +} + +static void uf_space_endspace_selected(MARIA_COLUMNDEF *rec, + MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) +{ + uint spaces; + if (get_bit(bit_buff)) + bfill((byte*) to,(end-to),' '); + else + { + if (get_bit(bit_buff)) + { + if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end) + { + bit_buff->error=1; + return; + } + if (to+spaces != end) + decode_bytes(rec,bit_buff,to,end-spaces); + bfill((byte*) end-spaces,spaces,' '); + } + else + decode_bytes(rec,bit_buff,to,end); + } +} + +static void uf_endspace_selected(MARIA_COLUMNDEF *rec, + MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) +{ + uint spaces; + if (get_bit(bit_buff)) + { + if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end) + { + bit_buff->error=1; + return; + } + if (to+spaces != end) + decode_bytes(rec,bit_buff,to,end-spaces); + bfill((byte*) end-spaces,spaces,' '); + } + else + decode_bytes(rec,bit_buff,to,end); +} + +static void uf_space_endspace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) +{ + uint spaces; + if (get_bit(bit_buff)) + bfill((byte*) to,(end-to),' '); + else + { + if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end) + { + bit_buff->error=1; + return; + } + if (to+spaces != end) + decode_bytes(rec,bit_buff,to,end-spaces); + bfill((byte*) end-spaces,spaces,' '); + } +} + +static void uf_endspace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) +{ + uint spaces; + if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end) + { + bit_buff->error=1; + return; + } + if (to+spaces != end) + decode_bytes(rec,bit_buff,to,end-spaces); + bfill((byte*) end-spaces,spaces,' '); +} + +static void uf_space_prespace_selected(MARIA_COLUMNDEF *rec, + MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) +{ + uint spaces; + if (get_bit(bit_buff)) + bfill((byte*) to,(end-to),' '); + else + { + if (get_bit(bit_buff)) + { + if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end) + { + bit_buff->error=1; + return; + } + bfill((byte*) to,spaces,' '); + if (to+spaces != end) + decode_bytes(rec,bit_buff,to+spaces,end); + } + else + decode_bytes(rec,bit_buff,to,end); + } +} + + +static void uf_prespace_selected(MARIA_COLUMNDEF *rec, + MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) +{ + uint spaces; + if (get_bit(bit_buff)) + { + if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end) + { + bit_buff->error=1; + return; + } + bfill((byte*) to,spaces,' '); + if (to+spaces != end) + decode_bytes(rec,bit_buff,to+spaces,end); + } + else + decode_bytes(rec,bit_buff,to,end); +} + + +static void uf_space_prespace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) +{ + uint spaces; + if (get_bit(bit_buff)) + bfill((byte*) to,(end-to),' '); + else + { + if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end) + { + bit_buff->error=1; + return; + } + bfill((byte*) to,spaces,' '); + if (to+spaces != end) + decode_bytes(rec,bit_buff,to+spaces,end); + } +} + +static void uf_prespace(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) +{ + uint spaces; + if ((spaces=get_bits(bit_buff,rec->space_length_bits))+to > end) + { + bit_buff->error=1; + return; + } + bfill((byte*) to,spaces,' '); + if (to+spaces != end) + decode_bytes(rec,bit_buff,to+spaces,end); +} + +static void uf_zerofill_normal(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) +{ + end-=rec->space_length_bits; + decode_bytes(rec,bit_buff, to, end); + bzero((char*) end,rec->space_length_bits); +} + +static void uf_constant(MARIA_COLUMNDEF *rec, + MARIA_BIT_BUFF *bit_buff __attribute__((unused)), + byte *to, byte *end) +{ + memcpy(to,rec->huff_tree->intervalls,(size_t) (end-to)); +} + +static void uf_intervall(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, + byte *end) +{ + reg1 uint field_length=(uint) (end-to); + memcpy(to,rec->huff_tree->intervalls+field_length*decode_pos(bit_buff, + rec->huff_tree), + (size_t) field_length); +} + + +/*ARGSUSED*/ +static void uf_zero(MARIA_COLUMNDEF *rec __attribute__((unused)), + MARIA_BIT_BUFF *bit_buff __attribute__((unused)), + byte *to, byte *end) +{ + bzero(to, (uint) (end-to)); +} + +static void uf_blob(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) +{ + if (get_bit(bit_buff)) + bzero(to, (uint) (end-to)); + else + { + ulong length=get_bits(bit_buff,rec->space_length_bits); + uint pack_length=(uint) (end-to)-portable_sizeof_char_ptr; + if (bit_buff->blob_pos+length > bit_buff->blob_end) + { + bit_buff->error=1; + bzero((byte*) to,(end-to)); + return; + } + decode_bytes(rec,bit_buff,(byte*) bit_buff->blob_pos, + (byte*) bit_buff->blob_pos+length); + _ma_store_blob_length((byte*) to,pack_length,length); + memcpy_fixed((char*) to+pack_length,(char*) &bit_buff->blob_pos, + sizeof(char*)); + bit_buff->blob_pos+=length; + } +} + + +static void uf_varchar1(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end __attribute__((unused))) +{ + if (get_bit(bit_buff)) + to[0]= 0; /* Zero lengths */ + else + { + ulong length=get_bits(bit_buff,rec->space_length_bits); + *to= (char) length; + decode_bytes(rec,bit_buff,to+1,to+1+length); + } +} + + +static void uf_varchar2(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end __attribute__((unused))) +{ + if (get_bit(bit_buff)) + to[0]=to[1]=0; /* Zero lengths */ + else + { + ulong length=get_bits(bit_buff,rec->space_length_bits); + int2store(to,length); + decode_bytes(rec,bit_buff,to+2,to+2+length); + } +} + + /* Functions to decode of buffer of bits */ + +#if BITS_SAVED == 64 + +static void decode_bytes(MARIA_COLUMNDEF *rec,MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) +{ + reg1 uint bits,low_byte; + reg3 uint16 *pos; + reg4 uint table_bits,table_and; + MARIA_DECODE_TREE *decode_tree; + + decode_tree=rec->decode_tree; + bits=bit_buff->bits; /* Save in reg for quicker access */ + table_bits=decode_tree->quick_table_bits; + table_and= (1 << table_bits)-1; + + do + { + if (bits <= 32) + { + if (bit_buff->pos > bit_buff->end+4) + { + bit_buff->error=1; + return; /* Can't be right */ + } + bit_buff->current_byte= (bit_buff->current_byte << 32) + + ((((uint) bit_buff->pos[3])) + + (((uint) bit_buff->pos[2]) << 8) + + (((uint) bit_buff->pos[1]) << 16) + + (((uint) bit_buff->pos[0]) << 24)); + bit_buff->pos+=4; + bits+=32; + } + /* + First use info in quick_table. + + The quick table is an array of 16-bit values. There exists one + value for each possible code representable by table_bits bits. + In most cases table_bits is 9. So there are 512 16-bit values. + + If the high-order bit (16) is set (IS_CHAR) then the array slot + for this value is a valid Huffman code for a resulting byte value. + + The low-order 8 bits (1..8) are the resulting byte value. + + Bits 9..14 are the length of the Huffman code for this byte value. + This means so many bits from the input stream were needed to + represent this byte value. The remaining bits belong to later + Huffman codes. This also means that for every Huffman code shorter + than table_bits there are multiple entires in the array, which + differ just in the unused bits. + + If the high-order bit (16) is clear (0) then the remaining bits are + the position of the remaining Huffman decode tree segment behind the + quick table. + */ + low_byte=(uint) (bit_buff->current_byte >> (bits - table_bits)) & table_and; + low_byte=decode_tree->table[low_byte]; + if (low_byte & IS_CHAR) + { + /* + All Huffman codes of less or equal table_bits length are in the + quick table. This is one of them. + */ + *to++ = (char) (low_byte & 255); /* Found char in quick table */ + bits-= ((low_byte >> 8) & 31); /* Remove bits used */ + } + else + { /* Map through rest of decode-table */ + /* This means that the Huffman code must be longer than table_bits. */ + pos=decode_tree->table+low_byte; + bits-=table_bits; + /* NOTE: decode_bytes_test_bit() is a macro wich contains a break !!! */ + for (;;) + { + low_byte=(uint) (bit_buff->current_byte >> (bits-8)); + decode_bytes_test_bit(0); + decode_bytes_test_bit(1); + decode_bytes_test_bit(2); + decode_bytes_test_bit(3); + decode_bytes_test_bit(4); + decode_bytes_test_bit(5); + decode_bytes_test_bit(6); + decode_bytes_test_bit(7); + bits-=8; + } + *to++ = (char) *pos; + } + } while (to != end); + + bit_buff->bits=bits; + return; +} + +#else + +static void decode_bytes(MARIA_COLUMNDEF *rec, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *end) +{ + reg1 uint bits,low_byte; + reg3 uint16 *pos; + reg4 uint table_bits,table_and; + MARIA_DECODE_TREE *decode_tree; + + decode_tree=rec->huff_tree; + bits=bit_buff->bits; /* Save in reg for quicker access */ + table_bits=decode_tree->quick_table_bits; + table_and= (1 << table_bits)-1; + + do + { + if (bits < table_bits) + { + if (bit_buff->pos > bit_buff->end+1) + { + bit_buff->error=1; + return; /* Can't be right */ + } +#if BITS_SAVED == 32 + bit_buff->current_byte= (bit_buff->current_byte << 24) + + (((uint) ((uchar) bit_buff->pos[2]))) + + (((uint) ((uchar) bit_buff->pos[1])) << 8) + + (((uint) ((uchar) bit_buff->pos[0])) << 16); + bit_buff->pos+=3; + bits+=24; +#else + if (bits) /* We must have at leasts 9 bits */ + { + bit_buff->current_byte= (bit_buff->current_byte << 8) + + (uint) ((uchar) bit_buff->pos[0]); + bit_buff->pos++; + bits+=8; + } + else + { + bit_buff->current_byte= ((uint) ((uchar) bit_buff->pos[0]) << 8) + + ((uint) ((uchar) bit_buff->pos[1])); + bit_buff->pos+=2; + bits+=16; + } +#endif + } + /* First use info in quick_table */ + low_byte=(bit_buff->current_byte >> (bits - table_bits)) & table_and; + low_byte=decode_tree->table[low_byte]; + if (low_byte & IS_CHAR) + { + *to++ = (low_byte & 255); /* Found char in quick table */ + bits-= ((low_byte >> 8) & 31); /* Remove bits used */ + } + else + { /* Map through rest of decode-table */ + pos=decode_tree->table+low_byte; + bits-=table_bits; + for (;;) + { + if (bits < 8) + { /* We don't need to check end */ +#if BITS_SAVED == 32 + bit_buff->current_byte= (bit_buff->current_byte << 24) + + (((uint) ((uchar) bit_buff->pos[2]))) + + (((uint) ((uchar) bit_buff->pos[1])) << 8) + + (((uint) ((uchar) bit_buff->pos[0])) << 16); + bit_buff->pos+=3; + bits+=24; +#else + bit_buff->current_byte= (bit_buff->current_byte << 8) + + (uint) ((uchar) bit_buff->pos[0]); + bit_buff->pos+=1; + bits+=8; +#endif + } + low_byte=(uint) (bit_buff->current_byte >> (bits-8)); + decode_bytes_test_bit(0); + decode_bytes_test_bit(1); + decode_bytes_test_bit(2); + decode_bytes_test_bit(3); + decode_bytes_test_bit(4); + decode_bytes_test_bit(5); + decode_bytes_test_bit(6); + decode_bytes_test_bit(7); + bits-=8; + } + *to++ = (char) *pos; + } + } while (to != end); + + bit_buff->bits=bits; + return; +} +#endif /* BIT_SAVED == 64 */ + + +static uint decode_pos(MARIA_BIT_BUFF *bit_buff, + MARIA_DECODE_TREE *decode_tree) +{ + uint16 *pos=decode_tree->table; + for (;;) + { + if (get_bit(bit_buff)) + pos++; + if (*pos & IS_CHAR) + return (uint) (*pos & ~IS_CHAR); + pos+= *pos; + } +} + + +int _ma_read_rnd_pack_record(MARIA_HA *info, + byte *buf, + register MARIA_RECORD_POS filepos, + my_bool skip_deleted_blocks) +{ + File file; + MARIA_BLOCK_INFO block_info; + MARIA_SHARE *share=info->s; + DBUG_ENTER("_ma_read_rnd_pack_record"); + + if (filepos >= info->state->data_file_length) + { + my_errno= HA_ERR_END_OF_FILE; + goto err; + } + + file= info->dfile.file; + if (info->opt_flag & READ_CACHE_USED) + { + if (_ma_read_cache(&info->rec_cache, (byte*) block_info.header, + filepos, share->pack.ref_length, + skip_deleted_blocks ? READING_NEXT : 0)) + goto err; + file= -1; + } + if (_ma_pack_get_block_info(info, &info->bit_buff, &block_info, + &info->rec_buff, &info->rec_buff_size, + file, filepos)) + goto err; /* Error code is already set */ +#ifndef DBUG_OFF + if (block_info.rec_len > share->max_pack_length) + { + my_errno=HA_ERR_WRONG_IN_RECORD; + goto err; + } +#endif + + if (info->opt_flag & READ_CACHE_USED) + { + if (_ma_read_cache(&info->rec_cache, (byte*) info->rec_buff, + block_info.filepos, block_info.rec_len, + skip_deleted_blocks ? READING_NEXT : 0)) + goto err; + } + else + { + if (my_read(info->dfile.file, (byte*)info->rec_buff + block_info.offset, + block_info.rec_len-block_info.offset, + MYF(MY_NABP))) + goto err; + } + info->packed_length= block_info.rec_len; + info->cur_row.lastpos= filepos; + info->cur_row.nextpos= block_info.filepos+block_info.rec_len; + info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED; + + DBUG_RETURN (_ma_pack_rec_unpack(info, &info->bit_buff, buf, + info->rec_buff, block_info.rec_len)); + err: + DBUG_RETURN(my_errno); +} + + + /* Read and process header from a huff-record-file */ + +uint _ma_pack_get_block_info(MARIA_HA *maria, MARIA_BIT_BUFF *bit_buff, + MARIA_BLOCK_INFO *info, + byte **rec_buff_p, my_size_t *rec_buff_size_p, + File file, my_off_t filepos) +{ + uchar *header= info->header; + uint head_length,ref_length; + LINT_INIT(ref_length); + + if (file >= 0) + { + ref_length=maria->s->pack.ref_length; + /* + We can't use my_pread() here because _ma_read_rnd_pack_record assumes + position is ok + */ + VOID(my_seek(file,filepos,MY_SEEK_SET,MYF(0))); + if (my_read(file,(char*) header,ref_length,MYF(MY_NABP))) + return BLOCK_FATAL_ERROR; + DBUG_DUMP("header",(byte*) header,ref_length); + } + head_length= read_pack_length((uint) maria->s->pack.version, header, + &info->rec_len); + if (maria->s->base.blobs) + { + head_length+= read_pack_length((uint) maria->s->pack.version, + header + head_length, &info->blob_len); + /* + Ensure that the record buffer is big enough for the compressed + record plus all expanded blobs. [We do not have an extra buffer + for the resulting blobs. Sigh.] + */ + if (_ma_alloc_buffer(rec_buff_p, rec_buff_size_p, + info->rec_len + info->blob_len + + maria->s->base.extra_rec_buff_size)) + return BLOCK_FATAL_ERROR; /* not enough memory */ + bit_buff->blob_pos= (uchar*) *rec_buff_p + info->rec_len; + bit_buff->blob_end= bit_buff->blob_pos + info->blob_len; + maria->blob_length=info->blob_len; + } + info->filepos=filepos+head_length; + if (file > 0) + { + info->offset=min(info->rec_len, ref_length - head_length); + memcpy(*rec_buff_p, header + head_length, info->offset); + } + return 0; +} + + + /* rutines for bit buffer */ + /* Note buffer must be 6 byte bigger than longest row */ + +static void init_bit_buffer(MARIA_BIT_BUFF *bit_buff, uchar *buffer, + uint length) +{ + bit_buff->pos=buffer; + bit_buff->end=buffer+length; + bit_buff->bits=bit_buff->error=0; + bit_buff->current_byte=0; /* Avoid purify errors */ +} + +static uint fill_and_get_bits(MARIA_BIT_BUFF *bit_buff, uint count) +{ + uint tmp; + count-=bit_buff->bits; + tmp=(bit_buff->current_byte & mask[bit_buff->bits]) << count; + fill_buffer(bit_buff); + bit_buff->bits=BITS_SAVED - count; + return tmp+(bit_buff->current_byte >> (BITS_SAVED - count)); +} + + /* Fill in empty bit_buff->current_byte from buffer */ + /* Sets bit_buff->error if buffer is exhausted */ + +static void fill_buffer(MARIA_BIT_BUFF *bit_buff) +{ + if (bit_buff->pos >= bit_buff->end) + { + bit_buff->error= 1; + bit_buff->current_byte=0; + return; + } +#if BITS_SAVED == 64 + bit_buff->current_byte= ((((uint) ((uchar) bit_buff->pos[7]))) + + (((uint) ((uchar) bit_buff->pos[6])) << 8) + + (((uint) ((uchar) bit_buff->pos[5])) << 16) + + (((uint) ((uchar) bit_buff->pos[4])) << 24) + + ((ulonglong) + ((((uint) ((uchar) bit_buff->pos[3]))) + + (((uint) ((uchar) bit_buff->pos[2])) << 8) + + (((uint) ((uchar) bit_buff->pos[1])) << 16) + + (((uint) ((uchar) bit_buff->pos[0])) << 24)) << 32)); + bit_buff->pos+=8; +#else +#if BITS_SAVED == 32 + bit_buff->current_byte= (((uint) ((uchar) bit_buff->pos[3])) + + (((uint) ((uchar) bit_buff->pos[2])) << 8) + + (((uint) ((uchar) bit_buff->pos[1])) << 16) + + (((uint) ((uchar) bit_buff->pos[0])) << 24)); + bit_buff->pos+=4; +#else + bit_buff->current_byte= (uint) (((uint) ((uchar) bit_buff->pos[1]))+ + (((uint) ((uchar) bit_buff->pos[0])) << 8)); + bit_buff->pos+=2; +#endif +#endif +} + + /* Get number of bits neaded to represent value */ + +static uint max_bit(register uint value) +{ + reg2 uint power=1; + + while ((value>>=1)) + power++; + return (power); +} + + +/***************************************************************************** + Some redefined functions to handle files when we are using memmap +*****************************************************************************/ +#ifdef HAVE_SYS_MMAN_H +#include +#endif + +#ifdef HAVE_MMAP + +static int _ma_read_mempack_record(MARIA_HA *info, byte *buf, + MARIA_RECORD_POS filepos); +static int _ma_read_rnd_mempack_record(MARIA_HA*, byte *, MARIA_RECORD_POS, + my_bool); + +my_bool _ma_memmap_file(MARIA_HA *info) +{ + MARIA_SHARE *share=info->s; + DBUG_ENTER("maria_memmap_file"); + + if (!info->s->file_map) + { + if (my_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0)) < + share->state.state.data_file_length+MEMMAP_EXTRA_MARGIN) + { + DBUG_PRINT("warning",("File isn't extended for memmap")); + DBUG_RETURN(0); + } + if (_ma_dynmap_file(info, share->state.state.data_file_length)) + DBUG_RETURN(0); + } + info->opt_flag|= MEMMAP_USED; + info->read_record= share->read_record= _ma_read_mempack_record; + share->scan= _ma_read_rnd_mempack_record; + DBUG_RETURN(1); +} + + +void _ma_unmap_file(MARIA_HA *info) +{ + VOID(my_munmap(info->s->file_map, + (size_t) info->s->mmaped_length + MEMMAP_EXTRA_MARGIN)); +} + + +static uchar * +_ma_mempack_get_block_info(MARIA_HA *maria, + MARIA_BIT_BUFF *bit_buff, + MARIA_BLOCK_INFO *info, + byte **rec_buff_p, + my_size_t *rec_buff_size_p, + uchar *header) +{ + header+= read_pack_length((uint) maria->s->pack.version, header, + &info->rec_len); + if (maria->s->base.blobs) + { + header+= read_pack_length((uint) maria->s->pack.version, header, + &info->blob_len); + /* _ma_alloc_rec_buff sets my_errno on error */ + if (_ma_alloc_buffer(rec_buff_p, rec_buff_size_p, + info->blob_len + maria->s->base.extra_rec_buff_size)) + return 0; /* not enough memory */ + bit_buff->blob_pos= (uchar*) *rec_buff_p; + bit_buff->blob_end= (uchar*) *rec_buff_p + info->blob_len; + } + return header; +} + + +static int _ma_read_mempack_record(MARIA_HA *info, byte *buf, + MARIA_RECORD_POS filepos) +{ + MARIA_BLOCK_INFO block_info; + MARIA_SHARE *share=info->s; + byte *pos; + DBUG_ENTER("maria_read_mempack_record"); + + if (filepos == HA_OFFSET_ERROR) + DBUG_RETURN(-1); /* _search() didn't find record */ + + if (!(pos= (byte*) _ma_mempack_get_block_info(info, &info->bit_buff, + &block_info, &info->rec_buff, + &info->rec_buff_size, + (uchar*) share->file_map+ + filepos))) + DBUG_RETURN(-1); + DBUG_RETURN(_ma_pack_rec_unpack(info, &info->bit_buff, buf, + pos, block_info.rec_len)); +} + + +/*ARGSUSED*/ +static int _ma_read_rnd_mempack_record(MARIA_HA *info, + byte *buf, + register MARIA_RECORD_POS filepos, + my_bool skip_deleted_blocks + __attribute__((unused))) +{ + MARIA_BLOCK_INFO block_info; + MARIA_SHARE *share=info->s; + byte *pos,*start; + DBUG_ENTER("_ma_read_rnd_mempack_record"); + + if (filepos >= share->state.state.data_file_length) + { + my_errno=HA_ERR_END_OF_FILE; + goto err; + } + if (!(pos= (byte*) _ma_mempack_get_block_info(info, &info->bit_buff, + &block_info, + &info->rec_buff, + &info->rec_buff_size, + (uchar*) + (start= share->file_map + + filepos)))) + goto err; +#ifndef DBUG_OFF + if (block_info.rec_len > info->s->max_pack_length) + { + my_errno=HA_ERR_WRONG_IN_RECORD; + goto err; + } +#endif + info->packed_length=block_info.rec_len; + info->cur_row.lastpos= filepos; + info->cur_row.nextpos= filepos+(uint) (pos-start)+block_info.rec_len; + info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED; + + DBUG_RETURN (_ma_pack_rec_unpack(info, &info->bit_buff, buf, + pos, block_info.rec_len)); + err: + DBUG_RETURN(my_errno); +} + +#endif /* HAVE_MMAP */ + + /* Save length of row */ + +uint _ma_save_pack_length(uint version, byte *block_buff, ulong length) +{ + if (length < 254) + { + *(uchar*) block_buff= (uchar) length; + return 1; + } + if (length <= 65535) + { + *(uchar*) block_buff=254; + int2store(block_buff+1,(uint) length); + return 3; + } + *(uchar*) block_buff=255; + if (version == 1) /* old format */ + { + DBUG_ASSERT(length <= 0xFFFFFF); + int3store(block_buff + 1, (ulong) length); + return 4; + } + else + { + int4store(block_buff + 1, (ulong) length); + return 5; + } +} + + +static uint read_pack_length(uint version, const uchar *buf, ulong *length) +{ + if (buf[0] < 254) + { + *length= buf[0]; + return 1; + } + else if (buf[0] == 254) + { + *length= uint2korr(buf + 1); + return 3; + } + if (version == 1) /* old format */ + { + *length= uint3korr(buf + 1); + return 4; + } + else + { + *length= uint4korr(buf + 1); + return 5; + } +} + + +uint _ma_calc_pack_length(uint version, ulong length) +{ + return (length < 254) ? 1 : (length < 65536) ? 3 : (version == 1) ? 4 : 5; +} diff --git a/storage/maria/ma_page.c b/storage/maria/ma_page.c new file mode 100644 index 00000000000..d6b8d5ecd7d --- /dev/null +++ b/storage/maria/ma_page.c @@ -0,0 +1,188 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Read and write key blocks */ + +#include "maria_def.h" + + /* Fetch a key-page in memory */ + +byte *_ma_fetch_keypage(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, + my_off_t page, int level, + byte *buff, + int return_buffer __attribute__ ((unused))) +{ + byte *tmp; + uint page_size; + DBUG_ENTER("_ma_fetch_keypage"); + DBUG_PRINT("enter",("page: %ld", (long) page)); + + DBUG_ASSERT(info->s->pagecache->block_size == keyinfo->block_length); + /* + TODO: replace PAGECACHE_PLAIN_PAGE with PAGECACHE_LSN_PAGE when + LSN on the pages will be implemented + */ + tmp= pagecache_read(info->s->pagecache, &info->s->kfile, + page / keyinfo->block_length, level, buff, + PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_LEFT_UNLOCKED, 0); + if (tmp == info->buff) + info->keyread_buff_used=1; + else if (!tmp) + { + DBUG_PRINT("error",("Got errno: %d from pagecache_read",my_errno)); + info->last_keypage=HA_OFFSET_ERROR; + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_RETURN(0); + } + info->last_keypage=page; + page_size= maria_data_on_page(tmp); + if (page_size < 4 || page_size > keyinfo->block_length) + { + DBUG_PRINT("error",("page %lu had wrong page length: %u", + (ulong) page, page_size)); + DBUG_DUMP("page", (char*) tmp, keyinfo->block_length); + info->last_keypage = HA_OFFSET_ERROR; + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno= HA_ERR_CRASHED; + tmp= 0; + } + DBUG_RETURN(tmp); +} /* _ma_fetch_keypage */ + + + /* Write a key-page on disk */ + +int _ma_write_keypage(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + my_off_t page, int level, byte *buff) +{ + DBUG_ENTER("_ma_write_keypage"); + +#ifdef EXTRA_DEBUG /* Safety check */ + if (page < info->s->base.keystart || + page+keyinfo->block_length > info->state->key_file_length || + (page & (MARIA_MIN_KEY_BLOCK_LENGTH-1))) + { + DBUG_PRINT("error",("Trying to write inside key status region: " + "key_start: %lu length: %lu page: %lu", + (long) info->s->base.keystart, + (long) info->state->key_file_length, + (long) page)); + my_errno=EINVAL; + DBUG_RETURN((-1)); + } + DBUG_PRINT("page",("write page at: %lu",(long) page)); + DBUG_DUMP("buff",(byte*) buff,maria_data_on_page(buff)); +#endif + +#ifdef HAVE_purify + { + /* Clear unitialized part of page to avoid valgrind/purify warnings */ + uint length= maria_data_on_page(buff); + bzero((byte*) buff+length,keyinfo->block_length-length); + length=keyinfo->block_length; + } +#endif + + DBUG_ASSERT(info->s->pagecache->block_size == keyinfo->block_length); + /* + TODO: replace PAGECACHE_PLAIN_PAGE with PAGECACHE_LSN_PAGE when + LSN on the pages will be implemented + */ + DBUG_RETURN(pagecache_write(info->s->pagecache, + &info->s->kfile, page / keyinfo->block_length, + level, buff, PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, 0)); +} /* maria_write_keypage */ + + + /* Remove page from disk */ + +int _ma_dispose(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, my_off_t pos, + int level) +{ + my_off_t old_link; + char buff[8]; + uint offset; + pgcache_page_no_t page_no; + DBUG_ENTER("_ma_dispose"); + DBUG_PRINT("enter",("pos: %ld", (long) pos)); + + old_link= info->s->state.key_del; + info->s->state.key_del= pos; + page_no= pos / keyinfo->block_length; + offset= pos % keyinfo->block_length; + mi_sizestore(buff,old_link); + info->s->state.changed|= STATE_NOT_SORTED_PAGES; + + DBUG_ASSERT(info->s->pagecache->block_size == keyinfo->block_length && + info->s->pagecache->block_size == info->s->block_size); + /* + TODO: replace PAGECACHE_PLAIN_PAGE with PAGECACHE_LSN_PAGE when + LSN on the pages will be implemented + */ + DBUG_RETURN(pagecache_write_part(info->s->pagecache, + &info->s->kfile, page_no, level, buff, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, 0, + offset, sizeof(buff))); +} /* _ma_dispose */ + + + /* Make new page on disk */ + +my_off_t _ma_new(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, int level) +{ + my_off_t pos; + byte *buff; + DBUG_ENTER("_ma_new"); + + if ((pos= info->s->state.key_del) == HA_OFFSET_ERROR) + { + if (info->state->key_file_length >= + info->s->base.max_key_file_length - keyinfo->block_length) + { + my_errno=HA_ERR_INDEX_FILE_FULL; + DBUG_RETURN(HA_OFFSET_ERROR); + } + pos=info->state->key_file_length; + info->state->key_file_length+= keyinfo->block_length; + } + else + { + buff= alloca(info->s->block_size); + DBUG_ASSERT(info->s->pagecache->block_size == keyinfo->block_length && + info->s->pagecache->block_size == info->s->block_size); + /* + TODO: replace PAGECACHE_PLAIN_PAGE with PAGECACHE_LSN_PAGE when + LSN on the pages will be implemented + */ + DBUG_ASSERT(info->s->pagecache->block_size == keyinfo->block_length); + if (!pagecache_read(info->s->pagecache, + &info->s->kfile, pos / keyinfo->block_length, level, + buff, PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, 0)) + pos= HA_OFFSET_ERROR; + else + info->s->state.key_del= mi_sizekorr(buff); + } + info->s->state.changed|= STATE_NOT_SORTED_PAGES; + DBUG_PRINT("exit",("Pos: %ld",(long) pos)); + DBUG_RETURN(pos); +} /* _ma_new */ diff --git a/storage/maria/ma_pagecache.c b/storage/maria/ma_pagecache.c new file mode 100755 index 00000000000..ae42f702b0a --- /dev/null +++ b/storage/maria/ma_pagecache.c @@ -0,0 +1,4157 @@ +/* Copyright (C) 2000-2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + These functions handle page cacheing for Maria tables. + + One cache can handle many files. + It must contain buffers of the same blocksize. + init_pagecache() should be used to init cache handler. + + The free list (free_block_list) is a stack like structure. + When a block is freed by free_block(), it is pushed onto the stack. + When a new block is required it is first tried to pop one from the stack. + If the stack is empty, it is tried to get a never-used block from the pool. + If this is empty too, then a block is taken from the LRU ring, flushing it + to disk, if necessary. This is handled in find_block(). + With the new free list, the blocks can have three temperatures: + hot, warm and cold (which is free). This is remembered in the block header + by the enum PCBLOCK_TEMPERATURE temperature variable. Remembering the + temperature is necessary to correctly count the number of warm blocks, + which is required to decide when blocks are allowed to become hot. Whenever + a block is inserted to another (sub-)chain, we take the old and new + temperature into account to decide if we got one more or less warm block. + blocks_unused is the sum of never used blocks in the pool and of currently + free blocks. blocks_used is the number of blocks fetched from the pool and + as such gives the maximum number of in-use blocks at any time. +*/ + +#include "maria_def.h" +#include +#include "ma_pagecache.h" +#include +#include +#include + +/* + Some compilation flags have been added specifically for this module + to control the following: + - not to let a thread to yield the control when reading directly + from page cache, which might improve performance in many cases; + to enable this add: + #define SERIALIZED_READ_FROM_CACHE + - to set an upper bound for number of threads simultaneously + using the page cache; this setting helps to determine an optimal + size for hash table and improve performance when the number of + blocks in the page cache much less than the number of threads + accessing it; + to set this number equal to add + #define MAX_THREADS + - to substitute calls of pthread_cond_wait for calls of + pthread_cond_timedwait (wait with timeout set up); + this setting should be used only when you want to trap a deadlock + situation, which theoretically should not happen; + to set timeout equal to seconds add + #define PAGECACHE_TIMEOUT + - to enable the module traps and to send debug information from + page cache module to a special debug log add: + #define PAGECACHE_DEBUG + the name of this debug log file can be set through: + #define PAGECACHE_DEBUG_LOG + if the name is not defined, it's set by default; + if the PAGECACHE_DEBUG flag is not set up and we are in a debug + mode, i.e. when ! defined(DBUG_OFF), the debug information from the + module is sent to the regular debug log. + + Example of the settings: + #define SERIALIZED_READ_FROM_CACHE + #define MAX_THREADS 100 + #define PAGECACHE_TIMEOUT 1 + #define PAGECACHE_DEBUG + #define PAGECACHE_DEBUG_LOG "my_pagecache_debug.log" +*/ + +/* + In key cache we have external raw locking here we use + SERIALIZED_READ_FROM_CACHE to avoid problem of reading + not consistent data from the page. + (keycache functions (key_cache_read(), key_cache_insert() and + key_cache_write()) rely on external MyISAM lock, we don't) +*/ +#define SERIALIZED_READ_FROM_CACHE yes + +#define PCBLOCK_INFO(B) \ + DBUG_PRINT("info", \ + ("block: 0x%lx file: %lu page: %lu s: %0x hshL: 0x%lx req: %u/%u " \ + "wrlock: %c", \ + (ulong)(B), \ + (ulong)((B)->hash_link ? \ + (B)->hash_link->file.file : \ + 0), \ + (ulong)((B)->hash_link ? \ + (B)->hash_link->pageno : \ + 0), \ + (B)->status, \ + (ulong)(B)->hash_link, \ + (uint) (B)->requests, \ + (uint)((B)->hash_link ? \ + (B)->hash_link->requests : \ + 0), \ + ((block->status & PCBLOCK_WRLOCK)?'Y':'N'))) + +/* TODO: put it to my_static.c */ +my_bool my_disable_flush_pagecache_blocks= 0; +/** + when flushing pages of a file, it can happen that we take some dirty blocks + out of changed_blocks[]; Checkpoint must not run at this moment. +*/ +uint changed_blocks_is_incomplete= 0; + +#define STRUCT_PTR(TYPE, MEMBER, a) \ + (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER)) + +/* types of condition variables */ +#define COND_FOR_REQUESTED 0 /* queue of thread waiting for read operation */ +#define COND_FOR_SAVED 1 /* queue of thread waiting for flush */ +#define COND_FOR_WRLOCK 2 /* queue of write lock */ +#define COND_SIZE 3 /* number of COND_* queues */ + +/* offset of LSN on the page */ +#define PAGE_LSN_OFFSET 0 + +typedef pthread_cond_t KEYCACHE_CONDVAR; + +/* descriptor of the page in the page cache block buffer */ +struct st_pagecache_page +{ + PAGECACHE_FILE file; /* file to which the page belongs to */ + pgcache_page_no_t pageno; /* number of the page in the file */ +}; + +/* element in the chain of a hash table bucket */ +struct st_pagecache_hash_link +{ + struct st_pagecache_hash_link + *next, **prev; /* to connect links in the same bucket */ + struct st_pagecache_block_link + *block; /* reference to the block for the page: */ + PAGECACHE_FILE file; /* from such a file */ + pgcache_page_no_t pageno; /* this page */ + uint requests; /* number of requests for the page */ +}; + +/* simple states of a block */ +#define PCBLOCK_ERROR 1 /* an error occurred when performing disk i/o */ +#define PCBLOCK_READ 2 /* the is page in the block buffer */ +#define PCBLOCK_IN_SWITCH 4 /* block is preparing to read new page */ +#define PCBLOCK_REASSIGNED 8 /* block does not accept requests for old page */ +#define PCBLOCK_IN_FLUSH 16 /* block is in flush operation */ +#define PCBLOCK_CHANGED 32 /* block buffer contains a dirty page */ +#define PCBLOCK_WRLOCK 64 /* write locked block */ + +/* page status, returned by find_block */ +#define PAGE_READ 0 +#define PAGE_TO_BE_READ 1 +#define PAGE_WAIT_TO_BE_READ 2 + +/* block temperature determines in which (sub-)chain the block currently is */ +enum PCBLOCK_TEMPERATURE { PCBLOCK_COLD /*free*/ , PCBLOCK_WARM , PCBLOCK_HOT }; + +/* debug info */ +#ifndef DBUG_OFF +static const char *page_cache_page_type_str[]= +{ + /* used only for control page type changing during debugging */ + "EMPTY", + "PLAIN", + "LSN" +}; + +static const char *page_cache_page_write_mode_str[]= +{ + "DELAY", + "NOW", + "DONE" +}; + +static const char *page_cache_page_lock_str[]= +{ + "free -> free", + "read -> read", + "write -> write", + "free -> read", + "free -> write", + "read -> free", + "write -> free", + "write -> read" +}; + +static const char *page_cache_page_pin_str[]= +{ + "pinned -> pinned", + "unpinned -> unpinned", + "unpinned -> pinned", + "pinned -> unpinned" +}; + + +typedef struct st_pagecache_pin_info +{ + struct st_pagecache_pin_info *next, **prev; + struct st_my_thread_var *thread; +} PAGECACHE_PIN_INFO; + +/* + st_pagecache_lock_info structure should be kept in next, prev, thread part + compatible with st_pagecache_pin_info to be compatible in functions. +*/ + +typedef struct st_pagecache_lock_info +{ + struct st_pagecache_lock_info *next, **prev; + struct st_my_thread_var *thread; + my_bool write_lock; +} PAGECACHE_LOCK_INFO; + + +/* service functions maintain debugging info about pin & lock */ + + +/* + Links information about thread pinned/locked the block to the list + + SYNOPSIS + info_link() + list the list to link in + node the node which should be linked +*/ + +static void info_link(PAGECACHE_PIN_INFO **list, PAGECACHE_PIN_INFO *node) +{ + if ((node->next= *list)) + node->next->prev= &(node->next); + *list= node; + node->prev= list; +} + + +/* + Unlinks information about thread pinned/locked the block from the list + + SYNOPSIS + info_unlink() + node the node which should be unlinked +*/ + +static void info_unlink(PAGECACHE_PIN_INFO *node) +{ + if ((*node->prev= node->next)) + node->next->prev= node->prev; +} + + +/* + Finds information about given thread in the list of threads which + pinned/locked this block. + + SYNOPSIS + info_find() + list the list where to find the thread + thread thread ID (reference to the st_my_thread_var + of the thread) + + RETURN + 0 - the thread was not found + pointer to the information node of the thread in the list +*/ + +static PAGECACHE_PIN_INFO *info_find(PAGECACHE_PIN_INFO *list, + struct st_my_thread_var *thread) +{ + register PAGECACHE_PIN_INFO *i= list; + for(; i != 0; i= i->next) + if (i->thread == thread) + return i; + return 0; +} + +#endif /* !DBUG_OFF */ + +/* page cache block */ +struct st_pagecache_block_link +{ + struct st_pagecache_block_link + *next_used, **prev_used; /* to connect links in the LRU chain (ring) */ + struct st_pagecache_block_link + *next_changed, **prev_changed; /* for lists of file dirty/clean blocks */ + struct st_pagecache_hash_link + *hash_link; /* backward ptr to referring hash_link */ + WQUEUE + wqueue[COND_SIZE]; /* queues on waiting requests for new/old pages */ + uint requests; /* number of requests for the block */ + byte *buffer; /* buffer for the block page */ + uint status; /* state of the block */ + uint pins; /* pin counter */ +#ifndef DBUG_OFF + PAGECACHE_PIN_INFO *pin_list; + PAGECACHE_LOCK_INFO *lock_list; +#endif + enum PCBLOCK_TEMPERATURE temperature; /* block temperature: cold, warm, hot */ + enum pagecache_page_type type; /* type of the block */ + uint hits_left; /* number of hits left until promotion */ + ulonglong last_hit_time; /* timestamp of the last hit */ + LSN rec_lsn; /**< LSN when first became dirty */ + KEYCACHE_CONDVAR *condvar; /* condition variable for 'no readers' event */ +}; + +#ifndef DBUG_OFF +/* debug checks */ +static my_bool info_check_pin(PAGECACHE_BLOCK_LINK *block, + enum pagecache_page_pin mode) +{ + struct st_my_thread_var *thread= my_thread_var; + PAGECACHE_PIN_INFO *info= info_find(block->pin_list, thread); + DBUG_ENTER("info_check_pin"); + DBUG_PRINT("enter", ("thread: 0x%lx pin: %s", + (ulong) thread, page_cache_page_pin_str[mode])); + if (info) + { + if (mode == PAGECACHE_PIN_LEFT_UNPINNED) + { + DBUG_PRINT("info", + ("info_check_pin: thread: 0x%lx block: 0x%lx ; LEFT_UNPINNED!!!", + (ulong)thread, (ulong)block)); + DBUG_RETURN(1); + } + else if (mode == PAGECACHE_PIN) + { + DBUG_PRINT("info", + ("info_check_pin: thread: 0x%lx block: 0x%lx ; PIN!!!", + (ulong)thread, (ulong)block)); + DBUG_RETURN(1); + } + } + else + { + if (mode == PAGECACHE_PIN_LEFT_PINNED) + { + DBUG_PRINT("info", + ("info_check_pin: thread: 0x%lx block: 0x%lx ; LEFT_PINNED!!!", + (ulong)thread, (ulong)block)); + DBUG_RETURN(1); + } + else if (mode == PAGECACHE_UNPIN) + { + DBUG_PRINT("info", + ("info_check_pin: thread: 0x%lx block: 0x%lx ; UNPIN!!!", + (ulong)thread, (ulong)block)); + DBUG_RETURN(1); + } + } + DBUG_RETURN(0); +} + + +/* + Debug function which checks current lock/pin state and requested changes + + SYNOPSIS + info_check_lock() + lock requested lock changes + pin requested pin changes + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool info_check_lock(PAGECACHE_BLOCK_LINK *block, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin) +{ + struct st_my_thread_var *thread= my_thread_var; + PAGECACHE_LOCK_INFO *info= + (PAGECACHE_LOCK_INFO *) info_find((PAGECACHE_PIN_INFO *) block->lock_list, + thread); + DBUG_ENTER("info_check_lock"); + switch(lock) + { + case PAGECACHE_LOCK_LEFT_UNLOCKED: + if (pin != PAGECACHE_PIN_LEFT_UNPINNED || + info) + goto error; + break; + case PAGECACHE_LOCK_LEFT_READLOCKED: + if ((pin != PAGECACHE_PIN_LEFT_UNPINNED && + pin != PAGECACHE_PIN_LEFT_PINNED) || + info == 0 || info->write_lock) + goto error; + break; + case PAGECACHE_LOCK_LEFT_WRITELOCKED: + if (pin != PAGECACHE_PIN_LEFT_PINNED || + info == 0 || !info->write_lock) + goto error; + break; + case PAGECACHE_LOCK_READ: + if ((pin != PAGECACHE_PIN_LEFT_UNPINNED && + pin != PAGECACHE_PIN) || + info != 0) + goto error; + break; + case PAGECACHE_LOCK_WRITE: + if (pin != PAGECACHE_PIN || + info != 0) + goto error; + break; + case PAGECACHE_LOCK_READ_UNLOCK: + if ((pin != PAGECACHE_PIN_LEFT_UNPINNED && + pin != PAGECACHE_UNPIN) || + info == 0 || info->write_lock) + goto error; + break; + case PAGECACHE_LOCK_WRITE_UNLOCK: + if (pin != PAGECACHE_UNPIN || + info == 0 || !info->write_lock) + goto error; + break; + case PAGECACHE_LOCK_WRITE_TO_READ: + if ((pin != PAGECACHE_PIN_LEFT_PINNED && + pin != PAGECACHE_UNPIN) || + info == 0 || !info->write_lock) + goto error; + break; + } + DBUG_RETURN(0); +error: + DBUG_PRINT("info", + ("info_check_lock: thread: 0x%lx block 0x%lx: info: %d wrt: %d," + "to lock: %s, to pin: %s", + (ulong)thread, (ulong)block, test(info), + (info ? info->write_lock : 0), + page_cache_page_lock_str[lock], + page_cache_page_pin_str[pin])); + DBUG_RETURN(1); +} +#endif + +#define FLUSH_CACHE 2000 /* sort this many blocks at once */ + +static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block); +static void test_key_cache(PAGECACHE *pagecache, + const char *where, my_bool lock); + +#define PAGECACHE_HASH(p, f, pos) (((ulong) (pos) + \ + (ulong) (f).file) & (p->hash_entries-1)) +#define FILE_HASH(f) ((uint) (f).file & (PAGECACHE_CHANGED_BLOCKS_HASH - 1)) + +#define DEFAULT_PAGECACHE_DEBUG_LOG "pagecache_debug.log" + +#if defined(PAGECACHE_DEBUG) && ! defined(PAGECACHE_DEBUG_LOG) +#define PAGECACHE_DEBUG_LOG DEFAULT_PAGECACHE_DEBUG_LOG +#endif + +#if defined(PAGECACHE_DEBUG_LOG) +static FILE *pagecache_debug_log= NULL; +static void pagecache_debug_print _VARARGS((const char *fmt, ...)); +#define PAGECACHE_DEBUG_OPEN \ + if (!pagecache_debug_log) \ + { \ + pagecache_debug_log= fopen(PAGECACHE_DEBUG_LOG, "w"); \ + (void) setvbuf(pagecache_debug_log, NULL, _IOLBF, BUFSIZ); \ + } + +#define PAGECACHE_DEBUG_CLOSE \ + if (pagecache_debug_log) \ + { \ + fclose(pagecache_debug_log); \ + pagecache_debug_log= 0; \ + } +#else +#define PAGECACHE_DEBUG_OPEN +#define PAGECACHE_DEBUG_CLOSE +#endif /* defined(PAGECACHE_DEBUG_LOG) */ + +#if defined(PAGECACHE_DEBUG_LOG) && defined(PAGECACHE_DEBUG) +#define KEYCACHE_DBUG_PRINT(l, m) \ + { if (pagecache_debug_log) \ + fprintf(pagecache_debug_log, "%s: ", l); \ + pagecache_debug_print m; } + +#define KEYCACHE_DBUG_ASSERT(a) \ + { if (! (a) && pagecache_debug_log) \ + fclose(pagecache_debug_log); \ + assert(a); } +#else +#define KEYCACHE_DBUG_PRINT(l, m) DBUG_PRINT(l, m) +#define KEYCACHE_DBUG_ASSERT(a) DBUG_ASSERT(a) +#endif /* defined(PAGECACHE_DEBUG_LOG) && defined(PAGECACHE_DEBUG) */ + +#if defined(PAGECACHE_DEBUG) || !defined(DBUG_OFF) +#ifdef THREAD +static long pagecache_thread_id; +#define KEYCACHE_THREAD_TRACE(l) \ + KEYCACHE_DBUG_PRINT(l,("|thread %ld",pagecache_thread_id)) + +#define KEYCACHE_THREAD_TRACE_BEGIN(l) \ + { struct st_my_thread_var *thread_var= my_thread_var; \ + pagecache_thread_id= thread_var->id; \ + KEYCACHE_DBUG_PRINT(l,("[thread %ld",pagecache_thread_id)) } + +#define KEYCACHE_THREAD_TRACE_END(l) \ + KEYCACHE_DBUG_PRINT(l,("]thread %ld",pagecache_thread_id)) +#else /* THREAD */ +#define KEYCACHE_THREAD_TRACE(l) KEYCACHE_DBUG_PRINT(l,("")) +#define KEYCACHE_THREAD_TRACE_BEGIN(l) KEYCACHE_DBUG_PRINT(l,("")) +#define KEYCACHE_THREAD_TRACE_END(l) KEYCACHE_DBUG_PRINT(l,("")) +#endif /* THREAD */ +#else +#define KEYCACHE_THREAD_TRACE_BEGIN(l) +#define KEYCACHE_THREAD_TRACE_END(l) +#define KEYCACHE_THREAD_TRACE(l) +#endif /* defined(PAGECACHE_DEBUG) || !defined(DBUG_OFF) */ + +#define PCBLOCK_NUMBER(p, b) \ + ((uint) (((char*)(b)-(char *) p->block_root)/sizeof(PAGECACHE_BLOCK_LINK))) +#define PAGECACHE_HASH_LINK_NUMBER(p, h) \ + ((uint) (((char*)(h)-(char *) p->hash_link_root)/ \ + sizeof(PAGECACHE_HASH_LINK))) + +#if (defined(PAGECACHE_TIMEOUT) && !defined(__WIN__)) || defined(PAGECACHE_DEBUG) +static int pagecache_pthread_cond_wait(pthread_cond_t *cond, + pthread_mutex_t *mutex); +#else +#define pagecache_pthread_cond_wait pthread_cond_wait +#endif + +#if defined(PAGECACHE_DEBUG) +static int ___pagecache_pthread_mutex_lock(pthread_mutex_t *mutex); +static void ___pagecache_pthread_mutex_unlock(pthread_mutex_t *mutex); +static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond); +#define pagecache_pthread_mutex_lock(M) \ +{ DBUG_PRINT("lock", ("mutex lock 0x%lx %u", (ulong)(M), __LINE__)); \ + ___pagecache_pthread_mutex_lock(M);} +#define pagecache_pthread_mutex_unlock(M) \ +{ DBUG_PRINT("lock", ("mutex unlock 0x%lx %u", (ulong)(M), __LINE__)); \ + ___pagecache_pthread_mutex_unlock(M);} +#define pagecache_pthread_cond_signal(M) \ +{ DBUG_PRINT("lock", ("signal 0x%lx %u", (ulong)(M), __LINE__)); \ + ___pagecache_pthread_cond_signal(M);} +#else +#define pagecache_pthread_mutex_lock pthread_mutex_lock +#define pagecache_pthread_mutex_unlock pthread_mutex_unlock +#define pagecache_pthread_cond_signal pthread_cond_signal +#endif /* defined(PAGECACHE_DEBUG) */ + +extern my_bool translog_flush(LSN lsn); + +/* + Write page to the disk + + SYNOPSIS + pagecache_fwrite() + pagecache - page cache pointer + filedesc - pagecache file descriptor structure + buffer - buffer which we will write + type - page type (plain or with LSN) + flags - MYF() flags + + RETURN + 0 - OK + !=0 - Error +*/ + +static uint pagecache_fwrite(PAGECACHE *pagecache, + PAGECACHE_FILE *filedesc, + byte *buffer, + pgcache_page_no_t pageno, + enum pagecache_page_type type, + myf flags) +{ + DBUG_ENTER("pagecache_fwrite"); + if (type == PAGECACHE_LSN_PAGE) + { + LSN lsn; + DBUG_PRINT("info", ("Log handler call")); + /* TODO: integrate with page format */ + lsn= lsn_korr(buffer + PAGE_LSN_OFFSET); + /* + check CONTROL_FILE_IMPOSSIBLE_FILENO & + CONTROL_FILE_IMPOSSIBLE_LOG_OFFSET + */ + DBUG_ASSERT(lsn != 0); + translog_flush(lsn); + } + DBUG_RETURN(my_pwrite(filedesc->file, buffer, pagecache->block_size, + (pageno)<<(pagecache->shift), flags)); +} + + +/* + Read page from the disk + + SYNOPSIS + pagecache_fread() + pagecache - page cache pointer + filedesc - pagecache file descriptor structure + buffer - buffer in which we will read + pageno - page number + flags - MYF() flags +*/ +#define pagecache_fread(pagecache, filedesc, buffer, pageno, flags) \ + my_pread((filedesc)->file, buffer, pagecache->block_size, \ + (pageno)<<(pagecache->shift), flags) + + +/* + next_power(value) is 2 at the power of (1+floor(log2(value))); + e.g. next_power(2)=4, next_power(3)=4. +*/ +static inline uint next_power(uint value) +{ + return (uint) my_round_up_to_next_power((uint32) value) << 1; +} + + +/* + Initialize a page cache + + SYNOPSIS + init_pagecache() + pagecache pointer to a page cache data structure + key_cache_block_size size of blocks to keep cached data + use_mem total memory to use for the key cache + division_limit division limit (may be zero) + age_threshold age threshold (may be zero) + block_size size of block (should be power of 2) + + RETURN VALUE + number of blocks in the key cache, if successful, + 0 - otherwise. + + NOTES. + if pagecache->inited != 0 we assume that the key cache + is already initialized. This is for now used by myisamchk, but shouldn't + be something that a program should rely on! + + It's assumed that no two threads call this function simultaneously + referring to the same key cache handle. + +*/ + +int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem, + uint division_limit, uint age_threshold, + uint block_size) +{ + uint blocks, hash_links, length; + int error; + DBUG_ENTER("init_pagecache"); + DBUG_ASSERT(block_size >= 512); + + PAGECACHE_DEBUG_OPEN; + if (pagecache->inited && pagecache->disk_blocks > 0) + { + DBUG_PRINT("warning",("key cache already in use")); + DBUG_RETURN(0); + } + + pagecache->global_cache_w_requests= pagecache->global_cache_r_requests= 0; + pagecache->global_cache_read= pagecache->global_cache_write= 0; + pagecache->disk_blocks= -1; + if (! pagecache->inited) + { + pagecache->inited= 1; + pagecache->in_init= 0; + pthread_mutex_init(&pagecache->cache_lock, MY_MUTEX_INIT_FAST); + pagecache->resize_queue.last_thread= NULL; + } + + pagecache->mem_size= use_mem; + pagecache->block_size= block_size; + pagecache->shift= my_bit_log2(block_size); + DBUG_PRINT("info", ("block_size: %u", + block_size)); + DBUG_ASSERT(((uint)(1 << pagecache->shift)) == block_size); + + blocks= (int) (use_mem / (sizeof(PAGECACHE_BLOCK_LINK) + + 2 * sizeof(PAGECACHE_HASH_LINK) + + sizeof(PAGECACHE_HASH_LINK*) * + 5/4 + block_size)); + /* + We need to support page cache with just one block to be able to do + scanning of rows-in-block files + */ + if (blocks >= 1) + { + for ( ; ; ) + { + /* Set my_hash_entries to the next bigger 2 power */ + if ((pagecache->hash_entries= next_power(blocks)) < + (blocks) * 5/4) + pagecache->hash_entries<<= 1; + hash_links= 2 * blocks; +#if defined(MAX_THREADS) + if (hash_links < MAX_THREADS + blocks - 1) + hash_links= MAX_THREADS + blocks - 1; +#endif + while ((length= (ALIGN_SIZE(blocks * sizeof(PAGECACHE_BLOCK_LINK)) + + ALIGN_SIZE(hash_links * sizeof(PAGECACHE_HASH_LINK)) + + ALIGN_SIZE(sizeof(PAGECACHE_HASH_LINK*) * + pagecache->hash_entries))) + + (((ulong) blocks) << pagecache->shift) > use_mem) + blocks--; + /* Allocate memory for cache page buffers */ + if ((pagecache->block_mem= + my_large_malloc((ulong) blocks * pagecache->block_size, + MYF(MY_WME)))) + { + /* + Allocate memory for blocks, hash_links and hash entries; + For each block 2 hash links are allocated + */ + if ((pagecache->block_root= + (PAGECACHE_BLOCK_LINK*) my_malloc((uint) length, + MYF(0)))) + break; + my_large_free(pagecache->block_mem, MYF(0)); + pagecache->block_mem= 0; + } + if (blocks < 8) + { + my_errno= ENOMEM; + goto err; + } + blocks= blocks / 4*3; + } + pagecache->blocks_unused= (ulong) blocks; + pagecache->disk_blocks= (int) blocks; + pagecache->hash_links= hash_links; + pagecache->hash_root= + (PAGECACHE_HASH_LINK**) ((char*) pagecache->block_root + + ALIGN_SIZE(blocks*sizeof(PAGECACHE_BLOCK_LINK))); + pagecache->hash_link_root= + (PAGECACHE_HASH_LINK*) ((char*) pagecache->hash_root + + ALIGN_SIZE((sizeof(PAGECACHE_HASH_LINK*) * + pagecache->hash_entries))); + bzero((byte*) pagecache->block_root, + pagecache->disk_blocks * sizeof(PAGECACHE_BLOCK_LINK)); + bzero((byte*) pagecache->hash_root, + pagecache->hash_entries * sizeof(PAGECACHE_HASH_LINK*)); + bzero((byte*) pagecache->hash_link_root, + pagecache->hash_links * sizeof(PAGECACHE_HASH_LINK)); + pagecache->hash_links_used= 0; + pagecache->free_hash_list= NULL; + pagecache->blocks_used= pagecache->blocks_changed= 0; + + pagecache->global_blocks_changed= 0; + pagecache->blocks_available=0; /* For debugging */ + + /* The LRU chain is empty after initialization */ + pagecache->used_last= NULL; + pagecache->used_ins= NULL; + pagecache->free_block_list= NULL; + pagecache->time= 0; + pagecache->warm_blocks= 0; + pagecache->min_warm_blocks= (division_limit ? + blocks * division_limit / 100 + 1 : + blocks); + pagecache->age_threshold= (age_threshold ? + blocks * age_threshold / 100 : + blocks); + + pagecache->cnt_for_resize_op= 0; + pagecache->resize_in_flush= 0; + pagecache->can_be_used= 1; + + pagecache->waiting_for_hash_link.last_thread= NULL; + pagecache->waiting_for_block.last_thread= NULL; + DBUG_PRINT("exit", + ("disk_blocks: %d block_root: 0x%lx hash_entries: %d\ + hash_root: 0x%lx hash_links: %d hash_link_root: 0x%lx", + pagecache->disk_blocks, (long) pagecache->block_root, + pagecache->hash_entries, (long) pagecache->hash_root, + pagecache->hash_links, (long) pagecache->hash_link_root)); + bzero((gptr) pagecache->changed_blocks, + sizeof(pagecache->changed_blocks[0]) * + PAGECACHE_CHANGED_BLOCKS_HASH); + bzero((gptr) pagecache->file_blocks, + sizeof(pagecache->file_blocks[0]) * + PAGECACHE_CHANGED_BLOCKS_HASH); + } + + pagecache->blocks= pagecache->disk_blocks > 0 ? pagecache->disk_blocks : 0; + DBUG_RETURN((int) pagecache->disk_blocks); + +err: + error= my_errno; + pagecache->disk_blocks= 0; + pagecache->blocks= 0; + if (pagecache->block_mem) + { + my_large_free((gptr) pagecache->block_mem, MYF(0)); + pagecache->block_mem= NULL; + } + if (pagecache->block_root) + { + my_free((gptr) pagecache->block_root, MYF(0)); + pagecache->block_root= NULL; + } + my_errno= error; + pagecache->can_be_used= 0; + DBUG_RETURN(0); +} + + +/* + Flush all blocks in the key cache to disk +*/ + +#ifdef NOT_USED +static int flush_all_key_blocks(PAGECACHE *pagecache) +{ +#if defined(PAGECACHE_DEBUG) + uint cnt=0; +#endif + while (pagecache->blocks_changed > 0) + { + PAGECACHE_BLOCK_LINK *block; + for (block= pagecache->used_last->next_used ; ; block=block->next_used) + { + if (block->hash_link) + { +#if defined(PAGECACHE_DEBUG) + cnt++; + KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used); +#endif + if (flush_pagecache_blocks_int(pagecache, &block->hash_link->file, + FLUSH_RELEASE)) + return 1; + break; + } + if (block == pagecache->used_last) + break; + } + } + return 0; +} +#endif /* NOT_USED */ + +/* + Resize a key cache + + SYNOPSIS + resize_pagecache() + pagecache pointer to a page cache data structure + use_mem total memory to use for the new key cache + division_limit new division limit (if not zero) + age_threshold new age threshold (if not zero) + + RETURN VALUE + number of blocks in the key cache, if successful, + 0 - otherwise. + + NOTES. + The function first compares the memory size parameter + with the key cache value. + + If they differ the function free the the memory allocated for the + old key cache blocks by calling the end_pagecache function and + then rebuilds the key cache with new blocks by calling + init_key_cache. + + The function starts the operation only when all other threads + performing operations with the key cache let her to proceed + (when cnt_for_resize=0). + + Before being usable, this function needs: + - to receive fixes for BUG#17332 "changing key_buffer_size on a running + server can crash under load" similar to those done to the key cache + - to have us (Sanja) look at the additional constraints placed on + resizing, due to the page locking specific to this page cache. + So we disable it for now. +*/ +#if NOT_USED /* keep disabled until code is fixed see above !! */ +int resize_pagecache(PAGECACHE *pagecache, + my_size_t use_mem, uint division_limit, + uint age_threshold) +{ + int blocks; +#ifdef THREAD + struct st_my_thread_var *thread; + WQUEUE *wqueue; + +#endif + DBUG_ENTER("resize_pagecache"); + + if (!pagecache->inited) + DBUG_RETURN(pagecache->disk_blocks); + + if(use_mem == pagecache->mem_size) + { + change_pagecache_param(pagecache, division_limit, age_threshold); + DBUG_RETURN(pagecache->disk_blocks); + } + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + +#ifdef THREAD + wqueue= &pagecache->resize_queue; + thread= my_thread_var; + wqueue_link_into_queue(wqueue, thread); + + while (wqueue->last_thread->next != thread) + { + pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); + } +#endif + + pagecache->resize_in_flush= 1; + if (flush_all_key_blocks(pagecache)) + { + /* TODO: if this happens, we should write a warning in the log file ! */ + pagecache->resize_in_flush= 0; + blocks= 0; + pagecache->can_be_used= 0; + goto finish; + } + pagecache->resize_in_flush= 0; + pagecache->can_be_used= 0; +#ifdef THREAD + while (pagecache->cnt_for_resize_op) + { + KEYCACHE_DBUG_PRINT("resize_pagecache: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); + } +#else + KEYCACHE_DBUG_ASSERT(pagecache->cnt_for_resize_op == 0); +#endif + + end_pagecache(pagecache, 0); /* Don't free mutex */ + /* The following will work even if use_mem is 0 */ + blocks= init_pagecache(pagecache, pagecache->block_size, use_mem, + division_limit, age_threshold); + +finish: +#ifdef THREAD + wqueue_unlink_from_queue(wqueue, thread); + /* Signal for the next resize request to proceeed if any */ + if (wqueue->last_thread) + { + KEYCACHE_DBUG_PRINT("resize_pagecache: signal", + ("thread %ld", wqueue->last_thread->next->id)); + pagecache_pthread_cond_signal(&wqueue->last_thread->next->suspend); + } +#endif + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_RETURN(blocks); +} +#endif /* 0 */ + + +/* + Increment counter blocking resize key cache operation +*/ +static inline void inc_counter_for_resize_op(PAGECACHE *pagecache) +{ + pagecache->cnt_for_resize_op++; +} + + +/* + Decrement counter blocking resize key cache operation; + Signal the operation to proceed when counter becomes equal zero +*/ +static inline void dec_counter_for_resize_op(PAGECACHE *pagecache) +{ +#ifdef THREAD + struct st_my_thread_var *last_thread; + if (!--pagecache->cnt_for_resize_op && + (last_thread= pagecache->resize_queue.last_thread)) + { + KEYCACHE_DBUG_PRINT("dec_counter_for_resize_op: signal", + ("thread %ld", last_thread->next->id)); + pagecache_pthread_cond_signal(&last_thread->next->suspend); + } +#else + pagecache->cnt_for_resize_op--; +#endif +} + +/* + Change the page cache parameters + + SYNOPSIS + change_pagecache_param() + pagecache pointer to a page cache data structure + division_limit new division limit (if not zero) + age_threshold new age threshold (if not zero) + + RETURN VALUE + none + + NOTES. + Presently the function resets the key cache parameters + concerning midpoint insertion strategy - division_limit and + age_threshold. +*/ + +void change_pagecache_param(PAGECACHE *pagecache, uint division_limit, + uint age_threshold) +{ + DBUG_ENTER("change_pagecache_param"); + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + if (division_limit) + pagecache->min_warm_blocks= (pagecache->disk_blocks * + division_limit / 100 + 1); + if (age_threshold) + pagecache->age_threshold= (pagecache->disk_blocks * + age_threshold / 100); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_VOID_RETURN; +} + + +/* + Removes page cache from memory. Does NOT flush pages to disk. + + SYNOPSIS + end_pagecache() + pagecache page cache handle + cleanup Complete free (Free also mutex for key cache) + + RETURN VALUE + none +*/ + +void end_pagecache(PAGECACHE *pagecache, my_bool cleanup) +{ + DBUG_ENTER("end_pagecache"); + DBUG_PRINT("enter", ("key_cache: 0x%lx", (long) pagecache)); + + if (!pagecache->inited) + DBUG_VOID_RETURN; + + if (pagecache->disk_blocks > 0) + { + if (pagecache->block_mem) + { + my_large_free((gptr) pagecache->block_mem, MYF(0)); + pagecache->block_mem= NULL; + my_free((gptr) pagecache->block_root, MYF(0)); + pagecache->block_root= NULL; + } + pagecache->disk_blocks= -1; + /* Reset blocks_changed to be safe if flush_all_key_blocks is called */ + pagecache->blocks_changed= 0; + } + + DBUG_PRINT("status", ("used: %lu changed: %lu w_requests: %lu " + "writes: %lu r_requests: %lu reads: %lu", + pagecache->blocks_used, pagecache->global_blocks_changed, + (ulong) pagecache->global_cache_w_requests, + (ulong) pagecache->global_cache_write, + (ulong) pagecache->global_cache_r_requests, + (ulong) pagecache->global_cache_read)); + + if (cleanup) + { + pthread_mutex_destroy(&pagecache->cache_lock); + pagecache->inited= pagecache->can_be_used= 0; + PAGECACHE_DEBUG_CLOSE; + } + DBUG_VOID_RETURN; +} /* end_pagecache */ + + +/* + Unlink a block from the chain of dirty/clean blocks +*/ + +static inline void unlink_changed(PAGECACHE_BLOCK_LINK *block) +{ + if (block->next_changed) + block->next_changed->prev_changed= block->prev_changed; + *block->prev_changed= block->next_changed; +} + + +/* + Link a block into the chain of dirty/clean blocks +*/ + +static inline void link_changed(PAGECACHE_BLOCK_LINK *block, + PAGECACHE_BLOCK_LINK **phead) +{ + block->prev_changed= phead; + if ((block->next_changed= *phead)) + (*phead)->prev_changed= &block->next_changed; + *phead= block; +} + + +/* + Unlink a block from the chain of dirty/clean blocks, if it's asked for, + and link it to the chain of clean blocks for the specified file +*/ + +static void link_to_file_list(PAGECACHE *pagecache, + PAGECACHE_BLOCK_LINK *block, + PAGECACHE_FILE *file, my_bool unlink) +{ + if (unlink) + unlink_changed(block); + link_changed(block, &pagecache->file_blocks[FILE_HASH(*file)]); + if (block->status & PCBLOCK_CHANGED) + { + block->status&= ~PCBLOCK_CHANGED; + block->rec_lsn= 0; + pagecache->blocks_changed--; + pagecache->global_blocks_changed--; + } +} + + +/* + Unlink a block from the chain of clean blocks for the specified + file and link it to the chain of dirty blocks for this file +*/ + +static inline void link_to_changed_list(PAGECACHE *pagecache, + PAGECACHE_BLOCK_LINK *block) +{ + unlink_changed(block); + link_changed(block, + &pagecache->changed_blocks[FILE_HASH(block->hash_link->file)]); + block->status|=PCBLOCK_CHANGED; + pagecache->blocks_changed++; + pagecache->global_blocks_changed++; +} + + +/* + Link a block to the LRU chain at the beginning or at the end of + one of two parts. + + SYNOPSIS + link_block() + pagecache pointer to a page cache data structure + block pointer to the block to link to the LRU chain + hot <-> to link the block into the hot subchain + at_end <-> to link the block at the end of the subchain + + RETURN VALUE + none + + NOTES. + The LRU chain is represented by a curcular list of block structures. + The list is double-linked of the type (**prev,*next) type. + The LRU chain is divided into two parts - hot and warm. + There are two pointers to access the last blocks of these two + parts. The beginning of the warm part follows right after the + end of the hot part. + Only blocks of the warm part can be used for replacement. + The first block from the beginning of this subchain is always + taken for eviction (pagecache->last_used->next) + + LRU chain: +------+ H O T +------+ + +----| end |----...<----| beg |----+ + | +------+last +------+ | + v<-link in latest hot (new end) | + | link in latest warm (new end)->^ + | +------+ W A R M +------+ | + +----| beg |---->...----| end |----+ + +------+ +------+ins + first for eviction +*/ + +static void link_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, + my_bool hot, my_bool at_end) +{ + PAGECACHE_BLOCK_LINK *ins; + PAGECACHE_BLOCK_LINK **ptr_ins; + + PCBLOCK_INFO(block); + KEYCACHE_DBUG_ASSERT(! (block->hash_link && block->hash_link->requests)); +#ifdef THREAD + if (!hot && pagecache->waiting_for_block.last_thread) + { + /* Signal that in the LRU warm sub-chain an available block has appeared */ + struct st_my_thread_var *last_thread= + pagecache->waiting_for_block.last_thread; + struct st_my_thread_var *first_thread= last_thread->next; + struct st_my_thread_var *next_thread= first_thread; + PAGECACHE_HASH_LINK *hash_link= + (PAGECACHE_HASH_LINK *) first_thread->opt_info; + struct st_my_thread_var *thread; + do + { + thread= next_thread; + next_thread= thread->next; + /* + We notify about the event all threads that ask + for the same page as the first thread in the queue + */ + if ((PAGECACHE_HASH_LINK *) thread->opt_info == hash_link) + { + KEYCACHE_DBUG_PRINT("link_block: signal", ("thread: %ld", thread->id)); + pagecache_pthread_cond_signal(&thread->suspend); + wqueue_unlink_from_queue(&pagecache->waiting_for_block, thread); + block->requests++; + } + } + while (thread != last_thread); + hash_link->block= block; + KEYCACHE_THREAD_TRACE("link_block: after signaling"); +#if defined(PAGECACHE_DEBUG) + KEYCACHE_DBUG_PRINT("link_block", + ("linked,unlinked block: %u status: %x #requests: %u #available: %u", + PCBLOCK_NUMBER(pagecache, block), block->status, + block->requests, pagecache->blocks_available)); +#endif + return; + } +#else /* THREAD */ + KEYCACHE_DBUG_ASSERT(! (!hot && pagecache->waiting_for_block.last_thread)); + /* Condition not transformed using DeMorgan, to keep the text identical */ +#endif /* THREAD */ + ptr_ins= hot ? &pagecache->used_ins : &pagecache->used_last; + ins= *ptr_ins; + if (ins) + { + ins->next_used->prev_used= &block->next_used; + block->next_used= ins->next_used; + block->prev_used= &ins->next_used; + ins->next_used= block; + if (at_end) + *ptr_ins= block; + } + else + { + /* The LRU chain is empty */ + pagecache->used_last= pagecache->used_ins= block->next_used= block; + block->prev_used= &block->next_used; + } + KEYCACHE_THREAD_TRACE("link_block"); +#if defined(PAGECACHE_DEBUG) + pagecache->blocks_available++; + KEYCACHE_DBUG_PRINT("link_block", + ("linked block: %u:%1u status: %x #requests: %u #available: %u", + PCBLOCK_NUMBER(pagecache, block), at_end, block->status, + block->requests, pagecache->blocks_available)); + KEYCACHE_DBUG_ASSERT((ulong) pagecache->blocks_available <= + pagecache->blocks_used); +#endif +} + + +/* + Unlink a block from the LRU chain + + SYNOPSIS + unlink_block() + pagecache pointer to a page cache data structure + block pointer to the block to unlink from the LRU chain + + RETURN VALUE + none + + NOTES. + See NOTES for link_block +*/ + +static void unlink_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) +{ + DBUG_ENTER("unlink_block"); + DBUG_PRINT("unlink_block", ("unlink 0x%lx", (ulong)block)); + if (block->next_used == block) + /* The list contains only one member */ + pagecache->used_last= pagecache->used_ins= NULL; + else + { + block->next_used->prev_used= block->prev_used; + *block->prev_used= block->next_used; + if (pagecache->used_last == block) + pagecache->used_last= STRUCT_PTR(PAGECACHE_BLOCK_LINK, + next_used, block->prev_used); + if (pagecache->used_ins == block) + pagecache->used_ins= STRUCT_PTR(PAGECACHE_BLOCK_LINK, + next_used, block->prev_used); + } + block->next_used= NULL; + + KEYCACHE_THREAD_TRACE("unlink_block"); +#if defined(PAGECACHE_DEBUG) + KEYCACHE_DBUG_ASSERT(pagecache->blocks_available != 0); + pagecache->blocks_available--; + KEYCACHE_DBUG_PRINT("unlink_block", + ("unlinked block: 0x%lx (%u) status: %x #requests: %u #available: %u", + (ulong)block, PCBLOCK_NUMBER(pagecache, block), + block->status, + block->requests, pagecache->blocks_available)); + PCBLOCK_INFO(block); +#endif + DBUG_VOID_RETURN; +} + + +/* + Register requests for a block + + SYNOPSIS + reg_requests() + pagecache this page cache reference + block the block we request reference + count how many requests we register (it is 1 everywhere) + + NOTE + Registration of request means we are going to use this block so we exclude + it from the LRU if it is first request +*/ +static void reg_requests(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block, + int count) +{ + DBUG_ENTER("reg_requests"); + DBUG_PRINT("enter", ("block: 0x%lx (%u) status: %x reqs: %u", + (ulong)block, PCBLOCK_NUMBER(pagecache, block), + block->status, block->requests)); + PCBLOCK_INFO(block); + if (! block->requests) + /* First request for the block unlinks it */ + unlink_block(pagecache, block); + block->requests+= count; + DBUG_VOID_RETURN; +} + + +/* + Unregister request for a block + linking it to the LRU chain if it's the last request + + SYNOPSIS + unreg_request() + pagecache pointer to a page cache data structure + block pointer to the block to link to the LRU chain + at_end <-> to link the block at the end of the LRU chain + + RETURN VALUE + none + + NOTES. + Every linking to the LRU chain decrements by one a special block + counter (if it's positive). If the at_end parameter is TRUE the block is + added either at the end of warm sub-chain or at the end of hot sub-chain. + It is added to the hot subchain if its counter is zero and number of + blocks in warm sub-chain is not less than some low limit (determined by + the division_limit parameter). Otherwise the block is added to the warm + sub-chain. If the at_end parameter is FALSE the block is always added + at beginning of the warm sub-chain. + Thus a warm block can be promoted to the hot sub-chain when its counter + becomes zero for the first time. + At the same time the block at the very beginning of the hot subchain + might be moved to the beginning of the warm subchain if it stays untouched + for a too long time (this time is determined by parameter age_threshold). +*/ + +static void unreg_request(PAGECACHE *pagecache, + PAGECACHE_BLOCK_LINK *block, int at_end) +{ + DBUG_ENTER("unreg_request"); + DBUG_PRINT("enter", ("block 0x%lx (%u) status: %x reqs: %u", + (ulong)block, PCBLOCK_NUMBER(pagecache, block), + block->status, block->requests)); + PCBLOCK_INFO(block); + DBUG_ASSERT(block->requests > 0); + if (! --block->requests) + { + my_bool hot; + if (block->hits_left) + block->hits_left--; + hot= !block->hits_left && at_end && + pagecache->warm_blocks > pagecache->min_warm_blocks; + if (hot) + { + if (block->temperature == PCBLOCK_WARM) + pagecache->warm_blocks--; + block->temperature= PCBLOCK_HOT; + KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu", + pagecache->warm_blocks)); + } + link_block(pagecache, block, hot, (my_bool)at_end); + block->last_hit_time= pagecache->time; + pagecache->time++; + + block= pagecache->used_ins; + /* Check if we should link a hot block to the warm block */ + if (block && pagecache->time - block->last_hit_time > + pagecache->age_threshold) + { + unlink_block(pagecache, block); + link_block(pagecache, block, 0, 0); + if (block->temperature != PCBLOCK_WARM) + { + pagecache->warm_blocks++; + block->temperature= PCBLOCK_WARM; + } + KEYCACHE_DBUG_PRINT("unreg_request", ("#warm_blocks: %lu", + pagecache->warm_blocks)); + } + } + DBUG_VOID_RETURN; +} + +/* + Remove a reader of the page in block +*/ + +static inline void remove_reader(PAGECACHE_BLOCK_LINK *block) +{ + DBUG_ENTER("remove_reader"); + PCBLOCK_INFO(block); + DBUG_ASSERT(block->hash_link->requests > 0); +#ifdef THREAD + if (! --block->hash_link->requests && block->condvar) + pagecache_pthread_cond_signal(block->condvar); +#else + --block->hash_link->requests; +#endif + DBUG_VOID_RETURN; +} + + +/* + Wait until the last reader of the page in block + signals on its termination +*/ + +static inline void wait_for_readers(PAGECACHE *pagecache + __attribute__((unused)), + PAGECACHE_BLOCK_LINK *block) +{ +#ifdef THREAD + struct st_my_thread_var *thread= my_thread_var; + while (block->hash_link->requests) + { + KEYCACHE_DBUG_PRINT("wait_for_readers: wait", + ("suspend thread: %ld block: %u", + thread->id, PCBLOCK_NUMBER(pagecache, block))); + block->condvar= &thread->suspend; + pagecache_pthread_cond_wait(&thread->suspend, &pagecache->cache_lock); + block->condvar= NULL; + } +#else + KEYCACHE_DBUG_ASSERT(block->hash_link->requests == 0); +#endif +} + + +/* + Add a hash link to a bucket in the hash_table +*/ + +static inline void link_hash(PAGECACHE_HASH_LINK **start, + PAGECACHE_HASH_LINK *hash_link) +{ + if (*start) + (*start)->prev= &hash_link->next; + hash_link->next= *start; + hash_link->prev= start; + *start= hash_link; +} + + +/* + Remove a hash link from the hash table +*/ + +static void unlink_hash(PAGECACHE *pagecache, PAGECACHE_HASH_LINK *hash_link) +{ + KEYCACHE_DBUG_PRINT("unlink_hash", ("fd: %u pos_ %lu #requests=%u", + (uint) hash_link->file.file, (ulong) hash_link->pageno, + hash_link->requests)); + KEYCACHE_DBUG_ASSERT(hash_link->requests == 0); + if ((*hash_link->prev= hash_link->next)) + hash_link->next->prev= hash_link->prev; + hash_link->block= NULL; +#ifdef THREAD + if (pagecache->waiting_for_hash_link.last_thread) + { + /* Signal that a free hash link has appeared */ + struct st_my_thread_var *last_thread= + pagecache->waiting_for_hash_link.last_thread; + struct st_my_thread_var *first_thread= last_thread->next; + struct st_my_thread_var *next_thread= first_thread; + PAGECACHE_PAGE *first_page= (PAGECACHE_PAGE *) (first_thread->opt_info); + struct st_my_thread_var *thread; + + hash_link->file= first_page->file; + hash_link->pageno= first_page->pageno; + do + { + PAGECACHE_PAGE *page; + thread= next_thread; + page= (PAGECACHE_PAGE *) thread->opt_info; + next_thread= thread->next; + /* + We notify about the event all threads that ask + for the same page as the first thread in the queue + */ + if (page->file.file == hash_link->file.file && + page->pageno == hash_link->pageno) + { + KEYCACHE_DBUG_PRINT("unlink_hash: signal", ("thread %ld", thread->id)); + pagecache_pthread_cond_signal(&thread->suspend); + wqueue_unlink_from_queue(&pagecache->waiting_for_hash_link, thread); + } + } + while (thread != last_thread); + link_hash(&pagecache->hash_root[PAGECACHE_HASH(pagecache, + hash_link->file, + hash_link->pageno)], + hash_link); + return; + } +#else /* THREAD */ + KEYCACHE_DBUG_ASSERT(! (pagecache->waiting_for_hash_link.last_thread)); +#endif /* THREAD */ + hash_link->next= pagecache->free_hash_list; + pagecache->free_hash_list= hash_link; +} + + +/* + Get the hash link for the page if it is in the cache (do not put the + page in the cache if it is absent there) + + SYNOPSIS + get_present_hash_link() + pagecache Pagecache reference + file file ID + pageno page number in the file + start where to put pointer to found hash bucket (for + direct referring it) + + RETURN + found hashlink pointer +*/ + +static PAGECACHE_HASH_LINK *get_present_hash_link(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno, + PAGECACHE_HASH_LINK ***start) +{ + reg1 PAGECACHE_HASH_LINK *hash_link; +#if defined(PAGECACHE_DEBUG) + int cnt; +#endif + DBUG_ENTER("get_present_hash_link"); + + KEYCACHE_DBUG_PRINT("get_present_hash_link", ("fd: %u pos: %lu", + (uint) file->file, (ulong) pageno)); + + /* + Find the bucket in the hash table for the pair (file, pageno); + start contains the head of the bucket list, + hash_link points to the first member of the list + */ + hash_link= *(*start= &pagecache->hash_root[PAGECACHE_HASH(pagecache, + *file, pageno)]); +#if defined(PAGECACHE_DEBUG) + cnt= 0; +#endif + /* Look for an element for the pair (file, pageno) in the bucket chain */ + while (hash_link && + (hash_link->pageno != pageno || + hash_link->file.file != file->file)) + { + hash_link= hash_link->next; +#if defined(PAGECACHE_DEBUG) + cnt++; + if (! (cnt <= pagecache->hash_links_used)) + { + int i; + for (i=0, hash_link= **start ; + i < cnt ; i++, hash_link= hash_link->next) + { + KEYCACHE_DBUG_PRINT("get_present_hash_link", ("fd: %u pos: %lu", + (uint) hash_link->file.file, (ulong) hash_link->pageno)); + } + } + KEYCACHE_DBUG_ASSERT(cnt <= pagecache->hash_links_used); +#endif + } + if (hash_link) + { + /* Register the request for the page */ + hash_link->requests++; + } + /* + As soon as the caller will release the page cache's lock, "hash_link" + will be potentially obsolete (unusable) information. + */ + DBUG_RETURN(hash_link); +} + + +/* + Get the hash link for a page +*/ + +static PAGECACHE_HASH_LINK *get_hash_link(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno) +{ + reg1 PAGECACHE_HASH_LINK *hash_link; + PAGECACHE_HASH_LINK **start; + + KEYCACHE_DBUG_PRINT("get_hash_link", ("fd: %u pos: %lu", + (uint) file->file, (ulong) pageno)); + +restart: + /* try to find the page in the cache */ + hash_link= get_present_hash_link(pagecache, file, pageno, + &start); + if (!hash_link) + { + /* There is no hash link in the hash table for the pair (file, pageno) */ + if (pagecache->free_hash_list) + { + hash_link= pagecache->free_hash_list; + pagecache->free_hash_list= hash_link->next; + } + else if (pagecache->hash_links_used < pagecache->hash_links) + { + hash_link= &pagecache->hash_link_root[pagecache->hash_links_used++]; + } + else + { +#ifdef THREAD + /* Wait for a free hash link */ + struct st_my_thread_var *thread= my_thread_var; + PAGECACHE_PAGE page; + KEYCACHE_DBUG_PRINT("get_hash_link", ("waiting")); + page.file= *file; + page.pageno= pageno; + thread->opt_info= (void *) &page; + wqueue_link_into_queue(&pagecache->waiting_for_hash_link, thread); + KEYCACHE_DBUG_PRINT("get_hash_link: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); + thread->opt_info= NULL; +#else + KEYCACHE_DBUG_ASSERT(0); +#endif + DBUG_PRINT("info", ("restarting...")); + goto restart; + } + hash_link->file= *file; + hash_link->pageno= pageno; + link_hash(start, hash_link); + /* Register the request for the page */ + hash_link->requests++; + } + + return hash_link; +} + + +/* + Get a block for the file page requested by a pagecache read/write operation; + If the page is not in the cache return a free block, if there is none + return the lru block after saving its buffer if the page is dirty. + + SYNOPSIS + + find_block() + pagecache pointer to a page cache data structure + file handler for the file to read page from + pageno number of the page in the file + init_hits_left how initialize the block counter for the page + wrmode <-> get for writing + reg_req Register request to thye page + page_st out {PAGE_READ,PAGE_TO_BE_READ,PAGE_WAIT_TO_BE_READ} + + RETURN VALUE + Pointer to the found block if successful, 0 - otherwise + + NOTES. + For the page from file positioned at pageno the function checks whether + the page is in the key cache specified by the first parameter. + If this is the case it immediately returns the block. + If not, the function first chooses a block for this page. If there is + no not used blocks in the key cache yet, the function takes the block + at the very beginning of the warm sub-chain. It saves the page in that + block if it's dirty before returning the pointer to it. + The function returns in the page_st parameter the following values: + PAGE_READ - if page already in the block, + PAGE_TO_BE_READ - if it is to be read yet by the current thread + WAIT_TO_BE_READ - if it is to be read by another thread + If an error occurs THE PCBLOCK_ERROR bit is set in the block status. + It might happen that there are no blocks in LRU chain (in warm part) - + all blocks are unlinked for some read/write operations. Then the function + waits until first of this operations links any block back. +*/ + +static PAGECACHE_BLOCK_LINK *find_block(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno, + int init_hits_left, + my_bool wrmode, + my_bool reg_req, + int *page_st) +{ + PAGECACHE_HASH_LINK *hash_link; + PAGECACHE_BLOCK_LINK *block; + int error= 0; + int page_status; + + DBUG_ENTER("find_block"); + KEYCACHE_THREAD_TRACE("find_block:begin"); + DBUG_PRINT("enter", ("fd: %d pos: %lu wrmode: %d", + file->file, (ulong) pageno, wrmode)); + KEYCACHE_DBUG_PRINT("find_block", ("fd: %d pos: %lu wrmode: %d", + file->file, (ulong) pageno, + wrmode)); +#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) + DBUG_EXECUTE("check_pagecache", + test_key_cache(pagecache, "start of find_block", 0);); +#endif + +restart: + /* Find the hash link for the requested page (file, pageno) */ + hash_link= get_hash_link(pagecache, file, pageno); + + page_status= -1; + if ((block= hash_link->block) && + block->hash_link == hash_link && (block->status & PCBLOCK_READ)) + page_status= PAGE_READ; + + if (wrmode && pagecache->resize_in_flush) + { + /* This is a write request during the flush phase of a resize operation */ + + if (page_status != PAGE_READ) + { + /* We don't need the page in the cache: we are going to write on disk */ + DBUG_ASSERT(hash_link->requests > 0); + hash_link->requests--; + unlink_hash(pagecache, hash_link); + return 0; + } + if (!(block->status & PCBLOCK_IN_FLUSH)) + { + DBUG_ASSERT(hash_link->requests > 0); + hash_link->requests--; + /* + Remove block to invalidate the page in the block buffer + as we are going to write directly on disk. + Although we have an exclusive lock for the updated key part + the control can be yielded by the current thread as we might + have unfinished readers of other key parts in the block + buffer. Still we are guaranteed not to have any readers + of the key part we are writing into until the block is + removed from the cache as we set the PCBLOCK_REASSIGNED + flag (see the code below that handles reading requests). + */ + free_block(pagecache, block); + return 0; + } + /* Wait until the page is flushed on disk */ + DBUG_ASSERT(hash_link->requests > 0); + hash_link->requests--; + { +#ifdef THREAD + struct st_my_thread_var *thread= my_thread_var; + wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread); + do + { + KEYCACHE_DBUG_PRINT("find_block: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); + } + while(thread->next); +#else + KEYCACHE_DBUG_ASSERT(0); + /* + Given the use of "resize_in_flush", it seems impossible + that this whole branch is ever entered in single-threaded case + because "(wrmode && pagecache->resize_in_flush)" cannot be true. + TODO: Check this, and then put the whole branch into the + "#ifdef THREAD" guard. + */ +#endif + } + /* Invalidate page in the block if it has not been done yet */ + if (block->status) + free_block(pagecache, block); + return 0; + } + + if (page_status == PAGE_READ && + (block->status & (PCBLOCK_IN_SWITCH | PCBLOCK_REASSIGNED))) + { + /* This is a request for a page to be removed from cache */ + + KEYCACHE_DBUG_PRINT("find_block", + ("request for old page in block: %u " + "wrmode: %d block->status: %d", + PCBLOCK_NUMBER(pagecache, block), wrmode, + block->status)); + /* + Only reading requests can proceed until the old dirty page is flushed, + all others are to be suspended, then resubmitted + */ + if (!wrmode && !(block->status & PCBLOCK_REASSIGNED)) + { + if (reg_req) + reg_requests(pagecache, block, 1); + } + else + { + DBUG_ASSERT(hash_link->requests > 0); + hash_link->requests--; + KEYCACHE_DBUG_PRINT("find_block", + ("request waiting for old page to be saved")); + { +#ifdef THREAD + struct st_my_thread_var *thread= my_thread_var; + /* Put the request into the queue of those waiting for the old page */ + wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread); + /* Wait until the request can be resubmitted */ + do + { + KEYCACHE_DBUG_PRINT("find_block: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); + } + while(thread->next); +#else + KEYCACHE_DBUG_ASSERT(0); + /* No parallel requests in single-threaded case */ +#endif + } + KEYCACHE_DBUG_PRINT("find_block", + ("request for old page resubmitted")); + DBUG_PRINT("info", ("restarting...")); + /* Resubmit the request */ + goto restart; + } + block->status&= ~PCBLOCK_IN_SWITCH; + } + else + { + /* This is a request for a new page or for a page not to be removed */ + if (! block) + { + /* No block is assigned for the page yet */ + if (pagecache->blocks_unused) + { + if (pagecache->free_block_list) + { + /* There is a block in the free list. */ + block= pagecache->free_block_list; + pagecache->free_block_list= block->next_used; + block->next_used= NULL; + } + else + { + /* There are some never used blocks, take first of them */ + block= &pagecache->block_root[pagecache->blocks_used]; + block->buffer= ADD_TO_PTR(pagecache->block_mem, + ((ulong) pagecache->blocks_used* + pagecache->block_size), + byte*); + pagecache->blocks_used++; + } + pagecache->blocks_unused--; + DBUG_ASSERT((block->status & PCBLOCK_WRLOCK) == 0); + DBUG_ASSERT(block->pins == 0); + block->status= 0; +#ifndef DBUG_OFF + block->type= PAGECACHE_EMPTY_PAGE; +#endif + block->requests= 1; + block->temperature= PCBLOCK_COLD; + block->hits_left= init_hits_left; + block->last_hit_time= 0; + link_to_file_list(pagecache, block, file, 0); + block->hash_link= hash_link; + hash_link->block= block; + page_status= PAGE_TO_BE_READ; + DBUG_PRINT("info", ("page to be read set for page 0x%lx", + (ulong)block)); + KEYCACHE_DBUG_PRINT("find_block", + ("got free or never used block %u", + PCBLOCK_NUMBER(pagecache, block))); + } + else + { + /* There are no never used blocks, use a block from the LRU chain */ + + /* + Wait until a new block is added to the LRU chain; + several threads might wait here for the same page, + all of them must get the same block + */ + +#ifdef THREAD + if (! pagecache->used_last) + { + struct st_my_thread_var *thread= my_thread_var; + thread->opt_info= (void *) hash_link; + wqueue_link_into_queue(&pagecache->waiting_for_block, thread); + do + { + KEYCACHE_DBUG_PRINT("find_block: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); + } + while (thread->next); + thread->opt_info= NULL; + } +#else + KEYCACHE_DBUG_ASSERT(pagecache->used_last); +#endif + block= hash_link->block; + if (! block) + { + /* + Take the first block from the LRU chain + unlinking it from the chain + */ + block= pagecache->used_last->next_used; + block->hits_left= init_hits_left; + block->last_hit_time= 0; + if (reg_req) + reg_requests(pagecache, block, 1); + hash_link->block= block; + } + PCBLOCK_INFO(block); + DBUG_ASSERT((block->status & PCBLOCK_WRLOCK) == 0); + DBUG_ASSERT(block->pins == 0); + + if (block->hash_link != hash_link && + ! (block->status & PCBLOCK_IN_SWITCH) ) + { + /* this is a primary request for a new page */ + DBUG_ASSERT((block->status & PCBLOCK_WRLOCK) == 0); + DBUG_ASSERT(block->pins == 0); + block->status|= (PCBLOCK_IN_SWITCH | PCBLOCK_WRLOCK); + + KEYCACHE_DBUG_PRINT("find_block", + ("got block %u for new page", + PCBLOCK_NUMBER(pagecache, block))); + + if (block->status & PCBLOCK_CHANGED) + { + /* The block contains a dirty page - push it out of the cache */ + + KEYCACHE_DBUG_PRINT("find_block", ("block is dirty")); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + /* + The call is thread safe because only the current + thread might change the block->hash_link value + */ + DBUG_ASSERT(block->pins == 0); + error= pagecache_fwrite(pagecache, + &block->hash_link->file, + block->buffer, + block->hash_link->pageno, + block->type, + MYF(MY_NABP | MY_WAIT_IF_FULL)); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + pagecache->global_cache_write++; + } + + block->status|= PCBLOCK_REASSIGNED; + if (block->hash_link) + { + /* + Wait until all pending read requests + for this page are executed + (we could have avoided this waiting, if we had read + a page in the cache in a sweep, without yielding control) + */ + wait_for_readers(pagecache, block); + + /* Remove the hash link for this page from the hash table */ + unlink_hash(pagecache, block->hash_link); + /* All pending requests for this page must be resubmitted */ +#ifdef THREAD + if (block->wqueue[COND_FOR_SAVED].last_thread) + wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]); +#endif + } + link_to_file_list(pagecache, block, file, + (my_bool)(block->hash_link ? 1 : 0)); + PCBLOCK_INFO(block); + block->status= error? PCBLOCK_ERROR : 0; +#ifndef DBUG_OFF + block->type= PAGECACHE_EMPTY_PAGE; +#endif + block->hash_link= hash_link; + page_status= PAGE_TO_BE_READ; + DBUG_PRINT("info", ("page to be read set for page 0x%lx", + (ulong)block)); + + KEYCACHE_DBUG_ASSERT(block->hash_link->block == block); + KEYCACHE_DBUG_ASSERT(hash_link->block->hash_link == hash_link); + } + else + { + /* This is for secondary requests for a new page only */ + KEYCACHE_DBUG_PRINT("find_block", + ("block->hash_link: %p hash_link: %p " + "block->status: %u", block->hash_link, + hash_link, block->status )); + page_status= (((block->hash_link == hash_link) && + (block->status & PCBLOCK_READ)) ? + PAGE_READ : PAGE_WAIT_TO_BE_READ); + } + } + pagecache->global_cache_read++; + } + else + { + if (reg_req) + reg_requests(pagecache, block, 1); + KEYCACHE_DBUG_PRINT("find_block", + ("block->hash_link: %p hash_link: %p " + "block->status: %u", block->hash_link, + hash_link, block->status )); + page_status= (((block->hash_link == hash_link) && + (block->status & PCBLOCK_READ)) ? + PAGE_READ : PAGE_WAIT_TO_BE_READ); + } + } + + KEYCACHE_DBUG_ASSERT(page_status != -1); + *page_st= page_status; + DBUG_PRINT("info", + ("block: 0x%lx fd: %u pos: %lu block->status: %u page_status: %u", + (ulong) block, (uint) file->file, + (ulong) pageno, block->status, (uint) page_status)); + KEYCACHE_DBUG_PRINT("find_block", + ("block: 0x%lx fd: %d pos: %lu block->status: %u page_status: %d", + (ulong) block, + file->file, (ulong) pageno, block->status, + page_status)); + +#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) + DBUG_EXECUTE("check_pagecache", + test_key_cache(pagecache, "end of find_block",0);); +#endif + KEYCACHE_THREAD_TRACE("find_block:end"); + DBUG_RETURN(block); +} + + +static void add_pin(PAGECACHE_BLOCK_LINK *block) +{ + DBUG_ENTER("add_pin"); + DBUG_PRINT("enter", ("block: 0x%lx pins: %u", + (ulong) block, + block->pins)); + PCBLOCK_INFO(block); + block->pins++; +#ifndef DBUG_OFF + { + PAGECACHE_PIN_INFO *info= + (PAGECACHE_PIN_INFO *)my_malloc(sizeof(PAGECACHE_PIN_INFO), MYF(0)); + info->thread= my_thread_var; + info_link(&block->pin_list, info); + } +#endif + DBUG_VOID_RETURN; +} + +static void remove_pin(PAGECACHE_BLOCK_LINK *block) +{ + DBUG_ENTER("remove_pin"); + DBUG_PRINT("enter", ("block: 0x%lx pins: %u", + (ulong) block, + block->pins)); + PCBLOCK_INFO(block); + DBUG_ASSERT(block->pins > 0); + block->pins--; +#ifndef DBUG_OFF + { + PAGECACHE_PIN_INFO *info= info_find(block->pin_list, my_thread_var); + DBUG_ASSERT(info != 0); + info_unlink(info); + my_free((gptr) info, MYF(0)); + } +#endif + DBUG_VOID_RETURN; +} +#ifndef DBUG_OFF +static void info_add_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl) +{ + PAGECACHE_LOCK_INFO *info= + (PAGECACHE_LOCK_INFO *)my_malloc(sizeof(PAGECACHE_LOCK_INFO), MYF(0)); + info->thread= my_thread_var; + info->write_lock= wl; + info_link((PAGECACHE_PIN_INFO **)&block->lock_list, + (PAGECACHE_PIN_INFO *)info); +} +static void info_remove_lock(PAGECACHE_BLOCK_LINK *block) +{ + PAGECACHE_LOCK_INFO *info= + (PAGECACHE_LOCK_INFO *)info_find((PAGECACHE_PIN_INFO *)block->lock_list, + my_thread_var); + DBUG_ASSERT(info != 0); + info_unlink((PAGECACHE_PIN_INFO *)info); + my_free((gptr)info, MYF(0)); +} +static void info_change_lock(PAGECACHE_BLOCK_LINK *block, my_bool wl) +{ + PAGECACHE_LOCK_INFO *info= + (PAGECACHE_LOCK_INFO *)info_find((PAGECACHE_PIN_INFO *)block->lock_list, + my_thread_var); + DBUG_ASSERT(info != 0); + DBUG_ASSERT(info->write_lock != wl); + info->write_lock= wl; +} +#else +#define info_add_lock(B,W) +#define info_remove_lock(B) +#define info_change_lock(B,W) +#endif + +/* + Put on the block write lock + + SYNOPSIS + get_wrlock() + pagecache pointer to a page cache data structure + block the block to work with + + RETURN + 0 - OK + 1 - Can't lock this block, need retry +*/ + +static my_bool get_wrlock(PAGECACHE *pagecache, + PAGECACHE_BLOCK_LINK *block) +{ + PAGECACHE_FILE file= block->hash_link->file; + pgcache_page_no_t pageno= block->hash_link->pageno; + DBUG_ENTER("get_wrlock"); + DBUG_PRINT("info", ("the block 0x%lx " + "files %d(%d) pages %d(%d)", + (ulong)block, + file.file, block->hash_link->file.file, + pageno, block->hash_link->pageno)); + PCBLOCK_INFO(block); + while (block->status & PCBLOCK_WRLOCK) + { + /* Lock failed we will wait */ +#ifdef THREAD + struct st_my_thread_var *thread= my_thread_var; + DBUG_PRINT("info", ("fail to lock, waiting... 0x%lx", (ulong)block)); + wqueue_add_to_queue(&block->wqueue[COND_FOR_WRLOCK], thread); + dec_counter_for_resize_op(pagecache); + do + { + KEYCACHE_DBUG_PRINT("get_wrlock: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); + } + while(thread->next); +#else + DBUG_ASSERT(0); +#endif + PCBLOCK_INFO(block); + if ((block->status & (PCBLOCK_REASSIGNED | PCBLOCK_IN_SWITCH)) || + file.file != block->hash_link->file.file || + pageno != block->hash_link->pageno) + { + DBUG_PRINT("info", ("the block 0x%lx changed => need retry" + "status %x files %d != %d or pages %d !=%d", + (ulong)block, block->status, + file.file, block->hash_link->file.file, + pageno, block->hash_link->pageno)); + DBUG_RETURN(1); + } + } + DBUG_ASSERT(block->pins == 0); + /* we are doing it by global cache mutex protection, so it is OK */ + block->status|= PCBLOCK_WRLOCK; + DBUG_PRINT("info", ("WR lock set, block 0x%lx", (ulong)block)); + DBUG_RETURN(0); +} + + +/* + Remove write lock from the block + + SYNOPSIS + release_wrlock() + pagecache pointer to a page cache data structure + block the block to work with + + RETURN + 0 - OK +*/ + +static void release_wrlock(PAGECACHE_BLOCK_LINK *block) +{ + DBUG_ENTER("release_wrlock"); + PCBLOCK_INFO(block); + DBUG_ASSERT(block->status & PCBLOCK_WRLOCK); + DBUG_ASSERT(block->pins > 0); + block->status&= ~PCBLOCK_WRLOCK; + DBUG_PRINT("info", ("WR lock reset, block 0x%lx", (ulong)block)); +#ifdef THREAD + /* release all threads waiting for write lock */ + if (block->wqueue[COND_FOR_WRLOCK].last_thread) + wqueue_release_queue(&block->wqueue[COND_FOR_WRLOCK]); +#endif + PCBLOCK_INFO(block); + DBUG_VOID_RETURN; +} + + +/* + Try to lock/unlock and pin/unpin the block + + SYNOPSIS + make_lock_and_pin() + pagecache pointer to a page cache data structure + block the block to work with + lock lock change mode + pin pinchange mode + + RETURN + 0 - OK + 1 - Try to lock the block failed +*/ + +static my_bool make_lock_and_pin(PAGECACHE *pagecache, + PAGECACHE_BLOCK_LINK *block, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin) +{ + DBUG_ENTER("make_lock_and_pin"); + + DBUG_PRINT("enter", ("block: 0x%lx", (ulong)block)); +#ifndef DBUG_OFF + if (block) + { + DBUG_PRINT("enter", ("block: 0x%lx (%u) wrlock: %c pins: %u lock: %s pin: %s", + (ulong)block, PCBLOCK_NUMBER(pagecache, block), + ((block->status & PCBLOCK_WRLOCK)?'Y':'N'), + block->pins, + page_cache_page_lock_str[lock], + page_cache_page_pin_str[pin])); + PCBLOCK_INFO(block); + } +#endif + + switch (lock) { + case PAGECACHE_LOCK_WRITE: /* free -> write */ + /* Writelock and pin the buffer */ + if (get_wrlock(pagecache, block)) + { + /* can't lock => need retry */ + goto retry; + } + + /* The cache is locked so nothing afraid of */ + add_pin(block); + info_add_lock(block, 1); + break; + case PAGECACHE_LOCK_WRITE_TO_READ: /* write -> read */ + case PAGECACHE_LOCK_WRITE_UNLOCK: /* write -> free */ + /* + Removes write lock and puts read lock (which is nothing in our + implementation) + */ + release_wrlock(block); + case PAGECACHE_LOCK_READ_UNLOCK: /* read -> free */ + case PAGECACHE_LOCK_LEFT_READLOCKED: /* read -> read */ + if (pin == PAGECACHE_UNPIN) + { + remove_pin(block); + } + if (lock == PAGECACHE_LOCK_WRITE_TO_READ) + { + info_change_lock(block, 0); + } + else if (lock == PAGECACHE_LOCK_WRITE_UNLOCK || + lock == PAGECACHE_LOCK_READ_UNLOCK) + { + info_remove_lock(block); + } + break; + case PAGECACHE_LOCK_READ: /* free -> read */ + if (pin == PAGECACHE_PIN) + { + /* The cache is locked so nothing afraid off */ + add_pin(block); + } + info_add_lock(block, 0); + break; + case PAGECACHE_LOCK_LEFT_UNLOCKED: /* free -> free */ + case PAGECACHE_LOCK_LEFT_WRITELOCKED: /* write -> write */ + break; /* do nothing */ + default: + DBUG_ASSERT(0); /* Never should happened */ + } + +#ifndef DBUG_OFF + if (block) + PCBLOCK_INFO(block); +#endif + DBUG_RETURN(0); +retry: + DBUG_PRINT("INFO", ("Retry block 0x%lx", (ulong)block)); + PCBLOCK_INFO(block); + DBUG_ASSERT(block->hash_link->requests > 0); + block->hash_link->requests--; + DBUG_ASSERT(block->requests > 0); + unreg_request(pagecache, block, 1); + PCBLOCK_INFO(block); + DBUG_RETURN(1); + +} + + +/* + Read into a key cache block buffer from disk. + + SYNOPSIS + + read_block() + pagecache pointer to a page cache data structure + block block to which buffer the data is to be read + primary <-> the current thread will read the data + validator validator of read from the disk data + validator_data pointer to the data need by the validator + + RETURN VALUE + None + + NOTES. + The function either reads a page data from file to the block buffer, + or waits until another thread reads it. What page to read is determined + by a block parameter - reference to a hash link for this page. + If an error occurs THE PCBLOCK_ERROR bit is set in the block status. +*/ + +static void read_block(PAGECACHE *pagecache, + PAGECACHE_BLOCK_LINK *block, + my_bool primary, + pagecache_disk_read_validator validator, + gptr validator_data) +{ + uint got_length; + + /* On entry cache_lock is locked */ + + DBUG_ENTER("read_block"); + if (primary) + { + /* + This code is executed only by threads + that submitted primary requests + */ + + DBUG_PRINT("read_block", + ("page to be read by primary request")); + + /* Page is not in buffer yet, is to be read from disk */ + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + /* + Here other threads may step in and register as secondary readers. + They will register in block->wqueue[COND_FOR_REQUESTED]. + */ + got_length= pagecache_fread(pagecache, &block->hash_link->file, + block->buffer, + block->hash_link->pageno, MYF(0)); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + if (got_length < pagecache->block_size) + block->status|= PCBLOCK_ERROR; + else + block->status= (PCBLOCK_READ | (block->status & PCBLOCK_WRLOCK)); + + if (validator != NULL && + (*validator)(block->buffer, validator_data)) + block->status|= PCBLOCK_ERROR; + + DBUG_PRINT("read_block", + ("primary request: new page in cache")); + /* Signal that all pending requests for this page now can be processed */ +#ifdef THREAD + if (block->wqueue[COND_FOR_REQUESTED].last_thread) + wqueue_release_queue(&block->wqueue[COND_FOR_REQUESTED]); +#endif + } + else + { + /* + This code is executed only by threads + that submitted secondary requests + */ + DBUG_PRINT("read_block", + ("secondary request waiting for new page to be read")); + { +#ifdef THREAD + struct st_my_thread_var *thread= my_thread_var; + /* Put the request into a queue and wait until it can be processed */ + wqueue_add_to_queue(&block->wqueue[COND_FOR_REQUESTED], thread); + do + { + DBUG_PRINT("read_block: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); + } + while (thread->next); +#else + KEYCACHE_DBUG_ASSERT(0); + /* No parallel requests in single-threaded case */ +#endif + } + DBUG_PRINT("read_block", + ("secondary request: new page in cache")); + } + DBUG_VOID_RETURN; +} + + +/* + Set LSN on the page to the given one if the given LSN is bigger + + SYNOPSIS + check_and_set_lsn() + lsn LSN to set + block block to check and set +*/ + +static void check_and_set_lsn(LSN lsn, PAGECACHE_BLOCK_LINK *block) +{ + LSN old; + DBUG_ENTER("check_and_set_lsn"); + DBUG_ASSERT(block->type == PAGECACHE_LSN_PAGE); + old= lsn_korr(block->buffer + PAGE_LSN_OFFSET); + DBUG_PRINT("info", ("old lsn: (%lu, 0x%lx) new lsn: (%lu, 0x%lx)", + (ulong)LSN_FILE_NO(old), (ulong)LSN_OFFSET(old), + (ulong)LSN_FILE_NO(lsn), (ulong)LSN_OFFSET(lsn))); + if (cmp_translog_addr(lsn, old) > 0) + lsn_store(block->buffer + PAGE_LSN_OFFSET, lsn); + DBUG_VOID_RETURN; +} + + +/* + Unlock/unpin page and put LSN stamp if it need + + SYNOPSIS + pagecache_unlock() + pagecache pointer to a page cache data structure + file handler for the file for the block of data to be read + pageno number of the block of data in the file + lock lock change + pin pin page + first_REDO_LSN_for_page do not set it if it is zero + lsn if it is not CONTROL_FILE_IMPOSSIBLE_LSN (0) and it + is bigger then LSN on the page it will be written on + the page + + NOTE + Pininig uses requests registration mechanism it works following way: + | beginnig | ending | + | of func. | of func. | + ----------------------------+-------------+---------------+ + PAGECACHE_PIN_LEFT_PINNED | - | - | + PAGECACHE_PIN_LEFT_UNPINNED | reg request | unreg request | + PAGECACHE_PIN | reg request | - | + PAGECACHE_UNPIN | - | unreg request | + + +*/ + +void pagecache_unlock(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin, + LSN first_REDO_LSN_for_page, + LSN lsn) +{ + PAGECACHE_BLOCK_LINK *block; + int page_st; + DBUG_ENTER("pagecache_unlock"); + DBUG_PRINT("enter", ("fd: %u page: %lu %s %s", + (uint) file->file, (ulong) pageno, + page_cache_page_lock_str[lock], + page_cache_page_pin_str[pin])); + /* we do not allow any lock/pin increasing here */ + DBUG_ASSERT(pin != PAGECACHE_PIN); + DBUG_ASSERT(lock != PAGECACHE_LOCK_READ); + DBUG_ASSERT(lock != PAGECACHE_LOCK_WRITE); + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + /* + As soon as we keep lock cache can be used, and we have lock because want + to unlock. + */ + DBUG_ASSERT(pagecache->can_be_used); + + inc_counter_for_resize_op(pagecache); + /* See NOTE for pagecache_unlock about registering requests */ + block= find_block(pagecache, file, pageno, 0, 0, + test(pin == PAGECACHE_PIN_LEFT_UNPINNED), &page_st); + PCBLOCK_INFO(block); + DBUG_ASSERT(block != 0 && page_st == PAGE_READ); + if (first_REDO_LSN_for_page) + { + DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK); + DBUG_ASSERT(pin == PAGECACHE_UNPIN); + if (block->rec_lsn == 0) + block->rec_lsn= first_REDO_LSN_for_page; + } + if (lsn != 0) + { + check_and_set_lsn(lsn, block); + } + + if (make_lock_and_pin(pagecache, block, lock, pin)) + { + DBUG_ASSERT(0); /* should not happend */ + } + + remove_reader(block); + /* + Link the block into the LRU chain if it's the last submitted request + for the block and block will not be pinned. + See NOTE for pagecache_unlock about registering requests. + */ + if (pin != PAGECACHE_PIN_LEFT_PINNED) + unreg_request(pagecache, block, 1); + + dec_counter_for_resize_op(pagecache); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + + DBUG_VOID_RETURN; +} + + +/* + Unpin page + + SYNOPSIS + pagecache_unpin() + pagecache pointer to a page cache data structure + file handler for the file for the block of data to be read + pageno number of the block of data in the file + lsn if it is not CONTROL_FILE_IMPOSSIBLE_LSN (0) and it + is bigger then LSN on the page it will be written on + the page +*/ + +void pagecache_unpin(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno, + LSN lsn) +{ + PAGECACHE_BLOCK_LINK *block; + int page_st; + DBUG_ENTER("pagecache_unpin"); + DBUG_PRINT("enter", ("fd: %u page: %lu", + (uint) file->file, (ulong) pageno)); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + /* + As soon as we keep lock cache can be used, and we have lock bacause want + aunlock. + */ + DBUG_ASSERT(pagecache->can_be_used); + + inc_counter_for_resize_op(pagecache); + /* See NOTE for pagecache_unlock about registering requests */ + block= find_block(pagecache, file, pageno, 0, 0, 0, &page_st); + DBUG_ASSERT(block != 0); + DBUG_ASSERT(page_st == PAGE_READ); + + if (lsn != 0) + { + check_and_set_lsn(lsn, block); + } + + /* + we can just unpin only with keeping read lock because: + a) we can't pin without any lock + b) we can't unpin keeping write lock + */ + if (make_lock_and_pin(pagecache, block, + PAGECACHE_LOCK_LEFT_READLOCKED, + PAGECACHE_UNPIN)) + DBUG_ASSERT(0); /* should not happend */ + + remove_reader(block); + /* + Link the block into the LRU chain if it's the last submitted request + for the block and block will not be pinned. + See NOTE for pagecache_unlock about registering requests + */ + unreg_request(pagecache, block, 1); + + dec_counter_for_resize_op(pagecache); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + + DBUG_VOID_RETURN; +} + + +/* + Unlock/unpin page and put LSN stamp if it need + (uses direct block/page pointer) + + SYNOPSIS + pagecache_unlock_by_link() + pagecache pointer to a page cache data structure + link direct link to page (returned by read or write) + lock lock change + pin pin page + first_REDO_LSN_for_page do not set it if it is zero + lsn if it is not CONTROL_FILE_IMPOSSIBLE_LSN (0) and it + is bigger then LSN on the page it will be written on + the page +*/ + +void pagecache_unlock_by_link(PAGECACHE *pagecache, + PAGECACHE_PAGE_LINK *link, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin, + LSN first_REDO_LSN_for_page, + LSN lsn) +{ + PAGECACHE_BLOCK_LINK *block= (PAGECACHE_BLOCK_LINK *)link; + DBUG_ENTER("pagecache_unlock_by_link"); + DBUG_PRINT("enter", ("block: 0x%lx fd: %u page: %lu %s %s", + (ulong) block, + (uint) block->hash_link->file.file, + (ulong) block->hash_link->pageno, + page_cache_page_lock_str[lock], + page_cache_page_pin_str[pin])); + /* + We do not allow any lock/pin increasing here and page can't be + unpinned because we use direct link. + */ + DBUG_ASSERT(pin != PAGECACHE_PIN); + DBUG_ASSERT(pin != PAGECACHE_PIN_LEFT_UNPINNED); + DBUG_ASSERT(lock != PAGECACHE_LOCK_READ); + DBUG_ASSERT(lock != PAGECACHE_LOCK_WRITE); + if (pin == PAGECACHE_PIN_LEFT_UNPINNED && + lock == PAGECACHE_LOCK_READ_UNLOCK) + { + /* block do not need here so we do not provide it */ + if (make_lock_and_pin(pagecache, 0, lock, pin)) + DBUG_ASSERT(0); /* should not happend */ + DBUG_VOID_RETURN; + } + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + /* + As soon as we keep lock cache can be used, and we have lock because want + unlock. + */ + DBUG_ASSERT(pagecache->can_be_used); + + inc_counter_for_resize_op(pagecache); + if (first_REDO_LSN_for_page) + { + /* + LOCK_READ_UNLOCK is ok here as the page may have first locked + with WRITE lock that was temporarly converted to READ lock before + it's unpinned + */ + DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE_UNLOCK || + lock == PAGECACHE_LOCK_READ_UNLOCK); + DBUG_ASSERT(pin == PAGECACHE_UNPIN); + if (block->rec_lsn == 0) + block->rec_lsn= first_REDO_LSN_for_page; + } + if (lsn != 0) + { + check_and_set_lsn(lsn, block); + } + + if (make_lock_and_pin(pagecache, block, lock, pin)) + DBUG_ASSERT(0); /* should not happend */ + + /* + Link the block into the LRU chain if it's the last submitted request + for the block and block will not be pinned. + See NOTE for pagecache_unlock about registering requests. + */ + if (pin != PAGECACHE_PIN_LEFT_PINNED) + unreg_request(pagecache, block, 1); + + dec_counter_for_resize_op(pagecache); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + + DBUG_VOID_RETURN; +} + + +/* + Unpin page + (uses direct block/page pointer) + + SYNOPSIS + pagecache_unpin_by_link() + pagecache pointer to a page cache data structure + link direct link to page (returned by read or write) + lsn if it is not CONTROL_FILE_IMPOSSIBLE_LSN (0) and it + is bigger then LSN on the page it will be written on + the page +*/ + +void pagecache_unpin_by_link(PAGECACHE *pagecache, + PAGECACHE_PAGE_LINK *link, + LSN lsn) +{ + PAGECACHE_BLOCK_LINK *block= (PAGECACHE_BLOCK_LINK *)link; + DBUG_ENTER("pagecache_unpin_by_link"); + DBUG_PRINT("enter", ("block: 0x%lx fd: %u page: %lu", + (ulong) block, + (uint) block->hash_link->file.file, + (ulong) block->hash_link->pageno)); + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + /* + As soon as we keep lock cache can be used, and we have lock because want + unlock. + */ + DBUG_ASSERT(pagecache->can_be_used); + + inc_counter_for_resize_op(pagecache); + + if (lsn != 0) + { + check_and_set_lsn(lsn, block); + } + + /* + We can just unpin only with keeping read lock because: + a) we can't pin without any lock + b) we can't unpin keeping write lock + */ + if (make_lock_and_pin(pagecache, block, + PAGECACHE_LOCK_LEFT_READLOCKED, + PAGECACHE_UNPIN)) + DBUG_ASSERT(0); /* should not happend */ + + /* + Link the block into the LRU chain if it's the last submitted request + for the block and block will not be pinned. + See NOTE for pagecache_unlock about registering requests. + */ + unreg_request(pagecache, block, 1); + + dec_counter_for_resize_op(pagecache); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + + DBUG_VOID_RETURN; +} + + +/* + Read a block of data from a cached file into a buffer; + + SYNOPSIS + pagecache_valid_read() + pagecache pointer to a page cache data structure + file handler for the file for the block of data to be read + pageno number of the block of data in the file + level determines the weight of the data + buff buffer to where the data must be placed + type type of the page + lock lock change + link link to the page if we pin it + validator validator of read from the disk data + validator_data pointer to the data need by the validator + + RETURN VALUE + Returns address from where the data is placed if successful, 0 - otherwise. + + Pin will be chosen according to lock parameter (see lock_to_pin) +*/ +static enum pagecache_page_pin lock_to_pin[]= +{ + PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_UNLOCKED*/, + PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_READLOCKED*/, + PAGECACHE_PIN_LEFT_PINNED /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/, + PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ*/, + PAGECACHE_PIN /*PAGECACHE_LOCK_WRITE*/, + PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ_UNLOCK*/, + PAGECACHE_UNPIN /*PAGECACHE_LOCK_WRITE_UNLOCK*/, + PAGECACHE_UNPIN /*PAGECACHE_LOCK_WRITE_TO_READ*/ +}; + +byte *pagecache_valid_read(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno, + uint level, + byte *buff, + enum pagecache_page_type type, + enum pagecache_page_lock lock, + PAGECACHE_PAGE_LINK *link, + pagecache_disk_read_validator validator, + gptr validator_data) +{ + int error= 0; + enum pagecache_page_pin pin= lock_to_pin[lock]; + PAGECACHE_PAGE_LINK fake_link; + DBUG_ENTER("pagecache_valid_read"); + DBUG_PRINT("enter", ("fd: %u page: %lu level: %u t:%s %s %s", + (uint) file->file, (ulong) pageno, level, + page_cache_page_type_str[type], + page_cache_page_lock_str[lock], + page_cache_page_pin_str[pin])); + + if (!link) + link= &fake_link; + else + *link= 0; + +restart: + + if (pagecache->can_be_used) + { + /* Key cache is used */ + PAGECACHE_BLOCK_LINK *block; + uint status; + int page_st; + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + if (!pagecache->can_be_used) + { + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + goto no_key_cache; + } + + inc_counter_for_resize_op(pagecache); + pagecache->global_cache_r_requests++; + /* See NOTE for pagecache_unlock about registering requests. */ + block= find_block(pagecache, file, pageno, level, + test(lock == PAGECACHE_LOCK_WRITE), + test((pin == PAGECACHE_PIN_LEFT_UNPINNED) || + (pin == PAGECACHE_PIN)), + &page_st); + DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE || + block->type == type); + block->type= type; + if (((block->status & PCBLOCK_ERROR) == 0) && (page_st != PAGE_READ)) + { + DBUG_PRINT("info", ("read block 0x%lx", (ulong)block)); + /* The requested page is to be read into the block buffer */ + read_block(pagecache, block, + (my_bool)(page_st == PAGE_TO_BE_READ), + validator, validator_data); + DBUG_PRINT("info", ("read is done")); + } + if (make_lock_and_pin(pagecache, block, lock, pin)) + { + /* + We failed to write lock the block, cache is unlocked, + we will try to get the block again. + */ + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_PRINT("info", ("restarting...")); + goto restart; + } + + if (! ((status= block->status) & PCBLOCK_ERROR)) + { +#if !defined(SERIALIZED_READ_FROM_CACHE) + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); +#endif + + DBUG_ASSERT((pagecache->block_size & 511) == 0); + /* Copy data from the cache buffer */ + bmove512(buff, block->buffer, pagecache->block_size); + +#if !defined(SERIALIZED_READ_FROM_CACHE) + pagecache_pthread_mutex_lock(&pagecache->cache_lock); +#endif + } + + remove_reader(block); + /* + Link the block into the LRU chain if it's the last submitted request + for the block and block will not be pinned. + See NOTE for pagecache_unlock about registering requests. + */ + if (pin == PAGECACHE_PIN_LEFT_UNPINNED || pin == PAGECACHE_UNPIN) + unreg_request(pagecache, block, 1); + else + *link= (PAGECACHE_PAGE_LINK)block; + + dec_counter_for_resize_op(pagecache); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + + if (status & PCBLOCK_ERROR) + DBUG_RETURN((byte *) 0); + + DBUG_RETURN(buff); + } + +no_key_cache: /* Key cache is not used */ + + /* We can't use mutex here as the key cache may not be initialized */ + pagecache->global_cache_r_requests++; + pagecache->global_cache_read++; + if (pagecache_fread(pagecache, file, (byte*) buff, pageno, MYF(MY_NABP))) + error= 1; + DBUG_RETURN(error ? (byte*) 0 : buff); +} + + +/* + Delete page from the buffer + + SYNOPSIS + pagecache_delete() + pagecache pointer to a page cache data structure + file handler for the file for the block of data to be read + pageno number of the block of data in the file + lock lock change + flush flush page if it is dirty + + RETURN VALUE + 0 - deleted or was not present at all + 1 - error + + NOTES. + lock can be only PAGECACHE_LOCK_LEFT_WRITELOCKED (page was write locked + before) or PAGECACHE_LOCK_WRITE (delete will write lock page before delete) +*/ +my_bool pagecache_delete(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno, + enum pagecache_page_lock lock, + my_bool flush) +{ + int error= 0; + enum pagecache_page_pin pin= lock_to_pin[lock]; + DBUG_ENTER("pagecache_delete"); + DBUG_PRINT("enter", ("fd: %u page: %lu %s %s", + (uint) file->file, (ulong) pageno, + page_cache_page_lock_str[lock], + page_cache_page_pin_str[pin])); + DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE || + lock == PAGECACHE_LOCK_LEFT_WRITELOCKED); + DBUG_ASSERT(pin == PAGECACHE_PIN || + pin == PAGECACHE_PIN_LEFT_PINNED); + +restart: + + if (pagecache->can_be_used) + { + /* Key cache is used */ + reg1 PAGECACHE_BLOCK_LINK *block; + PAGECACHE_HASH_LINK **unused_start, *link; + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + if (!pagecache->can_be_used) + goto end; + + inc_counter_for_resize_op(pagecache); + link= get_present_hash_link(pagecache, file, pageno, &unused_start); + if (!link) + { + DBUG_PRINT("info", ("There is no such page in the cache")); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_RETURN(0); + } + block= link->block; + /* See NOTE for pagecache_unlock about registering requests. */ + if (pin == PAGECACHE_PIN) + reg_requests(pagecache, block, 1); + DBUG_ASSERT(block != 0); + if (make_lock_and_pin(pagecache, block, lock, pin)) + { + /* + We failed to writelock the block, cache is unlocked, and last write + lock is released, we will try to get the block again. + */ + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_PRINT("info", ("restarting...")); + goto restart; + } + + if (block->status & PCBLOCK_CHANGED) + { + if (flush) + { + /* The block contains a dirty page - push it out of the cache */ + + KEYCACHE_DBUG_PRINT("find_block", ("block is dirty")); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + /* + The call is thread safe because only the current + thread might change the block->hash_link value + */ + DBUG_ASSERT(block->pins == 1); + error= pagecache_fwrite(pagecache, + &block->hash_link->file, + block->buffer, + block->hash_link->pageno, + block->type, + MYF(MY_NABP | MY_WAIT_IF_FULL)); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + pagecache->global_cache_write++; + + if (error) + { + block->status|= PCBLOCK_ERROR; + goto err; + } + } + pagecache->blocks_changed--; + pagecache->global_blocks_changed--; + /* + free_block() will change the status and rec_lsn of the block so no + need to change them here. + */ + } + /* Cache is locked, so we can relese page before freeing it */ + make_lock_and_pin(pagecache, block, + PAGECACHE_LOCK_WRITE_UNLOCK, + PAGECACHE_UNPIN); + DBUG_ASSERT(link->requests > 0); + link->requests--; + /* See NOTE for pagecache_unlock about registering requests. */ + free_block(pagecache, block); + +err: + dec_counter_for_resize_op(pagecache); +end: + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + } + + DBUG_RETURN(error); +} + + +my_bool pagecache_delete_pages(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno, + uint page_count, + enum pagecache_page_lock lock, + my_bool flush) +{ + ulong page_end; + DBUG_ENTER("pagecache_delete_pages"); + DBUG_ASSERT(page_count > 0); + + page_end= pageno + page_count; + do + { + if (pagecache_delete(pagecache, file, pageno, + lock, flush)) + DBUG_RETURN(1); + } while (++pageno != page_end); + DBUG_RETURN(0); +} + + +/* + Write a buffer into a cached file. + + SYNOPSIS + + pagecache_write_part() + pagecache pointer to a page cache data structure + file handler for the file to write data to + pageno number of the block of data in the file + level determines the weight of the data + buff buffer with the data + type type of the page + lock lock change + pin pin page + write_mode how to write page + link link to the page if we pin it + + RETURN VALUE + 0 if a success, 1 - otherwise. +*/ + +/* description of how to change lock before and after write */ +struct write_lock_change +{ + int need_lock_change; /* need changing of lock at the end of write */ + enum pagecache_page_lock new_lock; /* lock at the beginning */ + enum pagecache_page_lock unlock_lock; /* lock at the end */ +}; + +static struct write_lock_change write_lock_change_table[]= +{ + {1, + PAGECACHE_LOCK_WRITE, + PAGECACHE_LOCK_WRITE_UNLOCK} /*PAGECACHE_LOCK_LEFT_UNLOCKED*/, + {0, /*unsupported (we can't write having the block read locked) */ + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_LOCK_LEFT_UNLOCKED} /*PAGECACHE_LOCK_LEFT_READLOCKED*/, + {0, PAGECACHE_LOCK_LEFT_WRITELOCKED, 0} /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/, + {1, + PAGECACHE_LOCK_WRITE, + PAGECACHE_LOCK_WRITE_TO_READ} /*PAGECACHE_LOCK_READ*/, + {0, PAGECACHE_LOCK_WRITE, 0} /*PAGECACHE_LOCK_WRITE*/, + {0, /*unsupported (we can't write having the block read locked) */ + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_LOCK_LEFT_UNLOCKED} /*PAGECACHE_LOCK_READ_UNLOCK*/, + {1, + PAGECACHE_LOCK_LEFT_WRITELOCKED, + PAGECACHE_LOCK_WRITE_UNLOCK } /*PAGECACHE_LOCK_WRITE_UNLOCK*/, + {1, + PAGECACHE_LOCK_LEFT_WRITELOCKED, + PAGECACHE_LOCK_WRITE_TO_READ} /*PAGECACHE_LOCK_WRITE_TO_READ*/ +}; + +/* description of how to change pin before and after write */ +struct write_pin_change +{ + enum pagecache_page_pin new_pin; /* pin status at the beginning */ + enum pagecache_page_pin unlock_pin; /* pin status at the end */ +}; + +static struct write_pin_change write_pin_change_table[]= +{ + {PAGECACHE_PIN_LEFT_PINNED, + PAGECACHE_PIN_LEFT_PINNED} /*PAGECACHE_PIN_LEFT_PINNED*/, + {PAGECACHE_PIN, + PAGECACHE_UNPIN} /*PAGECACHE_PIN_LEFT_UNPINNED*/, + {PAGECACHE_PIN, + PAGECACHE_PIN_LEFT_PINNED} /*PAGECACHE_PIN*/, + {PAGECACHE_PIN_LEFT_PINNED, + PAGECACHE_UNPIN} /*PAGECACHE_UNPIN*/ +}; + +my_bool pagecache_write_part(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno, + uint level, + byte *buff, + enum pagecache_page_type type, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin, + enum pagecache_write_mode write_mode, + PAGECACHE_PAGE_LINK *link, + uint offset, uint size) +{ + PAGECACHE_BLOCK_LINK *block= NULL; + PAGECACHE_PAGE_LINK fake_link; + int error= 0; + int need_lock_change= write_lock_change_table[lock].need_lock_change; + DBUG_ENTER("pagecache_write_part"); + DBUG_PRINT("enter", ("fd: %u page: %lu level: %u type: %s lock: %s " + "pin: %s mode: %s offset: %u size %u", + (uint) file->file, (ulong) pageno, level, + page_cache_page_type_str[type], + page_cache_page_lock_str[lock], + page_cache_page_pin_str[pin], + page_cache_page_write_mode_str[write_mode], + offset, size)); + DBUG_ASSERT(lock != PAGECACHE_LOCK_LEFT_READLOCKED); + DBUG_ASSERT(lock != PAGECACHE_LOCK_READ_UNLOCK); + DBUG_ASSERT(offset + size <= pagecache->block_size); + if (!link) + link= &fake_link; + else + *link= 0; + +restart: + +#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) + DBUG_EXECUTE("check_pagecache", + test_key_cache(pagecache, "start of key_cache_write", 1);); +#endif + + if (pagecache->can_be_used) + { + /* Key cache is used */ + int page_st; + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + if (!pagecache->can_be_used) + { + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + goto no_key_cache; + } + + inc_counter_for_resize_op(pagecache); + pagecache->global_cache_w_requests++; + /* See NOTE for pagecache_unlock about registering requests. */ + block= find_block(pagecache, file, pageno, level, + test(write_mode != PAGECACHE_WRITE_DONE && + lock != PAGECACHE_LOCK_LEFT_WRITELOCKED && + lock != PAGECACHE_LOCK_WRITE_UNLOCK && + lock != PAGECACHE_LOCK_WRITE_TO_READ), + test((pin == PAGECACHE_PIN_LEFT_UNPINNED) || + (pin == PAGECACHE_PIN)), + &page_st); + if (!block) + { + DBUG_ASSERT(write_mode != PAGECACHE_WRITE_DONE); + /* It happens only for requests submitted during resize operation */ + dec_counter_for_resize_op(pagecache); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + /* Write to the disk key cache is in resize at the moment*/ + goto no_key_cache; + } + + DBUG_ASSERT(block->type == PAGECACHE_EMPTY_PAGE || + block->type == type); + block->type= type; + + if (make_lock_and_pin(pagecache, block, + write_lock_change_table[lock].new_lock, + (need_lock_change ? + write_pin_change_table[pin].new_pin : + pin))) + { + /* + We failed to writelock the block, cache is unlocked, and last write + lock is released, we will try to get the block again. + */ + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_PRINT("info", ("restarting...")); + goto restart; + } + + if (write_mode == PAGECACHE_WRITE_DONE) + { + if ((block->status & PCBLOCK_ERROR) && page_st != PAGE_READ) + { + /* Copy data from buff */ + if (!(size & 511)) + bmove512(block->buffer + offset, buff, size); + else + memcpy(block->buffer + offset, buff, size); + block->status= (PCBLOCK_READ | (block->status & PCBLOCK_WRLOCK)); + KEYCACHE_DBUG_PRINT("key_cache_insert", + ("primary request: new page in cache")); +#ifdef THREAD + /* Signal that all pending requests for this now can be processed. */ + if (block->wqueue[COND_FOR_REQUESTED].last_thread) + wqueue_release_queue(&block->wqueue[COND_FOR_REQUESTED]); +#endif + } + } + else + { + if (! (block->status & PCBLOCK_CHANGED)) + link_to_changed_list(pagecache, block); + + if (! (block->status & PCBLOCK_ERROR)) + { + if (!(size & 511)) + bmove512(block->buffer + offset, buff, size); + else + memcpy(block->buffer + offset, buff, size); + block->status|= PCBLOCK_READ; + } + } + + + if (need_lock_change) + { + /* + We don't set rec_lsn of the block; this is ok as for the + Maria-block-record's pages, we always keep pages pinned here. + */ + if (make_lock_and_pin(pagecache, block, + write_lock_change_table[lock].unlock_lock, + write_pin_change_table[pin].unlock_pin)) + DBUG_ASSERT(0); + } + + /* Unregister the request */ + DBUG_ASSERT(block->hash_link->requests > 0); + block->hash_link->requests--; + /* See NOTE for pagecache_unlock about registering requests. */ + if (pin == PAGECACHE_PIN_LEFT_UNPINNED || pin == PAGECACHE_UNPIN) + unreg_request(pagecache, block, 1); + else + *link= (PAGECACHE_PAGE_LINK)block; + + if (block->status & PCBLOCK_ERROR) + error= 1; + + dec_counter_for_resize_op(pagecache); + + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + + goto end; + } + +no_key_cache: + /* Key cache is not used */ + if (write_mode == PAGECACHE_WRITE_DELAY) + { + pagecache->global_cache_w_requests++; + pagecache->global_cache_write++; + if (pagecache_fwrite(pagecache, file, (byte*) buff, pageno, type, + MYF(MY_NABP | MY_WAIT_IF_FULL))) + error=1; + } + +end: +#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) + DBUG_EXECUTE("exec", + test_key_cache(pagecache, "end of key_cache_write", 1);); +#endif + if (block) + PCBLOCK_INFO(block); + else + DBUG_PRINT("info", ("No block")); + DBUG_RETURN(error); +} + + +/* + Free block: remove reference to it from hash table, + remove it from the chain file of dirty/clean blocks + and add it to the free list. +*/ + +static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) +{ + KEYCACHE_THREAD_TRACE("free block"); + KEYCACHE_DBUG_PRINT("free_block", + ("block: %u hash_link 0x%lx", + PCBLOCK_NUMBER(pagecache, block), + (long) block->hash_link)); + if (block->hash_link) + { + /* + While waiting for readers to finish, new readers might request the + block. But since we set block->status|= PCBLOCK_REASSIGNED, they + will wait on block->wqueue[COND_FOR_SAVED]. They must be signalled + later. + */ + block->status|= PCBLOCK_REASSIGNED; + wait_for_readers(pagecache, block); + unlink_hash(pagecache, block->hash_link); + } + + unlink_changed(block); + DBUG_ASSERT((block->status & PCBLOCK_WRLOCK) == 0); + DBUG_ASSERT(block->pins == 0); + block->status= 0; +#ifndef DBUG_OFF + block->type= PAGECACHE_EMPTY_PAGE; +#endif + block->rec_lsn= 0; + KEYCACHE_THREAD_TRACE("free block"); + KEYCACHE_DBUG_PRINT("free_block", + ("block is freed")); + unreg_request(pagecache, block, 0); + block->hash_link= NULL; + + /* Remove the free block from the LRU ring. */ + unlink_block(pagecache, block); + if (block->temperature == PCBLOCK_WARM) + pagecache->warm_blocks--; + block->temperature= PCBLOCK_COLD; + /* Insert the free block in the free list. */ + block->next_used= pagecache->free_block_list; + pagecache->free_block_list= block; + /* Keep track of the number of currently unused blocks. */ + pagecache->blocks_unused++; + +#ifdef THREAD + /* All pending requests for this page must be resubmitted. */ + if (block->wqueue[COND_FOR_SAVED].last_thread) + wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]); +#endif +} + + +static int cmp_sec_link(PAGECACHE_BLOCK_LINK **a, PAGECACHE_BLOCK_LINK **b) +{ + return (((*a)->hash_link->pageno < (*b)->hash_link->pageno) ? -1 : + ((*a)->hash_link->pageno > (*b)->hash_link->pageno) ? 1 : 0); +} + + +/* + Flush a portion of changed blocks to disk, + free used blocks if requested +*/ + +static int flush_cached_blocks(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + PAGECACHE_BLOCK_LINK **cache, + PAGECACHE_BLOCK_LINK **end, + enum flush_type type) +{ + int error; + int last_errno= 0; + uint count= (uint) (end-cache); + DBUG_ENTER("flush_cached_blocks"); + + /* Don't lock the cache during the flush */ + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + /* + As all blocks referred in 'cache' are marked by PCBLOCK_IN_FLUSH + we are guarunteed no thread will change them + */ + qsort((byte*) cache, count, sizeof(*cache), (qsort_cmp) cmp_sec_link); + + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + for (; cache != end; cache++) + { + PAGECACHE_BLOCK_LINK *block= *cache; + + if (block->pins) + { + KEYCACHE_DBUG_PRINT("flush_cached_blocks", + ("block: %u (0x%lx) pinned", + PCBLOCK_NUMBER(pagecache, block), (ulong)block)); + DBUG_PRINT("info", ("block: %u (0x%lx) pinned", + PCBLOCK_NUMBER(pagecache, block), (ulong)block)); + PCBLOCK_INFO(block); + last_errno= -1; + unreg_request(pagecache, block, 1); + continue; + } + /* if the block is not pinned then it is not write locked */ + DBUG_ASSERT((block->status & PCBLOCK_WRLOCK) == 0); + DBUG_ASSERT(block->pins == 0); + if (make_lock_and_pin(pagecache, block, + PAGECACHE_LOCK_WRITE, PAGECACHE_PIN)) + DBUG_ASSERT(0); + + KEYCACHE_DBUG_PRINT("flush_cached_blocks", + ("block: %u (0x%lx) to be flushed", + PCBLOCK_NUMBER(pagecache, block), (ulong)block)); + DBUG_PRINT("info", ("block: %u (0x%lx) to be flushed", + PCBLOCK_NUMBER(pagecache, block), (ulong)block)); + PCBLOCK_INFO(block); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_PRINT("info", ("block: %u (0x%lx) pins: %u", + PCBLOCK_NUMBER(pagecache, block), (ulong)block, + block->pins)); + DBUG_ASSERT(block->pins == 1); + error= pagecache_fwrite(pagecache, file, + block->buffer, + block->hash_link->pageno, + block->type, + MYF(MY_NABP | MY_WAIT_IF_FULL)); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + + make_lock_and_pin(pagecache, block, + PAGECACHE_LOCK_WRITE_UNLOCK, + PAGECACHE_UNPIN); + + pagecache->global_cache_write++; + if (error) + { + block->status|= PCBLOCK_ERROR; + if (!last_errno) + last_errno= errno ? errno : -1; + } +#ifdef THREAD + /* + Let to proceed for possible waiting requests to write to the block page. + It might happen only during an operation to resize the key cache. + */ + if (block->wqueue[COND_FOR_SAVED].last_thread) + wqueue_release_queue(&block->wqueue[COND_FOR_SAVED]); +#endif + /* type will never be FLUSH_IGNORE_CHANGED here */ + if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE)) + { + pagecache->blocks_changed--; + pagecache->global_blocks_changed--; + free_block(pagecache, block); + } + else + { + block->status&= ~PCBLOCK_IN_FLUSH; + link_to_file_list(pagecache, block, file, 1); + unreg_request(pagecache, block, 1); + } + } + DBUG_RETURN(last_errno); +} + + +/** + @brief flush all key blocks for a file to disk but don't do any mutex locks + + @param pagecache pointer to a pagecache data structure + @param file handler for the file to flush to + @param flush_type type of the flush + + @note + This function doesn't do any mutex locks because it needs to be called + both from flush_pagecache_blocks and flush_all_key_blocks (the later one + does the mutex lock in the resize_pagecache() function). + + @return Operation status + @retval 0 OK + @retval 1 Error +*/ + +static int flush_pagecache_blocks_int(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + enum flush_type type) +{ + PAGECACHE_BLOCK_LINK *cache_buff[FLUSH_CACHE],**cache; + int last_errno= 0; + DBUG_ENTER("flush_pagecache_blocks_int"); + DBUG_PRINT("enter",("file: %d blocks_used: %lu blocks_changed: %lu", + file->file, pagecache->blocks_used, pagecache->blocks_changed)); + +#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) + DBUG_EXECUTE("check_pagecache", + test_key_cache(pagecache, + "start of flush_pagecache_blocks", 0);); +#endif + + cache= cache_buff; + if (pagecache->disk_blocks > 0 && + (!my_disable_flush_pagecache_blocks || type != FLUSH_KEEP)) + { + /* Key cache exists and flush is not disabled */ + int error= 0; + uint count= 0; + PAGECACHE_BLOCK_LINK **pos, **end; + PAGECACHE_BLOCK_LINK *first_in_switch= NULL; + PAGECACHE_BLOCK_LINK *block, *next; +#if defined(PAGECACHE_DEBUG) + uint cnt= 0; +#endif + uint8 changed_blocks_is_incomplete_incremented= 0; + + if (type != FLUSH_IGNORE_CHANGED) + { + /* + Count how many key blocks we have to cache to be able + to flush all dirty pages with minimum seek moves + */ + for (block= pagecache->changed_blocks[FILE_HASH(*file)] ; + block; + block= block->next_changed) + { + if (block->hash_link->file.file == file->file) + { + count++; + KEYCACHE_DBUG_ASSERT(count<= pagecache->blocks_used); + } + } + /* Allocate a new buffer only if its bigger than the one we have */ + if (count > FLUSH_CACHE && + !(cache= + (PAGECACHE_BLOCK_LINK**) + my_malloc(sizeof(PAGECACHE_BLOCK_LINK*)*count, MYF(0)))) + { + cache= cache_buff; + count= FLUSH_CACHE; + } + } + + /* Retrieve the blocks and write them to a buffer to be flushed */ +restart: + end= (pos= cache)+count; + for (block= pagecache->changed_blocks[FILE_HASH(*file)] ; + block; + block= next) + { +#if defined(PAGECACHE_DEBUG) + cnt++; + KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used); +#endif + next= block->next_changed; + if (block->hash_link->file.file == file->file) + { + /* + Mark the block with BLOCK_IN_FLUSH in order not to let + other threads to use it for new pages and interfere with + our sequence ot flushing dirty file pages + */ + block->status|= PCBLOCK_IN_FLUSH; + + if (! (block->status & PCBLOCK_IN_SWITCH)) + { + /* + We care only for the blocks for which flushing was not + initiated by other threads as a result of page swapping + */ + reg_requests(pagecache, block, 1); + if (type != FLUSH_IGNORE_CHANGED) + { + /* It's not a temporary file */ + if (pos == end) + { + /* + This happens only if there is not enough + memory for the big block + */ + if ((error= flush_cached_blocks(pagecache, file, cache, + end,type))) + last_errno=error; + DBUG_PRINT("info", ("restarting...")); + /* + Restart the scan as some other thread might have changed + the changed blocks chain: the blocks that were in switch + state before the flush started have to be excluded + */ + goto restart; + } + *pos++= block; + } + else + { + /* It's a temporary file */ + pagecache->blocks_changed--; + pagecache->global_blocks_changed--; + free_block(pagecache, block); + } + } + else + { + /* Link the block into a list of blocks 'in switch' */ + unlink_changed(block); + link_changed(block, &first_in_switch); + /* + We have just removed a page from the list of dirty pages + ("changed_blocks") though it's still dirty (the flush by another + thread has not yet happened). Checkpoint will miss the page and so + must be blocked until that flush has happened. + */ + /** + @todo RECOVERY: check all places where we remove a page from the + list of dirty pages + */ + if (unlikely(!changed_blocks_is_incomplete_incremented)) + { + changed_blocks_is_incomplete_incremented= 1; + changed_blocks_is_incomplete++; + } + } + } + } + if (pos != cache) + { + if ((error= flush_cached_blocks(pagecache, file, cache, pos, type))) + last_errno= error; + } + /* Wait until list of blocks in switch is empty */ + while (first_in_switch) + { +#if defined(PAGECACHE_DEBUG) + cnt= 0; +#endif + block= first_in_switch; + { +#ifdef THREAD + struct st_my_thread_var *thread= my_thread_var; + wqueue_add_to_queue(&block->wqueue[COND_FOR_SAVED], thread); + do + { + KEYCACHE_DBUG_PRINT("flush_pagecache_blocks_int: wait", + ("suspend thread %ld", thread->id)); + pagecache_pthread_cond_wait(&thread->suspend, + &pagecache->cache_lock); + } + while (thread->next); +#else + KEYCACHE_DBUG_ASSERT(0); + /* No parallel requests in single-threaded case */ +#endif + } +#if defined(PAGECACHE_DEBUG) + cnt++; + KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used); +#endif + } + changed_blocks_is_incomplete-= + changed_blocks_is_incomplete_incremented; + /* The following happens very seldom */ + if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE)) + { +#if defined(PAGECACHE_DEBUG) + cnt=0; +#endif + for (block= pagecache->file_blocks[FILE_HASH(*file)] ; + block; + block= next) + { +#if defined(PAGECACHE_DEBUG) + cnt++; + KEYCACHE_DBUG_ASSERT(cnt <= pagecache->blocks_used); +#endif + next= block->next_changed; + if (block->hash_link->file.file == file->file && + (! (block->status & PCBLOCK_CHANGED) + || type == FLUSH_IGNORE_CHANGED)) + { + reg_requests(pagecache, block, 1); + free_block(pagecache, block); + } + } + } + } + +#ifndef DBUG_OFF + DBUG_EXECUTE("check_pagecache", + test_key_cache(pagecache, "end of flush_pagecache_blocks", 0);); +#endif + if (cache != cache_buff) + my_free((gptr) cache, MYF(0)); + if (last_errno) + errno=last_errno; /* Return first error */ + DBUG_RETURN(last_errno != 0); +} + + +/* + Flush all blocks for a file to disk + + SYNOPSIS + + flush_pagecache_blocks() + pagecache pointer to a page cache data structure + file handler for the file to flush to + flush_type type of the flush + + RETURN + 0 ok + 1 error +*/ + +int flush_pagecache_blocks(PAGECACHE *pagecache, + PAGECACHE_FILE *file, enum flush_type type) +{ + int res; + DBUG_ENTER("flush_pagecache_blocks"); + DBUG_PRINT("enter", ("pagecache: 0x%lx", (long) pagecache)); + + if (pagecache->disk_blocks <= 0) + DBUG_RETURN(0); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + inc_counter_for_resize_op(pagecache); + res= flush_pagecache_blocks_int(pagecache, file, type); + dec_counter_for_resize_op(pagecache); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + DBUG_RETURN(res); +} + + +/* + Reset the counters of a key cache. + + SYNOPSIS + reset_pagecache_counters() + name the name of a key cache + pagecache pointer to the pagecache to be reset + + DESCRIPTION + This procedure is used to reset the counters of all currently used key + caches, both the default one and the named ones. + + RETURN + 0 on success (always because it can't fail) +*/ + +int reset_pagecache_counters(const char *name, PAGECACHE *pagecache) +{ + DBUG_ENTER("reset_pagecache_counters"); + if (!pagecache->inited) + { + DBUG_PRINT("info", ("Key cache %s not initialized.", name)); + DBUG_RETURN(0); + } + DBUG_PRINT("info", ("Resetting counters for key cache %s.", name)); + + pagecache->global_blocks_changed= 0; /* Key_blocks_not_flushed */ + pagecache->global_cache_r_requests= 0; /* Key_read_requests */ + pagecache->global_cache_read= 0; /* Key_reads */ + pagecache->global_cache_w_requests= 0; /* Key_write_requests */ + pagecache->global_cache_write= 0; /* Key_writes */ + DBUG_RETURN(0); +} + + +/** + @brief Allocates a buffer and stores in it some info about all dirty pages + + Does the allocation because the caller cannot know the size itself. + Memory freeing is to be done by the caller (if the "str" member of the + LEX_STRING is not NULL). + Ignores all pages of another type than PAGECACHE_LSN_PAGE, because they + are not interesting for a checkpoint record. + The caller has the intention of doing checkpoints. + + @param pagecache pointer to the page cache + @param[out] str pointer to where the allocated buffer, and + its size, will be put + @param[out] min_rec_lsn pointer to where the minimum rec_lsn of all + relevant dirty pages will be put + @param[out] max_rec_lsn pointer to where the maximum rec_lsn of all + relevant dirty pages will be put + @return Operation status + @retval 0 OK + @retval 1 Error +*/ + +my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache, + LEX_STRING *str, + LSN *min_rec_lsn, + LSN *max_rec_lsn) +{ + my_bool error= 0; + ulong stored_list_size= 0; + uint file_hash; + char *ptr; + LSN minimum_rec_lsn= ULONGLONG_MAX, maximum_rec_lsn= 0; + DBUG_ENTER("pagecache_collect_changed_blocks_with_LSN"); + + DBUG_ASSERT(NULL == str->str); + /* + We lock the entire cache but will be quick, just reading/writing a few MBs + of memory at most. + */ + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + while (changed_blocks_is_incomplete > 0) + { + /* + Some pages are more recent in memory than on disk (=dirty) and are not + in "changed_blocks" so we cannot know them. Wait. + */ + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + sleep(1); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); + } + + /* Count how many dirty pages are interesting */ + for (file_hash= 0; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH; file_hash++) + { + PAGECACHE_BLOCK_LINK *block; + for (block= pagecache->changed_blocks[file_hash] ; + block; + block= block->next_changed) + { + /* + Q: is there somthing subtle with block->hash_link: can it be NULL? + does it have to be == hash_link->block... ? + */ + DBUG_ASSERT(block->hash_link != NULL); + DBUG_ASSERT(block->status & PCBLOCK_CHANGED); + if (block->type != PAGECACHE_LSN_PAGE) + continue; /* no need to store it */ + stored_list_size++; + } + } + + str->length= 8 + /* number of dirty pages */ + (4 + /* file */ + 4 + /* pageno */ + LSN_STORE_SIZE /* rec_lsn */ + ) * stored_list_size; + if (NULL == (str->str= my_malloc(str->length, MYF(MY_WME)))) + goto err; + ptr= str->str; + int8store(ptr, stored_list_size); + ptr+= 8; + if (!stored_list_size) + goto end; + for (file_hash= 0; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH; file_hash++) + { + PAGECACHE_BLOCK_LINK *block; + for (block= pagecache->changed_blocks[file_hash] ; + block; + block= block->next_changed) + { + if (block->type != PAGECACHE_LSN_PAGE) + continue; /* no need to store it in the checkpoint record */ + compile_time_assert((4 == sizeof(block->hash_link->file.file))); + compile_time_assert((4 == sizeof(block->hash_link->pageno))); + int4store(ptr, block->hash_link->file.file); + ptr+= 4; + int4store(ptr, block->hash_link->pageno); + ptr+= 4; + lsn_store(ptr, block->rec_lsn); + ptr+= LSN_STORE_SIZE; + if (block->rec_lsn != 0) + { + if (cmp_translog_addr(block->rec_lsn, minimum_rec_lsn) < 0) + minimum_rec_lsn= block->rec_lsn; + if (cmp_translog_addr(block->rec_lsn, maximum_rec_lsn) > 0) + maximum_rec_lsn= block->rec_lsn; + } /* otherwise, some trn->rec_lsn should hold the info */ + } + } +end: + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + *min_rec_lsn= minimum_rec_lsn; + *max_rec_lsn= maximum_rec_lsn; + DBUG_RETURN(error); + +err: + error= 1; + goto end; +} + + +#ifndef DBUG_OFF +/* + Test if disk-cache is ok +*/ +static void test_key_cache(PAGECACHE *pagecache __attribute__((unused)), + const char *where __attribute__((unused)), + my_bool lock __attribute__((unused))) +{ + /* TODO */ +} +#endif + +#if defined(PAGECACHE_TIMEOUT) + +#define KEYCACHE_DUMP_FILE "pagecache_dump.txt" +#define MAX_QUEUE_LEN 100 + + +static void pagecache_dump(PAGECACHE *pagecache) +{ + FILE *pagecache_dump_file=fopen(KEYCACHE_DUMP_FILE, "w"); + struct st_my_thread_var *last; + struct st_my_thread_var *thread; + PAGECACHE_BLOCK_LINK *block; + PAGECACHE_HASH_LINK *hash_link; + PAGECACHE_PAGE *page; + uint i; + + fprintf(pagecache_dump_file, "thread:%u\n", thread->id); + + i=0; + thread=last=waiting_for_hash_link.last_thread; + fprintf(pagecache_dump_file, "queue of threads waiting for hash link\n"); + if (thread) + do + { + thread= thread->next; + page= (PAGECACHE_PAGE *) thread->opt_info; + fprintf(pagecache_dump_file, + "thread:%u, (file,pageno)=(%u,%lu)\n", + thread->id,(uint) page->file.file,(ulong) page->pageno); + if (++i == MAX_QUEUE_LEN) + break; + } + while (thread != last); + + i=0; + thread=last=waiting_for_block.last_thread; + fprintf(pagecache_dump_file, "queue of threads waiting for block\n"); + if (thread) + do + { + thread=thread->next; + hash_link= (PAGECACHE_HASH_LINK *) thread->opt_info; + fprintf(pagecache_dump_file, + "thread:%u hash_link:%u (file,pageno)=(%u,%lu)\n", + thread->id, (uint) PAGECACHE_HASH_LINK_NUMBER(pagecache, hash_link), + (uint) hash_link->file.file,(ulong) hash_link->pageno); + if (++i == MAX_QUEUE_LEN) + break; + } + while (thread != last); + + for (i=0 ; i < pagecache->blocks_used ; i++) + { + int j; + block= &pagecache->block_root[i]; + hash_link= block->hash_link; + fprintf(pagecache_dump_file, + "block:%u hash_link:%d status:%x #requests=%u waiting_for_readers:%d\n", + i, (int) (hash_link ? + PAGECACHE_HASH_LINK_NUMBER(pagecache, hash_link) : + -1), + block->status, block->requests, block->condvar ? 1 : 0); + for (j=0 ; j < COND_SIZE; j++) + { + PAGECACHE_WQUEUE *wqueue=&block->wqueue[j]; + thread= last= wqueue->last_thread; + fprintf(pagecache_dump_file, "queue #%d\n", j); + if (thread) + { + do + { + thread=thread->next; + fprintf(pagecache_dump_file, + "thread:%u\n", thread->id); + if (++i == MAX_QUEUE_LEN) + break; + } + while (thread != last); + } + } + } + fprintf(pagecache_dump_file, "LRU chain:"); + block= pagecache= used_last; + if (block) + { + do + { + block= block->next_used; + fprintf(pagecache_dump_file, + "block:%u, ", PCBLOCK_NUMBER(pagecache, block)); + } + while (block != pagecache->used_last); + } + fprintf(pagecache_dump_file, "\n"); + + fclose(pagecache_dump_file); +} + +#endif /* defined(PAGECACHE_TIMEOUT) */ + +#if defined(PAGECACHE_TIMEOUT) && !defined(__WIN__) + + +static int pagecache_pthread_cond_wait(pthread_cond_t *cond, + pthread_mutex_t *mutex) +{ + int rc; + struct timeval now; /* time when we started waiting */ + struct timespec timeout; /* timeout value for the wait function */ + struct timezone tz; +#if defined(PAGECACHE_DEBUG) + int cnt=0; +#endif + + /* Get current time */ + gettimeofday(&now, &tz); + /* Prepare timeout value */ + timeout.tv_sec= now.tv_sec + PAGECACHE_TIMEOUT; + /* + timeval uses microseconds. + timespec uses nanoseconds. + 1 nanosecond = 1000 micro seconds + */ + timeout.tv_nsec= now.tv_usec * 1000; + KEYCACHE_THREAD_TRACE_END("started waiting"); +#if defined(PAGECACHE_DEBUG) + cnt++; + if (cnt % 100 == 0) + fprintf(pagecache_debug_log, "waiting...\n"); + fflush(pagecache_debug_log); +#endif + rc= pthread_cond_timedwait(cond, mutex, &timeout); + KEYCACHE_THREAD_TRACE_BEGIN("finished waiting"); + if (rc == ETIMEDOUT || rc == ETIME) + { +#if defined(PAGECACHE_DEBUG) + fprintf(pagecache_debug_log,"aborted by pagecache timeout\n"); + fclose(pagecache_debug_log); + abort(); +#endif + pagecache_dump(); + } + +#if defined(PAGECACHE_DEBUG) + KEYCACHE_DBUG_ASSERT(rc != ETIMEDOUT); +#else + assert(rc != ETIMEDOUT); +#endif + return rc; +} +#else +#if defined(PAGECACHE_DEBUG) +static int pagecache_pthread_cond_wait(pthread_cond_t *cond, + pthread_mutex_t *mutex) +{ + int rc; + KEYCACHE_THREAD_TRACE_END("started waiting"); + rc= pthread_cond_wait(cond, mutex); + KEYCACHE_THREAD_TRACE_BEGIN("finished waiting"); + return rc; +} +#endif +#endif /* defined(PAGECACHE_TIMEOUT) && !defined(__WIN__) */ + +#if defined(PAGECACHE_DEBUG) +static int ___pagecache_pthread_mutex_lock(pthread_mutex_t *mutex) +{ + int rc; + rc= pthread_mutex_lock(mutex); + KEYCACHE_THREAD_TRACE_BEGIN(""); + return rc; +} + + +static void ___pagecache_pthread_mutex_unlock(pthread_mutex_t *mutex) +{ + KEYCACHE_THREAD_TRACE_END(""); + pthread_mutex_unlock(mutex); +} + + +static int ___pagecache_pthread_cond_signal(pthread_cond_t *cond) +{ + int rc; + KEYCACHE_THREAD_TRACE("signal"); + rc= pthread_cond_signal(cond); + return rc; +} + + +#if defined(PAGECACHE_DEBUG_LOG) + + +static void pagecache_debug_print(const char * fmt, ...) +{ + va_list args; + va_start(args,fmt); + if (pagecache_debug_log) + { + VOID(vfprintf(pagecache_debug_log, fmt, args)); + VOID(fputc('\n',pagecache_debug_log)); + } + va_end(args); +} +#endif /* defined(PAGECACHE_DEBUG_LOG) */ + +#if defined(PAGECACHE_DEBUG_LOG) + + +void pagecache_debug_log_close(void) +{ + if (pagecache_debug_log) + fclose(pagecache_debug_log); +} +#endif /* defined(PAGECACHE_DEBUG_LOG) */ + +#endif /* defined(PAGECACHE_DEBUG) */ diff --git a/storage/maria/ma_pagecache.h b/storage/maria/ma_pagecache.h new file mode 100644 index 00000000000..478f71161eb --- /dev/null +++ b/storage/maria/ma_pagecache.h @@ -0,0 +1,260 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Page cache variable structures */ + +#ifndef _ma_pagecache_h +#define _ma_pagecache_h +C_MODE_START + +#include "ma_loghandler_lsn.h" +#include + +/* Type of the page */ +enum pagecache_page_type +{ + /* + Used only for control page type changing during debugging. This define + should only be using when using DBUG. + */ + PAGECACHE_EMPTY_PAGE, + /* the page does not contain LSN */ + PAGECACHE_PLAIN_PAGE, + /* the page contain LSN (maria tablespace page) */ + PAGECACHE_LSN_PAGE +}; + +/* + This enum describe lock status changing. every type of page cache will + interpret WRITE/READ lock as it need. +*/ +enum pagecache_page_lock +{ + PAGECACHE_LOCK_LEFT_UNLOCKED, /* free -> free */ + PAGECACHE_LOCK_LEFT_READLOCKED, /* read -> read */ + PAGECACHE_LOCK_LEFT_WRITELOCKED, /* write -> write */ + PAGECACHE_LOCK_READ, /* free -> read */ + PAGECACHE_LOCK_WRITE, /* free -> write */ + PAGECACHE_LOCK_READ_UNLOCK, /* read -> free */ + PAGECACHE_LOCK_WRITE_UNLOCK, /* write -> free */ + PAGECACHE_LOCK_WRITE_TO_READ /* write -> read */ +}; +/* + This enum describe pin status changing +*/ +enum pagecache_page_pin +{ + PAGECACHE_PIN_LEFT_PINNED, /* pinned -> pinned */ + PAGECACHE_PIN_LEFT_UNPINNED, /* unpinned -> unpinned */ + PAGECACHE_PIN, /* unpinned -> pinned */ + PAGECACHE_UNPIN /* pinned -> unpinned */ +}; +/* How to write the page */ +enum pagecache_write_mode +{ + /* do not write immediately, i.e. it will be dirty page */ + PAGECACHE_WRITE_DELAY, + /* page already is in the file. (key cache insert analogue) */ + PAGECACHE_WRITE_DONE +}; + +typedef void *PAGECACHE_PAGE_LINK; + +/* file descriptor for Maria */ +typedef struct st_pagecache_file +{ + File file; +} PAGECACHE_FILE; + +/* page number for maria */ +typedef uint32 pgcache_page_no_t; + +/* declare structures that is used by st_pagecache */ + +struct st_pagecache_block_link; +typedef struct st_pagecache_block_link PAGECACHE_BLOCK_LINK; +struct st_pagecache_page; +typedef struct st_pagecache_page PAGECACHE_PAGE; +struct st_pagecache_hash_link; +typedef struct st_pagecache_hash_link PAGECACHE_HASH_LINK; + +#include + +typedef my_bool (*pagecache_disk_read_validator)(byte *page, gptr data); + +#define PAGECACHE_CHANGED_BLOCKS_HASH 128 /* must be power of 2 */ + +/* + The page cache structure + It also contains read-only statistics parameters. +*/ + +typedef struct st_pagecache +{ + my_bool inited; + my_bool resize_in_flush; /* true during flush of resize operation */ + my_bool can_be_used; /* usage of cache for read/write is allowed */ + uint shift; /* block size = 2 ^ shift */ + my_size_t mem_size; /* specified size of the cache memory */ + uint32 block_size; /* size of the page buffer of a cache block */ + ulong min_warm_blocks; /* min number of warm blocks; */ + ulong age_threshold; /* age threshold for hot blocks */ + ulonglong time; /* total number of block link operations */ + uint hash_entries; /* max number of entries in the hash table */ + int hash_links; /* max number of hash links */ + int hash_links_used; /* number of hash links taken from free links pool */ + int disk_blocks; /* max number of blocks in the cache */ + ulong blocks_used; /* maximum number of concurrently used blocks */ + ulong blocks_unused; /* number of currently unused blocks */ + ulong blocks_changed; /* number of currently dirty blocks */ + ulong warm_blocks; /* number of blocks in warm sub-chain */ + ulong cnt_for_resize_op; /* counter to block resize operation */ + ulong blocks_available; /* number of blocks available in the LRU chain */ + PAGECACHE_HASH_LINK **hash_root;/* arr. of entries into hash table buckets */ + PAGECACHE_HASH_LINK *hash_link_root;/* memory for hash table links */ + PAGECACHE_HASH_LINK *free_hash_list;/* list of free hash links */ + PAGECACHE_BLOCK_LINK *free_block_list;/* list of free blocks */ + PAGECACHE_BLOCK_LINK *block_root;/* memory for block links */ + byte HUGE_PTR *block_mem; /* memory for block buffers */ + PAGECACHE_BLOCK_LINK *used_last;/* ptr to the last block of the LRU chain */ + PAGECACHE_BLOCK_LINK *used_ins;/* ptr to the insertion block in LRU chain */ + pthread_mutex_t cache_lock; /* to lock access to the cache structure */ + WQUEUE resize_queue; /* threads waiting during resize operation */ + WQUEUE waiting_for_hash_link;/* waiting for a free hash link */ + WQUEUE waiting_for_block; /* requests waiting for a free block */ + /* hash for dirty file bl.*/ + PAGECACHE_BLOCK_LINK *changed_blocks[PAGECACHE_CHANGED_BLOCKS_HASH]; + /* hash for other file bl.*/ + PAGECACHE_BLOCK_LINK *file_blocks[PAGECACHE_CHANGED_BLOCKS_HASH]; + + /* + The following variables are and variables used to hold parameters for + initializing the key cache. + */ + + ulonglong param_buff_size; /* size the memory allocated for the cache */ + ulong param_block_size; /* size of the blocks in the key cache */ + ulong param_division_limit; /* min. percentage of warm blocks */ + ulong param_age_threshold; /* determines when hot block is downgraded */ + + /* Statistics variables. These are reset in reset_pagecache_counters(). */ + ulong global_blocks_changed; /* number of currently dirty blocks */ + ulonglong global_cache_w_requests;/* number of write requests (write hits) */ + ulonglong global_cache_write; /* number of writes from cache to files */ + ulonglong global_cache_r_requests;/* number of read requests (read hits) */ + ulonglong global_cache_read; /* number of reads from files to cache */ + + int blocks; /* max number of blocks in the cache */ + my_bool in_init; /* Set to 1 in MySQL during init/resize */ +} PAGECACHE; + +/* The default key cache */ +extern PAGECACHE dflt_pagecache_var, *dflt_pagecache; + +extern int init_pagecache(PAGECACHE *pagecache, my_size_t use_mem, + uint division_limit, uint age_threshold, + uint block_size); +extern int resize_pagecache(PAGECACHE *pagecache, + my_size_t use_mem, uint division_limit, + uint age_threshold); +extern void change_pagecache_param(PAGECACHE *pagecache, uint division_limit, + uint age_threshold); + +#define pagecache_read(P,F,N,L,B,T,K,I) \ + pagecache_valid_read(P,F,N,L,B,T,K,I,0,0) + +extern byte *pagecache_valid_read(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno, + uint level, + byte *buff, + enum pagecache_page_type type, + enum pagecache_page_lock lock, + PAGECACHE_PAGE_LINK *link, + pagecache_disk_read_validator validator, + gptr validator_data); + +#define pagecache_write(P,F,N,L,B,T,O,I,M,K) \ + pagecache_write_part(P,F,N,L,B,T,O,I,M,K,0,(P)->block_size) + +extern my_bool pagecache_write_part(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno, + uint level, + byte *buff, + enum pagecache_page_type type, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin, + enum pagecache_write_mode write_mode, + PAGECACHE_PAGE_LINK *link, + uint offset, + uint size); +extern void pagecache_unlock(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin, + LSN first_REDO_LSN_for_page, + LSN lsn); +extern void pagecache_unlock_by_link(PAGECACHE *pagecache, + PAGECACHE_PAGE_LINK *link, + enum pagecache_page_lock lock, + enum pagecache_page_pin pin, + LSN first_REDO_LSN_for_page, + LSN lsn); +extern void pagecache_unpin(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno, + LSN lsn); +extern void pagecache_unpin_by_link(PAGECACHE *pagecache, + PAGECACHE_PAGE_LINK *link, + LSN lsn); +extern int flush_pagecache_blocks(PAGECACHE *keycache, + PAGECACHE_FILE *file, + enum flush_type type); +extern my_bool pagecache_delete(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno, + enum pagecache_page_lock lock, + my_bool flush); +extern my_bool pagecache_delete_pages(PAGECACHE *pagecache, + PAGECACHE_FILE *file, + pgcache_page_no_t pageno, + uint page_count, + enum pagecache_page_lock lock, + my_bool flush); +extern void end_pagecache(PAGECACHE *keycache, my_bool cleanup); +extern my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache, + LEX_STRING *str, + LSN *min_lsn, + LSN *max_lsn); +extern int reset_pagecache_counters(const char *name, PAGECACHE *pagecache); + + +/* Functions to handle multiple key caches */ +extern my_bool multi_pagecache_init(void); +extern void multi_pagecache_free(void); +extern PAGECACHE *multi_pagecache_search(byte *key, uint length, + PAGECACHE *def); +extern my_bool multi_pagecache_set(const byte *key, uint length, + PAGECACHE *pagecache); +extern void multi_pagecache_change(PAGECACHE *old_data, + PAGECACHE *new_data); +extern int reset_pagecache_counters(const char *name, + PAGECACHE *pagecache); + +C_MODE_END +#endif /* _keycache_h */ diff --git a/storage/maria/ma_pagecaches.c b/storage/maria/ma_pagecaches.c new file mode 100644 index 00000000000..d2ed4edca31 --- /dev/null +++ b/storage/maria/ma_pagecaches.c @@ -0,0 +1,105 @@ +/* Copyright (C) 2003-2007 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Handling of multiple key caches + + The idea is to have a thread safe hash on the table name, + with a default key cache value that is returned if the table name is not in + the cache. +*/ + +#include "maria_def.h" +#include "ma_pagecache.h" +#include +#include +#include "../../mysys/my_safehash.h" + +/***************************************************************************** + Functions to handle the pagecache objects +*****************************************************************************/ + +/* Variable to store all key cache objects */ +static SAFE_HASH pagecache_hash; + + +my_bool multi_pagecache_init(void) +{ + return safe_hash_init(&pagecache_hash, 16, (byte*) maria_pagecache); +} + + +void multi_pagecache_free(void) +{ + safe_hash_free(&pagecache_hash); +} + +/* + Get a key cache to be used for a specific table. + + SYNOPSIS + multi_pagecache_search() + key key to find (usually table path) + uint length Length of key. + def Default value if no key cache + + NOTES + This function is coded in such a way that we will return the + default key cache even if one never called multi_pagecache_init. + This will ensure that it works with old MyISAM clients. + + RETURN + key cache to use +*/ + +PAGECACHE *multi_pagecache_search(byte *key, uint length, + PAGECACHE *def) +{ + if (!pagecache_hash.hash.records) + return def; + return (PAGECACHE*) safe_hash_search(&pagecache_hash, key, length, + (void*) def); +} + + +/* + Assosiate a key cache with a key + + + SYONOPSIS + multi_pagecache_set() + key key (path to table etc..) + length Length of key + pagecache cache to assococite with the table + + NOTES + This can be used both to insert a new entry and change an existing + entry +*/ + + +my_bool multi_pagecache_set(const byte *key, uint length, + PAGECACHE *pagecache) +{ + return safe_hash_set(&pagecache_hash, key, length, (byte*) pagecache); +} + + +void multi_pagecache_change(PAGECACHE *old_data, + PAGECACHE *new_data) +{ + safe_hash_change(&pagecache_hash, (byte*) old_data, (byte*) new_data); +} diff --git a/storage/maria/ma_panic.c b/storage/maria/ma_panic.c new file mode 100644 index 00000000000..0394f630343 --- /dev/null +++ b/storage/maria/ma_panic.c @@ -0,0 +1,134 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "ma_fulltext.h" + +/* + Stop usage of Maria + + SYNOPSIS + maria_panic() + flag HA_PANIC_CLOSE: All maria files (tables and log) are closed. + maria_end() is called. + HA_PANIC_WRITE: All misam files are unlocked and + all changed data in single user maria is + written to file + HA_PANIC_READ All maria files that was locked when + maria_panic(HA_PANIC_WRITE) was done is + locked. A maria_readinfo() is done for + all single user files to get changes + in database + + RETURN + 0 ok + # error number in case of error +*/ + +int maria_panic(enum ha_panic_function flag) +{ + int error=0; + LIST *list_element,*next_open; + MARIA_HA *info; + DBUG_ENTER("maria_panic"); + + if (!maria_inited) + DBUG_RETURN(0); + pthread_mutex_lock(&THR_LOCK_maria); + for (list_element=maria_open_list ; list_element ; list_element=next_open) + { + next_open=list_element->next; /* Save if close */ + info=(MARIA_HA*) list_element->data; + switch (flag) { + case HA_PANIC_CLOSE: + /* + If bad luck (if some tables would be used now, which normally does not + happen in MySQL), as we release the mutex, the list may change and so + we may crash. + */ + pthread_mutex_unlock(&THR_LOCK_maria); + if (maria_close(info)) + error=my_errno; + pthread_mutex_lock(&THR_LOCK_maria); + break; + case HA_PANIC_WRITE: /* Do this to free databases */ +#ifdef CANT_OPEN_FILES_TWICE + if (info->s->options & HA_OPTION_READ_ONLY_DATA) + break; +#endif + if (flush_pagecache_blocks(info->s->pagecache, &info->s->kfile, + FLUSH_RELEASE)) + error=my_errno; + if (info->opt_flag & WRITE_CACHE_USED) + if (flush_io_cache(&info->rec_cache)) + error=my_errno; + if (info->opt_flag & READ_CACHE_USED) + { + if (flush_io_cache(&info->rec_cache)) + error=my_errno; + reinit_io_cache(&info->rec_cache,READ_CACHE,0, + (pbool) (info->lock_type != F_UNLCK),1); + } + if (info->lock_type != F_UNLCK && ! info->was_locked) + { + info->was_locked=info->lock_type; + if (maria_lock_database(info,F_UNLCK)) + error=my_errno; + } +#ifdef CANT_OPEN_FILES_TWICE + if (info->s->kfile.file >= 0 && my_close(info->s->kfile.file, MYF(0))) + error = my_errno; + if (info->dfile.file >= 0 && my_close(info->dfile.file, MYF(0))) + error = my_errno; + info->s->kfile.file= info->dfile.file= -1;/* Files aren't open anymore */ + break; +#endif + case HA_PANIC_READ: /* Restore to before WRITE */ +#ifdef CANT_OPEN_FILES_TWICE + { /* Open closed files */ + char name_buff[FN_REFLEN]; + if (info->s->kfile.file < 0) + if ((info->s->kfile.file= my_open(fn_format(name_buff, + info->filename, "", + N_NAME_IEXT,4), + info->mode, + MYF(MY_WME))) < 0) + error = my_errno; + if (info->dfile.file < 0) + { + if ((info->dfile.file= my_open(fn_format(name_buff, info->filename, + "", N_NAME_DEXT, 4), + info->mode, + MYF(MY_WME))) < 0) + error = my_errno; + info->rec_cache.file= info->dfile.file; + } + } +#endif + if (info->was_locked) + { + if (maria_lock_database(info, info->was_locked)) + error=my_errno; + info->was_locked=0; + } + break; + } + } + pthread_mutex_unlock(&THR_LOCK_maria); + if (flag == HA_PANIC_CLOSE) + maria_end(); + if (!error) + DBUG_RETURN(0); + DBUG_RETURN(my_errno=error); +} /* maria_panic */ diff --git a/storage/maria/ma_preload.c b/storage/maria/ma_preload.c new file mode 100644 index 00000000000..44fc12f8571 --- /dev/null +++ b/storage/maria/ma_preload.c @@ -0,0 +1,128 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Preload indexes into key cache +*/ + +#include "maria_def.h" + + +/* + Preload pages of the index file for a table into the key cache + + SYNOPSIS + maria_preload() + info open table + map map of indexes to preload into key cache + ignore_leaves only non-leaves pages are to be preloaded + + RETURN VALUE + 0 if a success. error code - otherwise. + + NOTES. + At present pages for all indexes are preloaded. + In future only pages for indexes specified in the key_map parameter + of the table will be preloaded. +*/ + +int maria_preload(MARIA_HA *info, ulonglong key_map, my_bool ignore_leaves) +{ + uint i; + ulong length, block_length= 0; + uchar *buff= NULL; + MARIA_SHARE* share= info->s; + uint keys= share->state.header.keys; + MARIA_KEYDEF *keyinfo= share->keyinfo; + my_off_t key_file_length= share->state.state.key_file_length; + my_off_t pos= share->base.keystart; + DBUG_ENTER("maria_preload"); + + if (!keys || !maria_is_any_key_active(key_map) || key_file_length == pos) + DBUG_RETURN(0); + + block_length= keyinfo[0].block_length; + + /* Check whether all indexes use the same block size */ + for (i= 1 ; i < keys ; i++) + { + if (keyinfo[i].block_length != block_length) + DBUG_RETURN(my_errno= HA_ERR_NON_UNIQUE_BLOCK_SIZE); + } + + length= info->preload_buff_size/block_length * block_length; + set_if_bigger(length, block_length); + + if (!(buff= (uchar *) my_malloc(length, MYF(MY_WME)))) + DBUG_RETURN(my_errno= HA_ERR_OUT_OF_MEM); + + if (flush_pagecache_blocks(share->pagecache, &share->kfile, FLUSH_RELEASE)) + goto err; + + do + { + /* Read the next block of index file into the preload buffer */ + if ((my_off_t) length > (key_file_length-pos)) + length= (ulong) (key_file_length-pos); + if (my_pread(share->kfile.file, (byte*) buff, length, pos, + MYF(MY_FAE|MY_FNABP))) + goto err; + + if (ignore_leaves) + { + uchar *end= buff+length; + do + { + if (_ma_test_if_nod(buff)) + { + DBUG_ASSERT(share->pagecache->block_size == block_length); + if (pagecache_write(share->pagecache, + &share->kfile, pos / block_length, + DFLT_INIT_HITS, + (byte*) buff, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DONE, 0)) + goto err; + } + pos+= block_length; + } + while ((buff+= block_length) != end); + buff= end-length; + } + else + { + if (pagecache_write(share->pagecache, + &share->kfile, pos / block_length, + DFLT_INIT_HITS, + (byte*) buff, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DONE, 0)) + goto err; + pos+= length; + } + } + while (pos != key_file_length); + + my_free((char*) buff, MYF(0)); + DBUG_RETURN(0); + +err: + my_free((char*) buff, MYF(MY_ALLOW_ZERO_PTR)); + DBUG_RETURN(my_errno= errno); +} diff --git a/storage/maria/ma_range.c b/storage/maria/ma_range.c new file mode 100644 index 00000000000..b359868e8e4 --- /dev/null +++ b/storage/maria/ma_range.c @@ -0,0 +1,262 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Gives a approximated number of how many records there is between two keys. + Used when optimizing querries. + */ + +#include "maria_def.h" +#include "ma_rt_index.h" + +static ha_rows _ma_record_pos(MARIA_HA *info,const byte *key,uint key_len, + enum ha_rkey_function search_flag); +static double _ma_search_pos(MARIA_HA *info,MARIA_KEYDEF *keyinfo, byte *key, + uint key_len,uint nextflag, my_off_t pos); +static uint _ma_keynr(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *page, + byte *keypos, uint *ret_max_key); + + +/** + @brief Estimate how many records there is in a given range + + @param info MARIA handler + @param inx Index to use + @param min_key Min key. Is = 0 if no min range + @param max_key Max key. Is = 0 if no max range + + @note + We should ONLY return 0 if there is no rows in range + + @return Estimated number of rows or error + @retval HA_POS_ERROR error (or we can't estimate number of rows) + @retval number Estimated number of rows +*/ + +ha_rows maria_records_in_range(MARIA_HA *info, int inx, key_range *min_key, + key_range *max_key) +{ + ha_rows start_pos,end_pos,res; + DBUG_ENTER("maria_records_in_range"); + + if ((inx = _ma_check_index(info,inx)) < 0) + DBUG_RETURN(HA_POS_ERROR); + + if (fast_ma_readinfo(info)) + DBUG_RETURN(HA_POS_ERROR); + info->update&= (HA_STATE_CHANGED+HA_STATE_ROW_CHANGED); + if (info->s->concurrent_insert) + rw_rdlock(&info->s->key_root_lock[inx]); + + switch(info->s->keyinfo[inx].key_alg){ +#ifdef HAVE_RTREE_KEYS + case HA_KEY_ALG_RTREE: + { + byte *key_buff; + uint start_key_len; + + /* + The problem is that the optimizer doesn't support + RTree keys properly at the moment. + Hope this will be fixed some day. + But now NULL in the min_key means that we + didn't make the task for the RTree key + and expect BTree functionality from it. + As it's not able to handle such request + we return the error. + */ + if (!min_key) + { + res= HA_POS_ERROR; + break; + } + key_buff= info->lastkey+info->s->base.max_key_length; + start_key_len= _ma_pack_key(info,inx, key_buff, + min_key->key, min_key->length, + (HA_KEYSEG**) 0); + res= maria_rtree_estimate(info, inx, key_buff, start_key_len, + maria_read_vec[min_key->flag]); + res= res ? res : 1; /* Don't return 0 */ + break; + } +#endif + case HA_KEY_ALG_BTREE: + default: + start_pos= (min_key ? + _ma_record_pos(info, min_key->key, min_key->length, + min_key->flag) : + (ha_rows) 0); + end_pos= (max_key ? + _ma_record_pos(info, max_key->key, max_key->length, + max_key->flag) : + info->state->records+ (ha_rows) 1); + res= (end_pos < start_pos ? (ha_rows) 0 : + (end_pos == start_pos ? (ha_rows) 1 : end_pos-start_pos)); + if (start_pos == HA_POS_ERROR || end_pos == HA_POS_ERROR) + res=HA_POS_ERROR; + } + + if (info->s->concurrent_insert) + rw_unlock(&info->s->key_root_lock[inx]); + fast_ma_writeinfo(info); + + /** + @todo LOCK + If res==0 (no rows), if we need to guarantee repeatability of the search, + we will need to set a next-key lock in this statement. + Also SELECT COUNT(*)... + */ + + DBUG_PRINT("info",("records: %ld",(ulong) (res))); + DBUG_RETURN(res); +} + + + /* Find relative position (in records) for key in index-tree */ + +static ha_rows _ma_record_pos(MARIA_HA *info, const byte *key, uint key_len, + enum ha_rkey_function search_flag) +{ + uint inx=(uint) info->lastinx, nextflag; + MARIA_KEYDEF *keyinfo=info->s->keyinfo+inx; + byte *key_buff; + double pos; + DBUG_ENTER("_ma_record_pos"); + DBUG_PRINT("enter",("search_flag: %d",search_flag)); + + if (key_len == 0) + key_len= USE_WHOLE_KEY; + key_buff=info->lastkey+info->s->base.max_key_length; + key_len= _ma_pack_key(info, inx, key_buff, key, key_len, + (HA_KEYSEG**) 0); + DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE, keyinfo->seg, + key_buff, key_len);); + nextflag=maria_read_vec[search_flag]; + if (!(nextflag & (SEARCH_FIND | SEARCH_NO_FIND | SEARCH_LAST))) + key_len=USE_WHOLE_KEY; + + pos= _ma_search_pos(info,keyinfo, key_buff, key_len, + nextflag | SEARCH_SAVE_BUFF, + info->s->state.key_root[inx]); + if (pos >= 0.0) + { + DBUG_PRINT("exit",("pos: %ld",(ulong) (pos*info->state->records))); + DBUG_RETURN((ulong) (pos*info->state->records+0.5)); + } + DBUG_RETURN(HA_POS_ERROR); +} + + + /* This is a modified version of _ma_search */ + /* Returns offset for key in indextable (decimal 0.0 <= x <= 1.0) */ + +static double _ma_search_pos(register MARIA_HA *info, + register MARIA_KEYDEF *keyinfo, + byte *key, uint key_len, uint nextflag, + register my_off_t pos) +{ + int flag; + uint nod_flag,keynr,max_keynr; + my_bool after_key; + byte *keypos, *buff; + double offset; + DBUG_ENTER("_ma_search_pos"); + LINT_INIT(max_keynr); + + if (pos == HA_OFFSET_ERROR) + DBUG_RETURN(0.5); + + if (!(buff= _ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,info->buff,1))) + goto err; + flag=(*keyinfo->bin_search)(info, keyinfo, buff, key, key_len, nextflag, + &keypos,info->lastkey, &after_key); + nod_flag=_ma_test_if_nod(buff); + keynr= _ma_keynr(info,keyinfo,buff,keypos,&max_keynr); + + if (flag) + { + if (flag == MARIA_FOUND_WRONG_KEY) + DBUG_RETURN(-1); /* error */ + /* + Didn't found match. keypos points at next (bigger) key + Try to find a smaller, better matching key. + Matches keynr + [0-1] + */ + if (flag > 0 && ! nod_flag) + offset= 1.0; + else if ((offset= _ma_search_pos(info,keyinfo,key,key_len,nextflag, + _ma_kpos(nod_flag,keypos))) < 0) + DBUG_RETURN(offset); + } + else + { + /* + Found match. Keypos points at the start of the found key + Matches keynr+1 + */ + offset=1.0; /* Matches keynr+1 */ + if ((nextflag & SEARCH_FIND) && nod_flag && + ((keyinfo->flag & (HA_NOSAME | HA_NULL_PART)) != HA_NOSAME || + key_len != USE_WHOLE_KEY)) + { + /* + There may be identical keys in the tree. Try to match on of those. + Matches keynr + [0-1] + */ + if ((offset= _ma_search_pos(info,keyinfo,key,key_len,SEARCH_FIND, + _ma_kpos(nod_flag,keypos))) < 0) + DBUG_RETURN(offset); /* Read error */ + } + } + DBUG_PRINT("info",("keynr: %d offset: %g max_keynr: %d nod: %d flag: %d", + keynr,offset,max_keynr,nod_flag,flag)); + DBUG_RETURN((keynr+offset)/(max_keynr+1)); +err: + DBUG_PRINT("exit",("Error: %d",my_errno)); + DBUG_RETURN (-1.0); +} + + + /* Get keynummer of current key and max number of keys in nod */ + +static uint _ma_keynr(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + byte *page, byte *keypos, uint *ret_max_key) +{ + uint nod_flag,keynr,max_key; + byte t_buff[HA_MAX_KEY_BUFF],*end; + + end= page+maria_data_on_page(page); + nod_flag=_ma_test_if_nod(page); + page+=2+nod_flag; + + if (!(keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY))) + { + *ret_max_key= (uint) (end-page)/(keyinfo->keylength+nod_flag); + return (uint) (keypos-page)/(keyinfo->keylength+nod_flag); + } + + max_key=keynr=0; + t_buff[0]=0; /* Safety */ + while (page < end) + { + if (!(*keyinfo->get_key)(keyinfo,nod_flag,&page,t_buff)) + return 0; /* Error */ + max_key++; + if (page == keypos) + keynr=max_key; + } + *ret_max_key=max_key; + return(keynr); +} diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c new file mode 100644 index 00000000000..a42fbdf0458 --- /dev/null +++ b/storage/maria/ma_recovery.c @@ -0,0 +1,267 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + WL#3072 Maria recovery + First version written by Guilhem Bichot on 2006-04-27. + Does not compile yet. +*/ + +/* Here is the implementation of this module */ + +#include "page_cache.h" +#include "least_recently_dirtied.h" +#include "transaction.h" +#include "share.h" +#include "log.h" + +typedef struct st_record_type_properties { + /* used for debug error messages or "maria_read_log" command-line tool: */ + char *name, + my_bool record_ends_group; + /* a function to execute when we see the record during the REDO phase */ + int (*record_execute_in_redo_phase)(RECORD *); /* param will be record header instead later */ + /* a function to execute when we see the record during the UNDO phase */ + int (*record_execute_in_undo_phase)(RECORD *); /* param will be record header instead later */ +} RECORD_TYPE_PROPERTIES; + +int no_op(RECORD *) {return 0}; + +RECORD_TYPE_PROPERTIES all_record_type_properties[]= +{ + /* listed here in the order of the "log records type" enumeration */ + {"REDO_INSERT_HEAD", FALSE, redo_insert_head_execute_in_redo_phase, no_op}, + ..., + {"UNDO_INSERT" , TRUE , undo_insert_execute_in_redo_phase, undo_insert_execute_in_undo_phase}, + {"COMMIT", , TRUE , commit_execute_in_redo_phase, no_op}, + ... +}; + +int redo_insert_head_execute_in_redo_phase(RECORD *record) +{ + /* write the data to the proper page */ +} + +int undo_insert_execute_in_redo_phase(RECORD *record) +{ + trans_table[short_trans_id].undo_lsn= record.lsn; + /* don't restore the old version of the row */ +} + +int undo_insert_execute_in_undo_phase(RECORD *record) +{ + /* restore the old version of the row */ + trans_table[short_trans_id].undo_lsn= record.prev_undo_lsn; +} + +int commit_execute_in_redo_phase(RECORD *record) +{ + trans_table[short_trans_id].state= COMMITTED; + /* + and that's all: the delete/update handler should not be woken up! as there + may be REDO for purge further in the log. + */ +} + +#define record_ends_group(R) \ + all_record_type_properties[(R)->type].record_ends_group) + +#define execute_log_record_in_redo_phase(R) \ + all_record_type_properties[(R).type].record_execute_in_redo_phase(R) + + +int recovery() +{ + control_file_create_or_open(); + /* + init log handler: tell it that we are going to do large reads of the + log, sequential and backward. Log handler could decide to alloc a big + read-only IO_CACHE for this, or use its usual page cache. + */ + + /* read checkpoint log record from log handler */ + RECORD *checkpoint_record= log_read_record(last_checkpoint_lsn_at_start); + + /* parse this record, build structs (dirty_pages, transactions table, file_map) */ + /* + read log records (note: sometimes only the header is needed, for ex during + REDO phase only the header of UNDO is needed, not the 4G blob in the + variable-length part, so I could use that; however for PREPARE (which is a + variable-length record) I'll need to read the full record in the REDO + phase): + */ + + /**** REDO PHASE *****/ + + record= log_read_record(min(rec_lsn, ...)); /* later, read only header */ + + /* + if log handler knows the end LSN of the log, we could print here how many + MB of log we have to read (to give an idea of the time), and print + progress notes. + */ + + while (record != NULL) + { + /* + A complete group is a set of log records with an "end mark" record + (e.g. a set of REDOs for an operation, terminated by an UNDO for this + operation); if there is no "end mark" record the group is incomplete + and won't be executed. + */ + if (record_ends_group(record) + { + if (trans_table[record.short_trans_id].group_start_lsn != 0) + { + /* + There is a complete group for this transaction, containing more than + this event. + We're going to read recently read log records: + for this log_read_record() to be efficient (not touch the disk), + log handler could cache recently read pages + (can just use an IO_CACHE of 10 MB to read the log, or the normal + log handler page cache). + Without it only OS file cache will help. + */ + record2= + log_read_record(trans_table[record.short_trans_id].group_start_lsn); + + do + { + if (record2.short_trans_id == record.short_trans_id) + execute_log_record_in_redo_phase(record2); /* it's in our group */ + record2= log_read_next_record(); + } + while (record2.lsn < record.lsn); + trans_table[record.short_trans_id].group_start_lsn= 0; /* group finished */ + } + execute_log_record_in_redo_phase(record); + } + else /* record does not end group */ + { + /* just record the fact, can't know if can execute yet */ + if (trans_table[short_trans_id].group_start_lsn == 0) /* group not yet started */ + trans_table[short_trans_id].group_start_lsn= record.lsn; + } + + /* + Later we can optimize: instead of "execute_log_record(record2)", do + copy_record_into_exec_buffer(record2): + this will just copy record into a multi-record (10 MB?) memory buffer, + and when buffer is full, will do sorting of REDOs per + page id and execute them. + This sorting will enable us to do more sequential reads of the + data/index pages. + Note that updating bitmap pages (when we have executed a REDO for a page + we update its bitmap page) may break the sequential read of pages, + so maybe we should read and cache bitmap pages in the beginning. + Or ok the sequence will be broken, but quickly all bitmap pages will be + in memory and so the sequence will not be broken anymore. + Sorting could even determine, based on physical device of files + ("st_dev" in stat()), that some files should be should be taken by + different threads, if we want to do parallism. + */ + /* + Here's how to read a complete variable-length record if needed: + read the header, allocate buffer of record length, read whole + record. + */ + record= log_read_next_record(); + } + + /* + Earlier or here, create true transactions in TM. + If done earlier, note that TM should not wake up the delete/update handler + when it receives a commit info, as existing REDO for purge may exist in + the log, and so the delete/update handler may do changes which conflict + with these REDOs. + Even if done here, better to not wake it up now as we're going to free the + page cache. + + MikaelR suggests: support checkpoints during REDO phase too: do checkpoint + after a certain amount of log records have been executed. This helps + against repeated crashes. Those checkpoints could not be user-requested + (as engine is not communicating during the REDO phase), so they would be + automatic: this changes the original assumption that we don't write to the + log while in the REDO phase, but why not. How often should we checkpoint? + */ + + /* + We want to have two steps: + engine->recover_with_max_memory(); + next_engine->recover_with_max_memory(); + engine->init_with_normal_memory(); + next_engine->init_with_normal_memory(); + So: in recover_with_max_memory() allocate a giant page cache, do REDO + phase, then all page cache is flushed and emptied and freed (only retain + small structures like TM): take full checkpoint, which is useful if + next engine crashes in its recovery the next second. + Destroy all shares (maria_close()), then at init_with_normal_memory() we + do this: + */ + + /**** UNDO PHASE *****/ + + print_information_to_error_log(nb of trans to roll back, nb of prepared trans); + + /* + Launch one or more threads to do the background rollback. Don't wait for + them to complete their rollback (background rollback; for debugging, we + can have an option which waits). Set a counter (total_of_rollback_threads) + to the number of threads to lauch. + + Note that InnoDB's rollback-in-background works as long as InnoDB is the + last engine to recover, otherwise MySQL will refuse new connections until + the last engine has recovered so it's not "background" from the user's + point of view. InnoDB is near top of sys_table_types so all others + (e.g. BDB) recover after it... So it's really "online rollback" only if + InnoDB is the only engine. + */ + + /* wake up delete/update handler */ + /* tell the TM that it can now accept new transactions */ + + /* + mark that checkpoint requests are now allowed. + */ +} + +pthread_handler_decl rollback_background_thread() +{ + /* + execute the normal runtime-rollback code for a bunch of transactions. + */ + while (trans in list_of_trans_to_rollback_by_this_thread) + { + while (trans->undo_lsn != 0) + { + /* this is the normal runtime-rollback code: */ + record= log_read_record(trans->undo_lsn); + execute_log_record_in_undo_phase(record); + trans->undo_lsn= record.prev_undo_lsn; + } + /* remove trans from list */ + } + lock_mutex(rollback_threads); /* or atomic counter */ + if (--total_of_rollback_threads == 0) + { + /* + All rollback threads are done. Print "rollback finished" to the error + log and take a full checkpoint. + */ + } + unlock_mutex(rollback_threads); + pthread_exit(); +} diff --git a/storage/maria/ma_recovery.h b/storage/maria/ma_recovery.h new file mode 100644 index 00000000000..d2901f5724c --- /dev/null +++ b/storage/maria/ma_recovery.h @@ -0,0 +1,25 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + WL#3072 Maria recovery + First version written by Guilhem Bichot on 2006-04-27. + Does not compile yet. +*/ + +/* This is the interface of this module. */ + +/* Performs recovery of the engine at start */ +int recovery(); diff --git a/storage/maria/ma_rename.c b/storage/maria/ma_rename.c new file mode 100644 index 00000000000..5224698c614 --- /dev/null +++ b/storage/maria/ma_rename.c @@ -0,0 +1,140 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Rename a table +*/ + +#include "ma_fulltext.h" +#include "trnman_public.h" + +/** + @brief renames a table + + @param old_name current name of table + @param new_name table should be renamed to this name + + @return Operation status + @retval 0 OK + @retval !=0 Error +*/ + +int maria_rename(const char *old_name, const char *new_name) +{ + char from[FN_REFLEN],to[FN_REFLEN]; + int data_file_rename_error; +#ifdef USE_RAID + uint raid_type=0,raid_chunks=0; +#endif + MARIA_HA *info; + MARIA_SHARE *share; + myf sync_dir; + DBUG_ENTER("maria_rename"); + +#ifdef EXTRA_DEBUG + _ma_check_table_is_closed(old_name,"rename old_table"); + _ma_check_table_is_closed(new_name,"rename new table2"); +#endif + /** @todo LOCK take X-lock on table */ + if (!(info= maria_open(old_name, O_RDWR, HA_OPEN_FOR_REPAIR))) + DBUG_RETURN(my_errno); + share= info->s; +#ifdef USE_RAID + raid_type = share->base.raid_type; + raid_chunks = share->base.raid_chunks; +#endif + + sync_dir= (share->base.transactional && !share->temporary) ? + MY_SYNC_DIR : 0; + if (sync_dir) + { + uchar log_data[LSN_STORE_SIZE]; + LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 3]; + uint old_name_len= strlen(old_name), new_name_len= strlen(new_name); + int2store(log_data, old_name_len); + int2store(log_data + 2, new_name_len); + log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= 2 + 2; + log_array[TRANSLOG_INTERNAL_PARTS + 1].str= (char *)old_name; + log_array[TRANSLOG_INTERNAL_PARTS + 1].length= old_name_len; + log_array[TRANSLOG_INTERNAL_PARTS + 2].str= (char *)new_name; + log_array[TRANSLOG_INTERNAL_PARTS + 2].length= new_name_len; + /* + For this record to be of any use for Recovery, we need the upper + MySQL layer to be crash-safe, which it is not now (that would require + work using the ddl_log of sql/sql_table.cc); when it is, we should + reconsider the moment of writing this log record (before or after op, + under THR_LOCK_maria or not...), how to use it in Recovery, and force + the log. For now this record is just informative. + */ + if (unlikely(translog_write_record(&share->state.create_rename_lsn, + LOGREC_REDO_RENAME_TABLE, + &dummy_transaction_object, NULL, + 2 + 2 + old_name_len + new_name_len, + sizeof(log_array)/sizeof(log_array[0]), + log_array, NULL))) + { + maria_close(info); + DBUG_RETURN(1); + } + /* + store LSN into file, needed for Recovery to not be confused if a + RENAME happened (applying REDOs to the wrong table). + */ + lsn_store(log_data, share->state.create_rename_lsn); + if (my_pwrite(share->kfile.file, log_data, sizeof(log_data), + sizeof(share->state.header) + 2, MYF(MY_NABP)) || + my_sync(share->kfile.file, MYF(MY_WME))) + { + maria_close(info); + DBUG_RETURN(1); + } + } + + maria_close(info); +#ifdef USE_RAID +#ifdef EXTRA_DEBUG + _ma_check_table_is_closed(old_name,"rename raidcheck"); +#endif +#endif /* USE_RAID */ + + fn_format(from,old_name,"",MARIA_NAME_IEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT); + fn_format(to,new_name,"",MARIA_NAME_IEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT); + if (my_rename_with_symlink(from, to, MYF(MY_WME | sync_dir))) + DBUG_RETURN(my_errno); + fn_format(from,old_name,"",MARIA_NAME_DEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT); + fn_format(to,new_name,"",MARIA_NAME_DEXT,MY_UNPACK_FILENAME|MY_APPEND_EXT); +#ifdef USE_RAID + if (raid_type) + data_file_rename_error= my_raid_rename(from, to, raid_chunks, + MYF(MY_WME | sync_dir)); + else +#endif + data_file_rename_error= + my_rename_with_symlink(from, to, MYF(MY_WME | sync_dir)); + if (data_file_rename_error) + { + /* + now we have a renamed index file and a non-renamed data file, try to + undo the rename of the index file. + */ + data_file_rename_error= my_errno; + fn_format(from, old_name, "", MARIA_NAME_IEXT, MYF(MY_UNPACK_FILENAME|MY_APPEND_EXT)); + fn_format(to, new_name, "", MARIA_NAME_IEXT, MYF(MY_UNPACK_FILENAME|MY_APPEND_EXT)); + my_rename_with_symlink(to, from, MYF(MY_WME | sync_dir)); + } + DBUG_RETURN(data_file_rename_error); + +} diff --git a/storage/maria/ma_rfirst.c b/storage/maria/ma_rfirst.c new file mode 100644 index 00000000000..04c496d9c56 --- /dev/null +++ b/storage/maria/ma_rfirst.c @@ -0,0 +1,26 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" + + /* Read first row through a specfic key */ + +int maria_rfirst(MARIA_HA *info, byte *buf, int inx) +{ + DBUG_ENTER("maria_rfirst"); + info->cur_row.lastpos= HA_OFFSET_ERROR; + info->update|= HA_STATE_PREV_FOUND; + DBUG_RETURN(maria_rnext(info,buf,inx)); +} /* maria_rfirst */ diff --git a/storage/maria/ma_rkey.c b/storage/maria/ma_rkey.c new file mode 100644 index 00000000000..6158935472b --- /dev/null +++ b/storage/maria/ma_rkey.c @@ -0,0 +1,177 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Read record based on a key */ + +#include "maria_def.h" +#include "ma_rt_index.h" + + /* Read a record using key */ + /* Ordinary search_flag is 0 ; Give error if no record with key */ + +int maria_rkey(MARIA_HA *info, byte *buf, int inx, const byte *key, + uint key_len, enum ha_rkey_function search_flag) +{ + byte *key_buff; + MARIA_SHARE *share=info->s; + MARIA_KEYDEF *keyinfo; + HA_KEYSEG *last_used_keyseg; + uint pack_key_length, use_key_length, nextflag; + DBUG_ENTER("maria_rkey"); + DBUG_PRINT("enter", ("base: 0x%lx buf: 0x%lx inx: %d search_flag: %d", + (long) info, (long) buf, inx, search_flag)); + + if ((inx = _ma_check_index(info,inx)) < 0) + DBUG_RETURN(my_errno); + + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + info->last_key_func= search_flag; + keyinfo= share->keyinfo + inx; + + if (info->once_flags & USE_PACKED_KEYS) + { + info->once_flags&= ~USE_PACKED_KEYS; /* Reset flag */ + /* + key is already packed!; This happens when we are using a MERGE TABLE + */ + key_buff= info->lastkey+info->s->base.max_key_length; + pack_key_length= key_len; + bmove(key_buff,key,key_len); + last_used_keyseg= 0; + } + else + { + if (key_len == 0) + key_len=USE_WHOLE_KEY; + /* Save the packed key for later use in the second buffer of lastkey. */ + key_buff=info->lastkey+info->s->base.max_key_length; + pack_key_length= _ma_pack_key(info,(uint) inx, key_buff, key, + key_len, &last_used_keyseg); + /* Save packed_key_length for use by the MERGE engine. */ + info->pack_key_length= pack_key_length; + DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE, keyinfo->seg, + key_buff, pack_key_length);); + } + + if (fast_ma_readinfo(info)) + goto err; + if (share->concurrent_insert) + rw_rdlock(&share->key_root_lock[inx]); + + nextflag=maria_read_vec[search_flag]; + use_key_length=pack_key_length; + if (!(nextflag & (SEARCH_FIND | SEARCH_NO_FIND | SEARCH_LAST))) + use_key_length=USE_WHOLE_KEY; + + switch (info->s->keyinfo[inx].key_alg) { +#ifdef HAVE_RTREE_KEYS + case HA_KEY_ALG_RTREE: + if (maria_rtree_find_first(info,inx,key_buff,use_key_length,nextflag) < 0) + { + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno= HA_ERR_CRASHED; + info->cur_row.lastpos= HA_OFFSET_ERROR; + } + break; +#endif + case HA_KEY_ALG_BTREE: + default: + if (!_ma_search(info, keyinfo, key_buff, use_key_length, + maria_read_vec[search_flag], + info->s->state.key_root[inx]) && + share->concurrent_insert) + { + /* + If we searching for a partial key (or using >, >=, < or <=) and + the data is outside of the data file, we need to continue searching + for the first key inside the data file + */ + if (info->cur_row.lastpos >= info->state->data_file_length && + (search_flag != HA_READ_KEY_EXACT || + last_used_keyseg != keyinfo->seg + keyinfo->keysegs)) + { + do + { + uint not_used[2]; + /* + Skip rows that are inserted by other threads since we got a lock + Note that this can only happen if we are not searching after an + full length exact key, because the keys are sorted + according to position + */ + if (_ma_search_next(info, keyinfo, info->lastkey, + info->lastkey_length, + maria_readnext_vec[search_flag], + info->s->state.key_root[inx])) + break; + /* + Check that the found key does still match the search. + _ma_search_next() delivers the next key regardless of its + value. + */ + if (search_flag == HA_READ_KEY_EXACT && + ha_key_cmp(keyinfo->seg, (uchar*) key_buff, + (uchar*) info->lastkey, use_key_length, + SEARCH_FIND, not_used)) + { + my_errno= HA_ERR_KEY_NOT_FOUND; + info->cur_row.lastpos= HA_OFFSET_ERROR; + break; + } + } while (info->cur_row.lastpos >= info->state->data_file_length); + } + } + } + if (share->concurrent_insert) + rw_unlock(&share->key_root_lock[inx]); + + if (info->cur_row.lastpos == HA_OFFSET_ERROR) + { + fast_ma_writeinfo(info); + goto err; + } + + /* Calculate length of the found key; Used by maria_rnext_same */ + if ((keyinfo->flag & HA_VAR_LENGTH_KEY) && last_used_keyseg) + info->last_rkey_length= _ma_keylength_part(keyinfo, info->lastkey, + last_used_keyseg); + else + info->last_rkey_length= pack_key_length; + + /* Check if we don't want to have record back, only error message */ + if (!buf) + { + fast_ma_writeinfo(info); + DBUG_RETURN(0); + } + if (!(*info->read_record)(info, buf, info->cur_row.lastpos)) + { + info->update|= HA_STATE_AKTIV; /* Record is read */ + DBUG_RETURN(0); + } + + info->cur_row.lastpos= HA_OFFSET_ERROR; /* Didn't find row */ + +err: + /* Store last used key as a base for read next */ + memcpy(info->lastkey,key_buff,pack_key_length); + info->last_rkey_length= pack_key_length; + bzero((char*) info->lastkey+pack_key_length,info->s->base.rec_reflength); + info->lastkey_length=pack_key_length+info->s->base.rec_reflength; + + if (search_flag == HA_READ_AFTER_KEY) + info->update|=HA_STATE_NEXT_FOUND; /* Previous gives last row */ + DBUG_RETURN(my_errno); +} /* _ma_rkey */ diff --git a/storage/maria/ma_rlast.c b/storage/maria/ma_rlast.c new file mode 100644 index 00000000000..ebd039843c8 --- /dev/null +++ b/storage/maria/ma_rlast.c @@ -0,0 +1,26 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" + + /* Read last row with the same key as the previous read. */ + +int maria_rlast(MARIA_HA *info, byte *buf, int inx) +{ + DBUG_ENTER("maria_rlast"); + info->cur_row.lastpos= HA_OFFSET_ERROR; + info->update|= HA_STATE_NEXT_FOUND; + DBUG_RETURN(maria_rprev(info,buf,inx)); +} /* maria_rlast */ diff --git a/storage/maria/ma_rnext.c b/storage/maria/ma_rnext.c new file mode 100644 index 00000000000..ccca05ff3ad --- /dev/null +++ b/storage/maria/ma_rnext.c @@ -0,0 +1,122 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" + +#include "ma_rt_index.h" + + /* + Read next row with the same key as previous read + One may have done a write, update or delete of the previous row. + NOTE! Even if one changes the previous row, the next read is done + based on the position of the last used key! + */ + +int maria_rnext(MARIA_HA *info, byte *buf, int inx) +{ + int error,changed; + uint flag; + DBUG_ENTER("maria_rnext"); + + if ((inx = _ma_check_index(info,inx)) < 0) + DBUG_RETURN(my_errno); + flag=SEARCH_BIGGER; /* Read next */ + if (info->cur_row.lastpos == HA_OFFSET_ERROR && + info->update & HA_STATE_PREV_FOUND) + flag=0; /* Read first */ + + if (fast_ma_readinfo(info)) + DBUG_RETURN(my_errno); + if (info->s->concurrent_insert) + rw_rdlock(&info->s->key_root_lock[inx]); + changed= _ma_test_if_changed(info); + if (!flag) + { + switch(info->s->keyinfo[inx].key_alg){ +#ifdef HAVE_RTREE_KEYS + case HA_KEY_ALG_RTREE: + error=maria_rtree_get_first(info,inx,info->lastkey_length); + break; +#endif + case HA_KEY_ALG_BTREE: + default: + error= _ma_search_first(info,info->s->keyinfo+inx, + info->s->state.key_root[inx]); + break; + } + } + else + { + switch (info->s->keyinfo[inx].key_alg) { +#ifdef HAVE_RTREE_KEYS + case HA_KEY_ALG_RTREE: + /* + Note that rtree doesn't support that the table + may be changed since last call, so we do need + to skip rows inserted by other threads like in btree + */ + error= maria_rtree_get_next(info,inx,info->lastkey_length); + break; +#endif + case HA_KEY_ALG_BTREE: + default: + if (!changed) + error= _ma_search_next(info,info->s->keyinfo+inx,info->lastkey, + info->lastkey_length,flag, + info->s->state.key_root[inx]); + else + error= _ma_search(info,info->s->keyinfo+inx,info->lastkey, + USE_WHOLE_KEY,flag, info->s->state.key_root[inx]); + } + } + + if (info->s->concurrent_insert) + { + if (!error) + { + while (info->cur_row.lastpos >= info->state->data_file_length) + { + /* Skip rows inserted by other threads since we got a lock */ + if ((error= _ma_search_next(info,info->s->keyinfo+inx, + info->lastkey, + info->lastkey_length, + SEARCH_BIGGER, + info->s->state.key_root[inx]))) + break; + } + } + rw_unlock(&info->s->key_root_lock[inx]); + } + /* Don't clear if database-changed */ + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + info->update|= HA_STATE_NEXT_FOUND; + + if (error) + { + if (my_errno == HA_ERR_KEY_NOT_FOUND) + my_errno=HA_ERR_END_OF_FILE; + } + else if (!buf) + { + DBUG_RETURN(info->cur_row.lastpos == HA_OFFSET_ERROR ? my_errno : 0); + } + else if (!(*info->read_record)(info, buf, info->cur_row.lastpos)) + { + info->update|= HA_STATE_AKTIV; /* Record is read */ + DBUG_RETURN(0); + } + DBUG_PRINT("error",("Got error: %d, errno: %d",error, my_errno)); + DBUG_RETURN(my_errno); +} /* maria_rnext */ diff --git a/storage/maria/ma_rnext_same.c b/storage/maria/ma_rnext_same.c new file mode 100644 index 00000000000..207a438e10b --- /dev/null +++ b/storage/maria/ma_rnext_same.c @@ -0,0 +1,107 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" +#include "ma_rt_index.h" + +/* + Read next row with the same key as previous read, but abort if + the key changes. + One may have done a write, update or delete of the previous row. + + NOTE! Even if one changes the previous row, the next read is done + based on the position of the last used key! +*/ + +int maria_rnext_same(MARIA_HA *info, byte *buf) +{ + int error; + uint inx,not_used[2]; + MARIA_KEYDEF *keyinfo; + DBUG_ENTER("maria_rnext_same"); + + if ((int) (inx= info->lastinx) < 0 || + info->cur_row.lastpos == HA_OFFSET_ERROR) + DBUG_RETURN(my_errno=HA_ERR_WRONG_INDEX); + keyinfo= info->s->keyinfo+inx; + if (fast_ma_readinfo(info)) + DBUG_RETURN(my_errno); + + if (info->s->concurrent_insert) + rw_rdlock(&info->s->key_root_lock[inx]); + + switch (keyinfo->key_alg) + { +#ifdef HAVE_RTREE_KEYS + case HA_KEY_ALG_RTREE: + if ((error=maria_rtree_find_next(info,inx, + maria_read_vec[info->last_key_func]))) + { + error=1; + my_errno=HA_ERR_END_OF_FILE; + info->cur_row.lastpos= HA_OFFSET_ERROR; + break; + } + break; +#endif + case HA_KEY_ALG_BTREE: + default: + if (!(info->update & HA_STATE_RNEXT_SAME)) + { + /* First rnext_same; Store old key */ + memcpy(info->lastkey2,info->lastkey,info->last_rkey_length); + } + for (;;) + { + if ((error= _ma_search_next(info,keyinfo,info->lastkey, + info->lastkey_length,SEARCH_BIGGER, + info->s->state.key_root[inx]))) + break; + if (ha_key_cmp(keyinfo->seg, (uchar*) info->lastkey, + (uchar*) info->lastkey2, + info->last_rkey_length, SEARCH_FIND, not_used)) + { + error=1; + my_errno=HA_ERR_END_OF_FILE; + info->cur_row.lastpos= HA_OFFSET_ERROR; + break; + } + /* Skip rows that are inserted by other threads since we got a lock */ + if (info->cur_row.lastpos < info->state->data_file_length) + break; + } + } + if (info->s->concurrent_insert) + rw_unlock(&info->s->key_root_lock[inx]); + /* Don't clear if database-changed */ + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + info->update|= HA_STATE_NEXT_FOUND | HA_STATE_RNEXT_SAME; + + if (error) + { + if (my_errno == HA_ERR_KEY_NOT_FOUND) + my_errno=HA_ERR_END_OF_FILE; + } + else if (!buf) + { + DBUG_RETURN(info->cur_row.lastpos == HA_OFFSET_ERROR ? my_errno : 0); + } + else if (!(*info->read_record)(info, buf, info->cur_row.lastpos)) + { + info->update|= HA_STATE_AKTIV; /* Record is read */ + DBUG_RETURN(0); + } + DBUG_RETURN(my_errno); +} /* maria_rnext_same */ diff --git a/storage/maria/ma_rprev.c b/storage/maria/ma_rprev.c new file mode 100644 index 00000000000..5e7cfc9f41a --- /dev/null +++ b/storage/maria/ma_rprev.c @@ -0,0 +1,88 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" + + /* + Read previous row with the same key as previous read + One may have done a write, update or delete of the previous row. + NOTE! Even if one changes the previous row, the next read is done + based on the position of the last used key! + */ + +int maria_rprev(MARIA_HA *info, byte *buf, int inx) +{ + int error,changed; + register uint flag; + MARIA_SHARE *share=info->s; + DBUG_ENTER("maria_rprev"); + + if ((inx = _ma_check_index(info,inx)) < 0) + DBUG_RETURN(my_errno); + flag=SEARCH_SMALLER; /* Read previous */ + if (info->cur_row.lastpos == HA_OFFSET_ERROR && + info->update & HA_STATE_NEXT_FOUND) + flag=0; /* Read last */ + + if (fast_ma_readinfo(info)) + DBUG_RETURN(my_errno); + changed= _ma_test_if_changed(info); + if (share->concurrent_insert) + rw_rdlock(&share->key_root_lock[inx]); + if (!flag) + error= _ma_search_last(info, share->keyinfo+inx, + share->state.key_root[inx]); + else if (!changed) + error= _ma_search_next(info,share->keyinfo+inx,info->lastkey, + info->lastkey_length,flag, + share->state.key_root[inx]); + else + error= _ma_search(info,share->keyinfo+inx,info->lastkey, + USE_WHOLE_KEY, flag, share->state.key_root[inx]); + + if (share->concurrent_insert) + { + if (!error) + { + while (info->cur_row.lastpos >= info->state->data_file_length) + { + /* Skip rows that are inserted by other threads since we got a lock */ + if ((error= _ma_search_next(info,share->keyinfo+inx,info->lastkey, + info->lastkey_length, + SEARCH_SMALLER, + share->state.key_root[inx]))) + break; + } + } + rw_unlock(&share->key_root_lock[inx]); + } + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + info->update|= HA_STATE_PREV_FOUND; + if (error) + { + if (my_errno == HA_ERR_KEY_NOT_FOUND) + my_errno=HA_ERR_END_OF_FILE; + } + else if (!buf) + { + DBUG_RETURN(info->cur_row.lastpos == HA_OFFSET_ERROR ? my_errno : 0); + } + else if (!(*info->read_record)(info, buf, info->cur_row.lastpos)) + { + info->update|= HA_STATE_AKTIV; /* Record is read */ + DBUG_RETURN(0); + } + DBUG_RETURN(my_errno); +} /* maria_rprev */ diff --git a/storage/maria/ma_rrnd.c b/storage/maria/ma_rrnd.c new file mode 100644 index 00000000000..8e2b12dc60d --- /dev/null +++ b/storage/maria/ma_rrnd.c @@ -0,0 +1,52 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Read a record with random-access. The position to the record must + get by MARIA_HA. The next record can be read with pos= MARIA_POS_ERROR */ + + +#include "maria_def.h" + +/* + Read a row based on position. + + RETURN + 0 Ok. + HA_ERR_RECORD_DELETED Record is deleted. + HA_ERR_END_OF_FILE EOF. +*/ + +int maria_rrnd(MARIA_HA *info, byte *buf, MARIA_RECORD_POS filepos) +{ + DBUG_ENTER("maria_rrnd"); + + DBUG_ASSERT(filepos != HA_OFFSET_ERROR); +#ifdef NOT_USED + if (filepos == HA_OFFSET_ERROR) + { + if (info->cur_row.lastpos == HA_OFFSET_ERROR) /* First read ? */ + filepos= info->s->pack.header_length; /* Read first record */ + else + filepos= info->cur_row.nextpos; + } +#endif + + /* Init all but update-flag */ + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + if (info->opt_flag & WRITE_CACHE_USED && flush_io_cache(&info->rec_cache)) + DBUG_RETURN(my_errno); + + DBUG_RETURN((*info->s->read_record)(info, buf, filepos)); +} diff --git a/storage/maria/ma_rsame.c b/storage/maria/ma_rsame.c new file mode 100644 index 00000000000..052fe79af58 --- /dev/null +++ b/storage/maria/ma_rsame.c @@ -0,0 +1,69 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" + +/* + Find current row with read on position or read on key + + NOTES + If inx >= 0 find record using key + + RETURN + 0 Ok + HA_ERR_KEY_NOT_FOUND Row is deleted + HA_ERR_END_OF_FILE End of file +*/ + + +int maria_rsame(MARIA_HA *info, byte *record, int inx) +{ + DBUG_ENTER("maria_rsame"); + + if (inx != -1 && ! maria_is_key_active(info->s->state.key_map, inx)) + { + DBUG_RETURN(my_errno=HA_ERR_WRONG_INDEX); + } + if (info->cur_row.lastpos == HA_OFFSET_ERROR || + info->update & HA_STATE_DELETED) + { + DBUG_RETURN(my_errno=HA_ERR_KEY_NOT_FOUND); /* No current record */ + } + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + + /* Read row from data file */ + if (fast_ma_readinfo(info)) + DBUG_RETURN(my_errno); + + if (inx >= 0) + { + info->lastinx=inx; + info->lastkey_length= _ma_make_key(info,(uint) inx,info->lastkey,record, + info->cur_row.lastpos); + if (info->s->concurrent_insert) + rw_rdlock(&info->s->key_root_lock[inx]); + VOID(_ma_search(info,info->s->keyinfo+inx,info->lastkey, USE_WHOLE_KEY, + SEARCH_SAME, + info->s->state.key_root[inx])); + if (info->s->concurrent_insert) + rw_unlock(&info->s->key_root_lock[inx]); + } + + if (!(*info->read_record)(info, record, info->cur_row.lastpos)) + DBUG_RETURN(0); + if (my_errno == HA_ERR_RECORD_DELETED) + my_errno=HA_ERR_KEY_NOT_FOUND; + DBUG_RETURN(my_errno); +} /* maria_rsame */ diff --git a/storage/maria/ma_rsamepos.c b/storage/maria/ma_rsamepos.c new file mode 100644 index 00000000000..1e09bdb8db4 --- /dev/null +++ b/storage/maria/ma_rsamepos.c @@ -0,0 +1,58 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* read record through position and fix key-position */ +/* As maria_rsame but supply a position */ + +#include "maria_def.h" + + + /* + ** If inx >= 0 update index pointer + ** Returns one of the following values: + ** 0 = Ok. + ** HA_ERR_KEY_NOT_FOUND = Row is deleted + ** HA_ERR_END_OF_FILE = End of file + */ + +int maria_rsame_with_pos(MARIA_HA *info, byte *record, int inx, + MARIA_RECORD_POS filepos) +{ + DBUG_ENTER("maria_rsame_with_pos"); + DBUG_PRINT("enter",("index: %d filepos: %ld", inx, (long) filepos)); + + if (inx < -1 || + (inx >= 0 && ! maria_is_key_active(info->s->state.key_map, inx))) + { + DBUG_RETURN(my_errno=HA_ERR_WRONG_INDEX); + } + + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + if ((*info->s->read_record)(info, record, filepos)) + { + if (my_errno == HA_ERR_RECORD_DELETED) + my_errno=HA_ERR_KEY_NOT_FOUND; + DBUG_RETURN(my_errno); + } + info->cur_row.lastpos= filepos; + info->lastinx= inx; + if (inx >= 0) + { + info->lastkey_length= _ma_make_key(info,(uint) inx,info->lastkey,record, + info->cur_row.lastpos); + info->update|=HA_STATE_KEY_CHANGED; /* Don't use indexposition */ + } + DBUG_RETURN(0); +} /* maria_rsame_pos */ diff --git a/storage/maria/ma_rt_index.c b/storage/maria/ma_rt_index.c new file mode 100644 index 00000000000..27a83e433a4 --- /dev/null +++ b/storage/maria/ma_rt_index.c @@ -0,0 +1,1089 @@ +/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin & MySQL Finland AB + & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" + +#ifdef HAVE_RTREE_KEYS + +#include "ma_rt_index.h" +#include "ma_rt_key.h" +#include "ma_rt_mbr.h" + +#define REINSERT_BUFFER_INC 10 +#define PICK_BY_AREA +/*#define PICK_BY_PERIMETER*/ + +typedef struct st_page_level +{ + uint level; + my_off_t offs; +} stPageLevel; + +typedef struct st_page_list +{ + ulong n_pages; + ulong m_pages; + stPageLevel *pages; +} stPageList; + + +/* + Find next key in r-tree according to search_flag recursively + + NOTES + Used in maria_rtree_find_first() and maria_rtree_find_next() + + RETURN + -1 Error + 0 Found + 1 Not found +*/ + +static int maria_rtree_find_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + uint search_flag, + uint nod_cmp_flag, my_off_t page, int level) +{ + uint nod_flag; + int res; + byte *page_buf, *k, *last; + int k_len; + uint *saved_key = (uint*) (info->maria_rtree_recursion_state) + level; + + if (!(page_buf = (byte*) my_alloca((uint)keyinfo->block_length))) + { + my_errno = HA_ERR_OUT_OF_MEM; + return -1; + } + if (!_ma_fetch_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf, 0)) + goto err1; + nod_flag = _ma_test_if_nod(page_buf); + + k_len = keyinfo->keylength - info->s->base.rec_reflength; + + if(info->maria_rtree_recursion_depth >= level) + { + k= page_buf + *saved_key; + } + else + { + k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + } + last= rt_PAGE_END(page_buf); + + for (; k < last; k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag)) + { + if (nod_flag) + { + /* this is an internal node in the tree */ + if (!(res = maria_rtree_key_cmp(keyinfo->seg, + info->first_mbr_key, k, + info->last_rkey_length, nod_cmp_flag))) + { + switch ((res = maria_rtree_find_req(info, keyinfo, search_flag, + nod_cmp_flag, + _ma_kpos(nod_flag, k), + level + 1))) + { + case 0: /* found - exit from recursion */ + *saved_key = k - page_buf; + goto ok; + case 1: /* not found - continue searching */ + info->maria_rtree_recursion_depth = level; + break; + default: /* error */ + case -1: + goto err1; + } + } + } + else + { + /* this is a leaf */ + if (!maria_rtree_key_cmp(keyinfo->seg, info->first_mbr_key, + k, info->last_rkey_length, search_flag)) + { + byte *after_key = (byte*) rt_PAGE_NEXT_KEY(k, k_len, nod_flag); + info->cur_row.lastpos = _ma_dpos(info, 0, after_key); + info->lastkey_length = k_len + info->s->base.rec_reflength; + memcpy(info->lastkey, k, info->lastkey_length); + info->maria_rtree_recursion_depth = level; + *saved_key = last - page_buf; + + if (after_key < last) + { + info->int_keypos = info->buff; + info->int_maxpos = info->buff + (last - after_key); + memcpy(info->buff, after_key, last - after_key); + info->keyread_buff_used = 0; + } + else + { + info->keyread_buff_used = 1; + } + + res = 0; + goto ok; + } + } + } + info->cur_row.lastpos = HA_OFFSET_ERROR; + my_errno = HA_ERR_KEY_NOT_FOUND; + res = 1; + +ok: + my_afree((byte*)page_buf); + return res; + +err1: + my_afree((byte*)page_buf); + info->cur_row.lastpos = HA_OFFSET_ERROR; + return -1; +} + + +/* + Find first key in r-tree according to search_flag condition + + SYNOPSIS + maria_rtree_find_first() + info Handler to MARIA file + uint keynr Key number to use + key Key to search for + key_length Length of 'key' + search_flag Bitmap of flags how to do the search + + RETURN + -1 Error + 0 Found + 1 Not found +*/ + +int maria_rtree_find_first(MARIA_HA *info, uint keynr, byte *key, + uint key_length, uint search_flag) +{ + my_off_t root; + uint nod_cmp_flag; + MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr; + + if ((root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + { + my_errno= HA_ERR_END_OF_FILE; + return -1; + } + + /* + Save searched key, include data pointer. + The data pointer is required if the search_flag contains MBR_DATA. + */ + memcpy(info->first_mbr_key, key, keyinfo->keylength); + info->last_rkey_length = key_length; + + info->maria_rtree_recursion_depth = -1; + info->keyread_buff_used = 1; + + nod_cmp_flag= ((search_flag & (MBR_EQUAL | MBR_WITHIN)) ? + MBR_WITHIN : MBR_INTERSECT); + return maria_rtree_find_req(info, keyinfo, search_flag, nod_cmp_flag, root, + 0); +} + + +/* + Find next key in r-tree according to search_flag condition + + SYNOPSIS + maria_rtree_find_next() + info Handler to MARIA file + uint keynr Key number to use + search_flag Bitmap of flags how to do the search + + RETURN + -1 Error + 0 Found + 1 Not found +*/ + +int maria_rtree_find_next(MARIA_HA *info, uint keynr, uint search_flag) +{ + my_off_t root; + uint nod_cmp_flag; + MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr; + + if (info->update & HA_STATE_DELETED) + return maria_rtree_find_first(info, keynr, info->lastkey, + info->lastkey_length, + search_flag); + + if (!info->keyread_buff_used) + { + byte *key= info->int_keypos; + + while (key < info->int_maxpos) + { + if (!maria_rtree_key_cmp(keyinfo->seg, + info->first_mbr_key, key, + info->last_rkey_length, search_flag)) + { + byte *after_key= key + keyinfo->keylength; + + info->cur_row.lastpos= _ma_dpos(info, 0, after_key); + memcpy(info->lastkey, key, info->lastkey_length); + + if (after_key < info->int_maxpos) + info->int_keypos= after_key; + else + info->keyread_buff_used= 1; + return 0; + } + key+= keyinfo->keylength; + } + } + if ((root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + { + my_errno= HA_ERR_END_OF_FILE; + return -1; + } + + nod_cmp_flag = ((search_flag & (MBR_EQUAL | MBR_WITHIN)) ? + MBR_WITHIN : MBR_INTERSECT); + return maria_rtree_find_req(info, keyinfo, search_flag, nod_cmp_flag, root, 0); +} + + +/* + Get next key in r-tree recursively + + NOTES + Used in maria_rtree_get_first() and maria_rtree_get_next() + + RETURN + -1 Error + 0 Found + 1 Not found +*/ + +static int maria_rtree_get_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, uint key_length, + my_off_t page, int level) +{ + byte *page_buf, *last, *k; + uint nod_flag, k_len; + int res; + uint *saved_key= (uint*) (info->maria_rtree_recursion_state) + level; + + if (!(page_buf= (byte*) my_alloca((uint)keyinfo->block_length))) + return -1; + if (!_ma_fetch_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf, 0)) + goto err1; + nod_flag = _ma_test_if_nod(page_buf); + + k_len = keyinfo->keylength - info->s->base.rec_reflength; + + if(info->maria_rtree_recursion_depth >= level) + { + k = page_buf + *saved_key; + if (!nod_flag) + { + /* Only leaf pages contain data references. */ + /* Need to check next key with data reference. */ + k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag); + } + } + else + { + k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + } + last = rt_PAGE_END(page_buf); + + for (; k < last; k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag)) + { + if (nod_flag) + { + /* this is an internal node in the tree */ + switch ((res = maria_rtree_get_req(info, keyinfo, key_length, + _ma_kpos(nod_flag, k), level + 1))) + { + case 0: /* found - exit from recursion */ + *saved_key = k - page_buf; + goto ok; + case 1: /* not found - continue searching */ + info->maria_rtree_recursion_depth = level; + break; + default: + case -1: /* error */ + goto err1; + } + } + else + { + /* this is a leaf */ + byte *after_key = rt_PAGE_NEXT_KEY(k, k_len, nod_flag); + info->cur_row.lastpos = _ma_dpos(info, 0, after_key); + info->lastkey_length = k_len + info->s->base.rec_reflength; + memcpy(info->lastkey, k, info->lastkey_length); + + info->maria_rtree_recursion_depth = level; + *saved_key = k - page_buf; + + if (after_key < last) + { + info->int_keypos = (byte*) saved_key; + memcpy(info->buff, page_buf, keyinfo->block_length); + info->int_maxpos = rt_PAGE_END(info->buff); + info->keyread_buff_used = 0; + } + else + { + info->keyread_buff_used = 1; + } + + res = 0; + goto ok; + } + } + info->cur_row.lastpos = HA_OFFSET_ERROR; + my_errno = HA_ERR_KEY_NOT_FOUND; + res = 1; + +ok: + my_afree((byte*)page_buf); + return res; + +err1: + my_afree((byte*)page_buf); + info->cur_row.lastpos = HA_OFFSET_ERROR; + return -1; +} + + +/* + Get first key in r-tree + + RETURN + -1 Error + 0 Found + 1 Not found +*/ + +int maria_rtree_get_first(MARIA_HA *info, uint keynr, uint key_length) +{ + my_off_t root; + MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr; + + if ((root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + { + my_errno= HA_ERR_END_OF_FILE; + return -1; + } + + info->maria_rtree_recursion_depth = -1; + info->keyread_buff_used = 1; + + return maria_rtree_get_req(info, &keyinfo[keynr], key_length, root, 0); +} + + +/* + Get next key in r-tree + + RETURN + -1 Error + 0 Found + 1 Not found +*/ + +int maria_rtree_get_next(MARIA_HA *info, uint keynr, uint key_length) +{ + my_off_t root; + MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr; + + if (!info->keyread_buff_used) + { + uint k_len = keyinfo->keylength - info->s->base.rec_reflength; + /* rt_PAGE_NEXT_KEY(info->int_keypos) */ + byte *key = info->buff + *(int*)info->int_keypos + k_len + + info->s->base.rec_reflength; + /* rt_PAGE_NEXT_KEY(key) */ + byte *after_key = key + k_len + info->s->base.rec_reflength; + + info->cur_row.lastpos = _ma_dpos(info, 0, after_key); + info->lastkey_length = k_len + info->s->base.rec_reflength; + memcpy(info->lastkey, key, k_len + info->s->base.rec_reflength); + + *(int*)info->int_keypos = key - info->buff; + if (after_key >= info->int_maxpos) + { + info->keyread_buff_used = 1; + } + + return 0; + } + else + { + if ((root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + { + my_errno= HA_ERR_END_OF_FILE; + return -1; + } + + return maria_rtree_get_req(info, &keyinfo[keynr], key_length, root, 0); + } +} + + +/* + Choose non-leaf better key for insertion +*/ + +#ifdef PICK_BY_PERIMETER +static uchar *maria_rtree_pick_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + uchar *key, + uint key_length, byte *page_buf, + uint nod_flag) +{ + double increase; + double best_incr = DBL_MAX; + double perimeter; + double best_perimeter; + uchar *best_key; + uchar *k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + uchar *last = rt_PAGE_END(page_buf); + + LINT_INIT(best_perimeter); + LINT_INIT(best_key); + + for (; k < last; k = rt_PAGE_NEXT_KEY(k, key_length, nod_flag)) + { + if ((increase = maria_rtree_perimeter_increase(keyinfo->seg, k, key, key_length, + &perimeter)) == -1) + return NULL; + if ((increase < best_incr)|| + (increase == best_incr && perimeter < best_perimeter)) + { + best_key = k; + best_perimeter= perimeter; + best_incr = increase; + } + } + return best_key; +} + +#endif /*PICK_BY_PERIMETER*/ + +#ifdef PICK_BY_AREA +static byte *maria_rtree_pick_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *key, + uint key_length, byte *page_buf, + uint nod_flag) +{ + double increase; + double best_incr = DBL_MAX; + double area; + double best_area; + byte *best_key; + byte *k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + byte *last = rt_PAGE_END(page_buf); + + LINT_INIT(best_area); + LINT_INIT(best_key); + + for (; k < last; k = rt_PAGE_NEXT_KEY(k, key_length, nod_flag)) + { + /* The following is safe as -1.0 is an exact number */ + if ((increase = maria_rtree_area_increase(keyinfo->seg, k, key, key_length, + &area)) == -1.0) + return NULL; + /* The following should be safe, even if we compare doubles */ + if (increase < best_incr) + { + best_key = k; + best_area = area; + best_incr = increase; + } + else + { + /* The following should be safe, even if we compare doubles */ + if ((increase == best_incr) && (area < best_area)) + { + best_key = k; + best_area = area; + best_incr = increase; + } + } + } + return best_key; +} + +#endif /*PICK_BY_AREA*/ + +/* + Go down and insert key into tree + + RETURN + -1 Error + 0 Child was not split + 1 Child was split +*/ + +static int maria_rtree_insert_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *key, + uint key_length, my_off_t page, + my_off_t *new_page, + int ins_level, int level) +{ + uint nod_flag; + int res; + byte *page_buf, *k; + + if (!(page_buf= (byte*) my_alloca((uint)keyinfo->block_length + + HA_MAX_KEY_BUFF))) + { + my_errno = HA_ERR_OUT_OF_MEM; + return -1; + } + if (!_ma_fetch_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf, 0)) + goto err1; + nod_flag = _ma_test_if_nod(page_buf); + + if ((ins_level == -1 && nod_flag) || /* key: go down to leaf */ + (ins_level > -1 && ins_level > level)) /* branch: go down to ins_level */ + { + if ((k = maria_rtree_pick_key(info, keyinfo, key, key_length, page_buf, + nod_flag)) == NULL) + goto err1; + switch ((res = maria_rtree_insert_req(info, keyinfo, key, key_length, + _ma_kpos(nod_flag, k), new_page, + ins_level, level + 1))) + { + case 0: /* child was not split */ + { + maria_rtree_combine_rect(keyinfo->seg, k, key, k, key_length); + if (_ma_write_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf)) + goto err1; + goto ok; + } + case 1: /* child was split */ + { + byte *new_key = page_buf + keyinfo->block_length + nod_flag; + /* set proper MBR for key */ + if (maria_rtree_set_key_mbr(info, keyinfo, k, key_length, + _ma_kpos(nod_flag, k))) + goto err1; + /* add new key for new page */ + _ma_kpointer(info, new_key - nod_flag, *new_page); + if (maria_rtree_set_key_mbr(info, keyinfo, new_key, key_length, + *new_page)) + goto err1; + res = maria_rtree_add_key(info, keyinfo, new_key, key_length, + page_buf, new_page); + if (_ma_write_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf)) + goto err1; + goto ok; + } + default: + case -1: /* error */ + { + goto err1; + } + } + } + else + { + res = maria_rtree_add_key(info, keyinfo, key, key_length, page_buf, + new_page); + if (_ma_write_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf)) + goto err1; + } + +ok: + my_afree(page_buf); + return res; + +err1: + my_afree(page_buf); + return -1; +} + + +/* + Insert key into the tree + + RETURN + -1 Error + 0 Root was not split + 1 Root was split +*/ + +static int maria_rtree_insert_level(MARIA_HA *info, uint keynr, byte *key, + uint key_length, int ins_level) +{ + my_off_t old_root; + MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr; + int res; + my_off_t new_page; + + if ((old_root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + { + if ((old_root = _ma_new(info, keyinfo, DFLT_INIT_HITS)) == HA_OFFSET_ERROR) + return -1; + info->keyread_buff_used = 1; + maria_putint(info->buff, 2, 0); + res = maria_rtree_add_key(info, keyinfo, key, key_length, info->buff, NULL); + if (_ma_write_keypage(info, keyinfo, old_root, DFLT_INIT_HITS, info->buff)) + return 1; + info->s->state.key_root[keynr] = old_root; + return res; + } + + switch ((res = maria_rtree_insert_req(info, keyinfo, key, key_length, + old_root, &new_page, ins_level, 0))) + { + case 0: /* root was not split */ + { + break; + } + case 1: /* root was split, grow a new root */ + { + byte *new_root_buf, *new_key; + my_off_t new_root; + uint nod_flag = info->s->base.key_reflength; + + if (!(new_root_buf= (byte*) my_alloca((uint)keyinfo->block_length + + HA_MAX_KEY_BUFF))) + { + my_errno = HA_ERR_OUT_OF_MEM; + return -1; + } + + maria_putint(new_root_buf, 2, nod_flag); + if ((new_root = _ma_new(info, keyinfo, DFLT_INIT_HITS)) == + HA_OFFSET_ERROR) + goto err1; + + new_key = new_root_buf + keyinfo->block_length + nod_flag; + + _ma_kpointer(info, new_key - nod_flag, old_root); + if (maria_rtree_set_key_mbr(info, keyinfo, new_key, key_length, + old_root)) + goto err1; + if (maria_rtree_add_key(info, keyinfo, new_key, key_length, new_root_buf, + NULL) + == -1) + goto err1; + _ma_kpointer(info, new_key - nod_flag, new_page); + if (maria_rtree_set_key_mbr(info, keyinfo, new_key, key_length, + new_page)) + goto err1; + if (maria_rtree_add_key(info, keyinfo, new_key, key_length, new_root_buf, + NULL) + == -1) + goto err1; + if (_ma_write_keypage(info, keyinfo, new_root, + DFLT_INIT_HITS, new_root_buf)) + goto err1; + info->s->state.key_root[keynr] = new_root; + + my_afree((byte*)new_root_buf); + break; +err1: + my_afree((byte*)new_root_buf); + return -1; + } + default: + case -1: /* error */ + { + break; + } + } + return res; +} + + +/* + Insert key into the tree - interface function + + RETURN + -1 Error + 0 OK +*/ + +int maria_rtree_insert(MARIA_HA *info, uint keynr, byte *key, uint key_length) +{ + return (!key_length || + (maria_rtree_insert_level(info, keynr, key, key_length, -1) == -1)) ? + -1 : 0; +} + + +/* + Fill reinsert page buffer + + RETURN + -1 Error + 0 OK +*/ + +static int maria_rtree_fill_reinsert_list(stPageList *ReinsertList, my_off_t page, + int level) +{ + if (ReinsertList->n_pages == ReinsertList->m_pages) + { + ReinsertList->m_pages += REINSERT_BUFFER_INC; + if (!(ReinsertList->pages = (stPageLevel*)my_realloc((gptr)ReinsertList->pages, + ReinsertList->m_pages * sizeof(stPageLevel), MYF(MY_ALLOW_ZERO_PTR)))) + goto err1; + } + /* save page to ReinsertList */ + ReinsertList->pages[ReinsertList->n_pages].offs = page; + ReinsertList->pages[ReinsertList->n_pages].level = level; + ReinsertList->n_pages++; + return 0; + +err1: + return -1; +} + + +/* + Go down and delete key from the tree + + RETURN + -1 Error + 0 Deleted + 1 Not found + 2 Empty leaf +*/ + +static int maria_rtree_delete_req(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *key, + uint key_length, my_off_t page, + uint *page_size, + stPageList *ReinsertList, int level) +{ + ulong i; + uint nod_flag; + int res; + byte *page_buf, *last, *k; + + if (!(page_buf = (byte*) my_alloca((uint)keyinfo->block_length))) + { + my_errno = HA_ERR_OUT_OF_MEM; + return -1; + } + if (!_ma_fetch_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf, 0)) + goto err1; + nod_flag = _ma_test_if_nod(page_buf); + + k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + last = rt_PAGE_END(page_buf); + + for (i = 0; k < last; k = rt_PAGE_NEXT_KEY(k, key_length, nod_flag), i++) + { + if (nod_flag) + { + /* not leaf */ + if (!maria_rtree_key_cmp(keyinfo->seg, key, k, key_length, MBR_WITHIN)) + { + switch ((res = maria_rtree_delete_req(info, keyinfo, key, key_length, + _ma_kpos(nod_flag, k), page_size, ReinsertList, level + 1))) + { + case 0: /* deleted */ + { + /* test page filling */ + if (*page_size + key_length >= + rt_PAGE_MIN_SIZE(keyinfo->block_length)) + { + /* OK */ + if (maria_rtree_set_key_mbr(info, keyinfo, k, key_length, + _ma_kpos(nod_flag, k))) + goto err1; + if (_ma_write_keypage(info, keyinfo, page, + DFLT_INIT_HITS, page_buf)) + goto err1; + } + else + { + /* too small: delete key & add it descendant to reinsert list */ + if (maria_rtree_fill_reinsert_list(ReinsertList, + _ma_kpos(nod_flag, k), + level + 1)) + goto err1; + maria_rtree_delete_key(info, page_buf, k, key_length, nod_flag); + if (_ma_write_keypage(info, keyinfo, page, + DFLT_INIT_HITS, page_buf)) + goto err1; + *page_size = maria_data_on_page(page_buf); + } + + goto ok; + } + case 1: /* not found - continue searching */ + { + break; + } + case 2: /* vacuous case: last key in the leaf */ + { + maria_rtree_delete_key(info, page_buf, k, key_length, nod_flag); + if (_ma_write_keypage(info, keyinfo, page, + DFLT_INIT_HITS, page_buf)) + goto err1; + *page_size = maria_data_on_page(page_buf); + res = 0; + goto ok; + } + default: /* error */ + case -1: + { + goto err1; + } + } + } + } + else + { + /* leaf */ + if (!maria_rtree_key_cmp(keyinfo->seg, key, k, key_length, MBR_EQUAL | MBR_DATA)) + { + maria_rtree_delete_key(info, page_buf, k, key_length, nod_flag); + *page_size = maria_data_on_page(page_buf); + if (*page_size == 2) + { + /* last key in the leaf */ + res = 2; + if (_ma_dispose(info, keyinfo, page, DFLT_INIT_HITS)) + goto err1; + } + else + { + res = 0; + if (_ma_write_keypage(info, keyinfo, page, DFLT_INIT_HITS, page_buf)) + goto err1; + } + goto ok; + } + } + } + res = 1; + +ok: + my_afree((byte*)page_buf); + return res; + +err1: + my_afree((byte*)page_buf); + return -1; +} + + +/* + Delete key - interface function + + RETURN + -1 Error + 0 Deleted +*/ + +int maria_rtree_delete(MARIA_HA *info, uint keynr, byte *key, uint key_length) +{ + uint page_size; + stPageList ReinsertList; + my_off_t old_root; + MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr; + + if ((old_root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + { + my_errno= HA_ERR_END_OF_FILE; + return -1; + } + + ReinsertList.pages = NULL; + ReinsertList.n_pages = 0; + ReinsertList.m_pages = 0; + + switch (maria_rtree_delete_req(info, keyinfo, key, key_length, old_root, + &page_size, &ReinsertList, 0)) + { + case 2: + { + info->s->state.key_root[keynr] = HA_OFFSET_ERROR; + return 0; + } + case 0: + { + uint nod_flag; + ulong i; + for (i = 0; i < ReinsertList.n_pages; ++i) + { + byte *page_buf, *k, *last; + + if (!(page_buf = (byte*) my_alloca((uint)keyinfo->block_length))) + { + my_errno = HA_ERR_OUT_OF_MEM; + goto err1; + } + if (!_ma_fetch_keypage(info, keyinfo, ReinsertList.pages[i].offs, + DFLT_INIT_HITS, page_buf, 0)) + goto err1; + nod_flag = _ma_test_if_nod(page_buf); + k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + last = rt_PAGE_END(page_buf); + for (; k < last; k = rt_PAGE_NEXT_KEY(k, key_length, nod_flag)) + { + if (maria_rtree_insert_level(info, keynr, k, key_length, + ReinsertList.pages[i].level) == -1) + { + my_afree(page_buf); + goto err1; + } + } + my_afree(page_buf); + if (_ma_dispose(info, keyinfo, ReinsertList.pages[i].offs, + DFLT_INIT_HITS)) + goto err1; + } + if (ReinsertList.pages) + my_free((byte*) ReinsertList.pages, MYF(0)); + + /* check for redundant root (not leaf, 1 child) and eliminate */ + if ((old_root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + goto err1; + if (!_ma_fetch_keypage(info, keyinfo, old_root, DFLT_INIT_HITS, + info->buff, 0)) + goto err1; + nod_flag = _ma_test_if_nod(info->buff); + page_size = maria_data_on_page(info->buff); + if (nod_flag && (page_size == 2 + key_length + nod_flag)) + { + my_off_t new_root = _ma_kpos(nod_flag, + rt_PAGE_FIRST_KEY(info->buff, nod_flag)); + if (_ma_dispose(info, keyinfo, old_root, DFLT_INIT_HITS)) + goto err1; + info->s->state.key_root[keynr] = new_root; + } + info->update= HA_STATE_DELETED; + return 0; + +err1: + return -1; + } + case 1: /* not found */ + { + my_errno = HA_ERR_KEY_NOT_FOUND; + return -1; + } + default: + case -1: /* error */ + return -1; + } +} + + +/* + Estimate number of suitable keys in the tree + + RETURN + estimated value +*/ + +ha_rows maria_rtree_estimate(MARIA_HA *info, uint keynr, byte *key, + uint key_length, uint flag) +{ + MARIA_KEYDEF *keyinfo = info->s->keyinfo + keynr; + my_off_t root; + uint i = 0; + uint nod_flag, k_len; + byte *page_buf, *k, *last; + double area = 0; + ha_rows res = 0; + + if (flag & MBR_DISJOINT) + return info->state->records; + + if ((root = info->s->state.key_root[keynr]) == HA_OFFSET_ERROR) + return HA_POS_ERROR; + if (!(page_buf= (byte*) my_alloca((uint)keyinfo->block_length))) + return HA_POS_ERROR; + if (!_ma_fetch_keypage(info, keyinfo, root, DFLT_INIT_HITS, page_buf, 0)) + goto err1; + nod_flag = _ma_test_if_nod(page_buf); + + k_len = keyinfo->keylength - info->s->base.rec_reflength; + + k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + last = rt_PAGE_END(page_buf); + + for (; k < last; k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag), i++) + { + if (nod_flag) + { + double k_area = maria_rtree_rect_volume(keyinfo->seg, k, key_length); + + /* The following should be safe, even if we compare doubles */ + if (k_area == 0) + { + if (flag & (MBR_CONTAIN | MBR_INTERSECT)) + { + area += 1; + } + else if (flag & (MBR_WITHIN | MBR_EQUAL)) + { + if (!maria_rtree_key_cmp(keyinfo->seg, key, k, key_length, + MBR_WITHIN)) + area += 1; + } + else + goto err1; + } + else + { + if (flag & (MBR_CONTAIN | MBR_INTERSECT)) + { + area+= maria_rtree_overlapping_area(keyinfo->seg, key, k, + key_length) / k_area; + } + else if (flag & (MBR_WITHIN | MBR_EQUAL)) + { + if (!maria_rtree_key_cmp(keyinfo->seg, key, k, key_length, + MBR_WITHIN)) + area+= (maria_rtree_rect_volume(keyinfo->seg, key, key_length) / + k_area); + } + else + goto err1; + } + } + else + { + if (!maria_rtree_key_cmp(keyinfo->seg, key, k, key_length, flag)) + ++res; + } + } + if (nod_flag) + { + if (i) + res = (ha_rows) (area / i * info->state->records); + else + res = HA_POS_ERROR; + } + + my_afree((byte*)page_buf); + return res; + +err1: + my_afree(page_buf); + return HA_POS_ERROR; +} + +#endif /*HAVE_RTREE_KEYS*/ diff --git a/storage/maria/ma_rt_index.h b/storage/maria/ma_rt_index.h new file mode 100644 index 00000000000..eae43966aa0 --- /dev/null +++ b/storage/maria/ma_rt_index.h @@ -0,0 +1,47 @@ +/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin & MySQL Finland AB + & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _rt_index_h +#define _rt_index_h + +#ifdef HAVE_RTREE_KEYS + +#define rt_PAGE_FIRST_KEY(page, nod_flag) (page + 2 + nod_flag) +#define rt_PAGE_NEXT_KEY(key, key_length, nod_flag) (key + key_length + \ + (nod_flag ? nod_flag : info->s->base.rec_reflength)) +#define rt_PAGE_END(page) (page + maria_data_on_page(page)) + +#define rt_PAGE_MIN_SIZE(block_length) ((uint)(block_length) / 3) + +int maria_rtree_insert(MARIA_HA *info, uint keynr, byte *key, uint key_length); +int maria_rtree_delete(MARIA_HA *info, uint keynr, byte *key, uint key_length); + +int maria_rtree_find_first(MARIA_HA *info, uint keynr, byte *key, + uint key_length, uint search_flag); +int maria_rtree_find_next(MARIA_HA *info, uint keynr, uint search_flag); + +int maria_rtree_get_first(MARIA_HA *info, uint keynr, uint key_length); +int maria_rtree_get_next(MARIA_HA *info, uint keynr, uint key_length); + +ha_rows maria_rtree_estimate(MARIA_HA *info, uint keynr, byte *key, + uint key_length, uint flag); + +int maria_rtree_split_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *page, + byte *key, uint key_length, + my_off_t *new_page_offs); + +#endif /*HAVE_RTREE_KEYS*/ +#endif /* _rt_index_h */ diff --git a/storage/maria/ma_rt_key.c b/storage/maria/ma_rt_key.c new file mode 100644 index 00000000000..a27ff23c006 --- /dev/null +++ b/storage/maria/ma_rt_key.c @@ -0,0 +1,101 @@ +/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" + +#ifdef HAVE_RTREE_KEYS +#include "ma_rt_index.h" +#include "ma_rt_key.h" +#include "ma_rt_mbr.h" + +/* + Add key to the page + + RESULT VALUES + -1 Error + 0 Not split + 1 Split +*/ + +int maria_rtree_add_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *key, + uint key_length, byte *page_buf, my_off_t *new_page) +{ + uint page_size = maria_data_on_page(page_buf); + uint nod_flag = _ma_test_if_nod(page_buf); + + if (page_size + key_length + info->s->base.rec_reflength <= + keyinfo->block_length) + { + /* split won't be necessary */ + if (nod_flag) + { + /* save key */ + memcpy(rt_PAGE_END(page_buf), key - nod_flag, key_length + nod_flag); + page_size += key_length + nod_flag; + } + else + { + /* save key */ + memcpy(rt_PAGE_END(page_buf), key, key_length + + info->s->base.rec_reflength); + page_size += key_length + info->s->base.rec_reflength; + } + maria_putint(page_buf, page_size, nod_flag); + return 0; + } + + return (maria_rtree_split_page(info, keyinfo, page_buf, key, key_length, + new_page) ? -1 : 1); +} + + +/* + Delete key from the page +*/ + +int maria_rtree_delete_key(MARIA_HA *info, byte *page_buf, byte *key, + uint key_length, uint nod_flag) +{ + uint16 page_size = maria_data_on_page(page_buf); + byte *key_start; + + key_start= key - nod_flag; + if (!nod_flag) + key_length += info->s->base.rec_reflength; + + memmove(key_start, key + key_length, page_size - key_length - + (key - page_buf)); + page_size-= key_length + nod_flag; + + maria_putint(page_buf, page_size, nod_flag); + return 0; +} + + +/* + Calculate and store key MBR +*/ + +int maria_rtree_set_key_mbr(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *key, + uint key_length, my_off_t child_page) +{ + if (!_ma_fetch_keypage(info, keyinfo, child_page, + DFLT_INIT_HITS, info->buff, 0)) + return -1; + + return maria_rtree_page_mbr(info, keyinfo->seg, info->buff, key, key_length); +} + +#endif /*HAVE_RTREE_KEYS*/ diff --git a/storage/maria/ma_rt_key.h b/storage/maria/ma_rt_key.h new file mode 100644 index 00000000000..03c9ef46438 --- /dev/null +++ b/storage/maria/ma_rt_key.h @@ -0,0 +1,32 @@ +/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin & MySQL Finland AB + & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Ramil Kalimullin, who has a shared copyright to this code */ + +#ifndef _rt_key_h +#define _rt_key_h + +#ifdef HAVE_RTREE_KEYS + +int maria_rtree_add_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *key, + uint key_length, byte *page_buf, my_off_t *new_page); +int maria_rtree_delete_key(MARIA_HA *info, byte *page, byte *key, + uint key_length, uint nod_flag); +int maria_rtree_set_key_mbr(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *key, + uint key_length, my_off_t child_page); + +#endif /*HAVE_RTREE_KEYS*/ +#endif /* _rt_key_h */ diff --git a/storage/maria/ma_rt_mbr.c b/storage/maria/ma_rt_mbr.c new file mode 100644 index 00000000000..2da4ffea7a4 --- /dev/null +++ b/storage/maria/ma_rt_mbr.c @@ -0,0 +1,806 @@ +/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin & MySQL Finland AB + & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" + +#ifdef HAVE_RTREE_KEYS + +#include "ma_rt_index.h" +#include "ma_rt_mbr.h" + +#define INTERSECT_CMP(amin, amax, bmin, bmax) ((amin > bmax) || (bmin > amax)) +#define CONTAIN_CMP(amin, amax, bmin, bmax) ((bmin > amin) || (bmax < amax)) +#define WITHIN_CMP(amin, amax, bmin, bmax) ((amin > bmin) || (amax < bmax)) +#define DISJOINT_CMP(amin, amax, bmin, bmax) ((amin <= bmax) && (bmin <= amax)) +#define EQUAL_CMP(amin, amax, bmin, bmax) ((amin != bmin) || (amax != bmax)) + +#define FCMP(A, B) ((int)(A) - (int)(B)) +#define p_inc(A, B, X) {A += X; B += X;} + +#define RT_CMP(nextflag) \ + if (nextflag & MBR_INTERSECT) \ + { \ + if (INTERSECT_CMP(amin, amax, bmin, bmax)) \ + return 1; \ + } \ + else if (nextflag & MBR_CONTAIN) \ + { \ + if (CONTAIN_CMP(amin, amax, bmin, bmax)) \ + return 1; \ + } \ + else if (nextflag & MBR_WITHIN) \ + { \ + if (WITHIN_CMP(amin, amax, bmin, bmax)) \ + return 1; \ + } \ + else if (nextflag & MBR_EQUAL) \ + { \ + if (EQUAL_CMP(amin, amax, bmin, bmax)) \ + return 1; \ + } \ + else if (nextflag & MBR_DISJOINT) \ + { \ + if (DISJOINT_CMP(amin, amax, bmin, bmax)) \ + return 1; \ + }\ + else /* if unknown comparison operator */ \ + { \ + DBUG_ASSERT(0); \ + } + +#define RT_CMP_KORR(type, korr_func, len, nextflag) \ +{ \ + type amin, amax, bmin, bmax; \ + amin = korr_func(a); \ + bmin = korr_func(b); \ + amax = korr_func(a+len); \ + bmax = korr_func(b+len); \ + RT_CMP(nextflag); \ +} + +#define RT_CMP_GET(type, get_func, len, nextflag) \ +{ \ + type amin, amax, bmin, bmax; \ + get_func(amin, a); \ + get_func(bmin, b); \ + get_func(amax, a+len); \ + get_func(bmax, b+len); \ + RT_CMP(nextflag); \ +} + +/* + Compares two keys a and b depending on nextflag + nextflag can contain these flags: + MBR_INTERSECT(a,b) a overlaps b + MBR_CONTAIN(a,b) a contains b + MBR_DISJOINT(a,b) a disjoint b + MBR_WITHIN(a,b) a within b + MBR_EQUAL(a,b) All coordinates of MBRs are equal + MBR_DATA(a,b) Data reference is the same + Returns 0 on success. +*/ + +int maria_rtree_key_cmp(HA_KEYSEG *keyseg, byte *b, byte *a, uint key_length, + uint nextflag) +{ + for (; (int) key_length > 0; keyseg += 2 ) + { + uint32 keyseg_length; + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_INT8: + RT_CMP_KORR(int8, mi_sint1korr, 1, nextflag); + break; + case HA_KEYTYPE_BINARY: + RT_CMP_KORR(uint8, mi_uint1korr, 1, nextflag); + break; + case HA_KEYTYPE_SHORT_INT: + RT_CMP_KORR(int16, mi_sint2korr, 2, nextflag); + break; + case HA_KEYTYPE_USHORT_INT: + RT_CMP_KORR(uint16, mi_uint2korr, 2, nextflag); + break; + case HA_KEYTYPE_INT24: + RT_CMP_KORR(int32, mi_sint3korr, 3, nextflag); + break; + case HA_KEYTYPE_UINT24: + RT_CMP_KORR(uint32, mi_uint3korr, 3, nextflag); + break; + case HA_KEYTYPE_LONG_INT: + RT_CMP_KORR(int32, mi_sint4korr, 4, nextflag); + break; + case HA_KEYTYPE_ULONG_INT: + RT_CMP_KORR(uint32, mi_uint4korr, 4, nextflag); + break; +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + RT_CMP_KORR(longlong, mi_sint8korr, 8, nextflag) + break; + case HA_KEYTYPE_ULONGLONG: + RT_CMP_KORR(ulonglong, mi_uint8korr, 8, nextflag) + break; +#endif + case HA_KEYTYPE_FLOAT: + /* The following should be safe, even if we compare doubles */ + RT_CMP_GET(float, mi_float4get, 4, nextflag); + break; + case HA_KEYTYPE_DOUBLE: + RT_CMP_GET(double, mi_float8get, 8, nextflag); + break; + case HA_KEYTYPE_END: + goto end; + default: + return 1; + } + keyseg_length= keyseg->length * 2; + key_length-= keyseg_length; + a+= keyseg_length; + b+= keyseg_length; + } + +end: + if (nextflag & MBR_DATA) + { + byte *end = a + keyseg->length; + do + { + if (*a++ != *b++) + return FCMP(a[-1], b[-1]); + } while (a != end); + } + return 0; +} + +#define RT_VOL_KORR(type, korr_func, len, cast) \ +{ \ + type amin, amax; \ + amin = korr_func(a); \ + amax = korr_func(a+len); \ + res *= (cast(amax) - cast(amin)); \ +} + +#define RT_VOL_GET(type, get_func, len, cast) \ +{ \ + type amin, amax; \ + get_func(amin, a); \ + get_func(amax, a+len); \ + res *= (cast(amax) - cast(amin)); \ +} + +/* + Calculates rectangle volume +*/ +double maria_rtree_rect_volume(HA_KEYSEG *keyseg, byte *a, uint key_length) +{ + double res = 1; + for (; (int)key_length > 0; keyseg += 2) + { + uint32 keyseg_length; + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_INT8: + RT_VOL_KORR(int8, mi_sint1korr, 1, (double)); + break; + case HA_KEYTYPE_BINARY: + RT_VOL_KORR(uint8, mi_uint1korr, 1, (double)); + break; + case HA_KEYTYPE_SHORT_INT: + RT_VOL_KORR(int16, mi_sint2korr, 2, (double)); + break; + case HA_KEYTYPE_USHORT_INT: + RT_VOL_KORR(uint16, mi_uint2korr, 2, (double)); + break; + case HA_KEYTYPE_INT24: + RT_VOL_KORR(int32, mi_sint3korr, 3, (double)); + break; + case HA_KEYTYPE_UINT24: + RT_VOL_KORR(uint32, mi_uint3korr, 3, (double)); + break; + case HA_KEYTYPE_LONG_INT: + RT_VOL_KORR(int32, mi_sint4korr, 4, (double)); + break; + case HA_KEYTYPE_ULONG_INT: + RT_VOL_KORR(uint32, mi_uint4korr, 4, (double)); + break; +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + RT_VOL_KORR(longlong, mi_sint8korr, 8, (double)); + break; + case HA_KEYTYPE_ULONGLONG: + RT_VOL_KORR(longlong, mi_sint8korr, 8, ulonglong2double); + break; +#endif + case HA_KEYTYPE_FLOAT: + RT_VOL_GET(float, mi_float4get, 4, (double)); + break; + case HA_KEYTYPE_DOUBLE: + RT_VOL_GET(double, mi_float8get, 8, (double)); + break; + case HA_KEYTYPE_END: + key_length = 0; + break; + default: + return -1; + } + keyseg_length= keyseg->length * 2; + key_length-= keyseg_length; + a+= keyseg_length; + } + return res; +} + +#define RT_D_MBR_KORR(type, korr_func, len, cast) \ +{ \ + type amin, amax; \ + amin = korr_func(a); \ + amax = korr_func(a+len); \ + *res++ = cast(amin); \ + *res++ = cast(amax); \ +} + +#define RT_D_MBR_GET(type, get_func, len, cast) \ +{ \ + type amin, amax; \ + get_func(amin, a); \ + get_func(amax, a+len); \ + *res++ = cast(amin); \ + *res++ = cast(amax); \ +} + + +/* + Creates an MBR as an array of doubles. +*/ + +int maria_rtree_d_mbr(HA_KEYSEG *keyseg, byte *a, uint key_length, double *res) +{ + for (; (int)key_length > 0; keyseg += 2) + { + uint32 keyseg_length; + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_INT8: + RT_D_MBR_KORR(int8, mi_sint1korr, 1, (double)); + break; + case HA_KEYTYPE_BINARY: + RT_D_MBR_KORR(uint8, mi_uint1korr, 1, (double)); + break; + case HA_KEYTYPE_SHORT_INT: + RT_D_MBR_KORR(int16, mi_sint2korr, 2, (double)); + break; + case HA_KEYTYPE_USHORT_INT: + RT_D_MBR_KORR(uint16, mi_uint2korr, 2, (double)); + break; + case HA_KEYTYPE_INT24: + RT_D_MBR_KORR(int32, mi_sint3korr, 3, (double)); + break; + case HA_KEYTYPE_UINT24: + RT_D_MBR_KORR(uint32, mi_uint3korr, 3, (double)); + break; + case HA_KEYTYPE_LONG_INT: + RT_D_MBR_KORR(int32, mi_sint4korr, 4, (double)); + break; + case HA_KEYTYPE_ULONG_INT: + RT_D_MBR_KORR(uint32, mi_uint4korr, 4, (double)); + break; +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + RT_D_MBR_KORR(longlong, mi_sint8korr, 8, (double)); + break; + case HA_KEYTYPE_ULONGLONG: + RT_D_MBR_KORR(longlong, mi_sint8korr, 8, ulonglong2double); + break; +#endif + case HA_KEYTYPE_FLOAT: + RT_D_MBR_GET(float, mi_float4get, 4, (double)); + break; + case HA_KEYTYPE_DOUBLE: + RT_D_MBR_GET(double, mi_float8get, 8, (double)); + break; + case HA_KEYTYPE_END: + key_length = 0; + break; + default: + return 1; + } + keyseg_length= keyseg->length * 2; + key_length-= keyseg_length; + a+= keyseg_length; + } + return 0; +} + +#define RT_COMB_KORR(type, korr_func, store_func, len) \ +{ \ + type amin, amax, bmin, bmax; \ + amin = korr_func(a); \ + bmin = korr_func(b); \ + amax = korr_func(a+len); \ + bmax = korr_func(b+len); \ + amin = min(amin, bmin); \ + amax = max(amax, bmax); \ + store_func(c, amin); \ + store_func(c+len, amax); \ +} + +#define RT_COMB_GET(type, get_func, store_func, len) \ +{ \ + type amin, amax, bmin, bmax; \ + get_func(amin, a); \ + get_func(bmin, b); \ + get_func(amax, a+len); \ + get_func(bmax, b+len); \ + amin = min(amin, bmin); \ + amax = max(amax, bmax); \ + store_func(c, amin); \ + store_func(c+len, amax); \ +} + +/* + Creates common minimal bounding rectungle + for two input rectagnles a and b + Result is written to c +*/ + +int maria_rtree_combine_rect(HA_KEYSEG *keyseg, byte* a, byte* b, byte* c, + uint key_length) +{ + for ( ; (int) key_length > 0 ; keyseg += 2) + { + uint32 keyseg_length; + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_INT8: + RT_COMB_KORR(int8, mi_sint1korr, mi_int1store, 1); + break; + case HA_KEYTYPE_BINARY: + RT_COMB_KORR(uint8, mi_uint1korr, mi_int1store, 1); + break; + case HA_KEYTYPE_SHORT_INT: + RT_COMB_KORR(int16, mi_sint2korr, mi_int2store, 2); + break; + case HA_KEYTYPE_USHORT_INT: + RT_COMB_KORR(uint16, mi_uint2korr, mi_int2store, 2); + break; + case HA_KEYTYPE_INT24: + RT_COMB_KORR(int32, mi_sint3korr, mi_int3store, 3); + break; + case HA_KEYTYPE_UINT24: + RT_COMB_KORR(uint32, mi_uint3korr, mi_int3store, 3); + break; + case HA_KEYTYPE_LONG_INT: + RT_COMB_KORR(int32, mi_sint4korr, mi_int4store, 4); + break; + case HA_KEYTYPE_ULONG_INT: + RT_COMB_KORR(uint32, mi_uint4korr, mi_int4store, 4); + break; +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + RT_COMB_KORR(longlong, mi_sint8korr, mi_int8store, 8); + break; + case HA_KEYTYPE_ULONGLONG: + RT_COMB_KORR(ulonglong, mi_uint8korr, mi_int8store, 8); + break; +#endif + case HA_KEYTYPE_FLOAT: + RT_COMB_GET(float, mi_float4get, mi_float4store, 4); + break; + case HA_KEYTYPE_DOUBLE: + RT_COMB_GET(double, mi_float8get, mi_float8store, 8); + break; + case HA_KEYTYPE_END: + return 0; + default: + return 1; + } + keyseg_length= keyseg->length * 2; + key_length-= keyseg_length; + a+= keyseg_length; + b+= keyseg_length; + c+= keyseg_length; + } + return 0; +} + + +#define RT_OVL_AREA_KORR(type, korr_func, len) \ +{ \ + type amin, amax, bmin, bmax; \ + amin = korr_func(a); \ + bmin = korr_func(b); \ + amax = korr_func(a+len); \ + bmax = korr_func(b+len); \ + amin = max(amin, bmin); \ + amax = min(amax, bmax); \ + if (amin >= amax) \ + return 0; \ + res *= amax - amin; \ +} + +#define RT_OVL_AREA_GET(type, get_func, len) \ +{ \ + type amin, amax, bmin, bmax; \ + get_func(amin, a); \ + get_func(bmin, b); \ + get_func(amax, a+len); \ + get_func(bmax, b+len); \ + amin = max(amin, bmin); \ + amax = min(amax, bmax); \ + if (amin >= amax) \ + return 0; \ + res *= amax - amin; \ +} + +/* +Calculates overlapping area of two MBRs a & b +*/ +double maria_rtree_overlapping_area(HA_KEYSEG *keyseg, byte* a, byte* b, + uint key_length) +{ + double res = 1; + for (; (int) key_length > 0 ; keyseg += 2) + { + uint32 keyseg_length; + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_INT8: + RT_OVL_AREA_KORR(int8, mi_sint1korr, 1); + break; + case HA_KEYTYPE_BINARY: + RT_OVL_AREA_KORR(uint8, mi_uint1korr, 1); + break; + case HA_KEYTYPE_SHORT_INT: + RT_OVL_AREA_KORR(int16, mi_sint2korr, 2); + break; + case HA_KEYTYPE_USHORT_INT: + RT_OVL_AREA_KORR(uint16, mi_uint2korr, 2); + break; + case HA_KEYTYPE_INT24: + RT_OVL_AREA_KORR(int32, mi_sint3korr, 3); + break; + case HA_KEYTYPE_UINT24: + RT_OVL_AREA_KORR(uint32, mi_uint3korr, 3); + break; + case HA_KEYTYPE_LONG_INT: + RT_OVL_AREA_KORR(int32, mi_sint4korr, 4); + break; + case HA_KEYTYPE_ULONG_INT: + RT_OVL_AREA_KORR(uint32, mi_uint4korr, 4); + break; +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + RT_OVL_AREA_KORR(longlong, mi_sint8korr, 8); + break; + case HA_KEYTYPE_ULONGLONG: + RT_OVL_AREA_KORR(longlong, mi_sint8korr, 8); + break; +#endif + case HA_KEYTYPE_FLOAT: + RT_OVL_AREA_GET(float, mi_float4get, 4); + break; + case HA_KEYTYPE_DOUBLE: + RT_OVL_AREA_GET(double, mi_float8get, 8); + break; + case HA_KEYTYPE_END: + return res; + default: + return -1; + } + keyseg_length= keyseg->length * 2; + key_length-= keyseg_length; + a+= keyseg_length; + b+= keyseg_length; + } + return res; +} + +#define RT_AREA_INC_KORR(type, korr_func, len) \ +{ \ + type amin, amax, bmin, bmax; \ + amin = korr_func(a); \ + bmin = korr_func(b); \ + amax = korr_func(a+len); \ + bmax = korr_func(b+len); \ + a_area *= (((double)amax) - ((double)amin)); \ + loc_ab_area *= ((double)max(amax, bmax) - (double)min(amin, bmin)); \ +} + +#define RT_AREA_INC_GET(type, get_func, len)\ +{\ + type amin, amax, bmin, bmax; \ + get_func(amin, a); \ + get_func(bmin, b); \ + get_func(amax, a+len); \ + get_func(bmax, b+len); \ + a_area *= (((double)amax) - ((double)amin)); \ + loc_ab_area *= ((double)max(amax, bmax) - (double)min(amin, bmin)); \ +} + +/* + Calculates MBR_AREA(a+b) - MBR_AREA(a) +*/ + +double maria_rtree_area_increase(HA_KEYSEG *keyseg, byte *a, byte *b, + uint key_length, double *ab_area) +{ + double a_area= 1.0; + double loc_ab_area= 1.0; + + *ab_area= 1.0; + for (; (int)key_length > 0; keyseg += 2) + { + uint32 keyseg_length; + + if (keyseg->null_bit) /* Handle NULL part */ + return -1; + + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_INT8: + RT_AREA_INC_KORR(int8, mi_sint1korr, 1); + break; + case HA_KEYTYPE_BINARY: + RT_AREA_INC_KORR(uint8, mi_uint1korr, 1); + break; + case HA_KEYTYPE_SHORT_INT: + RT_AREA_INC_KORR(int16, mi_sint2korr, 2); + break; + case HA_KEYTYPE_USHORT_INT: + RT_AREA_INC_KORR(uint16, mi_uint2korr, 2); + break; + case HA_KEYTYPE_INT24: + RT_AREA_INC_KORR(int32, mi_sint3korr, 3); + break; + case HA_KEYTYPE_UINT24: + RT_AREA_INC_KORR(int32, mi_uint3korr, 3); + break; + case HA_KEYTYPE_LONG_INT: + RT_AREA_INC_KORR(int32, mi_sint4korr, 4); + break; + case HA_KEYTYPE_ULONG_INT: + RT_AREA_INC_KORR(uint32, mi_uint4korr, 4); + break; +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + RT_AREA_INC_KORR(longlong, mi_sint8korr, 8); + break; + case HA_KEYTYPE_ULONGLONG: + RT_AREA_INC_KORR(longlong, mi_sint8korr, 8); + break; +#endif + case HA_KEYTYPE_FLOAT: + RT_AREA_INC_GET(float, mi_float4get, 4); + break; + case HA_KEYTYPE_DOUBLE: + RT_AREA_INC_GET(double, mi_float8get, 8); + break; + case HA_KEYTYPE_END: + goto safe_end; + default: + return -1; + } + keyseg_length= keyseg->length * 2; + key_length-= keyseg_length; + a+= keyseg_length; + b+= keyseg_length; + } +safe_end: + *ab_area= loc_ab_area; + return loc_ab_area - a_area; +} + +#define RT_PERIM_INC_KORR(type, korr_func, len) \ +{ \ + type amin, amax, bmin, bmax; \ + amin = korr_func(a); \ + bmin = korr_func(b); \ + amax = korr_func(a+len); \ + bmax = korr_func(b+len); \ + a_perim+= (((double)amax) - ((double)amin)); \ + *ab_perim+= ((double)max(amax, bmax) - (double)min(amin, bmin)); \ +} + +#define RT_PERIM_INC_GET(type, get_func, len)\ +{\ + type amin, amax, bmin, bmax; \ + get_func(amin, a); \ + get_func(bmin, b); \ + get_func(amax, a+len); \ + get_func(bmax, b+len); \ + a_perim+= (((double)amax) - ((double)amin)); \ + *ab_perim+= ((double)max(amax, bmax) - (double)min(amin, bmin)); \ +} + +/* +Calculates MBR_PERIMETER(a+b) - MBR_PERIMETER(a) +*/ +double maria_rtree_perimeter_increase(HA_KEYSEG *keyseg, byte* a, byte* b, + uint key_length, double *ab_perim) +{ + double a_perim = 0.0; + + *ab_perim= 0.0; + for (; (int)key_length > 0; keyseg += 2) + { + uint32 keyseg_length; + + if (keyseg->null_bit) /* Handle NULL part */ + return -1; + + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_INT8: + RT_PERIM_INC_KORR(int8, mi_sint1korr, 1); + break; + case HA_KEYTYPE_BINARY: + RT_PERIM_INC_KORR(uint8, mi_uint1korr, 1); + break; + case HA_KEYTYPE_SHORT_INT: + RT_PERIM_INC_KORR(int16, mi_sint2korr, 2); + break; + case HA_KEYTYPE_USHORT_INT: + RT_PERIM_INC_KORR(uint16, mi_uint2korr, 2); + break; + case HA_KEYTYPE_INT24: + RT_PERIM_INC_KORR(int32, mi_sint3korr, 3); + break; + case HA_KEYTYPE_UINT24: + RT_PERIM_INC_KORR(int32, mi_uint3korr, 3); + break; + case HA_KEYTYPE_LONG_INT: + RT_PERIM_INC_KORR(int32, mi_sint4korr, 4); + break; + case HA_KEYTYPE_ULONG_INT: + RT_PERIM_INC_KORR(uint32, mi_uint4korr, 4); + break; +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + RT_PERIM_INC_KORR(longlong, mi_sint8korr, 8); + break; + case HA_KEYTYPE_ULONGLONG: + RT_PERIM_INC_KORR(longlong, mi_sint8korr, 8); + break; +#endif + case HA_KEYTYPE_FLOAT: + RT_PERIM_INC_GET(float, mi_float4get, 4); + break; + case HA_KEYTYPE_DOUBLE: + RT_PERIM_INC_GET(double, mi_float8get, 8); + break; + case HA_KEYTYPE_END: + return *ab_perim - a_perim; + default: + return -1; + } + keyseg_length= keyseg->length * 2; + key_length-= keyseg_length; + a+= keyseg_length; + b+= keyseg_length; + } + return *ab_perim - a_perim; +} + + +#define RT_PAGE_MBR_KORR(type, korr_func, store_func, len) \ +{ \ + type amin, amax, bmin, bmax; \ + amin = korr_func(k + inc); \ + amax = korr_func(k + inc + len); \ + k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag); \ + for (; k < last; k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag)) \ +{ \ + bmin = korr_func(k + inc); \ + bmax = korr_func(k + inc + len); \ + if (amin > bmin) \ + amin = bmin; \ + if (amax < bmax) \ + amax = bmax; \ +} \ + store_func(c, amin); \ + c += len; \ + store_func(c, amax); \ + c += len; \ + inc += 2 * len; \ +} + +#define RT_PAGE_MBR_GET(type, get_func, store_func, len) \ +{ \ + type amin, amax, bmin, bmax; \ + get_func(amin, k + inc); \ + get_func(amax, k + inc + len); \ + k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag); \ + for (; k < last; k = rt_PAGE_NEXT_KEY(k, k_len, nod_flag)) \ +{ \ + get_func(bmin, k + inc); \ + get_func(bmax, k + inc + len); \ + if (amin > bmin) \ + amin = bmin; \ + if (amax < bmax) \ + amax = bmax; \ +} \ + store_func(c, amin); \ + c += len; \ + store_func(c, amax); \ + c += len; \ + inc += 2 * len; \ +} + +/* + Calculates key page total MBR = MBR(key1) + MBR(key2) + ... +*/ +int maria_rtree_page_mbr(MARIA_HA *info, HA_KEYSEG *keyseg, byte *page_buf, + byte *c, uint key_length) +{ + uint inc = 0; + uint k_len = key_length; + uint nod_flag = _ma_test_if_nod(page_buf); + byte *k; + byte *last = rt_PAGE_END(page_buf); + + for (; (int)key_length > 0; keyseg += 2) + { + key_length -= keyseg->length * 2; + + /* Handle NULL part */ + if (keyseg->null_bit) + { + return 1; + } + + k = rt_PAGE_FIRST_KEY(page_buf, nod_flag); + + switch ((enum ha_base_keytype) keyseg->type) { + case HA_KEYTYPE_INT8: + RT_PAGE_MBR_KORR(int8, mi_sint1korr, mi_int1store, 1); + break; + case HA_KEYTYPE_BINARY: + RT_PAGE_MBR_KORR(uint8, mi_uint1korr, mi_int1store, 1); + break; + case HA_KEYTYPE_SHORT_INT: + RT_PAGE_MBR_KORR(int16, mi_sint2korr, mi_int2store, 2); + break; + case HA_KEYTYPE_USHORT_INT: + RT_PAGE_MBR_KORR(uint16, mi_uint2korr, mi_int2store, 2); + break; + case HA_KEYTYPE_INT24: + RT_PAGE_MBR_KORR(int32, mi_sint3korr, mi_int3store, 3); + break; + case HA_KEYTYPE_UINT24: + RT_PAGE_MBR_KORR(uint32, mi_uint3korr, mi_int3store, 3); + break; + case HA_KEYTYPE_LONG_INT: + RT_PAGE_MBR_KORR(int32, mi_sint4korr, mi_int4store, 4); + break; + case HA_KEYTYPE_ULONG_INT: + RT_PAGE_MBR_KORR(uint32, mi_uint4korr, mi_int4store, 4); + break; +#ifdef HAVE_LONG_LONG + case HA_KEYTYPE_LONGLONG: + RT_PAGE_MBR_KORR(longlong, mi_sint8korr, mi_int8store, 8); + break; + case HA_KEYTYPE_ULONGLONG: + RT_PAGE_MBR_KORR(ulonglong, mi_uint8korr, mi_int8store, 8); + break; +#endif + case HA_KEYTYPE_FLOAT: + RT_PAGE_MBR_GET(float, mi_float4get, mi_float4store, 4); + break; + case HA_KEYTYPE_DOUBLE: + RT_PAGE_MBR_GET(double, mi_float8get, mi_float8store, 8); + break; + case HA_KEYTYPE_END: + return 0; + default: + return 1; + } + } + return 0; +} + +#endif /*HAVE_RTREE_KEYS*/ diff --git a/storage/maria/ma_rt_mbr.h b/storage/maria/ma_rt_mbr.h new file mode 100644 index 00000000000..01da74418a6 --- /dev/null +++ b/storage/maria/ma_rt_mbr.h @@ -0,0 +1,38 @@ +/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin & MySQL Finland AB + & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _rt_mbr_h +#define _rt_mbr_h + +#ifdef HAVE_RTREE_KEYS + +int maria_rtree_key_cmp(HA_KEYSEG *keyseg, byte *a, byte *b, uint key_length, + uint nextflag); +int maria_rtree_combine_rect(HA_KEYSEG *keyseg,byte *, byte *, byte*, + uint key_length); +double maria_rtree_rect_volume(HA_KEYSEG *keyseg, byte*, uint key_length); +int maria_rtree_d_mbr(HA_KEYSEG *keyseg, byte *a, uint key_length, + double *res); +double maria_rtree_overlapping_area(HA_KEYSEG *keyseg, byte *a, byte *b, + uint key_length); +double maria_rtree_area_increase(HA_KEYSEG *keyseg, byte *a, byte *b, + uint key_length, double *ab_area); +double maria_rtree_perimeter_increase(HA_KEYSEG *keyseg, byte* a, byte* b, + uint key_length, double *ab_perim); +int maria_rtree_page_mbr(MARIA_HA *info, HA_KEYSEG *keyseg, byte *page_buf, + byte* c, uint key_length); +#endif /*HAVE_RTREE_KEYS*/ +#endif /* _rt_mbr_h */ diff --git a/storage/maria/ma_rt_split.c b/storage/maria/ma_rt_split.c new file mode 100644 index 00000000000..6a66c4424eb --- /dev/null +++ b/storage/maria/ma_rt_split.c @@ -0,0 +1,354 @@ +/* Copyright (C) 2006 MySQL AB & Alexey Botchkov & MySQL Finland AB + & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" + +#ifdef HAVE_RTREE_KEYS + +#include "ma_rt_index.h" +#include "ma_rt_key.h" +#include "ma_rt_mbr.h" + +typedef struct +{ + double square; + int n_node; + byte *key; + double *coords; +} SplitStruct; + +inline static double *reserve_coords(double **d_buffer, int n_dim) +{ + double *coords = *d_buffer; + (*d_buffer) += n_dim * 2; + return coords; +} + +static void mbr_join(double *a, const double *b, int n_dim) +{ + double *end = a + n_dim * 2; + do + { + if (a[0] > b[0]) + a[0] = b[0]; + + if (a[1] < b[1]) + a[1] = b[1]; + + a += 2; + b += 2; + }while (a != end); +} + +/* +Counts the square of mbr which is a join of a and b +*/ +static double mbr_join_square(const double *a, const double *b, int n_dim) +{ + const double *end = a + n_dim * 2; + double square = 1.0; + do + { + square *= + ((a[1] < b[1]) ? b[1] : a[1]) - ((a[0] > b[0]) ? b[0] : a[0]); + + a += 2; + b += 2; + }while (a != end); + + return square; +} + +static double count_square(const double *a, int n_dim) +{ + const double *end = a + n_dim * 2; + double square = 1.0; + do + { + square *= a[1] - a[0]; + a += 2; + }while (a != end); + return square; +} + +inline static void copy_coords(double *dst, const double *src, int n_dim) +{ + memcpy(dst, src, sizeof(double) * (n_dim * 2)); +} + +/* +Select two nodes to collect group upon +*/ +static void pick_seeds(SplitStruct *node, int n_entries, + SplitStruct **seed_a, SplitStruct **seed_b, int n_dim) +{ + SplitStruct *cur1; + SplitStruct *lim1 = node + (n_entries - 1); + SplitStruct *cur2; + SplitStruct *lim2 = node + n_entries; + + double max_d = -DBL_MAX; + double d; + + for (cur1 = node; cur1 < lim1; ++cur1) + { + for (cur2=cur1 + 1; cur2 < lim2; ++cur2) + { + + d = mbr_join_square(cur1->coords, cur2->coords, n_dim) - cur1->square - + cur2->square; + if (d > max_d) + { + max_d = d; + *seed_a = cur1; + *seed_b = cur2; + } + } + } +} + +/* +Select next node and group where to add +*/ +static void pick_next(SplitStruct *node, int n_entries, double *g1, double *g2, + SplitStruct **choice, int *n_group, int n_dim) +{ + SplitStruct *cur = node; + SplitStruct *end = node + n_entries; + + double max_diff = -DBL_MAX; + + for (; curn_node) + { + continue; + } + + diff = mbr_join_square(g1, cur->coords, n_dim) - + mbr_join_square(g2, cur->coords, n_dim); + + abs_diff = fabs(diff); + if (abs_diff > max_diff) + { + max_diff = abs_diff; + *n_group = 1 + (diff > 0); + *choice = cur; + } + } +} + +/* +Mark not-in-group entries as n_group +*/ +static void mark_all_entries(SplitStruct *node, int n_entries, int n_group) +{ + SplitStruct *cur = node; + SplitStruct *end = node + n_entries; + for (; curn_node) + { + continue; + } + cur->n_node = n_group; + } +} + +static int split_maria_rtree_node(SplitStruct *node, int n_entries, + int all_size, /* Total key's size */ + int key_size, + int min_size, /* Minimal group size */ + int size1, int size2 /* initial group sizes */, + double **d_buffer, int n_dim) +{ + SplitStruct *cur; + SplitStruct *a; + SplitStruct *b; + double *g1 = reserve_coords(d_buffer, n_dim); + double *g2 = reserve_coords(d_buffer, n_dim); + SplitStruct *next; + int next_node; + int i; + SplitStruct *end = node + n_entries; + LINT_INIT(a); + LINT_INIT(b); + LINT_INIT(next); + LINT_INIT(next_node); + + if (all_size < min_size * 2) + { + return 1; + } + + cur = node; + for (; cursquare = count_square(cur->coords, n_dim); + cur->n_node = 0; + } + + pick_seeds(node, n_entries, &a, &b, n_dim); + a->n_node = 1; + b->n_node = 2; + + + copy_coords(g1, a->coords, n_dim); + size1 += key_size; + copy_coords(g2, b->coords, n_dim); + size2 += key_size; + + + for (i=n_entries - 2; i>0; --i) + { + if (all_size - (size2 + key_size) < min_size) /* Can't write into group 2 */ + { + mark_all_entries(node, n_entries, 1); + break; + } + + if (all_size - (size1 + key_size) < min_size) /* Can't write into group 1 */ + { + mark_all_entries(node, n_entries, 2); + break; + } + + pick_next(node, n_entries, g1, g2, &next, &next_node, n_dim); + if (next_node == 1) + { + size1 += key_size; + mbr_join(g1, next->coords, n_dim); + } + else + { + size2 += key_size; + mbr_join(g2, next->coords, n_dim); + } + next->n_node = next_node; + } + + return 0; +} + +int maria_rtree_split_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *page, byte *key, + uint key_length, my_off_t *new_page_offs) +{ + int n1, n2; /* Number of items in groups */ + + SplitStruct *task; + SplitStruct *cur; + SplitStruct *stop; + double *coord_buf; + double *next_coord; + double *old_coord; + int n_dim; + byte *source_cur, *cur1, *cur2; + byte *new_page; + int err_code= 0; + uint nod_flag= _ma_test_if_nod(page); + uint full_length= key_length + (nod_flag ? nod_flag : + info->s->base.rec_reflength); + int max_keys= (maria_data_on_page(page)-2) / (full_length); + + n_dim = keyinfo->keysegs / 2; + + if (!(coord_buf= (double*) my_alloca(n_dim * 2 * sizeof(double) * + (max_keys + 1 + 4) + + sizeof(SplitStruct) * (max_keys + 1)))) + return -1; + + task= (SplitStruct *)(coord_buf + n_dim * 2 * (max_keys + 1 + 4)); + + next_coord = coord_buf; + + stop = task + max_keys; + source_cur = rt_PAGE_FIRST_KEY(page, nod_flag); + + for (cur = task; cur < stop; ++cur, source_cur = rt_PAGE_NEXT_KEY(source_cur, + key_length, nod_flag)) + { + cur->coords = reserve_coords(&next_coord, n_dim); + cur->key = source_cur; + maria_rtree_d_mbr(keyinfo->seg, source_cur, key_length, cur->coords); + } + + cur->coords = reserve_coords(&next_coord, n_dim); + maria_rtree_d_mbr(keyinfo->seg, key, key_length, cur->coords); + cur->key = key; + + old_coord = next_coord; + + if (split_maria_rtree_node(task, max_keys + 1, + maria_data_on_page(page) + full_length + 2, full_length, + rt_PAGE_MIN_SIZE(keyinfo->block_length), + 2, 2, &next_coord, n_dim)) + { + err_code = 1; + goto split_err; + } + + if (!(new_page = (byte*) my_alloca((uint)keyinfo->block_length))) + { + err_code= -1; + goto split_err; + } + + stop = task + (max_keys + 1); + cur1 = rt_PAGE_FIRST_KEY(page, nod_flag); + cur2 = rt_PAGE_FIRST_KEY(new_page, nod_flag); + + n1= n2 = 0; + for (cur = task; cur < stop; ++cur) + { + byte *to; + if (cur->n_node == 1) + { + to = cur1; + cur1 = rt_PAGE_NEXT_KEY(cur1, key_length, nod_flag); + ++n1; + } + else + { + to = cur2; + cur2 = rt_PAGE_NEXT_KEY(cur2, key_length, nod_flag); + ++n2; + } + if (to != cur->key) + memcpy(to - nod_flag, cur->key - nod_flag, full_length); + } + + maria_putint(page, 2 + n1 * full_length, nod_flag); + maria_putint(new_page, 2 + n2 * full_length, nod_flag); + + if ((*new_page_offs= _ma_new(info, keyinfo, DFLT_INIT_HITS)) == + HA_OFFSET_ERROR) + err_code= -1; + else + err_code= _ma_write_keypage(info, keyinfo, *new_page_offs, + DFLT_INIT_HITS, new_page); + + my_afree((byte*)new_page); + +split_err: + my_afree((byte*) coord_buf); + return err_code; +} + +#endif /*HAVE_RTREE_KEYS*/ diff --git a/storage/maria/ma_rt_test.c b/storage/maria/ma_rt_test.c new file mode 100644 index 00000000000..4360e81c550 --- /dev/null +++ b/storage/maria/ma_rt_test.c @@ -0,0 +1,473 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Testing of the basic functions of a MARIA rtree table */ +/* Written by Alex Barkov who has a shared copyright to this code */ + + +#include "maria.h" + +#ifdef HAVE_RTREE_KEYS + +#include "ma_rt_index.h" + +#define MAX_REC_LENGTH 1024 +#define ndims 2 +#define KEYALG HA_KEY_ALG_RTREE + +static int read_with_pos(MARIA_HA * file, int silent); +static void create_record(char *record,uint rownr); +static void create_record1(char *record,uint rownr); +static void print_record(char * record,my_off_t offs,const char * tail); +static int run_test(const char *filename); + +static double rt_data[]= +{ + /*1*/ 0,10,0,10, + /*2*/ 5,15,0,10, + /*3*/ 0,10,5,15, + /*4*/ 10,20,10,20, + /*5*/ 0,10,0,10, + /*6*/ 5,15,0,10, + /*7*/ 0,10,5,15, + /*8*/ 10,20,10,20, + /*9*/ 0,10,0,10, + /*10*/ 5,15,0,10, + /*11*/ 0,10,5,15, + /*12*/ 10,20,10,20, + /*13*/ 0,10,0,10, + /*14*/ 5,15,0,10, + /*15*/ 0,10,5,15, + /*16*/ 10,20,10,20, + /*17*/ 5,15,0,10, + /*18*/ 0,10,5,15, + /*19*/ 10,20,10,20, + /*20*/ 0,10,0,10, + + /*1*/ 100,110,0,10, + /*2*/ 105,115,0,10, + /*3*/ 100,110,5,15, + /*4*/ 110,120,10,20, + /*5*/ 100,110,0,10, + /*6*/ 105,115,0,10, + /*7*/ 100,110,5,15, + /*8*/ 110,120,10,20, + /*9*/ 100,110,0,10, + /*10*/ 105,115,0,10, + /*11*/ 100,110,5,15, + /*12*/ 110,120,10,20, + /*13*/ 100,110,0,10, + /*14*/ 105,115,0,10, + /*15*/ 100,110,5,15, + /*16*/ 110,120,10,20, + /*17*/ 105,115,0,10, + /*18*/ 100,110,5,15, + /*19*/ 110,120,10,20, + /*20*/ 100,110,0,10, + -1 +}; + +int main(int argc __attribute__((unused)),char *argv[] __attribute__((unused))) +{ + MY_INIT(argv[0]); + maria_init(); + exit(run_test("rt_test")); +} + + +static int run_test(const char *filename) +{ + MARIA_HA *file; + MARIA_UNIQUEDEF uniquedef; + MARIA_CREATE_INFO create_info; + MARIA_COLUMNDEF recinfo[20]; + MARIA_KEYDEF keyinfo[20]; + HA_KEYSEG keyseg[20]; + key_range range; + + int silent=0; + int opt_unique=0; + int create_flag=0; + int key_type=HA_KEYTYPE_DOUBLE; + int key_length=8; + int null_fields=0; + int nrecords=sizeof(rt_data)/(sizeof(double)*4);/* 3000;*/ + int rec_length=0; + int uniques=0; + int i; + int error; + int row_count=0; + char record[MAX_REC_LENGTH]; + char read_record[MAX_REC_LENGTH]; + int upd= 10; + ha_rows hrows; + + /* Define a column for NULLs and DEL markers*/ + + recinfo[0].type=FIELD_NORMAL; + recinfo[0].length=1; /* For NULL bits */ + rec_length=1; + + /* Define 2*ndims columns for coordinates*/ + + for (i=1; i<=2*ndims ;i++){ + recinfo[i].type=FIELD_NORMAL; + recinfo[i].length=key_length; + rec_length+=key_length; + } + + /* Define a key with 2*ndims segments */ + + keyinfo[0].seg=keyseg; + keyinfo[0].keysegs=2*ndims; + keyinfo[0].flag=0; + keyinfo[0].key_alg=KEYALG; + + for (i=0; i<2*ndims; i++){ + keyinfo[0].seg[i].type= key_type; + keyinfo[0].seg[i].flag=0; /* Things like HA_REVERSE_SORT */ + keyinfo[0].seg[i].start= (key_length*i)+1; + keyinfo[0].seg[i].length=key_length; + keyinfo[0].seg[i].null_bit= null_fields ? 2 : 0; + keyinfo[0].seg[i].null_pos=0; + keyinfo[0].seg[i].language=default_charset_info->number; + } + + if (!silent) + printf("- Creating isam-file\n"); + + bzero((char*) &create_info,sizeof(create_info)); + create_info.max_rows=10000000; + + if (maria_create(filename, + DYNAMIC_RECORD, + 1, /* keys */ + keyinfo, + 1+2*ndims+opt_unique, /* columns */ + recinfo,uniques,&uniquedef,&create_info,create_flag)) + goto err; + + if (!silent) + printf("- Open isam-file\n"); + + if (!(file=maria_open(filename,2,HA_OPEN_ABORT_IF_LOCKED))) + goto err; + + if (!silent) + printf("- Writing key:s\n"); + + for (i=0; i "); + print_record(record,maria_position(file),"\n"); + error=maria_update(file,read_record,record); + if (error) + { + printf("pos: %2d maria_update: %3d errno: %3d\n",i,error,my_errno); + goto err; + } + } + + if ((error=read_with_pos(file,silent))) + goto err; + + if (!silent) + printf("- Test maria_rkey then a sequence of maria_rnext_same\n"); + + create_record(record, nrecords*4/5); + print_record(record,0," search for\n"); + + if ((error=maria_rkey(file,read_record,0,record+1,0,HA_READ_MBR_INTERSECT))) + { + printf("maria_rkey: %3d errno: %3d\n",error,my_errno); + goto err; + } + print_record(read_record,maria_position(file)," maria_rkey\n"); + row_count=1; + + for (;;) + { + if ((error=maria_rnext_same(file,read_record))) + { + if (error==HA_ERR_END_OF_FILE) + break; + printf("maria_next: %3d errno: %3d\n",error,my_errno); + goto err; + } + print_record(read_record,maria_position(file)," maria_rnext_same\n"); + row_count++; + } + printf(" %d rows\n",row_count); + + if (!silent) + printf("- Test maria_rfirst then a sequence of maria_rnext\n"); + + error=maria_rfirst(file,read_record,0); + if (error) + { + printf("maria_rfirst: %3d errno: %3d\n",error,my_errno); + goto err; + } + row_count=1; + print_record(read_record,maria_position(file)," maria_frirst\n"); + + for (i=0;icur_row.nextpos= info->s->pack.header_length; /* Read first record */ + info->lastinx= -1; /* Can't forward or backward */ + if (info->opt_flag & WRITE_CACHE_USED && flush_io_cache(&info->rec_cache)) + DBUG_RETURN(my_errno); + + if ((*info->s->scan_init)(info)) + DBUG_RETURN(my_errno); + DBUG_RETURN(0); +} + +/* + Read a row based on position. + + SYNOPSIS + maria_scan() + info Maria handler + record Read data here + + RETURN + 0 ok + HA_ERR_END_OF_FILE End of file + # Error code +*/ + +int maria_scan(MARIA_HA *info, byte *record) +{ + DBUG_ENTER("maria_scan"); + /* Init all but update-flag */ + info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + DBUG_RETURN((*info->s->scan)(info, record, info->cur_row.nextpos, 1)); +} + + +void maria_scan_end(MARIA_HA *info) +{ + (*info->s->scan_end)(info); +} diff --git a/storage/maria/ma_search.c b/storage/maria/ma_search.c new file mode 100644 index 00000000000..f3e7a0d542a --- /dev/null +++ b/storage/maria/ma_search.c @@ -0,0 +1,1906 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* key handling functions */ + +#include "ma_fulltext.h" +#include "m_ctype.h" + +static my_bool _ma_get_prev_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *page, + byte *key, byte *keypos, + uint *return_key_length); + + /* Check index */ + +int _ma_check_index(MARIA_HA *info, int inx) +{ + if (inx == -1) /* Use last index */ + inx=info->lastinx; + if (inx < 0 || ! maria_is_key_active(info->s->state.key_map, inx)) + { + my_errno=HA_ERR_WRONG_INDEX; + return -1; + } + if (info->lastinx != inx) /* Index changed */ + { + info->lastinx = inx; + info->page_changed=1; + info->update= ((info->update & (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED)) | + HA_STATE_NEXT_FOUND | HA_STATE_PREV_FOUND); + } + if (info->opt_flag & WRITE_CACHE_USED && flush_io_cache(&info->rec_cache)) + return(-1); + return(inx); +} /* _ma_check_index */ + + + /* + ** Search after row by a key + ** Position to row is stored in info->lastpos + ** Return: -1 if not found + ** 1 if one should continue search on higher level + */ + +int _ma_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + byte *key, uint key_len, uint nextflag, register my_off_t pos) +{ + my_bool last_key; + int error,flag; + uint nod_flag; + byte *keypos,*maxpos; + byte lastkey[HA_MAX_KEY_BUFF],*buff; + DBUG_ENTER("_ma_search"); + DBUG_PRINT("enter",("pos: %lu nextflag: %u lastpos: %lu", + (ulong) pos, nextflag, (ulong) info->cur_row.lastpos)); + DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE,keyinfo->seg,key,key_len);); + + if (pos == HA_OFFSET_ERROR) + { + my_errno=HA_ERR_KEY_NOT_FOUND; /* Didn't find key */ + info->cur_row.lastpos= HA_OFFSET_ERROR; + if (!(nextflag & (SEARCH_SMALLER | SEARCH_BIGGER | SEARCH_LAST))) + DBUG_RETURN(-1); /* Not found ; return error */ + DBUG_RETURN(1); /* Search at upper levels */ + } + + if (!(buff= _ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS, + info->keyread_buff, + test(!(nextflag & SEARCH_SAVE_BUFF))))) + goto err; + DBUG_DUMP("page", buff, maria_data_on_page(buff)); + + flag=(*keyinfo->bin_search)(info,keyinfo,buff,key,key_len,nextflag, + &keypos,lastkey, &last_key); + if (flag == MARIA_FOUND_WRONG_KEY) + DBUG_RETURN(-1); + nod_flag=_ma_test_if_nod(buff); + maxpos=buff+maria_data_on_page(buff)-1; + + if (flag) + { + if ((error= _ma_search(info,keyinfo,key,key_len,nextflag, + _ma_kpos(nod_flag,keypos))) <= 0) + DBUG_RETURN(error); + + if (flag >0) + { + if (nextflag & (SEARCH_SMALLER | SEARCH_LAST) && + keypos == buff+2+nod_flag) + DBUG_RETURN(1); /* Bigger than key */ + } + else if (nextflag & SEARCH_BIGGER && keypos >= maxpos) + DBUG_RETURN(1); /* Smaller than key */ + } + else + { + if ((nextflag & SEARCH_FIND) && nod_flag && + ((keyinfo->flag & (HA_NOSAME | HA_NULL_PART)) != HA_NOSAME || + key_len != USE_WHOLE_KEY)) + { + if ((error= _ma_search(info,keyinfo,key,key_len,SEARCH_FIND, + _ma_kpos(nod_flag,keypos))) >= 0 || + my_errno != HA_ERR_KEY_NOT_FOUND) + DBUG_RETURN(error); + info->last_keypage= HA_OFFSET_ERROR; /* Buffer not in mem */ + } + } + if (pos != info->last_keypage) + { + byte *old_buff=buff; + if (!(buff= _ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS, + info->keyread_buff, + test(!(nextflag & SEARCH_SAVE_BUFF))))) + goto err; + keypos=buff+(keypos-old_buff); + maxpos=buff+(maxpos-old_buff); + } + + if ((nextflag & (SEARCH_SMALLER | SEARCH_LAST)) && flag != 0) + { + uint not_used[2]; + if (_ma_get_prev_key(info,keyinfo, buff, info->lastkey, keypos, + &info->lastkey_length)) + goto err; + if (!(nextflag & SEARCH_SMALLER) && + ha_key_cmp(keyinfo->seg, (uchar*) info->lastkey, (uchar*) key, key_len, + SEARCH_FIND, not_used)) + { + my_errno=HA_ERR_KEY_NOT_FOUND; /* Didn't find key */ + goto err; + } + } + else + { + info->lastkey_length=(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,lastkey); + if (!info->lastkey_length) + goto err; + memcpy(info->lastkey,lastkey,info->lastkey_length); + } + info->cur_row.lastpos= _ma_dpos(info,0,info->lastkey+info->lastkey_length); + /* Save position for a possible read next / previous */ + info->int_keypos= info->keyread_buff+ (keypos-buff); + info->int_maxpos= info->keyread_buff+ (maxpos-buff); + info->int_nod_flag=nod_flag; + info->int_keytree_version=keyinfo->version; + info->last_search_keypage=info->last_keypage; + info->page_changed=0; + info->keyread_buff_used= (info->keyread_buff != buff); /* If we have to reread */ + + DBUG_PRINT("exit",("found key at %lu",(ulong) info->cur_row.lastpos)); + DBUG_RETURN(0); + +err: + DBUG_PRINT("exit",("Error: %d",my_errno)); + info->cur_row.lastpos= HA_OFFSET_ERROR; + info->page_changed=1; + DBUG_RETURN (-1); +} /* _ma_search */ + + + /* Search after key in page-block */ + /* If packed key puts smaller or identical key in buff */ + /* ret_pos point to where find or bigger key starts */ + /* ARGSUSED */ + +int _ma_bin_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, byte *page, + byte *key, uint key_len, uint comp_flag, byte **ret_pos, + byte *buff __attribute__((unused)), my_bool *last_key) +{ + reg4 int start,mid,end,save_end; + int flag; + uint totlength,nod_flag,not_used[2]; + DBUG_ENTER("_ma_bin_search"); + + LINT_INIT(flag); + totlength=keyinfo->keylength+(nod_flag=_ma_test_if_nod(page)); + start=0; mid=1; + save_end=end=(int) ((maria_data_on_page(page)-2-nod_flag)/totlength-1); + DBUG_PRINT("test",("page_length: %d end: %d",maria_data_on_page(page),end)); + page+=2+nod_flag; + + while (start != end) + { + mid= (start+end)/2; + if ((flag=ha_key_cmp(keyinfo->seg,(uchar*) page+(uint) mid*totlength, + (uchar*) key, key_len, comp_flag, not_used)) + >= 0) + end=mid; + else + start=mid+1; + } + if (mid != start) + flag=ha_key_cmp(keyinfo->seg, (uchar*) page+(uint) start*totlength, + (uchar*) key, key_len, comp_flag, not_used); + if (flag < 0) + start++; /* point at next, bigger key */ + *ret_pos=page+(uint) start*totlength; + *last_key= end == save_end; + DBUG_PRINT("exit",("flag: %d keypos: %d",flag,start)); + DBUG_RETURN(flag); +} /* _ma_bin_search */ + + +/* + Locate a packed key in a key page. + + SYNOPSIS + _ma_seq_search() + info Open table information. + keyinfo Key definition information. + page Key page (beginning). + key Search key. + key_len Length to use from search key or USE_WHOLE_KEY + comp_flag Search flags like SEARCH_SAME etc. + ret_pos RETURN Position in key page behind this key. + buff RETURN Copy of previous or identical unpacked key. + last_key RETURN If key is last in page. + + DESCRIPTION + Used instead of _ma_bin_search() when key is packed. + Puts smaller or identical key in buff. + Key is searched sequentially. + + RETURN + > 0 Key in 'buff' is smaller than search key. + 0 Key in 'buff' is identical to search key. + < 0 Not found. +*/ + +int _ma_seq_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, byte *page, + byte *key, uint key_len, uint comp_flag, byte **ret_pos, + byte *buff, my_bool *last_key) +{ + int flag; + uint nod_flag,length,not_used[2]; + byte t_buff[HA_MAX_KEY_BUFF],*end; + DBUG_ENTER("_ma_seq_search"); + + LINT_INIT(flag); LINT_INIT(length); + end= page+maria_data_on_page(page); + nod_flag=_ma_test_if_nod(page); + page+=2+nod_flag; + *ret_pos=page; + t_buff[0]=0; /* Avoid bugs */ + while (page < end) + { + length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,t_buff); + if (length == 0 || page > end) + { + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_PRINT("error", + ("Found wrong key: length: %u page: 0x%lx end: 0x%lx", + length, (long) page, (long) end)); + DBUG_RETURN(MARIA_FOUND_WRONG_KEY); + } + if ((flag= ha_key_cmp(keyinfo->seg, (uchar*) t_buff,(uchar*) key, + key_len,comp_flag, not_used)) >= 0) + break; +#ifdef EXTRA_DEBUG + DBUG_PRINT("loop",("page: 0x%lx key: '%s' flag: %d", (long) page, t_buff, + flag)); +#endif + memcpy(buff,t_buff,length); + *ret_pos=page; + } + if (flag == 0) + memcpy(buff,t_buff,length); /* Result is first key */ + *last_key= page == end; + DBUG_PRINT("exit",("flag: %d ret_pos: 0x%lx", flag, (long) *ret_pos)); + DBUG_RETURN(flag); +} /* _ma_seq_search */ + + +int _ma_prefix_search(MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + byte *page, byte *key, uint key_len, uint nextflag, + byte **ret_pos, byte *buff, my_bool *last_key) +{ + /* + my_flag is raw comparison result to be changed according to + SEARCH_NO_FIND,SEARCH_LAST and HA_REVERSE_SORT flags. + flag is the value returned by ha_key_cmp and as treated as final + */ + int flag=0, my_flag=-1; + uint nod_flag, length, len, matched, cmplen, kseg_len; + uint prefix_len,suffix_len; + int key_len_skip, seg_len_pack, key_len_left; + byte *end; + uchar *kseg, *vseg, *saved_vseg, *saved_from; + uchar *sort_order= keyinfo->seg->charset->sort_order; + byte tt_buff[HA_MAX_KEY_BUFF+2], *t_buff=tt_buff+2; + byte *saved_to; + uint saved_length=0, saved_prefix_len=0; + uint length_pack; + DBUG_ENTER("_ma_prefix_search"); + + LINT_INIT(length); + LINT_INIT(prefix_len); + LINT_INIT(seg_len_pack); + LINT_INIT(saved_from); + LINT_INIT(saved_to); + LINT_INIT(saved_vseg); + + t_buff[0]=0; /* Avoid bugs */ + end= page+maria_data_on_page(page); + nod_flag=_ma_test_if_nod(page); + page+=2+nod_flag; + *ret_pos=page; + kseg= (uchar*) key; + + get_key_pack_length(kseg_len, length_pack, kseg); + key_len_skip=length_pack+kseg_len; + key_len_left=(int) key_len- (int) key_len_skip; + /* If key_len is 0, then lenght_pack is 1, then key_len_left is -1. */ + cmplen=(key_len_left>=0) ? kseg_len : key_len-length_pack; + DBUG_PRINT("info",("key: '%.*s'",kseg_len,kseg)); + + /* + Keys are compressed the following way: + + If the max length of first key segment <= 127 bytes the prefix is + 1 byte else it's 2 byte + + (prefix) length The high bit is set if this is a prefix for the prev key. + [suffix length] Packed length of suffix if the previous was a prefix. + (suffix) data Key data bytes (past the common prefix or whole segment). + [next-key-seg] Next key segments (([packed length], data), ...) + pointer Reference to the data file (last_keyseg->length). + */ + + matched=0; /* how many char's from prefix were alredy matched */ + len=0; /* length of previous key unpacked */ + + while (page < end) + { + uint packed= *page & 128; + + vseg= (uchar*) page; + if (keyinfo->seg->length >= 127) + { + suffix_len=mi_uint2korr(vseg) & 32767; + vseg+=2; + } + else + suffix_len= *vseg++ & 127; + + if (packed) + { + if (suffix_len == 0) + { + /* == 0x80 or 0x8000, same key, prefix length == old key length. */ + prefix_len=len; + } + else + { + /* > 0x80 or 0x8000, this is prefix lgt, packed suffix lgt follows. */ + prefix_len=suffix_len; + get_key_length(suffix_len,vseg); + } + } + else + { + /* Not packed. No prefix used from last key. */ + prefix_len=0; + } + + len=prefix_len+suffix_len; + seg_len_pack=get_pack_length(len); + t_buff=tt_buff+3-seg_len_pack; + store_key_length(t_buff,len); + + if (prefix_len > saved_prefix_len) + memcpy(t_buff+seg_len_pack+saved_prefix_len,saved_vseg, + prefix_len-saved_prefix_len); + saved_vseg=vseg; + saved_prefix_len=prefix_len; + + DBUG_PRINT("loop",("page: '%.*s%.*s'",prefix_len,t_buff+seg_len_pack, + suffix_len,vseg)); + { + uchar *from= vseg+suffix_len; + HA_KEYSEG *keyseg; + uint l; + + for (keyseg=keyinfo->seg+1 ; keyseg->type ; keyseg++ ) + { + + if (keyseg->flag & HA_NULL_PART) + { + if (!(*from++)) + continue; + } + if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART | HA_SPACE_PACK)) + { + get_key_length(l,from); + } + else + l=keyseg->length; + + from+=l; + } + from+= keyseg->length; + page= (byte*) from+nod_flag; + length= (uint) (from-vseg); + } + + if (page > end) + { + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_PRINT("error", + ("Found wrong key: length: %u page: 0x%lx end: %lx", + length, (long) page, (long) end)); + DBUG_RETURN(MARIA_FOUND_WRONG_KEY); + } + + if (matched >= prefix_len) + { + /* We have to compare. But we can still skip part of the key */ + uint left; + uchar *k= kseg+prefix_len; + + /* + If prefix_len > cmplen then we are in the end-space comparison + phase. Do not try to acces the key any more ==> left= 0. + */ + left= ((len <= cmplen) ? suffix_len : + ((prefix_len < cmplen) ? cmplen - prefix_len : 0)); + + matched=prefix_len+left; + + if (sort_order) + { + for (my_flag=0;left;left--) + if ((my_flag= (int) sort_order[*vseg++] - (int) sort_order[*k++])) + break; + } + else + { + for (my_flag=0;left;left--) + if ((my_flag= (int) *vseg++ - (int) *k++)) + break; + } + + if (my_flag>0) /* mismatch */ + break; + if (my_flag==0) /* match */ + { + /* + ** len cmplen seg_left_len more_segs + ** < matched=len; continue search + ** > = prefix ? found : (matched=len; continue search) + ** > < - ok, found + ** = < - ok, found + ** = = - ok, found + ** = = + next seg + */ + if (len < cmplen) + { + if ((keyinfo->seg->type != HA_KEYTYPE_TEXT && + keyinfo->seg->type != HA_KEYTYPE_VARTEXT1 && + keyinfo->seg->type != HA_KEYTYPE_VARTEXT2)) + my_flag= -1; + else + { + /* We have to compare k and vseg as if they were space extended */ + uchar *k_end= k+ (cmplen - len); + for ( ; k < k_end && *k == ' '; k++) ; + if (k == k_end) + goto cmp_rest; /* should never happen */ + if ((uchar) *k < (uchar) ' ') + { + my_flag= 1; /* Compared string is smaller */ + break; + } + my_flag= -1; /* Continue searching */ + } + } + else if (len > cmplen) + { + uchar *vseg_end; + if ((nextflag & SEARCH_PREFIX) && key_len_left == 0) + goto fix_flag; + + /* We have to compare k and vseg as if they were space extended */ + for (vseg_end= vseg + (len-cmplen) ; + vseg < vseg_end && *vseg == (byte) ' '; + vseg++, matched++) ; + DBUG_ASSERT(vseg < vseg_end); + + if ((uchar) *vseg > (uchar) ' ') + { + my_flag= 1; /* Compared string is smaller */ + break; + } + my_flag= -1; /* Continue searching */ + } + else + { + cmp_rest: + if (key_len_left>0) + { + uint not_used[2]; + if ((flag = ha_key_cmp(keyinfo->seg+1,vseg, + k, key_len_left, nextflag, not_used)) >= 0) + break; + } + else + { + /* + at this line flag==-1 if the following lines were already + visited and 0 otherwise, i.e. flag <=0 here always !!! + */ + fix_flag: + DBUG_ASSERT(flag <= 0); + if (nextflag & (SEARCH_NO_FIND | SEARCH_LAST)) + flag=(nextflag & (SEARCH_BIGGER | SEARCH_LAST)) ? -1 : 1; + if (flag>=0) + break; + } + } + } + matched-=left; + } + /* else (matched < prefix_len) ---> do nothing. */ + + memcpy(buff,t_buff,saved_length=seg_len_pack+prefix_len); + saved_to= buff+saved_length; + saved_from= saved_vseg; + saved_length=length; + *ret_pos=page; + } + if (my_flag) + flag=(keyinfo->seg->flag & HA_REVERSE_SORT) ? -my_flag : my_flag; + if (flag == 0) + { + memcpy(buff,t_buff,saved_length=seg_len_pack+prefix_len); + saved_to= buff+saved_length; + saved_from= saved_vseg; + saved_length=length; + } + if (saved_length) + memcpy(saved_to, (byte*) saved_from, saved_length); + + *last_key= page == end; + + DBUG_PRINT("exit",("flag: %d ret_pos: 0x%lx", flag, (long) *ret_pos)); + DBUG_RETURN(flag); +} /* _ma_prefix_search */ + + + /* Get pos to a key_block */ + +my_off_t _ma_kpos(uint nod_flag, byte *after_key) +{ + after_key-=nod_flag; + switch (nod_flag) { +#if SIZEOF_OFF_T > 4 + case 7: + return mi_uint7korr(after_key)*MARIA_MIN_KEY_BLOCK_LENGTH; + case 6: + return mi_uint6korr(after_key)*MARIA_MIN_KEY_BLOCK_LENGTH; + case 5: + return mi_uint5korr(after_key)*MARIA_MIN_KEY_BLOCK_LENGTH; +#else + case 7: + after_key++; + case 6: + after_key++; + case 5: + after_key++; +#endif + case 4: + return ((my_off_t) mi_uint4korr(after_key))*MARIA_MIN_KEY_BLOCK_LENGTH; + case 3: + return ((my_off_t) mi_uint3korr(after_key))*MARIA_MIN_KEY_BLOCK_LENGTH; + case 2: + return (my_off_t) (mi_uint2korr(after_key)*MARIA_MIN_KEY_BLOCK_LENGTH); + case 1: + return (uint) (*after_key)*MARIA_MIN_KEY_BLOCK_LENGTH; + case 0: /* At leaf page */ + default: /* Impossible */ + return(HA_OFFSET_ERROR); + } +} /* _kpos */ + + + /* Save pos to a key_block */ + +void _ma_kpointer(register MARIA_HA *info, register byte *buff, my_off_t pos) +{ + pos/=MARIA_MIN_KEY_BLOCK_LENGTH; + switch (info->s->base.key_reflength) { +#if SIZEOF_OFF_T > 4 + case 7: mi_int7store(buff,pos); break; + case 6: mi_int6store(buff,pos); break; + case 5: mi_int5store(buff,pos); break; +#else + case 7: *buff++=0; + /* fall trough */ + case 6: *buff++=0; + /* fall trough */ + case 5: *buff++=0; + /* fall trough */ +#endif + case 4: mi_int4store(buff,pos); break; + case 3: mi_int3store(buff,pos); break; + case 2: mi_int2store(buff,(uint) pos); break; + case 1: buff[0]= (byte) pos; break; + default: abort(); /* impossible */ + } +} /* _ma_kpointer */ + + + /* Calc pos to a data-record from a key */ + + +my_off_t _ma_dpos(MARIA_HA *info, uint nod_flag, const byte *after_key) +{ + my_off_t pos; + after_key-=(nod_flag + info->s->rec_reflength); + switch (info->s->rec_reflength) { +#if SIZEOF_OFF_T > 4 + case 8: pos= (my_off_t) mi_uint8korr(after_key); break; + case 7: pos= (my_off_t) mi_uint7korr(after_key); break; + case 6: pos= (my_off_t) mi_uint6korr(after_key); break; + case 5: pos= (my_off_t) mi_uint5korr(after_key); break; +#else + case 8: pos= (my_off_t) mi_uint4korr(after_key+4); break; + case 7: pos= (my_off_t) mi_uint4korr(after_key+3); break; + case 6: pos= (my_off_t) mi_uint4korr(after_key+2); break; + case 5: pos= (my_off_t) mi_uint4korr(after_key+1); break; +#endif + case 4: pos= (my_off_t) mi_uint4korr(after_key); break; + case 3: pos= (my_off_t) mi_uint3korr(after_key); break; + case 2: pos= (my_off_t) mi_uint2korr(after_key); break; + default: + pos=0L; /* Shut compiler up */ + } + return ((info->s->data_file_type == STATIC_RECORD) ? + pos * info->s->base.pack_reclength : pos); +} + + +/* Calc position from a record pointer ( in delete link chain ) */ + +my_off_t _ma_rec_pos(MARIA_SHARE *s, byte *ptr) +{ + my_off_t pos; + switch (s->rec_reflength) { +#if SIZEOF_OFF_T > 4 + case 8: + pos= (my_off_t) mi_uint8korr(ptr); + if (pos == HA_OFFSET_ERROR) + return HA_OFFSET_ERROR; /* end of list */ + break; + case 7: + pos= (my_off_t) mi_uint7korr(ptr); + if (pos == (((my_off_t) 1) << 56) -1) + return HA_OFFSET_ERROR; /* end of list */ + break; + case 6: + pos= (my_off_t) mi_uint6korr(ptr); + if (pos == (((my_off_t) 1) << 48) -1) + return HA_OFFSET_ERROR; /* end of list */ + break; + case 5: + pos= (my_off_t) mi_uint5korr(ptr); + if (pos == (((my_off_t) 1) << 40) -1) + return HA_OFFSET_ERROR; /* end of list */ + break; +#else + case 8: + case 7: + case 6: + case 5: + ptr+= (s->rec_reflength-4); + /* fall through */ +#endif + case 4: + pos= (my_off_t) mi_uint4korr(ptr); + if (pos == (my_off_t) (uint32) ~0L) + return HA_OFFSET_ERROR; + break; + case 3: + pos= (my_off_t) mi_uint3korr(ptr); + if (pos == (my_off_t) (1 << 24) -1) + return HA_OFFSET_ERROR; + break; + case 2: + pos= (my_off_t) mi_uint2korr(ptr); + if (pos == (my_off_t) (1 << 16) -1) + return HA_OFFSET_ERROR; + break; + default: abort(); /* Impossible */ + } + return ((s->data_file_type == STATIC_RECORD) ? + pos * s->base.pack_reclength : pos); +} + + + /* save position to record */ + +void _ma_dpointer(MARIA_HA *info, byte *buff, my_off_t pos) +{ + if (info->s->data_file_type == STATIC_RECORD && + pos != HA_OFFSET_ERROR) + pos/= info->s->base.pack_reclength; + + switch (info->s->rec_reflength) { +#if SIZEOF_OFF_T > 4 + case 8: mi_int8store(buff,pos); break; + case 7: mi_int7store(buff,pos); break; + case 6: mi_int6store(buff,pos); break; + case 5: mi_int5store(buff,pos); break; +#else + case 8: *buff++=0; + /* fall trough */ + case 7: *buff++=0; + /* fall trough */ + case 6: *buff++=0; + /* fall trough */ + case 5: *buff++=0; + /* fall trough */ +#endif + case 4: mi_int4store(buff,pos); break; + case 3: mi_int3store(buff,pos); break; + case 2: mi_int2store(buff,(uint) pos); break; + default: abort(); /* Impossible */ + } +} /* _ma_dpointer */ + + + /* Get key from key-block */ + /* page points at previous key; its advanced to point at next key */ + /* key should contain previous key */ + /* Returns length of found key + pointers */ + /* nod_flag is a flag if we are on nod */ + + /* same as _ma_get_key but used with fixed length keys */ + +uint _ma_get_static_key(register MARIA_KEYDEF *keyinfo, uint nod_flag, + register byte **page, register byte *key) +{ + memcpy((byte*) key,(byte*) *page, + (size_t) (keyinfo->keylength+nod_flag)); + *page+=keyinfo->keylength+nod_flag; + return(keyinfo->keylength); +} /* _ma_get_static_key */ + + +/* + get key witch is packed against previous key or key with a NULL column. + + SYNOPSIS + _ma_get_pack_key() + keyinfo key definition information. + nod_flag If nod: Length of node pointer, else zero. + page_pos RETURN position in key page behind this key. + key IN/OUT in: prev key, out: unpacked key. + + RETURN + key_length + length of data pointer +*/ + +uint _ma_get_pack_key(register MARIA_KEYDEF *keyinfo, uint nod_flag, + register byte **page_pos, register byte *key) +{ + reg1 HA_KEYSEG *keyseg; + byte *start_key,*page=*page_pos; + uint length; + + start_key=key; + for (keyseg=keyinfo->seg ; keyseg->type ;keyseg++) + { + if (keyseg->flag & HA_PACK_KEY) + { + /* key with length, packed to previous key */ + byte *start= key; + uint packed= *page & 128,tot_length,rest_length; + if (keyseg->length >= 127) + { + length=mi_uint2korr(page) & 32767; + page+=2; + } + else + length= *page++ & 127; + + if (packed) + { + if (length > (uint) keyseg->length) + { + maria_print_error(keyinfo->share, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + return 0; /* Error */ + } + if (length == 0) /* Same key */ + { + if (keyseg->flag & HA_NULL_PART) + *key++=1; /* Can't be NULL */ + get_key_length(length,key); + key+= length; /* Same diff_key as prev */ + if (length > keyseg->length) + { + DBUG_PRINT("error", + ("Found too long null packed key: %u of %u at 0x%lx", + length, keyseg->length, (long) *page_pos)); + DBUG_DUMP("key",(char*) *page_pos,16); + maria_print_error(keyinfo->share, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + return 0; + } + continue; + } + if (keyseg->flag & HA_NULL_PART) + { + key++; /* Skip null marker*/ + start++; + } + + get_key_length(rest_length,page); + tot_length=rest_length+length; + + /* If the stored length has changed, we must move the key */ + if (tot_length >= 255 && *start != (char) 255) + { + /* length prefix changed from a length of one to a length of 3 */ + bmove_upp((char*) key+length+3,(char*) key+length+1,length); + *key=255; + mi_int2store(key+1,tot_length); + key+=3+length; + } + else if (tot_length < 255 && *start == (char) 255) + { + bmove(key+1,key+3,length); + *key=tot_length; + key+=1+length; + } + else + { + store_key_length_inc(key,tot_length); + key+=length; + } + memcpy(key,page,rest_length); + page+=rest_length; + key+=rest_length; + continue; + } + else + { + if (keyseg->flag & HA_NULL_PART) + { + if (!length--) /* Null part */ + { + *key++=0; + continue; + } + *key++=1; /* Not null */ + } + } + if (length > (uint) keyseg->length) + { + DBUG_PRINT("error",("Found too long packed key: %u of %u at 0x%lx", + length, keyseg->length, (long) *page_pos)); + DBUG_DUMP("key",(char*) *page_pos,16); + maria_print_error(keyinfo->share, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + return 0; /* Error */ + } + store_key_length_inc(key,length); + } + else + { + if (keyseg->flag & HA_NULL_PART) + { + if (!(*key++ = *page++)) + continue; + } + if (keyseg->flag & + (HA_VAR_LENGTH_PART | HA_BLOB_PART | HA_SPACE_PACK)) + { + byte *tmp=page; + get_key_length(length,tmp); + length+=(uint) (tmp-page); + } + else + length=keyseg->length; + } + memcpy((byte*) key,(byte*) page,(size_t) length); + key+=length; + page+=length; + } + length=keyseg->length+nod_flag; + bmove((byte*) key,(byte*) page,length); + *page_pos= page+length; + return ((uint) (key-start_key)+keyseg->length); +} /* _ma_get_pack_key */ + + + +/* key that is packed relatively to previous */ + +uint _ma_get_binary_pack_key(register MARIA_KEYDEF *keyinfo, uint nod_flag, + register byte **page_pos, register byte *key) +{ + reg1 HA_KEYSEG *keyseg; + byte *start_key,*page,*page_end,*from,*from_end; + uint length,tmp; + DBUG_ENTER("_ma_get_binary_pack_key"); + + page= *page_pos; + page_end=page+HA_MAX_KEY_BUFF+1; + start_key=key; + + /* + Keys are compressed the following way: + + prefix length Packed length of prefix for the prev key. (1 or 3 bytes) + for each key segment: + [is null] Null indicator if can be null (1 byte, zero means null) + [length] Packed length if varlength (1 or 3 bytes) + pointer Reference to the data file (last_keyseg->length). + */ + get_key_length(length,page); + if (length) + { + if (length > keyinfo->maxlength) + { + DBUG_PRINT("error", + ("Found too long binary packed key: %u of %u at 0x%lx", + length, keyinfo->maxlength, (long) *page_pos)); + DBUG_DUMP("key",(char*) *page_pos,16); + maria_print_error(keyinfo->share, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_RETURN(0); /* Wrong key */ + } + from=key; from_end=key+length; + } + else + { + from=page; from_end=page_end; /* Not packed key */ + } + + /* + The trouble is that key is split in two parts: + The first part is in from ...from_end-1. + The second part starts at page + */ + for (keyseg=keyinfo->seg ; keyseg->type ;keyseg++) + { + if (keyseg->flag & HA_NULL_PART) + { + if (from == from_end) { from=page; from_end=page_end; } + if (!(*key++ = *from++)) + continue; /* Null part */ + } + if (keyseg->flag & (HA_VAR_LENGTH_PART | HA_BLOB_PART | HA_SPACE_PACK)) + { + /* Get length of dynamic length key part */ + if (from == from_end) { from=page; from_end=page_end; } + if ((length= (uint) (uchar) (*key++ = *from++)) == 255) + { + if (from == from_end) { from=page; from_end=page_end; } + length= ((uint) (uchar) ((*key++ = *from++))) << 8; + if (from == from_end) { from=page; from_end=page_end; } + length+= (uint) (uchar) ((*key++ = *from++)); + } + } + else + length=keyseg->length; + + if ((tmp=(uint) (from_end-from)) <= length) + { + key+=tmp; /* Use old key */ + length-=tmp; + from=page; from_end=page_end; + } + DBUG_ASSERT((int) length >= 0); + DBUG_PRINT("info",("key: 0x%lx from: 0x%lx length: %u", + (long) key, (long) from, length)); + memmove((byte*) key, (byte*) from, (size_t) length); + key+=length; + from+=length; + } + length=keyseg->length+nod_flag; + if ((tmp=(uint) (from_end-from)) <= length) + { + memcpy(key+tmp,page,length-tmp); /* Get last part of key */ + *page_pos= page+length-tmp; + } + else + { + if (from_end != page_end) + { + DBUG_PRINT("error",("Error when unpacking key")); + maria_print_error(keyinfo->share, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_RETURN(0); /* Error */ + } + memcpy((byte*) key,(byte*) from,(size_t) length); + *page_pos= from+length; + } + DBUG_RETURN((uint) (key-start_key)+keyseg->length); +} + + + /* Get key at position without knowledge of previous key */ + /* Returns pointer to next key */ + +byte *_ma_get_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *page, + byte *key, byte *keypos, uint *return_key_length) +{ + uint nod_flag; + DBUG_ENTER("_ma_get_key"); + + nod_flag=_ma_test_if_nod(page); + if (! (keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY))) + { + bmove((byte*) key,(byte*) keypos,keyinfo->keylength+nod_flag); + DBUG_RETURN(keypos+keyinfo->keylength+nod_flag); + } + else + { + page+=2+nod_flag; + key[0]=0; /* safety */ + while (page <= keypos) + { + *return_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,key); + if (*return_key_length == 0) + { + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_RETURN(0); + } + } + } + DBUG_PRINT("exit",("page: 0x%lx length: %u", (long) page, + *return_key_length)); + DBUG_RETURN(page); +} /* _ma_get_key */ + + + /* Get key at position without knowledge of previous key */ + /* Returns 0 if ok */ + +static my_bool _ma_get_prev_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *page, byte *key, byte *keypos, + uint *return_key_length) +{ + uint nod_flag; + DBUG_ENTER("_ma_get_prev_key"); + + nod_flag=_ma_test_if_nod(page); + if (! (keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY))) + { + *return_key_length=keyinfo->keylength; + bmove((byte*) key,(byte*) keypos- *return_key_length-nod_flag, + *return_key_length); + DBUG_RETURN(0); + } + else + { + page+=2+nod_flag; + key[0]=0; /* safety */ + while (page < keypos) + { + *return_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,key); + if (*return_key_length == 0) + { + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_RETURN(1); + } + } + } + DBUG_RETURN(0); +} /* _ma_get_key */ + + + + /* Get last key from key-page */ + /* Return pointer to where key starts */ + +byte *_ma_get_last_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *page, + byte *lastkey, byte *endpos, uint *return_key_length) +{ + uint nod_flag; + byte *lastpos; + DBUG_ENTER("_ma_get_last_key"); + DBUG_PRINT("enter",("page: 0x%lx endpos: 0x%lx", (long) page, + (long) endpos)); + + nod_flag=_ma_test_if_nod(page); + if (! (keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY))) + { + lastpos=endpos-keyinfo->keylength-nod_flag; + *return_key_length=keyinfo->keylength; + if (lastpos > page) + bmove((byte*) lastkey,(byte*) lastpos,keyinfo->keylength+nod_flag); + } + else + { + lastpos=(page+=2+nod_flag); + lastkey[0]=0; + while (page < endpos) + { + lastpos=page; + *return_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,lastkey); + if (*return_key_length == 0) + { + DBUG_PRINT("error",("Couldn't find last key: page: 0x%lx", + (long) page)); + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_RETURN(0); + } + } + } + DBUG_PRINT("exit",("lastpos: 0x%lx length: %u", (long) lastpos, + *return_key_length)); + DBUG_RETURN(lastpos); +} /* _ma_get_last_key */ + + + /* Calculate length of key */ + +uint _ma_keylength(MARIA_KEYDEF *keyinfo, register const byte *key) +{ + reg1 HA_KEYSEG *keyseg; + const byte *start; + + if (! (keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY))) + return (keyinfo->keylength); + + start= key; + for (keyseg=keyinfo->seg ; keyseg->type ; keyseg++) + { + if (keyseg->flag & HA_NULL_PART) + if (!*key++) + continue; + if (keyseg->flag & (HA_SPACE_PACK | HA_BLOB_PART | HA_VAR_LENGTH_PART)) + { + uint length; + get_key_length(length,key); + key+=length; + } + else + key+= keyseg->length; + } + return((uint) (key-start)+keyseg->length); +} /* _ma_keylength */ + + +/* + Calculate length of part key. + + Used in maria_rkey() to find the key found for the key-part that was used. + This is needed in case of multi-byte character sets where we may search + after '0xDF' but find 'ss' +*/ + +uint _ma_keylength_part(MARIA_KEYDEF *keyinfo, register const byte *key, + HA_KEYSEG *end) +{ + reg1 HA_KEYSEG *keyseg; + const byte *start= key; + + for (keyseg=keyinfo->seg ; keyseg != end ; keyseg++) + { + if (keyseg->flag & HA_NULL_PART) + if (!*key++) + continue; + if (keyseg->flag & (HA_SPACE_PACK | HA_BLOB_PART | HA_VAR_LENGTH_PART)) + { + uint length; + get_key_length(length,key); + key+=length; + } + else + key+= keyseg->length; + } + return (uint) (key-start); +} + + +/* Move a key */ + +byte *_ma_move_key(MARIA_KEYDEF *keyinfo, byte *to, const byte *from) +{ + reg1 uint length; + memcpy(to, from, (size_t) (length= _ma_keylength(keyinfo, from))); + return to+length; +} + + +/* + Find next/previous record with same key + + WARNING + This can't be used when database is touched after last read +*/ + +int _ma_search_next(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + byte *key, uint key_length, uint nextflag, my_off_t pos) +{ + int error; + uint nod_flag; + byte lastkey[HA_MAX_KEY_BUFF]; + DBUG_ENTER("_ma_search_next"); + DBUG_PRINT("enter",("nextflag: %u lastpos: %lu int_keypos: %lu page_changed %d keyread_buff_used: %d", + nextflag, (ulong) info->cur_row.lastpos, + (ulong) info->int_keypos, + info->page_changed, info->keyread_buff_used)); + DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE,keyinfo->seg,key,key_length);); + + /* Force full read if we are at last key or if we are not on a leaf + and the key tree has changed since we used it last time + Note that even if the key tree has changed since last read, we can use + the last read data from the leaf if we haven't used the buffer for + something else. + */ + + if (((nextflag & SEARCH_BIGGER) && info->int_keypos >= info->int_maxpos) || + info->page_changed || + (info->int_keytree_version != keyinfo->version && + (info->int_nod_flag || info->keyread_buff_used))) + DBUG_RETURN(_ma_search(info,keyinfo,key, USE_WHOLE_KEY, + nextflag | SEARCH_SAVE_BUFF, pos)); + + if (info->keyread_buff_used) + { + if (!_ma_fetch_keypage(info,keyinfo,info->last_search_keypage, + DFLT_INIT_HITS,info->keyread_buff,0)) + DBUG_RETURN(-1); + info->keyread_buff_used=0; + } + + /* Last used buffer is in info->keyread_buff */ + nod_flag=_ma_test_if_nod(info->keyread_buff); + + if (nextflag & SEARCH_BIGGER) /* Next key */ + { + my_off_t tmp_pos= _ma_kpos(nod_flag,info->int_keypos); + if (tmp_pos != HA_OFFSET_ERROR) + { + if ((error= _ma_search(info,keyinfo,key, USE_WHOLE_KEY, + nextflag | SEARCH_SAVE_BUFF, tmp_pos)) <=0) + DBUG_RETURN(error); + } + memcpy(lastkey,key,key_length); + if (!(info->lastkey_length=(*keyinfo->get_key)(keyinfo,nod_flag, + &info->int_keypos,lastkey))) + DBUG_RETURN(-1); + } + else /* Previous key */ + { + uint length; + /* Find start of previous key */ + info->int_keypos= _ma_get_last_key(info,keyinfo,info->keyread_buff,lastkey, + info->int_keypos, &length); + if (!info->int_keypos) + DBUG_RETURN(-1); + if (info->int_keypos == info->keyread_buff+2) + DBUG_RETURN(_ma_search(info,keyinfo,key, USE_WHOLE_KEY, + nextflag | SEARCH_SAVE_BUFF, pos)); + if ((error= _ma_search(info,keyinfo,key, USE_WHOLE_KEY, + nextflag | SEARCH_SAVE_BUFF, + _ma_kpos(nod_flag,info->int_keypos))) <= 0) + DBUG_RETURN(error); + + /* QQ: We should be able to optimize away the following call */ + if (! _ma_get_last_key(info,keyinfo,info->keyread_buff,lastkey, + info->int_keypos,&info->lastkey_length)) + DBUG_RETURN(-1); + } + memcpy(info->lastkey,lastkey,info->lastkey_length); + info->cur_row.lastpos= _ma_dpos(info,0,info->lastkey+info->lastkey_length); + DBUG_PRINT("exit",("found key at %lu",(ulong) info->cur_row.lastpos)); + DBUG_RETURN(0); +} /* _ma_search_next */ + + + /* Search after position for the first row in an index */ + /* This is stored in info->cur_row.lastpos */ + +int _ma_search_first(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + register my_off_t pos) +{ + uint nod_flag; + byte *page; + DBUG_ENTER("_ma_search_first"); + + if (pos == HA_OFFSET_ERROR) + { + my_errno=HA_ERR_KEY_NOT_FOUND; + info->cur_row.lastpos= HA_OFFSET_ERROR; + DBUG_RETURN(-1); + } + + do + { + if (!_ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,info->keyread_buff,0)) + { + info->cur_row.lastpos= HA_OFFSET_ERROR; + DBUG_RETURN(-1); + } + nod_flag=_ma_test_if_nod(info->keyread_buff); + page=info->keyread_buff+2+nod_flag; + } while ((pos= _ma_kpos(nod_flag,page)) != HA_OFFSET_ERROR); + + if (!(info->lastkey_length=(*keyinfo->get_key)(keyinfo,nod_flag,&page, + info->lastkey))) + DBUG_RETURN(-1); /* Crashed */ + + info->int_keypos=page; info->int_maxpos=info->keyread_buff+maria_data_on_page(info->keyread_buff)-1; + info->int_nod_flag=nod_flag; + info->int_keytree_version=keyinfo->version; + info->last_search_keypage=info->last_keypage; + info->page_changed=info->keyread_buff_used=0; + info->cur_row.lastpos= _ma_dpos(info,0,info->lastkey+info->lastkey_length); + + DBUG_PRINT("exit",("found key at %lu", (ulong) info->cur_row.lastpos)); + DBUG_RETURN(0); +} /* _ma_search_first */ + + + /* Search after position for the last row in an index */ + /* This is stored in info->cur_row.lastpos */ + +int _ma_search_last(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + register my_off_t pos) +{ + uint nod_flag; + byte *buff,*page; + DBUG_ENTER("_ma_search_last"); + + if (pos == HA_OFFSET_ERROR) + { + my_errno=HA_ERR_KEY_NOT_FOUND; /* Didn't find key */ + info->cur_row.lastpos= HA_OFFSET_ERROR; + DBUG_RETURN(-1); + } + + buff=info->keyread_buff; + do + { + if (!_ma_fetch_keypage(info,keyinfo,pos,DFLT_INIT_HITS,buff,0)) + { + info->cur_row.lastpos= HA_OFFSET_ERROR; + DBUG_RETURN(-1); + } + page= buff+maria_data_on_page(buff); + nod_flag=_ma_test_if_nod(buff); + } while ((pos= _ma_kpos(nod_flag,page)) != HA_OFFSET_ERROR); + + if (!_ma_get_last_key(info,keyinfo,buff,info->lastkey,page, + &info->lastkey_length)) + DBUG_RETURN(-1); + info->cur_row.lastpos= _ma_dpos(info,0,info->lastkey+info->lastkey_length); + info->int_keypos=info->int_maxpos=page; + info->int_nod_flag=nod_flag; + info->int_keytree_version=keyinfo->version; + info->last_search_keypage=info->last_keypage; + info->page_changed=info->keyread_buff_used=0; + + DBUG_PRINT("exit",("found key at %lu",(ulong) info->cur_row.lastpos)); + DBUG_RETURN(0); +} /* _ma_search_last */ + + + +/**************************************************************************** +** +** Functions to store and pack a key in a page +** +** maria_calc_xx_key_length takes the following arguments: +** nod_flag If nod: Length of nod-pointer +** next_key Position to pos after the new key in buffer +** org_key Key that was before the next key in buffer +** prev_key Last key before current key +** key Key that will be stored +** s_temp Information how next key will be packed +****************************************************************************/ + +/* Static length key */ + +int +_ma_calc_static_key_length(MARIA_KEYDEF *keyinfo,uint nod_flag, + byte *next_pos __attribute__((unused)), + byte *org_key __attribute__((unused)), + byte *prev_key __attribute__((unused)), + const byte *key, MARIA_KEY_PARAM *s_temp) +{ + s_temp->key= key; + return (int) (s_temp->totlength=keyinfo->keylength+nod_flag); +} + +/* Variable length key */ + +int +_ma_calc_var_key_length(MARIA_KEYDEF *keyinfo,uint nod_flag, + byte *next_pos __attribute__((unused)), + byte *org_key __attribute__((unused)), + byte *prev_key __attribute__((unused)), + const byte *key, MARIA_KEY_PARAM *s_temp) +{ + s_temp->key= key; + return (int) (s_temp->totlength= _ma_keylength(keyinfo,key)+nod_flag); +} + +/* + length of key with a variable length first segment which is prefix + compressed (maria_chk reports 'packed + stripped') + + Keys are compressed the following way: + + If the max length of first key segment <= 127 bytes the prefix is + 1 byte else it's 2 byte + + prefix byte(s) The high bit is set if this is a prefix for the prev key + length Packed length if the previous was a prefix byte + [length] data bytes ('length' bytes) + next-key-seg Next key segments + + If the first segment can have NULL: + The length is 0 for NULLS and 1+length for not null columns. + +*/ + +int +_ma_calc_var_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag, + byte *next_key, + byte *org_key, byte *prev_key, const byte *key, + MARIA_KEY_PARAM *s_temp) +{ + reg1 HA_KEYSEG *keyseg; + int length; + uint key_length,ref_length,org_key_length=0, + length_pack,new_key_length,diff_flag,pack_marker; + const byte *start,*end,*key_end; + uchar *sort_order; + bool same_length; + + length_pack=s_temp->ref_length=s_temp->n_ref_length=s_temp->n_length=0; + same_length=0; keyseg=keyinfo->seg; + key_length= _ma_keylength(keyinfo,key)+nod_flag; + + sort_order=0; + if ((keyinfo->flag & HA_FULLTEXT) && + ((keyseg->type == HA_KEYTYPE_TEXT) || + (keyseg->type == HA_KEYTYPE_VARTEXT1) || + (keyseg->type == HA_KEYTYPE_VARTEXT2)) && + !use_strnxfrm(keyseg->charset)) + sort_order= keyseg->charset->sort_order; + + /* diff flag contains how many bytes is needed to pack key */ + if (keyseg->length >= 127) + { + diff_flag=2; + pack_marker=32768; + } + else + { + diff_flag= 1; + pack_marker=128; + } + s_temp->pack_marker=pack_marker; + + /* Handle the case that the first part have NULL values */ + if (keyseg->flag & HA_NULL_PART) + { + if (!*key++) + { + s_temp->key= key; + s_temp->key_length= 0; + s_temp->totlength= key_length-1+diff_flag; + s_temp->next_key_pos= 0; /* No next key */ + return (s_temp->totlength); + } + s_temp->store_not_null=1; + key_length--; /* We don't store NULL */ + if (prev_key && !*prev_key++) + org_key=prev_key=0; /* Can't pack against prev */ + else if (org_key) + org_key++; /* Skip NULL */ + } + else + s_temp->store_not_null=0; + s_temp->prev_key= org_key; + + /* The key part will start with a packed length */ + + get_key_pack_length(new_key_length,length_pack,key); + end= key_end= key+ new_key_length; + start= key; + + /* Calc how many characters are identical between this and the prev. key */ + if (prev_key) + { + get_key_length(org_key_length,prev_key); + s_temp->prev_key=prev_key; /* Pointer at data */ + /* Don't use key-pack if length == 0 */ + if (new_key_length && new_key_length == org_key_length) + same_length=1; + else if (new_key_length > org_key_length) + end= key + org_key_length; + + if (sort_order) /* SerG */ + { + while (key < end && + sort_order[* (uchar*) key] == sort_order[* (uchar*) prev_key]) + { + key++; prev_key++; + } + } + else + { + while (key < end && *key == *prev_key) + { + key++; prev_key++; + } + } + } + + s_temp->key=key; + s_temp->key_length= (uint) (key_end-key); + + if (same_length && key == key_end) + { + /* identical variable length key */ + s_temp->ref_length= pack_marker; + length=(int) key_length-(int) (key_end-start)-length_pack; + length+= diff_flag; + if (next_key) + { /* Can't combine with next */ + s_temp->n_length= *next_key; /* Needed by _ma_store_key */ + next_key=0; + } + } + else + { + if (start != key) + { /* Starts as prev key */ + ref_length= (uint) (key-start); + s_temp->ref_length= ref_length + pack_marker; + length= (int) (key_length - ref_length); + + length-= length_pack; + length+= diff_flag; + length+= ((new_key_length-ref_length) >= 255) ? 3 : 1;/* Rest_of_key */ + } + else + { + s_temp->key_length+=s_temp->store_not_null; /* If null */ + length= key_length - length_pack+ diff_flag; + } + } + s_temp->totlength=(uint) length; + s_temp->prev_length=0; + DBUG_PRINT("test",("tot_length: %u length: %d uniq_key_length: %u", + key_length, length, s_temp->key_length)); + + /* If something after that hasn't length=0, test if we can combine */ + if ((s_temp->next_key_pos=next_key)) + { + uint packed,n_length; + + packed = *next_key & 128; + if (diff_flag == 2) + { + n_length= mi_uint2korr(next_key) & 32767; /* Length of next key */ + next_key+=2; + } + else + n_length= *next_key++ & 127; + if (!packed) + n_length-= s_temp->store_not_null; + + if (n_length || packed) /* Don't pack 0 length keys */ + { + uint next_length_pack, new_ref_length=s_temp->ref_length; + + if (packed) + { + /* If first key and next key is packed (only on delete) */ + if (!prev_key && org_key) + { + get_key_length(org_key_length,org_key); + key=start; + if (sort_order) /* SerG */ + { + while (key < end && + sort_order[*(uchar*) key] == sort_order[*(uchar*) org_key]) + { + key++; org_key++; + } + } + else + { + while (key < end && *key == *org_key) + { + key++; org_key++; + } + } + if ((new_ref_length= (uint) (key - start))) + new_ref_length+=pack_marker; + } + + if (!n_length) + { + /* + We put a different key between two identical variable length keys + Extend next key to have same prefix as this key + */ + if (new_ref_length) /* prefix of previus key */ + { /* make next key longer */ + s_temp->part_of_prev_key= new_ref_length; + s_temp->prev_length= org_key_length - + (new_ref_length-pack_marker); + s_temp->n_ref_length= s_temp->part_of_prev_key; + s_temp->n_length= s_temp->prev_length; + n_length= get_pack_length(s_temp->prev_length); + s_temp->prev_key+= (new_ref_length - pack_marker); + length+= s_temp->prev_length + n_length; + } + else + { /* Can't use prev key */ + s_temp->part_of_prev_key=0; + s_temp->prev_length= org_key_length; + s_temp->n_ref_length=s_temp->n_length= org_key_length; + length+= org_key_length; + } + return (int) length; + } + + ref_length=n_length; + /* Get information about not packed key suffix */ + get_key_pack_length(n_length,next_length_pack,next_key); + + /* Test if new keys has fewer characters that match the previous key */ + if (!new_ref_length) + { /* Can't use prev key */ + s_temp->part_of_prev_key= 0; + s_temp->prev_length= ref_length; + s_temp->n_ref_length= s_temp->n_length= n_length+ref_length; + return (int) length+ref_length-next_length_pack; + } + if (ref_length+pack_marker > new_ref_length) + { + uint new_pack_length=new_ref_length-pack_marker; + /* We must copy characters from the original key to the next key */ + s_temp->part_of_prev_key= new_ref_length; + s_temp->prev_length= ref_length - new_pack_length; + s_temp->n_ref_length=s_temp->n_length=n_length + s_temp->prev_length; + s_temp->prev_key+= new_pack_length; + length-= (next_length_pack - get_pack_length(s_temp->n_length)); + return (int) length + s_temp->prev_length; + } + } + else + { + /* Next key wasn't a prefix of previous key */ + ref_length=0; + next_length_pack=0; + } + DBUG_PRINT("test",("length: %d next_key: 0x%lx", length, + (long) next_key)); + + { + uint tmp_length; + key=(start+=ref_length); + if (key+n_length < key_end) /* Normalize length based */ + key_end= key+n_length; + if (sort_order) /* SerG */ + { + while (key < key_end && + sort_order[*(uchar*) key] == sort_order[*(uchar*) next_key]) + { + key++; next_key++; + } + } + else + { + while (key < key_end && *key == *next_key) + { + key++; next_key++; + } + } + if (!(tmp_length=(uint) (key-start))) + { /* Key can't be re-packed */ + s_temp->next_key_pos=0; + return length; + } + ref_length+=tmp_length; + n_length-=tmp_length; + length-=tmp_length+next_length_pack; /* We gained these chars */ + } + if (n_length == 0 && ref_length == new_key_length) + { + s_temp->n_ref_length=pack_marker; /* Same as prev key */ + } + else + { + s_temp->n_ref_length=ref_length | pack_marker; + length+= get_pack_length(n_length); + s_temp->n_length=n_length; + } + } + } + return length; +} + + +/* Length of key which is prefix compressed */ + +int _ma_calc_bin_pack_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag, + byte *next_key, + byte *org_key, byte *prev_key, + const byte *key, + MARIA_KEY_PARAM *s_temp) +{ + uint length,key_length,ref_length; + + s_temp->totlength=key_length= _ma_keylength(keyinfo,key)+nod_flag; +#ifdef HAVE_purify + s_temp->n_length= s_temp->n_ref_length=0; /* For valgrind */ +#endif + s_temp->key=key; + s_temp->prev_key=org_key; + if (prev_key) /* If not first key in block */ + { + /* pack key against previous key */ + /* + As keys may be identical when running a sort in maria_chk, we + have to guard against the case where keys may be identical + */ + const byte *end; + end=key+key_length; + for ( ; *key == *prev_key && key < end; key++,prev_key++) ; + s_temp->ref_length= ref_length=(uint) (key-s_temp->key); + length=key_length - ref_length + get_pack_length(ref_length); + } + else + { + /* No previous key */ + s_temp->ref_length=ref_length=0; + length=key_length+1; + } + if ((s_temp->next_key_pos=next_key)) /* If another key after */ + { + /* pack key against next key */ + uint next_length,next_length_pack; + get_key_pack_length(next_length,next_length_pack,next_key); + + /* If first key and next key is packed (only on delete) */ + if (!prev_key && org_key && next_length) + { + const byte *end; + for (key= s_temp->key, end=key+next_length ; + *key == *org_key && key < end; + key++,org_key++) ; + ref_length= (uint) (key - s_temp->key); + } + + if (next_length > ref_length) + { + /* We put a key with different case between two keys with the same prefix + Extend next key to have same prefix as + this key */ + s_temp->n_ref_length= ref_length; + s_temp->prev_length= next_length-ref_length; + s_temp->prev_key+= ref_length; + return (int) (length+ s_temp->prev_length - next_length_pack + + get_pack_length(ref_length)); + } + /* Check how many characters are identical to next key */ + key= s_temp->key+next_length; + while (*key++ == *next_key++) ; + if ((ref_length= (uint) (key - s_temp->key)-1) == next_length) + { + s_temp->next_key_pos=0; + return length; /* can't pack next key */ + } + s_temp->prev_length=0; + s_temp->n_ref_length=ref_length; + return (int) (length-(ref_length - next_length) - next_length_pack + + get_pack_length(ref_length)); + } + return (int) length; +} + + +/* +** store a key packed with _ma_calc_xxx_key_length in page-buffert +*/ + +/* store key without compression */ + +void _ma_store_static_key(MARIA_KEYDEF *keyinfo __attribute__((unused)), + register byte *key_pos, + register MARIA_KEY_PARAM *s_temp) +{ + memcpy((byte*) key_pos,(byte*) s_temp->key,(size_t) s_temp->totlength); +} + + +/* store variable length key with prefix compression */ + +#define store_pack_length(test,pos,length) { \ + if (test) { *((pos)++) = (uchar) (length); } else \ + { *((pos)++) = (uchar) ((length) >> 8); *((pos)++) = (uchar) (length); } } + + +void _ma_store_var_pack_key(MARIA_KEYDEF *keyinfo __attribute__((unused)), + register byte *key_pos, + register MARIA_KEY_PARAM *s_temp) +{ + uint length; + byte *start; + + start=key_pos; + + if (s_temp->ref_length) + { + /* Packed against previous key */ + store_pack_length(s_temp->pack_marker == 128,key_pos,s_temp->ref_length); + /* If not same key after */ + if (s_temp->ref_length != s_temp->pack_marker) + store_key_length_inc(key_pos,s_temp->key_length); + } + else + { + /* Not packed against previous key */ + store_pack_length(s_temp->pack_marker == 128,key_pos,s_temp->key_length); + } + bmove((byte*) key_pos,(byte*) s_temp->key, + (length=s_temp->totlength-(uint) (key_pos-start))); + + if (!s_temp->next_key_pos) /* No following key */ + return; + key_pos+=length; + + if (s_temp->prev_length) + { + /* Extend next key because new key didn't have same prefix as prev key */ + if (s_temp->part_of_prev_key) + { + store_pack_length(s_temp->pack_marker == 128,key_pos, + s_temp->part_of_prev_key); + store_key_length_inc(key_pos,s_temp->n_length); + } + else + { + s_temp->n_length+= s_temp->store_not_null; + store_pack_length(s_temp->pack_marker == 128,key_pos, + s_temp->n_length); + } + memcpy(key_pos, s_temp->prev_key, s_temp->prev_length); + } + else if (s_temp->n_ref_length) + { + store_pack_length(s_temp->pack_marker == 128,key_pos,s_temp->n_ref_length); + if (s_temp->n_ref_length == s_temp->pack_marker) + return; /* Identical key */ + store_key_length(key_pos,s_temp->n_length); + } + else + { + s_temp->n_length+= s_temp->store_not_null; + store_pack_length(s_temp->pack_marker == 128,key_pos,s_temp->n_length); + } +} + + +/* variable length key with prefix compression */ + +void _ma_store_bin_pack_key(MARIA_KEYDEF *keyinfo __attribute__((unused)), + register byte *key_pos, + register MARIA_KEY_PARAM *s_temp) +{ + store_key_length_inc(key_pos,s_temp->ref_length); + memcpy((char*) key_pos,(char*) s_temp->key+s_temp->ref_length, + (size_t) s_temp->totlength-s_temp->ref_length); + + if (s_temp->next_key_pos) + { + key_pos+=(uint) (s_temp->totlength-s_temp->ref_length); + store_key_length_inc(key_pos,s_temp->n_ref_length); + if (s_temp->prev_length) /* If we must extend key */ + { + memcpy(key_pos,s_temp->prev_key,s_temp->prev_length); + } + } +} diff --git a/storage/maria/ma_sort.c b/storage/maria/ma_sort.c new file mode 100644 index 00000000000..d6256deb39c --- /dev/null +++ b/storage/maria/ma_sort.c @@ -0,0 +1,1055 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + Creates a index for a database by reading keys, sorting them and outputing + them in sorted order through MARIA_SORT_INFO functions. +*/ + +#include "ma_fulltext.h" +#if defined(MSDOS) || defined(__WIN__) +#include +#else +#include +#endif +#include + +/* static variables */ + +#undef MIN_SORT_MEMORY +#undef MYF_RW +#undef DISK_BUFFER_SIZE + +#define MERGEBUFF 15 +#define MERGEBUFF2 31 +#define MIN_SORT_MEMORY (4096-MALLOC_OVERHEAD) +#define MYF_RW MYF(MY_NABP | MY_WME | MY_WAIT_IF_FULL) +#define DISK_BUFFER_SIZE (IO_SIZE*16) + + +/* + Pointers of functions for store and read keys from temp file +*/ + +extern void print_error _VARARGS((const char *fmt,...)); + +/* Functions defined in this file */ + +static ha_rows NEAR_F find_all_keys(MARIA_SORT_PARAM *info,uint keys, + byte **sort_keys, + DYNAMIC_ARRAY *buffpek,int *maxbuffer, + IO_CACHE *tempfile, + IO_CACHE *tempfile_for_exceptions); +static int NEAR_F write_keys(MARIA_SORT_PARAM *info, byte **sort_keys, + uint count, BUFFPEK *buffpek,IO_CACHE *tempfile); +static int NEAR_F write_key(MARIA_SORT_PARAM *info, byte *key, + IO_CACHE *tempfile); +static int NEAR_F write_index(MARIA_SORT_PARAM *info, byte **sort_keys, + uint count); +static int NEAR_F merge_many_buff(MARIA_SORT_PARAM *info,uint keys, + byte **sort_keys, + BUFFPEK *buffpek,int *maxbuffer, + IO_CACHE *t_file); +static uint NEAR_F read_to_buffer(IO_CACHE *fromfile,BUFFPEK *buffpek, + uint sort_length); +static int NEAR_F merge_buffers(MARIA_SORT_PARAM *info,uint keys, + IO_CACHE *from_file, IO_CACHE *to_file, + byte **sort_keys, BUFFPEK *lastbuff, + BUFFPEK *Fb, BUFFPEK *Tb); +static int NEAR_F merge_index(MARIA_SORT_PARAM *,uint, byte **,BUFFPEK *, int, + IO_CACHE *); +static int flush_maria_ft_buf(MARIA_SORT_PARAM *info); + +static int NEAR_F write_keys_varlen(MARIA_SORT_PARAM *info, byte **sort_keys, + uint count, BUFFPEK *buffpek, + IO_CACHE *tempfile); +static uint NEAR_F read_to_buffer_varlen(IO_CACHE *fromfile,BUFFPEK *buffpek, + uint sort_length); +static int NEAR_F write_merge_key(MARIA_SORT_PARAM *info, IO_CACHE *to_file, + char *key, uint sort_length, uint count); +static int NEAR_F write_merge_key_varlen(MARIA_SORT_PARAM *info, + IO_CACHE *to_file, + char* key, uint sort_length, + uint count); +static inline int +my_var_write(MARIA_SORT_PARAM *info, IO_CACHE *to_file, byte *bufs); + +/* + Creates a index of sorted keys + + SYNOPSIS + _ma_create_index_by_sort() + info Sort parameters + no_messages Set to 1 if no output + sortbuff_size Size if sortbuffer to allocate + + RESULT + 0 ok + <> 0 Error +*/ + +int _ma_create_index_by_sort(MARIA_SORT_PARAM *info, my_bool no_messages, + ulong sortbuff_size) +{ + int error,maxbuffer,skr; + uint memavl,old_memavl,keys,sort_length; + DYNAMIC_ARRAY buffpek; + ha_rows records; + byte **sort_keys; + IO_CACHE tempfile, tempfile_for_exceptions; + DBUG_ENTER("_ma_create_index_by_sort"); + DBUG_PRINT("enter",("sort_length: %d", info->key_length)); + + if (info->keyinfo->flag & HA_VAR_LENGTH_KEY) + { + info->write_keys= write_keys_varlen; + info->read_to_buffer=read_to_buffer_varlen; + info->write_key=write_merge_key_varlen; + } + else + { + info->write_keys= write_keys; + info->read_to_buffer=read_to_buffer; + info->write_key=write_merge_key; + } + + my_b_clear(&tempfile); + my_b_clear(&tempfile_for_exceptions); + bzero((char*) &buffpek,sizeof(buffpek)); + sort_keys= (byte **) NULL; error= 1; + maxbuffer=1; + + memavl=max(sortbuff_size,MIN_SORT_MEMORY); + records= info->sort_info->max_records; + sort_length= info->key_length; + LINT_INIT(keys); + + while (memavl >= MIN_SORT_MEMORY) + { + if ((my_off_t) (records+1)*(sort_length+sizeof(char*)) <= + (my_off_t) memavl) + keys= records+1; + else + do + { + skr=maxbuffer; + if (memavl < sizeof(BUFFPEK)*(uint) maxbuffer || + (keys=(memavl-sizeof(BUFFPEK)*(uint) maxbuffer)/ + (sort_length+sizeof(char*))) <= 1 || + keys < (uint) maxbuffer) + { + _ma_check_print_error(info->sort_info->param, + "sort_buffer_size is to small"); + goto err; + } + } + while ((maxbuffer= (int) (records/(keys-1)+1)) != skr); + + if ((sort_keys=(byte**) my_malloc(keys*(sort_length+sizeof(char*))+ + HA_FT_MAXBYTELEN, MYF(0)))) + { + if (my_init_dynamic_array(&buffpek, sizeof(BUFFPEK), maxbuffer, + maxbuffer/2)) + { + my_free((gptr) sort_keys,MYF(0)); + sort_keys= 0; + } + else + break; + } + old_memavl=memavl; + if ((memavl=memavl/4*3) < MIN_SORT_MEMORY && old_memavl > MIN_SORT_MEMORY) + memavl=MIN_SORT_MEMORY; + } + if (memavl < MIN_SORT_MEMORY) + { + _ma_check_print_error(info->sort_info->param,"Sort buffer to small"); /* purecov: tested */ + goto err; /* purecov: tested */ + } + (*info->lock_in_memory)(info->sort_info->param);/* Everything is allocated */ + + if (!no_messages) + printf(" - Searching for keys, allocating buffer for %d keys\n",keys); + + if ((records=find_all_keys(info,keys,sort_keys,&buffpek,&maxbuffer, + &tempfile,&tempfile_for_exceptions)) + == HA_POS_ERROR) + goto err; /* purecov: tested */ + if (maxbuffer == 0) + { + if (!no_messages) + printf(" - Dumping %lu keys\n", (ulong) records); + if (write_index(info,sort_keys, (uint) records)) + goto err; /* purecov: inspected */ + } + else + { + keys=(keys*(sort_length+sizeof(char*)))/sort_length; + if (maxbuffer >= MERGEBUFF2) + { + if (!no_messages) + printf(" - Merging %lu keys\n", (ulong) records); /* purecov: tested */ + if (merge_many_buff(info,keys,sort_keys, + dynamic_element(&buffpek,0,BUFFPEK *),&maxbuffer,&tempfile)) + goto err; /* purecov: inspected */ + } + if (flush_io_cache(&tempfile) || + reinit_io_cache(&tempfile,READ_CACHE,0L,0,0)) + goto err; /* purecov: inspected */ + if (!no_messages) + printf(" - Last merge and dumping keys\n"); /* purecov: tested */ + if (merge_index(info,keys,sort_keys,dynamic_element(&buffpek,0,BUFFPEK *), + maxbuffer,&tempfile)) + goto err; /* purecov: inspected */ + } + + if (flush_maria_ft_buf(info) || _ma_flush_pending_blocks(info)) + goto err; + + if (my_b_inited(&tempfile_for_exceptions)) + { + MARIA_HA *idx=info->sort_info->info; + uint keyno=info->key; + uint key_length, ref_length=idx->s->rec_reflength; + + if (!no_messages) + printf(" - Adding exceptions\n"); /* purecov: tested */ + if (flush_io_cache(&tempfile_for_exceptions) || + reinit_io_cache(&tempfile_for_exceptions,READ_CACHE,0L,0,0)) + goto err; + + while (!my_b_read(&tempfile_for_exceptions,(byte*)&key_length, + sizeof(key_length)) + && !my_b_read(&tempfile_for_exceptions,(byte*)sort_keys, + (uint) key_length)) + { + if (_ma_ck_write(idx,keyno,(byte*) sort_keys,key_length-ref_length)) + goto err; + } + } + + error =0; + +err: + if (sort_keys) + my_free((gptr) sort_keys,MYF(0)); + delete_dynamic(&buffpek); + close_cached_file(&tempfile); + close_cached_file(&tempfile_for_exceptions); + + DBUG_RETURN(error ? -1 : 0); +} /* _ma_create_index_by_sort */ + + +/* Search after all keys and place them in a temp. file */ + +static ha_rows NEAR_F find_all_keys(MARIA_SORT_PARAM *info, uint keys, + byte **sort_keys, DYNAMIC_ARRAY *buffpek, + int *maxbuffer, IO_CACHE *tempfile, + IO_CACHE *tempfile_for_exceptions) +{ + int error; + uint idx; + DBUG_ENTER("find_all_keys"); + + idx=error=0; + sort_keys[0]= (byte*) (sort_keys+keys); + + while (!(error=(*info->key_read)(info,sort_keys[idx]))) + { + if (info->real_key_length > info->key_length) + { + if (write_key(info,sort_keys[idx],tempfile_for_exceptions)) + DBUG_RETURN(HA_POS_ERROR); /* purecov: inspected */ + continue; + } + + if (++idx == keys) + { + if (info->write_keys(info,sort_keys,idx-1, + (BUFFPEK *)alloc_dynamic(buffpek), + tempfile)) + DBUG_RETURN(HA_POS_ERROR); /* purecov: inspected */ + + sort_keys[0]=(byte*) (sort_keys+keys); + memcpy(sort_keys[0],sort_keys[idx-1],(size_t) info->key_length); + idx=1; + } + sort_keys[idx]=sort_keys[idx-1]+info->key_length; + } + if (error > 0) + DBUG_RETURN(HA_POS_ERROR); /* Aborted by get_key */ /* purecov: inspected */ + if (buffpek->elements) + { + if (info->write_keys(info,sort_keys,idx,(BUFFPEK *)alloc_dynamic(buffpek), + tempfile)) + DBUG_RETURN(HA_POS_ERROR); /* purecov: inspected */ + *maxbuffer=buffpek->elements-1; + } + else + *maxbuffer=0; + + DBUG_RETURN((*maxbuffer)*(keys-1)+idx); +} /* find_all_keys */ + + +#ifdef THREAD +/* Search after all keys and place them in a temp. file */ + +pthread_handler_t _ma_thr_find_all_keys(void *arg) +{ + MARIA_SORT_PARAM *sort_param= (MARIA_SORT_PARAM*) arg; + int error; + uint memavl,old_memavl,keys,sort_length; + uint idx, maxbuffer; + byte **sort_keys=0; + + LINT_INIT(keys); + + error=1; + + if (my_thread_init()) + goto err; + + { /* Add extra block since DBUG_ENTER declare variables */ + DBUG_ENTER("_ma_thr_find_all_keys"); + DBUG_PRINT("enter", ("master: %d", sort_param->master)); + if (sort_param->sort_info->got_error) + goto err; + + if (sort_param->keyinfo->flag & HA_VAR_LENGTH_KEY) + { + sort_param->write_keys= write_keys_varlen; + sort_param->read_to_buffer= read_to_buffer_varlen; + sort_param->write_key= write_merge_key_varlen; + } + else + { + sort_param->write_keys= write_keys; + sort_param->read_to_buffer= read_to_buffer; + sort_param->write_key= write_merge_key; + } + + my_b_clear(&sort_param->tempfile); + my_b_clear(&sort_param->tempfile_for_exceptions); + bzero((char*) &sort_param->buffpek,sizeof(sort_param->buffpek)); + bzero((char*) &sort_param->unique, sizeof(sort_param->unique)); + + memavl= max(sort_param->sortbuff_size, MIN_SORT_MEMORY); + idx= sort_param->sort_info->max_records; + sort_length= sort_param->key_length; + maxbuffer= 1; + + while (memavl >= MIN_SORT_MEMORY) + { + if ((my_off_t) (idx+1)*(sort_length+sizeof(char*)) <= + (my_off_t) memavl) + keys= idx+1; + else + { + uint skr; + do + { + skr= maxbuffer; + if (memavl < sizeof(BUFFPEK)*maxbuffer || + (keys=(memavl-sizeof(BUFFPEK)*maxbuffer)/ + (sort_length+sizeof(char*))) <= 1 || + keys < (uint) maxbuffer) + { + _ma_check_print_error(sort_param->sort_info->param, + "sort_buffer_size is to small"); + goto err; + } + } + while ((maxbuffer= (int) (idx/(keys-1)+1)) != skr); + } + if ((sort_keys= (byte **) + my_malloc(keys*(sort_length+sizeof(char*))+ + ((sort_param->keyinfo->flag & HA_FULLTEXT) ? + HA_FT_MAXBYTELEN : 0), MYF(0)))) + { + if (my_init_dynamic_array(&sort_param->buffpek, sizeof(BUFFPEK), + maxbuffer, maxbuffer/2)) + { + my_free((gptr) sort_keys,MYF(0)); + sort_keys= (byte **) NULL; /* for err: label */ + } + else + break; + } + old_memavl= memavl; + if ((memavl= memavl/4*3) < MIN_SORT_MEMORY && + old_memavl > MIN_SORT_MEMORY) + memavl= MIN_SORT_MEMORY; + } + if (memavl < MIN_SORT_MEMORY) + { + _ma_check_print_error(sort_param->sort_info->param, + "Sort buffer too small"); + goto err; /* purecov: tested */ + } + + if (sort_param->sort_info->param->testflag & T_VERBOSE) + printf("Key %d - Allocating buffer for %d keys\n", + sort_param->key+1, keys); + sort_param->sort_keys= sort_keys; + + idx= error= 0; + sort_keys[0]= (byte*) (sort_keys+keys); + + DBUG_PRINT("info", ("reading keys")); + while (!(error= sort_param->sort_info->got_error) && + !(error= (*sort_param->key_read)(sort_param, sort_keys[idx]))) + { + if (sort_param->real_key_length > sort_param->key_length) + { + if (write_key(sort_param,sort_keys[idx], + &sort_param->tempfile_for_exceptions)) + goto err; + continue; + } + + if (++idx == keys) + { + if (sort_param->write_keys(sort_param, sort_keys, idx - 1, + (BUFFPEK *)alloc_dynamic(&sort_param-> + buffpek), + &sort_param->tempfile)) + goto err; + sort_keys[0]= (byte*) (sort_keys+keys); + memcpy(sort_keys[0], sort_keys[idx - 1], + (size_t) sort_param->key_length); + idx= 1; + } + sort_keys[idx]=sort_keys[idx - 1] + sort_param->key_length; + } + if (error > 0) + goto err; + if (sort_param->buffpek.elements) + { + if (sort_param->write_keys(sort_param,sort_keys, idx, + (BUFFPEK *) alloc_dynamic(&sort_param-> + buffpek), + &sort_param->tempfile)) + goto err; + sort_param->keys= (sort_param->buffpek.elements - 1) * (keys - 1) + idx; + } + else + sort_param->keys= idx; + + sort_param->sort_keys_length= keys; + goto ok; + +err: + DBUG_PRINT("error", ("got some error")); + sort_param->sort_info->got_error= 1; /* no need to protect with a mutex */ + my_free((gptr) sort_keys,MYF(MY_ALLOW_ZERO_PTR)); + sort_param->sort_keys=0; + delete_dynamic(& sort_param->buffpek); + close_cached_file(&sort_param->tempfile); + close_cached_file(&sort_param->tempfile_for_exceptions); + +ok: + free_root(&sort_param->wordroot, MYF(0)); + /* + Detach from the share if the writer is involved. Avoid others to + be blocked. This includes a flush of the write buffer. This will + also indicate EOF to the readers. + */ + if (sort_param->sort_info->info->rec_cache.share) + remove_io_thread(&sort_param->sort_info->info->rec_cache); + + /* Readers detach from the share if any. Avoid others to be blocked. */ + if (sort_param->read_cache.share) + remove_io_thread(&sort_param->read_cache); + + pthread_mutex_lock(&sort_param->sort_info->mutex); + if (!--sort_param->sort_info->threads_running) + pthread_cond_signal(&sort_param->sort_info->cond); + pthread_mutex_unlock(&sort_param->sort_info->mutex); + DBUG_PRINT("exit", ("======== ending thread ========")); + } + my_thread_end(); + return NULL; +} + + +int _ma_thr_write_keys(MARIA_SORT_PARAM *sort_param) +{ + MARIA_SORT_INFO *sort_info=sort_param->sort_info; + HA_CHECK *param=sort_info->param; + ulong length, keys; + ulong *rec_per_key_part=param->rec_per_key_part; + int got_error=sort_info->got_error; + uint i; + MARIA_HA *info=sort_info->info; + MARIA_SHARE *share=info->s; + MARIA_SORT_PARAM *sinfo; + byte *mergebuf=0; + DBUG_ENTER("_ma_thr_write_keys"); + LINT_INIT(length); + + for (i= 0, sinfo= sort_param ; + i < sort_info->total_keys ; + i++, rec_per_key_part+=sinfo->keyinfo->keysegs, sinfo++) + { + if (!sinfo->sort_keys) + { + got_error=1; + my_free(sinfo->rec_buff, MYF(MY_ALLOW_ZERO_PTR)); + continue; + } + if (!got_error) + { + maria_set_key_active(share->state.key_map, sinfo->key); + + if (!sinfo->buffpek.elements) + { + if (param->testflag & T_VERBOSE) + { + printf("Key %d - Dumping %u keys\n",sinfo->key+1, sinfo->keys); + fflush(stdout); + } + if (write_index(sinfo, sinfo->sort_keys, sinfo->keys) || + flush_maria_ft_buf(sinfo) || _ma_flush_pending_blocks(sinfo)) + got_error=1; + } + if (!got_error && param->testflag & T_STATISTICS) + maria_update_key_parts(sinfo->keyinfo, rec_per_key_part, sinfo->unique, + param->stats_method == MI_STATS_METHOD_IGNORE_NULLS? + sinfo->notnull: NULL, + (ulonglong) info->state->records); + } + my_free((gptr) sinfo->sort_keys,MYF(0)); + my_free(sinfo->rec_buff, MYF(MY_ALLOW_ZERO_PTR)); + sinfo->sort_keys=0; + } + + for (i= 0, sinfo= sort_param ; + i < sort_info->total_keys ; + i++, + delete_dynamic(&sinfo->buffpek), + close_cached_file(&sinfo->tempfile), + close_cached_file(&sinfo->tempfile_for_exceptions), + sinfo++) + { + if (got_error) + continue; + if (sinfo->keyinfo->flag & HA_VAR_LENGTH_KEY) + { + sinfo->write_keys=write_keys_varlen; + sinfo->read_to_buffer=read_to_buffer_varlen; + sinfo->write_key=write_merge_key_varlen; + } + else + { + sinfo->write_keys=write_keys; + sinfo->read_to_buffer=read_to_buffer; + sinfo->write_key=write_merge_key; + } + if (sinfo->buffpek.elements) + { + uint maxbuffer=sinfo->buffpek.elements-1; + if (!mergebuf) + { + length=param->sort_buffer_length; + while (length >= MIN_SORT_MEMORY && !mergebuf) + { + mergebuf=my_malloc(length, MYF(0)); + length=length*3/4; + } + if (!mergebuf) + { + got_error=1; + continue; + } + } + keys=length/sinfo->key_length; + if (maxbuffer >= MERGEBUFF2) + { + if (param->testflag & T_VERBOSE) + printf("Key %d - Merging %u keys\n",sinfo->key+1, sinfo->keys); + if (merge_many_buff(sinfo, keys, (byte **) mergebuf, + dynamic_element(&sinfo->buffpek, 0, BUFFPEK *), + (int*) &maxbuffer, &sinfo->tempfile)) + { + got_error=1; + continue; + } + } + if (flush_io_cache(&sinfo->tempfile) || + reinit_io_cache(&sinfo->tempfile,READ_CACHE,0L,0,0)) + { + got_error=1; + continue; + } + if (param->testflag & T_VERBOSE) + printf("Key %d - Last merge and dumping keys\n", sinfo->key+1); + if (merge_index(sinfo, keys, (byte**) mergebuf, + dynamic_element(&sinfo->buffpek,0,BUFFPEK *), + maxbuffer,&sinfo->tempfile) || + flush_maria_ft_buf(sinfo) || + _ma_flush_pending_blocks(sinfo)) + { + got_error=1; + continue; + } + } + if (my_b_inited(&sinfo->tempfile_for_exceptions)) + { + uint key_length; + + if (param->testflag & T_VERBOSE) + printf("Key %d - Dumping 'long' keys\n", sinfo->key+1); + + if (flush_io_cache(&sinfo->tempfile_for_exceptions) || + reinit_io_cache(&sinfo->tempfile_for_exceptions,READ_CACHE,0L,0,0)) + { + got_error=1; + continue; + } + + while (!got_error && + !my_b_read(&sinfo->tempfile_for_exceptions,(byte*)&key_length, + sizeof(key_length))) + { + byte maria_ft_buf[HA_FT_MAXBYTELEN + HA_FT_WLEN + 10]; + if (key_length > sizeof(maria_ft_buf) || + my_b_read(&sinfo->tempfile_for_exceptions, (byte*)maria_ft_buf, + (uint)key_length) || + _ma_ck_write(info, sinfo->key, maria_ft_buf, + key_length - info->s->rec_reflength)) + got_error=1; + } + } + } + my_free((gptr) mergebuf,MYF(MY_ALLOW_ZERO_PTR)); + DBUG_RETURN(got_error); +} +#endif /* THREAD */ + +/* Write all keys in memory to file for later merge */ + +static int write_keys(MARIA_SORT_PARAM *info, register byte **sort_keys, + uint count, BUFFPEK *buffpek, IO_CACHE *tempfile) +{ + byte **end; + uint sort_length=info->key_length; + DBUG_ENTER("write_keys"); + + qsort2((byte*) sort_keys,count,sizeof(byte*),(qsort2_cmp) info->key_cmp, + info); + if (!my_b_inited(tempfile) && + open_cached_file(tempfile, my_tmpdir(info->tmpdir), "ST", + DISK_BUFFER_SIZE, info->sort_info->param->myf_rw)) + DBUG_RETURN(1); /* purecov: inspected */ + + buffpek->file_pos=my_b_tell(tempfile); + buffpek->count=count; + + for (end=sort_keys+count ; sort_keys != end ; sort_keys++) + { + if (my_b_write(tempfile, *sort_keys, (uint) sort_length)) + DBUG_RETURN(1); /* purecov: inspected */ + } + DBUG_RETURN(0); +} /* write_keys */ + + +static inline int +my_var_write(MARIA_SORT_PARAM *info, IO_CACHE *to_file, byte *bufs) +{ + int err; + uint16 len= _ma_keylength(info->keyinfo, bufs); + + /* The following is safe as this is a local file */ + if ((err= my_b_write(to_file, (byte*)&len, sizeof(len)))) + return (err); + if ((err= my_b_write(to_file,bufs, (uint) len))) + return (err); + return (0); +} + + +static int NEAR_F write_keys_varlen(MARIA_SORT_PARAM *info, + register byte **sort_keys, + uint count, BUFFPEK *buffpek, + IO_CACHE *tempfile) +{ + byte **end; + int err; + DBUG_ENTER("write_keys_varlen"); + + qsort2((byte*) sort_keys,count,sizeof(byte*),(qsort2_cmp) info->key_cmp, + info); + if (!my_b_inited(tempfile) && + open_cached_file(tempfile, my_tmpdir(info->tmpdir), "ST", + DISK_BUFFER_SIZE, info->sort_info->param->myf_rw)) + DBUG_RETURN(1); /* purecov: inspected */ + + buffpek->file_pos=my_b_tell(tempfile); + buffpek->count=count; + for (end=sort_keys+count ; sort_keys != end ; sort_keys++) + { + if ((err= my_var_write(info,tempfile, *sort_keys))) + DBUG_RETURN(err); + } + DBUG_RETURN(0); +} /* write_keys_varlen */ + + +static int NEAR_F write_key(MARIA_SORT_PARAM *info, byte *key, + IO_CACHE *tempfile) +{ + uint key_length=info->real_key_length; + DBUG_ENTER("write_key"); + + if (!my_b_inited(tempfile) && + open_cached_file(tempfile, my_tmpdir(info->tmpdir), "ST", + DISK_BUFFER_SIZE, info->sort_info->param->myf_rw)) + DBUG_RETURN(1); + + if (my_b_write(tempfile, (byte*)&key_length,sizeof(key_length)) || + my_b_write(tempfile, key, (uint) key_length)) + DBUG_RETURN(1); + DBUG_RETURN(0); +} /* write_key */ + + +/* Write index */ + +static int NEAR_F write_index(MARIA_SORT_PARAM *info, + register byte **sort_keys, + register uint count) +{ + DBUG_ENTER("write_index"); + + qsort2((gptr) sort_keys,(size_t) count,sizeof(byte*), + (qsort2_cmp) info->key_cmp,info); + while (count--) + { + if ((*info->key_write)(info, *sort_keys++)) + DBUG_RETURN(-1); /* purecov: inspected */ + } + DBUG_RETURN(0); +} /* write_index */ + + + /* Merge buffers to make < MERGEBUFF2 buffers */ + +static int NEAR_F merge_many_buff(MARIA_SORT_PARAM *info, uint keys, + byte **sort_keys, BUFFPEK *buffpek, + int *maxbuffer, IO_CACHE *t_file) +{ + register int i; + IO_CACHE t_file2, *from_file, *to_file, *temp; + BUFFPEK *lastbuff; + DBUG_ENTER("merge_many_buff"); + + if (*maxbuffer < MERGEBUFF2) + DBUG_RETURN(0); /* purecov: inspected */ + if (flush_io_cache(t_file) || + open_cached_file(&t_file2,my_tmpdir(info->tmpdir),"ST", + DISK_BUFFER_SIZE, info->sort_info->param->myf_rw)) + DBUG_RETURN(1); /* purecov: inspected */ + + from_file= t_file ; to_file= &t_file2; + while (*maxbuffer >= MERGEBUFF2) + { + reinit_io_cache(from_file,READ_CACHE,0L,0,0); + reinit_io_cache(to_file,WRITE_CACHE,0L,0,0); + lastbuff=buffpek; + for (i=0 ; i <= *maxbuffer-MERGEBUFF*3/2 ; i+=MERGEBUFF) + { + if (merge_buffers(info,keys,from_file,to_file,sort_keys,lastbuff++, + buffpek+i,buffpek+i+MERGEBUFF-1)) + break; /* purecov: inspected */ + } + if (merge_buffers(info,keys,from_file,to_file,sort_keys,lastbuff++, + buffpek+i,buffpek+ *maxbuffer)) + break; /* purecov: inspected */ + if (flush_io_cache(to_file)) + break; /* purecov: inspected */ + temp=from_file; from_file=to_file; to_file=temp; + *maxbuffer= (int) (lastbuff-buffpek)-1; + } + close_cached_file(to_file); /* This holds old result */ + if (to_file == t_file) + *t_file=t_file2; /* Copy result file */ + + DBUG_RETURN(*maxbuffer >= MERGEBUFF2); /* Return 1 if interrupted */ +} /* merge_many_buff */ + + +/* + Read data to buffer + + SYNOPSIS + read_to_buffer() + fromfile File to read from + buffpek Where to read from + sort_length max length to read + RESULT + > 0 Ammount of bytes read + -1 Error +*/ + +static uint NEAR_F read_to_buffer(IO_CACHE *fromfile, BUFFPEK *buffpek, + uint sort_length) +{ + register uint count; + uint length; + + if ((count=(uint) min((ha_rows) buffpek->max_keys,buffpek->count))) + { + if (my_pread(fromfile->file,(byte*) buffpek->base, + (length= sort_length*count),buffpek->file_pos,MYF_RW)) + return((uint) -1); /* purecov: inspected */ + buffpek->key=buffpek->base; + buffpek->file_pos+= length; /* New filepos */ + buffpek->count-= count; + buffpek->mem_count= count; + } + return (count*sort_length); +} /* read_to_buffer */ + +static uint NEAR_F read_to_buffer_varlen(IO_CACHE *fromfile, BUFFPEK *buffpek, + uint sort_length) +{ + register uint count; + uint16 length_of_key = 0; + uint idx; + byte *buffp; + + if ((count=(uint) min((ha_rows) buffpek->max_keys,buffpek->count))) + { + buffp= buffpek->base; + + for (idx=1;idx<=count;idx++) + { + if (my_pread(fromfile->file,(byte*)&length_of_key,sizeof(length_of_key), + buffpek->file_pos,MYF_RW)) + return((uint) -1); + buffpek->file_pos+=sizeof(length_of_key); + if (my_pread(fromfile->file,(byte*) buffp,length_of_key, + buffpek->file_pos,MYF_RW)) + return((uint) -1); + buffpek->file_pos+=length_of_key; + buffp = buffp + sort_length; + } + buffpek->key=buffpek->base; + buffpek->count-= count; + buffpek->mem_count= count; + } + return (count*sort_length); +} /* read_to_buffer_varlen */ + + +static int NEAR_F write_merge_key_varlen(MARIA_SORT_PARAM *info, + IO_CACHE *to_file,char* key, + uint sort_length, uint count) +{ + uint idx; + + char *bufs = key; + for (idx=1;idx<=count;idx++) + { + int err; + if ((err= my_var_write(info,to_file, (byte*) bufs))) + return (err); + bufs=bufs+sort_length; + } + return(0); +} + + +static int NEAR_F write_merge_key(MARIA_SORT_PARAM *info __attribute__((unused)), + IO_CACHE *to_file, char* key, + uint sort_length, uint count) +{ + return my_b_write(to_file,(byte*) key,(uint) sort_length*count); +} + +/* + Merge buffers to one buffer + If to_file == 0 then use info->key_write +*/ + +static int NEAR_F +merge_buffers(MARIA_SORT_PARAM *info, uint keys, IO_CACHE *from_file, + IO_CACHE *to_file, byte **sort_keys, BUFFPEK *lastbuff, + BUFFPEK *Fb, BUFFPEK *Tb) +{ + int error; + uint sort_length,maxcount; + ha_rows count; + my_off_t to_start_filepos; + byte *strpos; + BUFFPEK *buffpek,**refpek; + QUEUE queue; + volatile int *killed= _ma_killed_ptr(info->sort_info->param); + DBUG_ENTER("merge_buffers"); + + count=error=0; + maxcount=keys/((uint) (Tb-Fb) +1); + LINT_INIT(to_start_filepos); + if (to_file) + to_start_filepos=my_b_tell(to_file); + strpos= (byte*) sort_keys; + sort_length=info->key_length; + + if (init_queue(&queue,(uint) (Tb-Fb)+1,offsetof(BUFFPEK,key),0, + (int (*)(void*, byte *,byte*)) info->key_cmp, + (void*) info)) + DBUG_RETURN(1); /* purecov: inspected */ + + for (buffpek= Fb ; buffpek <= Tb ; buffpek++) + { + count+= buffpek->count; + buffpek->base= strpos; + buffpek->max_keys=maxcount; + strpos+= (uint) (error=(int) info->read_to_buffer(from_file,buffpek, + sort_length)); + if (error == -1) + goto err; /* purecov: inspected */ + queue_insert(&queue,(char*) buffpek); + } + + while (queue.elements > 1) + { + for (;;) + { + if (*killed) + { + error=1; goto err; + } + buffpek=(BUFFPEK*) queue_top(&queue); + if (to_file) + { + if (info->write_key(info,to_file,(byte*) buffpek->key, + (uint) sort_length,1)) + { + error=1; goto err; /* purecov: inspected */ + } + } + else + { + if ((*info->key_write)(info,(void*) buffpek->key)) + { + error=1; goto err; /* purecov: inspected */ + } + } + buffpek->key+=sort_length; + if (! --buffpek->mem_count) + { + if (!(error=(int) info->read_to_buffer(from_file,buffpek,sort_length))) + { + byte *base= buffpek->base; + uint max_keys=buffpek->max_keys; + + VOID(queue_remove(&queue,0)); + + /* Put room used by buffer to use in other buffer */ + for (refpek= (BUFFPEK**) &queue_top(&queue); + refpek <= (BUFFPEK**) &queue_end(&queue); + refpek++) + { + buffpek= *refpek; + if (buffpek->base+buffpek->max_keys*sort_length == base) + { + buffpek->max_keys+=max_keys; + break; + } + else if (base+max_keys*sort_length == buffpek->base) + { + buffpek->base=base; + buffpek->max_keys+=max_keys; + break; + } + } + break; /* One buffer have been removed */ + } + } + else if (error == -1) + goto err; /* purecov: inspected */ + queue_replaced(&queue); /* Top element has been replaced */ + } + } + buffpek=(BUFFPEK*) queue_top(&queue); + buffpek->base= (byte*) sort_keys; + buffpek->max_keys=keys; + do + { + if (to_file) + { + if (info->write_key(info,to_file,(byte*) buffpek->key, + sort_length,buffpek->mem_count)) + { + error=1; goto err; /* purecov: inspected */ + } + } + else + { + register byte *end; + strpos= buffpek->key; + for (end= strpos+buffpek->mem_count*sort_length; + strpos != end ; + strpos+=sort_length) + { + if ((*info->key_write)(info, (byte*) strpos)) + { + error=1; goto err; /* purecov: inspected */ + } + } + } + } + while ((error=(int) info->read_to_buffer(from_file,buffpek,sort_length)) != + -1 && error != 0); + + lastbuff->count=count; + if (to_file) + lastbuff->file_pos=to_start_filepos; +err: + delete_queue(&queue); + DBUG_RETURN(error); +} /* merge_buffers */ + + + /* Do a merge to output-file (save only positions) */ + +static int NEAR_F +merge_index(MARIA_SORT_PARAM *info, uint keys, byte **sort_keys, + BUFFPEK *buffpek, int maxbuffer, IO_CACHE *tempfile) +{ + DBUG_ENTER("merge_index"); + if (merge_buffers(info,keys,tempfile,(IO_CACHE*) 0,sort_keys,buffpek,buffpek, + buffpek+maxbuffer)) + DBUG_RETURN(1); /* purecov: inspected */ + DBUG_RETURN(0); +} /* merge_index */ + + +static int flush_maria_ft_buf(MARIA_SORT_PARAM *info) +{ + int err=0; + if (info->sort_info->ft_buf) + { + err=_ma_sort_ft_buf_flush(info); + my_free((gptr)info->sort_info->ft_buf, MYF(0)); + info->sort_info->ft_buf=0; + } + return err; +} + diff --git a/storage/maria/ma_sp_defs.h b/storage/maria/ma_sp_defs.h new file mode 100644 index 00000000000..a2870bfa062 --- /dev/null +++ b/storage/maria/ma_sp_defs.h @@ -0,0 +1,47 @@ +/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin & MySQL Finland AB + & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _SP_DEFS_H +#define _SP_DEFS_H + +#define SPDIMS 2 +#define SPTYPE HA_KEYTYPE_DOUBLE +#define SPLEN 8 + +#ifdef HAVE_SPATIAL + +enum wkbType +{ + wkbPoint = 1, + wkbLineString = 2, + wkbPolygon = 3, + wkbMultiPoint = 4, + wkbMultiLineString = 5, + wkbMultiPolygon = 6, + wkbGeometryCollection = 7 +}; + +enum wkbByteOrder +{ + wkbXDR = 0, /* Big Endian */ + wkbNDR = 1 /* Little Endian */ +}; + +uint _ma_sp_make_key(register MARIA_HA *info, uint keynr, byte *key, + const byte *record, my_off_t filepos); + +#endif /*HAVE_SPATIAL*/ +#endif /* _SP_DEFS_H */ diff --git a/storage/maria/ma_sp_key.c b/storage/maria/ma_sp_key.c new file mode 100644 index 00000000000..06769e97d30 --- /dev/null +++ b/storage/maria/ma_sp_key.c @@ -0,0 +1,299 @@ +/* Copyright (C) 2006 MySQL AB & Ramil Kalimullin + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include "maria_def.h" + +#ifdef HAVE_SPATIAL + +#include "ma_sp_defs.h" + +static int sp_add_point_to_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order, double *mbr); +static int sp_get_point_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order, double *mbr); +static int sp_get_linestring_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order, double *mbr); +static int sp_get_polygon_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order, double *mbr); +static int sp_get_geometry_mbr(uchar *(*wkb), uchar *end, uint n_dims, + double *mbr, int top); +static int sp_mbr_from_wkb(uchar (*wkb), uint size, uint n_dims, double *mbr); + +static void get_double(double *d, const byte *pos) +{ + float8get(*d, pos); +} + +uint _ma_sp_make_key(register MARIA_HA *info, uint keynr, byte *key, + const byte *record, my_off_t filepos) +{ + HA_KEYSEG *keyseg; + MARIA_KEYDEF *keyinfo = &info->s->keyinfo[keynr]; + uint len = 0; + byte *pos; + uint dlen; + uchar *dptr; + double mbr[SPDIMS * 2]; + uint i; + + keyseg = &keyinfo->seg[-1]; + pos = (byte*)record + keyseg->start; + + dlen = _ma_calc_blob_length(keyseg->bit_start, pos); + memcpy_fixed(&dptr, pos + keyseg->bit_start, sizeof(char*)); + if (!dptr) + { + my_errno= HA_ERR_NULL_IN_SPATIAL; + return 0; + } + sp_mbr_from_wkb(dptr + 4, dlen - 4, SPDIMS, mbr); /* SRID */ + + for (i = 0, keyseg = keyinfo->seg; keyseg->type; keyseg++, i++) + { + uint length = keyseg->length; + + pos = ((byte*)mbr) + keyseg->start; + if (keyseg->flag & HA_SWAP_KEY) + { +#ifdef HAVE_ISNAN + if (keyseg->type == HA_KEYTYPE_FLOAT) + { + float nr; + float4get(nr, pos); + if (isnan(nr)) + { + /* Replace NAN with zero */ + bzero(key, length); + key+= length; + continue; + } + } + else if (keyseg->type == HA_KEYTYPE_DOUBLE) + { + double nr; + get_double(&nr, pos); + if (isnan(nr)) + { + bzero(key, length); + key+= length; + continue; + } + } +#endif + pos += length; + while (length--) + { + *key++ = *--pos; + } + } + else + { + memcpy((byte*)key, pos, length); + key += keyseg->length; + } + len += keyseg->length; + } + _ma_dpointer(info, key, filepos); + return len; +} + +/* +Calculate minimal bounding rectangle (mbr) of the spatial object +stored in "well-known binary representation" (wkb) format. +*/ +static int sp_mbr_from_wkb(uchar *wkb, uint size, uint n_dims, double *mbr) +{ + uint i; + + for (i=0; i < n_dims; ++i) + { + mbr[i * 2] = DBL_MAX; + mbr[i * 2 + 1] = -DBL_MAX; + } + + return sp_get_geometry_mbr(&wkb, wkb + size, n_dims, mbr, 1); +} + +/* + Add one point stored in wkb to mbr +*/ + +static int sp_add_point_to_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order __attribute__((unused)), + double *mbr) +{ + double ord; + double *mbr_end= mbr + n_dims * 2; + + while (mbr < mbr_end) + { + if ((*wkb) > end - 8) + return -1; + get_double(&ord, (const byte*) *wkb); + (*wkb)+= 8; + if (ord < *mbr) + float8store((char*) mbr, ord); + mbr++; + if (ord > *mbr) + float8store((char*) mbr, ord); + mbr++; + } + return 0; +} + + +static int sp_get_point_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order, double *mbr) +{ + return sp_add_point_to_mbr(wkb, end, n_dims, byte_order, mbr); +} + + +static int sp_get_linestring_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order, double *mbr) +{ + uint n_points; + + n_points = uint4korr(*wkb); + (*wkb) += 4; + for (; n_points > 0; --n_points) + { + /* Add next point to mbr */ + if (sp_add_point_to_mbr(wkb, end, n_dims, byte_order, mbr)) + return -1; + } + return 0; +} + + +static int sp_get_polygon_mbr(uchar *(*wkb), uchar *end, uint n_dims, + uchar byte_order, double *mbr) +{ + uint n_linear_rings; + uint n_points; + + n_linear_rings = uint4korr((*wkb)); + (*wkb) += 4; + + for (; n_linear_rings > 0; --n_linear_rings) + { + n_points = uint4korr((*wkb)); + (*wkb) += 4; + for (; n_points > 0; --n_points) + { + /* Add next point to mbr */ + if (sp_add_point_to_mbr(wkb, end, n_dims, byte_order, mbr)) + return -1; + } + } + return 0; +} + +static int sp_get_geometry_mbr(uchar *(*wkb), uchar *end, uint n_dims, + double *mbr, int top) +{ + int res; + uchar byte_order; + uint wkb_type; + + byte_order = *(*wkb); + ++(*wkb); + + wkb_type = uint4korr((*wkb)); + (*wkb) += 4; + + switch ((enum wkbType) wkb_type) + { + case wkbPoint: + res = sp_get_point_mbr(wkb, end, n_dims, byte_order, mbr); + break; + case wkbLineString: + res = sp_get_linestring_mbr(wkb, end, n_dims, byte_order, mbr); + break; + case wkbPolygon: + res = sp_get_polygon_mbr(wkb, end, n_dims, byte_order, mbr); + break; + case wkbMultiPoint: + { + uint n_items; + n_items = uint4korr((*wkb)); + (*wkb) += 4; + for (; n_items > 0; --n_items) + { + byte_order = *(*wkb); + ++(*wkb); + (*wkb) += 4; + if (sp_get_point_mbr(wkb, end, n_dims, byte_order, mbr)) + return -1; + } + res = 0; + break; + } + case wkbMultiLineString: + { + uint n_items; + n_items = uint4korr((*wkb)); + (*wkb) += 4; + for (; n_items > 0; --n_items) + { + byte_order = *(*wkb); + ++(*wkb); + (*wkb) += 4; + if (sp_get_linestring_mbr(wkb, end, n_dims, byte_order, mbr)) + return -1; + } + res = 0; + break; + } + case wkbMultiPolygon: + { + uint n_items; + n_items = uint4korr((*wkb)); + (*wkb) += 4; + for (; n_items > 0; --n_items) + { + byte_order = *(*wkb); + ++(*wkb); + (*wkb) += 4; + if (sp_get_polygon_mbr(wkb, end, n_dims, byte_order, mbr)) + return -1; + } + res = 0; + break; + } + case wkbGeometryCollection: + { + uint n_items; + + if (!top) + return -1; + + n_items = uint4korr((*wkb)); + (*wkb) += 4; + for (; n_items > 0; --n_items) + { + if (sp_get_geometry_mbr(wkb, end, n_dims, mbr, 0)) + return -1; + } + res = 0; + break; + } + default: + res = -1; + } + return res; +} + +#endif /*HAVE_SPATIAL*/ diff --git a/storage/maria/ma_sp_test.c b/storage/maria/ma_sp_test.c new file mode 100644 index 00000000000..7a413f68135 --- /dev/null +++ b/storage/maria/ma_sp_test.c @@ -0,0 +1,568 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Testing of the basic functions of a MARIA spatial table */ +/* Written by Alex Barkov, who has a shared copyright to this code */ + +#include "maria.h" + +#ifdef HAVE_SPATIAL +#include "ma_sp_defs.h" + +#define MAX_REC_LENGTH 1024 +#define KEYALG HA_KEY_ALG_RTREE + +static void create_linestring(char *record,uint rownr); +static void print_record(char * record,my_off_t offs,const char * tail); + +static void create_key(char *key,uint rownr); +static void print_key(const char *key,const char * tail); + +static int run_test(const char *filename); +static int read_with_pos(MARIA_HA * file, int silent); + +static int maria_rtree_CreateLineStringWKB(double *ords, uint n_dims, uint n_points, + uchar *wkb); +static void maria_rtree_PrintWKB(uchar *wkb, uint n_dims); + +static char blob_key[MAX_REC_LENGTH]; + + +int main(int argc __attribute__((unused)),char *argv[]) +{ + MY_INIT(argv[0]); + maria_init(); + exit(run_test("sp_test")); +} + + +int run_test(const char *filename) +{ + MARIA_HA *file; + MARIA_UNIQUEDEF uniquedef; + MARIA_CREATE_INFO create_info; + MARIA_COLUMNDEF recinfo[20]; + MARIA_KEYDEF keyinfo[20]; + HA_KEYSEG keyseg[20]; + key_range min_range, max_range; + int silent=0; + int create_flag=0; + int null_fields=0; + int nrecords=30; + int uniques=0; + int i; + int error; + int row_count=0; + char record[MAX_REC_LENGTH]; + char key[MAX_REC_LENGTH]; + char read_record[MAX_REC_LENGTH]; + int upd=10; + ha_rows hrows; + + /* Define a column for NULLs and DEL markers*/ + + recinfo[0].type=FIELD_NORMAL; + recinfo[0].length=1; /* For NULL bits */ + + + /* Define spatial column */ + + recinfo[1].type=FIELD_BLOB; + recinfo[1].length=4 + portable_sizeof_char_ptr; + + + + /* Define a key with 1 spatial segment */ + + keyinfo[0].seg=keyseg; + keyinfo[0].keysegs=1; + keyinfo[0].flag=HA_SPATIAL; + keyinfo[0].key_alg=KEYALG; + + keyinfo[0].seg[0].type= HA_KEYTYPE_BINARY; + keyinfo[0].seg[0].flag=0; + keyinfo[0].seg[0].start= 1; + keyinfo[0].seg[0].length=1; /* Spatial ignores it anyway */ + keyinfo[0].seg[0].null_bit= null_fields ? 2 : 0; + keyinfo[0].seg[0].null_pos=0; + keyinfo[0].seg[0].language=default_charset_info->number; + keyinfo[0].seg[0].bit_start=4; /* Long BLOB */ + + + if (!silent) + printf("- Creating isam-file\n"); + + bzero((char*) &create_info,sizeof(create_info)); + create_info.max_rows=10000000; + + if (maria_create(filename, + DYNAMIC_RECORD, + 1, /* keys */ + keyinfo, + 2, /* columns */ + recinfo,uniques,&uniquedef,&create_info,create_flag)) + goto err; + + if (!silent) + printf("- Open isam-file\n"); + + if (!(file=maria_open(filename,2,HA_OPEN_ABORT_IF_LOCKED))) + goto err; + + if (!silent) + printf("- Writing key:s\n"); + + for (i=0; i "); + print_record(record,maria_position(file),"\n"); + error=maria_update(file,read_record,record); + if (error) + { + printf("pos: %2d maria_update: %3d errno: %3d\n",i,error,my_errno); + goto err; + } + } + + if ((error=read_with_pos(file,silent))) + goto err; + + if (!silent) + printf("- Test maria_rkey then a sequence of maria_rnext_same\n"); + + create_key(key, nrecords*4/5); + print_key(key," search for INTERSECT\n"); + + if ((error=maria_rkey(file,read_record,0,key,0,HA_READ_MBR_INTERSECT))) + { + printf("maria_rkey: %3d errno: %3d\n",error,my_errno); + goto err; + } + print_record(read_record,maria_position(file)," maria_rkey\n"); + row_count=1; + + for (;;) + { + if ((error=maria_rnext_same(file,read_record))) + { + if (error==HA_ERR_END_OF_FILE) + break; + printf("maria_next: %3d errno: %3d\n",error,my_errno); + goto err; + } + print_record(read_record,maria_position(file)," maria_rnext_same\n"); + row_count++; + } + printf(" %d rows\n",row_count); + + if (!silent) + printf("- Test maria_rfirst then a sequence of maria_rnext\n"); + + error=maria_rfirst(file,read_record,0); + if (error) + { + printf("maria_rfirst: %3d errno: %3d\n",error,my_errno); + goto err; + } + row_count=1; + print_record(read_record,maria_position(file)," maria_frirst\n"); + + for(i=0;i "); + printf(" offs=%ld ",(long int)offs); + printf("%s",tail); +} + + +#ifdef NOT_USED +static void create_point(char *record,uint rownr) +{ + uint tmp; + char *ptr; + char *pos=record; + double x[200]; + int i; + + for(i=0;i= , <= , > , < +*/ + +uint NEAR maria_read_vec[]= +{ + SEARCH_FIND, SEARCH_FIND | SEARCH_BIGGER, SEARCH_FIND | SEARCH_SMALLER, + SEARCH_NO_FIND | SEARCH_BIGGER, SEARCH_NO_FIND | SEARCH_SMALLER, + SEARCH_FIND | SEARCH_PREFIX, SEARCH_LAST, SEARCH_LAST | SEARCH_SMALLER, + MBR_CONTAIN, MBR_INTERSECT, MBR_WITHIN, MBR_DISJOINT, MBR_EQUAL +}; + +uint NEAR maria_readnext_vec[]= +{ + SEARCH_BIGGER, SEARCH_BIGGER, SEARCH_SMALLER, SEARCH_BIGGER, SEARCH_SMALLER, + SEARCH_BIGGER, SEARCH_SMALLER, SEARCH_SMALLER +}; diff --git a/storage/maria/ma_statrec.c b/storage/maria/ma_statrec.c new file mode 100644 index 00000000000..8ca3a5e989d --- /dev/null +++ b/storage/maria/ma_statrec.c @@ -0,0 +1,301 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + /* Functions to handle fixed-length-records */ + +#include "maria_def.h" + + +my_bool _ma_write_static_record(MARIA_HA *info, const byte *record) +{ + byte temp[8]; /* max pointer length */ + if (info->s->state.dellink != HA_OFFSET_ERROR && + !info->append_insert_at_end) + { + my_off_t filepos=info->s->state.dellink; + info->rec_cache.seek_not_done=1; /* We have done a seek */ + if (info->s->file_read(info,(char*) &temp[0],info->s->base.rec_reflength, + info->s->state.dellink+1, + MYF(MY_NABP))) + goto err; + info->s->state.dellink= _ma_rec_pos(info->s,temp); + info->state->del--; + info->state->empty-=info->s->base.pack_reclength; + if (info->s->file_write(info, (char*) record, info->s->base.reclength, + filepos, + MYF(MY_NABP))) + goto err; + } + else + { + if (info->state->data_file_length > info->s->base.max_data_file_length- + info->s->base.pack_reclength) + { + my_errno=HA_ERR_RECORD_FILE_FULL; + return(2); + } + if (info->opt_flag & WRITE_CACHE_USED) + { /* Cash in use */ + if (my_b_write(&info->rec_cache, (byte*) record, + info->s->base.reclength)) + goto err; + if (info->s->base.pack_reclength != info->s->base.reclength) + { + uint length=info->s->base.pack_reclength - info->s->base.reclength; + bzero((char*) temp,length); + if (my_b_write(&info->rec_cache, (byte*) temp,length)) + goto err; + } + } + else + { + info->rec_cache.seek_not_done=1; /* We have done a seek */ + if (info->s->file_write(info,(char*) record,info->s->base.reclength, + info->state->data_file_length, + info->s->write_flag)) + goto err; + if (info->s->base.pack_reclength != info->s->base.reclength) + { + uint length=info->s->base.pack_reclength - info->s->base.reclength; + bzero((char*) temp,length); + if (info->s->file_write(info, (byte*) temp,length, + info->state->data_file_length+ + info->s->base.reclength, + info->s->write_flag)) + goto err; + } + } + info->state->data_file_length+=info->s->base.pack_reclength; + info->s->state.split++; + } + return 0; + err: + return 1; +} + +my_bool _ma_update_static_record(MARIA_HA *info, MARIA_RECORD_POS pos, + const byte *oldrec __attribute__ ((unused)), + const byte *record) +{ + info->rec_cache.seek_not_done=1; /* We have done a seek */ + return (info->s->file_write(info, + (char*) record,info->s->base.reclength, + pos, + MYF(MY_NABP)) != 0); +} + + +my_bool _ma_delete_static_record(MARIA_HA *info, + const byte *record __attribute__ ((unused))) +{ + byte temp[9]; /* 1+sizeof(uint32) */ + info->state->del++; + info->state->empty+=info->s->base.pack_reclength; + temp[0]= '\0'; /* Mark that record is deleted */ + _ma_dpointer(info,temp+1,info->s->state.dellink); + info->s->state.dellink= info->cur_row.lastpos; + info->rec_cache.seek_not_done=1; + return (info->s->file_write(info, temp, 1+info->s->rec_reflength, + info->cur_row.lastpos, MYF(MY_NABP)) != 0); +} + + +my_bool _ma_cmp_static_record(register MARIA_HA *info, + register const byte *old) +{ + DBUG_ENTER("_ma_cmp_static_record"); + + /* We are going to do changes; dont let anybody disturb */ + dont_break(); /* Dont allow SIGHUP or SIGINT */ + + if (info->opt_flag & WRITE_CACHE_USED) + { + if (flush_io_cache(&info->rec_cache)) + { + DBUG_RETURN(1); + } + info->rec_cache.seek_not_done=1; /* We have done a seek */ + } + + if ((info->opt_flag & READ_CHECK_USED)) + { /* If check isn't disabled */ + info->rec_cache.seek_not_done=1; /* We have done a seek */ + if (info->s->file_read(info, (char*) info->rec_buff, + info->s->base.reclength, + info->cur_row.lastpos, + MYF(MY_NABP))) + DBUG_RETURN(1); + if (memcmp((byte*) info->rec_buff, (byte*) old, + (uint) info->s->base.reclength)) + { + DBUG_DUMP("read",old,info->s->base.reclength); + DBUG_DUMP("disk",info->rec_buff,info->s->base.reclength); + my_errno=HA_ERR_RECORD_CHANGED; /* Record have changed */ + DBUG_RETURN(1); + } + } + DBUG_RETURN(0); +} + + +my_bool _ma_cmp_static_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, + const byte *record, MARIA_RECORD_POS pos) +{ + DBUG_ENTER("_ma_cmp_static_unique"); + + info->rec_cache.seek_not_done=1; /* We have done a seek */ + if (info->s->file_read(info, (char*) info->rec_buff, info->s->base.reclength, + pos, MYF(MY_NABP))) + DBUG_RETURN(1); + DBUG_RETURN(_ma_unique_comp(def, record, (byte*) info->rec_buff, + def->null_are_equal)); +} + + +/* + Read a fixed-length-record + + RETURN + 0 Ok + 1 record delete + -1 on read-error or locking-error +*/ + +int _ma_read_static_record(register MARIA_HA *info, register byte *record, + MARIA_RECORD_POS pos) +{ + int error; + + if (pos != HA_OFFSET_ERROR) + { + if (info->opt_flag & WRITE_CACHE_USED && + info->rec_cache.pos_in_file <= pos && + flush_io_cache(&info->rec_cache)) + return(-1); + info->rec_cache.seek_not_done=1; /* We have done a seek */ + + error=info->s->file_read(info,(char*) record,info->s->base.reclength, + pos, MYF(MY_NABP)) != 0; + fast_ma_writeinfo(info); + if (! error) + { + if (!*record) + { + my_errno=HA_ERR_RECORD_DELETED; + return(1); /* Record is deleted */ + } + info->update|= HA_STATE_AKTIV; /* Record is read */ + return(0); + } + return(-1); /* Error on read */ + } + fast_ma_writeinfo(info); /* No such record */ + return(-1); +} + + + +int _ma_read_rnd_static_record(MARIA_HA *info, byte *buf, + MARIA_RECORD_POS filepos, + my_bool skip_deleted_blocks) +{ + int locked,error,cache_read; + uint cache_length; + MARIA_SHARE *share=info->s; + DBUG_ENTER("_ma_read_rnd_static_record"); + + cache_read=0; + cache_length=0; + if (info->opt_flag & READ_CACHE_USED) + { /* Cache in use */ + if (filepos == my_b_tell(&info->rec_cache) && + (skip_deleted_blocks || !filepos)) + { + cache_read=1; /* Read record using cache */ + cache_length=(uint) (info->rec_cache.read_end - info->rec_cache.read_pos); + } + else + info->rec_cache.seek_not_done=1; /* Filepos is changed */ + } + locked=0; + if (info->lock_type == F_UNLCK) + { + if (filepos >= info->state->data_file_length) + { /* Test if new records */ + if (_ma_readinfo(info,F_RDLCK,0)) + DBUG_RETURN(my_errno); + locked=1; + } + else + { /* We don't nead new info */ +#ifndef UNSAFE_LOCKING + if ((! cache_read || share->base.reclength > cache_length) && + share->tot_locks == 0) + { /* record not in cache */ + locked=1; + } +#else + info->tmp_lock_type=F_RDLCK; +#endif + } + } + if (filepos >= info->state->data_file_length) + { + DBUG_PRINT("test",("filepos: %ld (%ld) records: %ld del: %ld", + (long) filepos/share->base.reclength, (long) filepos, + (long) info->state->records, (long) info->state->del)); + fast_ma_writeinfo(info); + DBUG_RETURN(my_errno=HA_ERR_END_OF_FILE); + } + info->cur_row.lastpos= filepos; + info->cur_row.nextpos= filepos+share->base.pack_reclength; + + if (! cache_read) /* No cacheing */ + { + if ((error= _ma_read_static_record(info, buf, filepos))) + { + if (error > 0) + error=my_errno=HA_ERR_RECORD_DELETED; + else + error=my_errno; + } + DBUG_RETURN(error); + } + + /* Read record with cacheing */ + error=my_b_read(&info->rec_cache,(byte*) buf,share->base.reclength); + if (info->s->base.pack_reclength != info->s->base.reclength && !error) + { + char tmp[8]; /* Skill fill bytes */ + error=my_b_read(&info->rec_cache,(byte*) tmp, + info->s->base.pack_reclength - info->s->base.reclength); + } + if (locked) + VOID(_ma_writeinfo(info,0)); /* Unlock keyfile */ + if (!error) + { + if (!buf[0]) + { /* Record is removed */ + DBUG_RETURN(my_errno=HA_ERR_RECORD_DELETED); + } + /* Found and may be updated */ + info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED; + DBUG_RETURN(0); + } + /* my_errno should be set if rec_cache.error == -1 */ + if (info->rec_cache.error != -1 || my_errno == 0) + my_errno=HA_ERR_WRONG_IN_RECORD; + DBUG_RETURN(my_errno); /* Something wrong (EOF?) */ +} diff --git a/storage/maria/ma_test1.c b/storage/maria/ma_test1.c new file mode 100644 index 00000000000..028e02ab9d1 --- /dev/null +++ b/storage/maria/ma_test1.c @@ -0,0 +1,735 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Testing of the basic functions of a MARIA table */ + +#include "maria.h" +#include +#include +#include "ma_control_file.h" +#include "ma_loghandler.h" + +extern PAGECACHE *maria_log_pagecache; +extern const char *maria_data_root; + +#define MAX_REC_LENGTH 1024 + +static void usage(); + +static int rec_pointer_size=0, flags[50]; +static int key_field=FIELD_SKIP_PRESPACE,extra_field=FIELD_SKIP_ENDSPACE; +static int key_type=HA_KEYTYPE_NUM; +static int create_flag=0; +static enum data_file_type record_type= DYNAMIC_RECORD; + +static uint insert_count, update_count, remove_count; +static uint pack_keys=0, pack_seg=0, key_length; +static uint unique_key=HA_NOSAME; +static my_bool pagecacheing, null_fields, silent, skip_update, opt_unique, + verbose, skip_delete, transactional; +static MARIA_COLUMNDEF recinfo[4]; +static MARIA_KEYDEF keyinfo[10]; +static HA_KEYSEG keyseg[10]; +static HA_KEYSEG uniqueseg[10]; + +static int run_test(const char *filename); +static void get_options(int argc, char *argv[]); +static void create_key(char *key,uint rownr); +static void create_record(char *record,uint rownr); +static void update_record(char *record); + +int main(int argc,char *argv[]) +{ + MY_INIT(argv[0]); + my_init(); + get_options(argc,argv); + maria_data_root= "."; + /* Maria requires that we always have a page cache */ + if (maria_init() || + (init_pagecache(maria_pagecache, IO_SIZE*16, 0, 0, + maria_block_size) == 0) || + ma_control_file_create_or_open() || + (init_pagecache(maria_log_pagecache, + TRANSLOG_PAGECACHE_SIZE, 0, 0, + TRANSLOG_PAGE_SIZE) == 0) || + translog_init(maria_data_root, TRANSLOG_FILE_SIZE, + 0, 0, maria_log_pagecache, + TRANSLOG_DEFAULT_FLAGS)) + { + fprintf(stderr, "Error in initialization"); + exit(1); + } + + exit(run_test("test1")); +} + + +static int run_test(const char *filename) +{ + MARIA_HA *file; + int i,j,error,deleted,rec_length,uniques=0; + ha_rows found,row_count; + char record[MAX_REC_LENGTH],key[MAX_REC_LENGTH],read_record[MAX_REC_LENGTH]; + MARIA_UNIQUEDEF uniquedef; + MARIA_CREATE_INFO create_info; + + bzero((char*) recinfo,sizeof(recinfo)); + bzero((char*) &create_info,sizeof(create_info)); + + /* First define 2 columns */ + create_info.null_bytes= 1; + recinfo[0].type= key_field; + recinfo[0].length= (key_field == FIELD_BLOB ? 4+portable_sizeof_char_ptr : + key_length); + if (key_field == FIELD_VARCHAR) + recinfo[0].length+= HA_VARCHAR_PACKLENGTH(key_length); + recinfo[1].type=extra_field; + recinfo[1].length= (extra_field == FIELD_BLOB ? 4 + portable_sizeof_char_ptr : 24); + if (extra_field == FIELD_VARCHAR) + recinfo[1].length+= HA_VARCHAR_PACKLENGTH(recinfo[1].length); + if (opt_unique) + { + recinfo[2].type=FIELD_CHECK; + recinfo[2].length=MARIA_UNIQUE_HASH_LENGTH; + } + rec_length= recinfo[0].length+recinfo[1].length+recinfo[2].length; + + if (key_type == HA_KEYTYPE_VARTEXT1 && + key_length > 255) + key_type= HA_KEYTYPE_VARTEXT2; + + /* Define a key over the first column */ + keyinfo[0].seg=keyseg; + keyinfo[0].keysegs=1; + keyinfo[0].block_length= 0; /* Default block length */ + keyinfo[0].key_alg=HA_KEY_ALG_BTREE; + keyinfo[0].seg[0].type= key_type; + keyinfo[0].seg[0].flag= pack_seg; + keyinfo[0].seg[0].start=1; + keyinfo[0].seg[0].length=key_length; + keyinfo[0].seg[0].null_bit= null_fields ? 2 : 0; + keyinfo[0].seg[0].null_pos=0; + keyinfo[0].seg[0].language= default_charset_info->number; + if (pack_seg & HA_BLOB_PART) + { + keyinfo[0].seg[0].bit_start=4; /* Length of blob length */ + } + keyinfo[0].flag = (uint8) (pack_keys | unique_key); + + bzero((byte*) flags,sizeof(flags)); + if (opt_unique) + { + uint start; + uniques=1; + bzero((char*) &uniquedef,sizeof(uniquedef)); + bzero((char*) uniqueseg,sizeof(uniqueseg)); + uniquedef.seg=uniqueseg; + uniquedef.keysegs=2; + + /* Make a unique over all columns (except first NULL fields) */ + for (i=0, start=1 ; i < 2 ; i++) + { + uniqueseg[i].start=start; + start+=recinfo[i].length; + uniqueseg[i].length=recinfo[i].length; + uniqueseg[i].language= default_charset_info->number; + } + uniqueseg[0].type= key_type; + uniqueseg[0].null_bit= null_fields ? 2 : 0; + uniqueseg[1].type= HA_KEYTYPE_TEXT; + if (extra_field == FIELD_BLOB) + { + uniqueseg[1].length=0; /* The whole blob */ + uniqueseg[1].bit_start=4; /* long blob */ + uniqueseg[1].flag|= HA_BLOB_PART; + } + else if (extra_field == FIELD_VARCHAR) + { + uniqueseg[1].flag|= HA_VAR_LENGTH_PART; + uniqueseg[1].type= (HA_VARCHAR_PACKLENGTH(recinfo[1].length-1) == 1 ? + HA_KEYTYPE_VARTEXT1 : HA_KEYTYPE_VARTEXT2); + } + } + else + uniques=0; + + if (!silent) + printf("- Creating maria file\n"); + create_info.max_rows=(ulong) (rec_pointer_size ? + (1L << (rec_pointer_size*8))/40 : + 0); + create_info.transactional= transactional; + if (maria_create(filename, record_type, 1, keyinfo,2+opt_unique,recinfo, + uniques, &uniquedef, &create_info, + create_flag)) + goto err; + if (!(file=maria_open(filename,2,HA_OPEN_ABORT_IF_LOCKED))) + goto err; + if (!silent) + printf("- Writing key:s\n"); + + my_errno=0; + row_count=deleted=0; + for (i=49 ; i>=1 ; i-=2 ) + { + if (insert_count-- == 0) { VOID(maria_close(file)) ; exit(0) ; } + j=i%25 +1; + create_record(record,j); + error=maria_write(file,record); + if (!error) + row_count++; + flags[j]=1; + if (verbose || error) + printf("J= %2d maria_write: %d errno: %d\n", j,error,my_errno); + } + + /* Insert 2 rows with null values */ + if (null_fields) + { + create_record(record,0); + error=maria_write(file,record); + if (!error) + row_count++; + if (verbose || error) + printf("J= NULL maria_write: %d errno: %d\n", error,my_errno); + error=maria_write(file,record); + if (!error) + row_count++; + if (verbose || error) + printf("J= NULL maria_write: %d errno: %d\n", error,my_errno); + flags[0]=2; + } + + if (!skip_update) + { + if (opt_unique) + { + if (!silent) + printf("- Checking unique constraint\n"); + create_record(record,j); + if (!maria_write(file,record) || my_errno != HA_ERR_FOUND_DUPP_UNIQUE) + { + printf("unique check failed\n"); + } + } + if (!silent) + printf("- Updating rows\n"); + + /* Update first last row to force extend of file */ + if (maria_rsame(file,read_record,-1)) + { + printf("Can't find last row with maria_rsame\n"); + } + else + { + memcpy(record,read_record,rec_length); + update_record(record); + if (maria_update(file,read_record,record)) + { + printf("Can't update last row: %.*s\n", + keyinfo[0].seg[0].length,read_record+1); + } + } + + /* Read through all rows and update them */ + assert(maria_scan_init(file) == 0); + + found=0; + while ((error= maria_scan(file,read_record)) == 0) + { + if (update_count-- == 0) { VOID(maria_close(file)) ; exit(0) ; } + memcpy(record,read_record,rec_length); + update_record(record); + if (maria_update(file,read_record,record)) + { + printf("Can't update row: %.*s, error: %d\n", + keyinfo[0].seg[0].length,record+1,my_errno); + } + found++; + } + if (found != row_count) + printf("Found %ld of %ld rows\n", (ulong) found, (ulong) row_count); + maria_scan_end(file); + } + + if (!silent) + printf("- Reopening file\n"); + if (maria_close(file)) goto err; + if (!(file=maria_open(filename,2,HA_OPEN_ABORT_IF_LOCKED))) goto err; + if (!skip_delete) + { + if (!silent) + printf("- Removing keys\n"); + + for (i=0 ; i <= 10 ; i++) + { + /* testing */ + if (remove_count-- == 0) + { + fprintf(stderr, + "delete-rows number of rows deleted; Going down hard!\n"); + VOID(maria_close(file)); + exit(0) ; + } + j=i*2; + if (!flags[j]) + continue; + create_key(key,j); + my_errno=0; + if ((error = maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT))) + { + if (verbose || (flags[j] >= 1 || + (error && my_errno != HA_ERR_KEY_NOT_FOUND))) + printf("key: '%.*s' maria_rkey: %3d errno: %3d\n", + (int) key_length,key+test(null_fields),error,my_errno); + } + else + { + error=maria_delete(file,read_record); + if (verbose || error) + printf("key: '%.*s' maria_delete: %3d errno: %3d\n", + (int) key_length, key+test(null_fields), error, my_errno); + if (! error) + { + deleted++; + flags[j]--; + } + } + } + } + if (!silent) + printf("- Reading rows with key\n"); + record[1]= 0; /* For nicer printf */ + for (i=0 ; i <= 25 ; i++) + { + create_key(key,i); + my_errno=0; + error=maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT); + if (verbose || + (error == 0 && flags[i] == 0 && unique_key) || + (error && (flags[i] != 0 || my_errno != HA_ERR_KEY_NOT_FOUND))) + { + printf("key: '%.*s' maria_rkey: %3d errno: %3d record: %s\n", + (int) key_length,key+test(null_fields),error,my_errno,record+1); + } + } + + if (!silent) + printf("- Reading rows with position\n"); + if (maria_scan_init(file)) + { + fprintf(stderr, "maria_scan_init failed\n"); + goto err; + } + + for (i=1,found=0 ; i <= 30 ; i++) + { + my_errno=0; + if ((error= maria_scan(file, read_record)) == HA_ERR_END_OF_FILE) + { + if (found != row_count-deleted) + printf("Found only %ld of %ld rows\n", (ulong) found, + (ulong) (row_count - deleted)); + break; + } + if (!error) + found++; + if (verbose || (error != 0 && error != HA_ERR_RECORD_DELETED && + error != HA_ERR_END_OF_FILE)) + { + printf("pos: %2d maria_rrnd: %3d errno: %3d record: %s\n", + i-1,error,my_errno,read_record+1); + } + } + if (maria_close(file)) + goto err; + maria_end(); + my_end(MY_CHECK_ERROR); + + return (0); +err: + printf("got error: %3d when using maria-database\n",my_errno); + return 1; /* skip warning */ +} + + +static void create_key_part(char *key,uint rownr) +{ + if (!unique_key) + rownr&=7; /* Some identical keys */ + if (keyinfo[0].seg[0].type == HA_KEYTYPE_NUM) + { + sprintf(key,"%*d",keyinfo[0].seg[0].length,rownr); + } + else if (keyinfo[0].seg[0].type == HA_KEYTYPE_VARTEXT1 || + keyinfo[0].seg[0].type == HA_KEYTYPE_VARTEXT2) + { /* Alpha record */ + /* Create a key that may be easily packed */ + bfill(key,keyinfo[0].seg[0].length,rownr < 10 ? 'A' : 'B'); + sprintf(key+keyinfo[0].seg[0].length-2,"%-2d",rownr); + if ((rownr & 7) == 0) + { + /* Change the key to force a unpack of the next key */ + bfill(key+3,keyinfo[0].seg[0].length-5,rownr < 10 ? 'a' : 'b'); + } + } + else + { /* Alpha record */ + if (keyinfo[0].seg[0].flag & HA_SPACE_PACK) + sprintf(key,"%-*d",keyinfo[0].seg[0].length,rownr); + else + { + /* Create a key that may be easily packed */ + bfill(key,keyinfo[0].seg[0].length,rownr < 10 ? 'A' : 'B'); + sprintf(key+keyinfo[0].seg[0].length-2,"%-2d",rownr); + if ((rownr & 7) == 0) + { + /* Change the key to force a unpack of the next key */ + key[1]= (rownr < 10 ? 'a' : 'b'); + } + } + } +} + + +static void create_key(char *key,uint rownr) +{ + if (keyinfo[0].seg[0].null_bit) + { + if (rownr == 0) + { + key[0]=1; /* null key */ + key[1]=0; /* For easy print of key */ + return; + } + *key++=0; + } + if (keyinfo[0].seg[0].flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART)) + { + uint tmp; + create_key_part(key+2,rownr); + tmp=strlen(key+2); + int2store(key,tmp); + } + else + create_key_part(key,rownr); +} + + +static char blob_key[MAX_REC_LENGTH]; +static char blob_record[MAX_REC_LENGTH+20*20]; + + +static void create_record(char *record,uint rownr) +{ + char *pos; + bzero((char*) record,MAX_REC_LENGTH); + record[0]=1; /* delete marker */ + if (rownr == 0 && keyinfo[0].seg[0].null_bit) + record[0]|=keyinfo[0].seg[0].null_bit; /* Null key */ + + pos=record+1; + if (recinfo[0].type == FIELD_BLOB) + { + uint tmp; + char *ptr; + create_key_part(blob_key,rownr); + tmp=strlen(blob_key); + int4store(pos,tmp); + ptr=blob_key; + memcpy_fixed(pos+4,&ptr,sizeof(char*)); + pos+=recinfo[0].length; + } + else if (recinfo[0].type == FIELD_VARCHAR) + { + uint tmp, pack_length= HA_VARCHAR_PACKLENGTH(recinfo[0].length-1); + create_key_part(pos+pack_length,rownr); + tmp= strlen(pos+pack_length); + if (pack_length == 1) + *(uchar*) pos= (uchar) tmp; + else + int2store(pos,tmp); + pos+= recinfo[0].length; + } + else + { + create_key_part(pos,rownr); + pos+=recinfo[0].length; + } + if (recinfo[1].type == FIELD_BLOB) + { + uint tmp; + char *ptr;; + sprintf(blob_record,"... row: %d", rownr); + strappend(blob_record,max(MAX_REC_LENGTH-rownr,10),' '); + tmp=strlen(blob_record); + int4store(pos,tmp); + ptr=blob_record; + memcpy_fixed(pos+4,&ptr,sizeof(char*)); + } + else if (recinfo[1].type == FIELD_VARCHAR) + { + uint tmp, pack_length= HA_VARCHAR_PACKLENGTH(recinfo[1].length-1); + sprintf(pos+pack_length, "... row: %d", rownr); + tmp= strlen(pos+pack_length); + if (pack_length == 1) + *(uchar*) pos= (uchar) tmp; + else + int2store(pos,tmp); + } + else + { + sprintf(pos,"... row: %d", rownr); + strappend(pos,recinfo[1].length,' '); + } +} + +/* change row to test re-packing of rows and reallocation of keys */ + +static void update_record(char *record) +{ + char *pos=record+1; + if (recinfo[0].type == FIELD_BLOB) + { + char *column,*ptr; + int length; + length=uint4korr(pos); /* Long blob */ + memcpy_fixed(&column,pos+4,sizeof(char*)); + memcpy(blob_key,column,length); /* Move old key */ + ptr=blob_key; + memcpy_fixed(pos+4,&ptr,sizeof(char*)); /* Store pointer to new key */ + if (keyinfo[0].seg[0].type != HA_KEYTYPE_NUM) + default_charset_info->cset->casedn(default_charset_info, + blob_key, length, blob_key, length); + pos+=recinfo[0].length; + } + else if (recinfo[0].type == FIELD_VARCHAR) + { + uint pack_length= HA_VARCHAR_PACKLENGTH(recinfo[0].length-1); + uint length= pack_length == 1 ? (uint) *(uchar*) pos : uint2korr(pos); + default_charset_info->cset->casedn(default_charset_info, + pos + pack_length, length, + pos + pack_length, length); + pos+=recinfo[0].length; + } + else + { + if (keyinfo[0].seg[0].type != HA_KEYTYPE_NUM) + default_charset_info->cset->casedn(default_charset_info, + pos, keyinfo[0].seg[0].length, + pos, keyinfo[0].seg[0].length); + pos+=recinfo[0].length; + } + + if (recinfo[1].type == FIELD_BLOB) + { + char *column; + int length; + length=uint4korr(pos); + memcpy_fixed(&column,pos+4,sizeof(char*)); + memcpy(blob_record,column,length); + bfill(blob_record+length,20,'.'); /* Make it larger */ + length+=20; + int4store(pos,length); + column=blob_record; + memcpy_fixed(pos+4,&column,sizeof(char*)); + } + else if (recinfo[1].type == FIELD_VARCHAR) + { + /* Second field is longer than 10 characters */ + uint pack_length= HA_VARCHAR_PACKLENGTH(recinfo[1].length-1); + uint length= pack_length == 1 ? (uint) *(uchar*) pos : uint2korr(pos); + pos= record+ recinfo[1].offset; + bfill(pos+pack_length+length,recinfo[1].length-length-pack_length,'.'); + length=recinfo[1].length-pack_length; + if (pack_length == 1) + *(uchar*) pos= (uchar) length; + else + int2store(pos,length); + } + else + { + bfill(pos+recinfo[1].length-10,10,'.'); + } +} + + +static struct my_option my_long_options[] = +{ + {"checksum", 'c', "Undocumented", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, +#ifndef DBUG_OFF + {"debug", '#', "Undocumented", + 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, +#endif + {"delete-rows", 'd', "Abort after this many rows has been deleted", + (gptr*) &remove_count, (gptr*) &remove_count, 0, GET_UINT, REQUIRED_ARG, + 1000, 0, 0, 0, 0, 0}, + {"help", '?', "Display help and exit", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"insert-rows", 'i', "Undocumented", (gptr*) &insert_count, + (gptr*) &insert_count, 0, GET_UINT, REQUIRED_ARG, 1000, 0, 0, 0, 0, 0}, + {"key-alpha", 'a', "Use a key of type HA_KEYTYPE_TEXT", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"key-binary-pack", 'B', "Undocumented", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"key-blob", 'b', "Undocumented", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"key-cache", 'K', "Undocumented", (gptr*) &pagecacheing, + (gptr*) &pagecacheing, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"key-length", 'k', "Undocumented", (gptr*) &key_length, (gptr*) &key_length, + 0, GET_UINT, REQUIRED_ARG, 6, 0, 0, 0, 0, 0}, + {"key-multiple", 'm', "Undocumented", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"key-prefix_pack", 'P', "Undocumented", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"key-space_pack", 'p', "Undocumented", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"key-varchar", 'w', "Test VARCHAR keys", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"null-fields", 'N', "Define fields with NULL", + (gptr*) &null_fields, (gptr*) &null_fields, 0, GET_BOOL, NO_ARG, + 0, 0, 0, 0, 0, 0}, + {"row-fixed-size", 'S', "Fixed size records", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"rows-in-block", 'M', "Store rows in block format", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"row-pointer-size", 'R', "Undocumented", (gptr*) &rec_pointer_size, + (gptr*) &rec_pointer_size, 0, GET_INT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"silent", 's', "Undocumented", + (gptr*) &silent, (gptr*) &silent, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"skip-delete", 'U', "Don't test deletes", (gptr*) &skip_delete, + (gptr*) &skip_delete, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"skip-update", 'D', "Don't test updates", (gptr*) &skip_update, + (gptr*) &skip_update, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"transactional", 'T', "Test in transactional mode. (Only works with block format)", + (gptr*) &transactional, (gptr*) &transactional, 0, GET_BOOL, NO_ARG, + 0, 0, 0, 0, 0, 0}, + {"unique", 'C', "Undocumented", (gptr*) &opt_unique, (gptr*) &opt_unique, 0, + GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"update-rows", 'u', "Undocumented", (gptr*) &update_count, + (gptr*) &update_count, 0, GET_UINT, REQUIRED_ARG, 1000, 0, 0, 0, 0, 0}, + {"verbose", 'v', "Be more verbose", (gptr*) &verbose, (gptr*) &verbose, 0, + GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"version", 'V', "Print version number and exit", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} +}; + + +static my_bool +get_one_option(int optid, const struct my_option *opt __attribute__((unused)), + char *argument __attribute__((unused))) +{ + switch(optid) { + case 'a': + key_type= HA_KEYTYPE_TEXT; + break; + case 'c': + create_flag|= HA_CREATE_CHECKSUM; + break; + case 'R': /* Length of record pointer */ + if (rec_pointer_size > 3) + rec_pointer_size=0; + break; + case 'P': + pack_keys= HA_PACK_KEY; /* Use prefix compression */ + break; + case 'B': + pack_keys= HA_BINARY_PACK_KEY; /* Use binary compression */ + break; + case 'M': + record_type= BLOCK_RECORD; + break; + case 'S': + if (key_field == FIELD_VARCHAR) + { + create_flag=0; /* Static sized varchar */ + record_type= STATIC_RECORD; + } + else if (key_field != FIELD_BLOB) + { + key_field=FIELD_NORMAL; /* static-size record */ + extra_field=FIELD_NORMAL; + record_type= STATIC_RECORD; + } + break; + case 'p': + pack_keys=HA_PACK_KEY; /* Use prefix + space packing */ + pack_seg=HA_SPACE_PACK; + key_type=HA_KEYTYPE_TEXT; + break; + case 'm': + unique_key=0; + break; + case 'b': + key_field=FIELD_BLOB; /* blob key */ + extra_field= FIELD_BLOB; + pack_seg|= HA_BLOB_PART; + key_type= HA_KEYTYPE_VARTEXT1; + if (record_type == STATIC_RECORD) + record_type= DYNAMIC_RECORD; + break; + case 'k': + if (key_length < 4 || key_length > HA_MAX_KEY_LENGTH) + { + fprintf(stderr,"Wrong key length\n"); + exit(1); + } + break; + case 'w': + key_field=FIELD_VARCHAR; /* varchar keys */ + extra_field= FIELD_VARCHAR; + key_type= HA_KEYTYPE_VARTEXT1; + pack_seg|= HA_VAR_LENGTH_PART; + if (record_type == STATIC_RECORD) + record_type= DYNAMIC_RECORD; + break; + case 'K': /* Use key cacheing */ + pagecacheing=1; + break; + case 'V': + printf("test1 Ver 1.2 \n"); + exit(0); + case '#': + DBUG_PUSH (argument); + break; + case '?': + usage(); + exit(1); + } + return 0; +} + + +/* Read options */ + +static void get_options(int argc, char *argv[]) +{ + int ho_error; + + if ((ho_error=handle_options(&argc, &argv, my_long_options, get_one_option))) + exit(ho_error); + + return; +} /* get options */ + + +static void usage() +{ + printf("Usage: %s [options]\n\n", my_progname); + my_print_help(my_long_options); + my_print_variables(my_long_options); +} diff --git a/storage/maria/ma_test2.c b/storage/maria/ma_test2.c new file mode 100644 index 00000000000..bbbb4fca1bf --- /dev/null +++ b/storage/maria/ma_test2.c @@ -0,0 +1,1105 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Test av isam-databas: stor test */ + +#ifndef USE_MY_FUNC /* We want to be able to dbug this !! */ +#define USE_MY_FUNC +#endif +#ifdef DBUG_OFF +#undef DBUG_OFF +#endif +#ifndef SAFEMALLOC +#define SAFEMALLOC +#endif +#include "maria_def.h" +#include +#include + + +#define STANDARD_LENGTH 37 +#define MARIA_KEYS 6 +#define MAX_PARTS 4 +#if !defined(MSDOS) && !defined(labs) +#define labs(a) abs(a) +#endif + +static void get_options(int argc, char *argv[]); +static uint rnd(uint max_value); +static void fix_length(byte *record,uint length); +static void put_blob_in_record(char *blob_pos,char **blob_buffer, + ulong *length); +static void copy_key(struct st_maria_info *info,uint inx, + uchar *record,uchar *key); + +static int verbose=0,testflag=0, + first_key=0,async_io=0,pagecacheing=0,write_cacheing=0,locking=0, + rec_pointer_size=0,pack_fields=1,silent=0, + opt_quick_mode=0, transactional= 0; +static int pack_seg=HA_SPACE_PACK,pack_type=HA_PACK_KEY,remove_count=-1; +static int create_flag= 0, srand_arg= 0; +static ulong pagecache_size=IO_SIZE*16; +static enum data_file_type record_type= DYNAMIC_RECORD; + +static uint keys=MARIA_KEYS,recant=1000; +static uint use_blob=0; +static uint16 key1[1001],key3[5000]; +static char record[300],record2[300],key[100],key2[100], + read_record[300],read_record2[300],read_record3[300]; +static HA_KEYSEG glob_keyseg[MARIA_KEYS][MAX_PARTS]; + + /* Test program */ + +int main(int argc, char *argv[]) +{ + uint i; + int j,n1,n2,n3,error,k; + uint write_count,update,dupp_keys,opt_delete,start,length,blob_pos, + reclength,ant,found_parts; + my_off_t lastpos; + ha_rows range_records,records; + MARIA_HA *file; + MARIA_KEYDEF keyinfo[10]; + MARIA_COLUMNDEF recinfo[10]; + MARIA_INFO info; + const char *filename; + char *blob_buffer; + MARIA_CREATE_INFO create_info; + MY_INIT(argv[0]); + + filename= "test2"; + get_options(argc,argv); + if (! async_io) + my_disable_async_io=1; + + maria_init(); + reclength=STANDARD_LENGTH+60+(use_blob ? 8 : 0); + blob_pos=STANDARD_LENGTH+60; + keyinfo[0].seg= &glob_keyseg[0][0]; + keyinfo[0].seg[0].start=0; + keyinfo[0].seg[0].length=6; + keyinfo[0].seg[0].type=HA_KEYTYPE_TEXT; + keyinfo[0].seg[0].language= default_charset_info->number; + keyinfo[0].seg[0].flag=(uint8) pack_seg; + keyinfo[0].seg[0].null_bit=0; + keyinfo[0].seg[0].null_pos=0; + keyinfo[0].key_alg=HA_KEY_ALG_BTREE; + keyinfo[0].keysegs=1; + keyinfo[0].flag = pack_type; + keyinfo[0].block_length= 0; /* Default block length */ + keyinfo[1].seg= &glob_keyseg[1][0]; + keyinfo[1].seg[0].start=7; + keyinfo[1].seg[0].length=6; + keyinfo[1].seg[0].type=HA_KEYTYPE_BINARY; + keyinfo[1].seg[0].flag=0; + keyinfo[1].seg[0].null_bit=0; + keyinfo[1].seg[0].null_pos=0; + keyinfo[1].seg[1].start=0; /* two part key */ + keyinfo[1].seg[1].length=6; + keyinfo[1].seg[1].type=HA_KEYTYPE_NUM; + keyinfo[1].seg[1].flag=HA_REVERSE_SORT; + keyinfo[1].seg[1].null_bit=0; + keyinfo[1].seg[1].null_pos=0; + keyinfo[1].key_alg=HA_KEY_ALG_BTREE; + keyinfo[1].keysegs=2; + keyinfo[1].flag =0; + keyinfo[1].block_length= MARIA_MIN_KEY_BLOCK_LENGTH; /* Diff blocklength */ + keyinfo[2].seg= &glob_keyseg[2][0]; + keyinfo[2].seg[0].start=12; + keyinfo[2].seg[0].length=8; + keyinfo[2].seg[0].type=HA_KEYTYPE_BINARY; + keyinfo[2].seg[0].flag=HA_REVERSE_SORT; + keyinfo[2].seg[0].null_bit=0; + keyinfo[2].seg[0].null_pos=0; + keyinfo[2].key_alg=HA_KEY_ALG_BTREE; + keyinfo[2].keysegs=1; + keyinfo[2].flag =HA_NOSAME; + keyinfo[2].block_length= 0; /* Default block length */ + keyinfo[3].seg= &glob_keyseg[3][0]; + keyinfo[3].seg[0].start=0; + keyinfo[3].seg[0].length=reclength-(use_blob ? 8 : 0); + keyinfo[3].seg[0].type=HA_KEYTYPE_TEXT; + keyinfo[3].seg[0].language=default_charset_info->number; + keyinfo[3].seg[0].flag=(uint8) pack_seg; + keyinfo[3].seg[0].null_bit=0; + keyinfo[3].seg[0].null_pos=0; + keyinfo[3].key_alg=HA_KEY_ALG_BTREE; + keyinfo[3].keysegs=1; + keyinfo[3].flag = pack_type; + keyinfo[3].block_length= 0; /* Default block length */ + keyinfo[4].seg= &glob_keyseg[4][0]; + keyinfo[4].seg[0].start=0; + keyinfo[4].seg[0].length=5; + keyinfo[4].seg[0].type=HA_KEYTYPE_TEXT; + keyinfo[4].seg[0].language=default_charset_info->number; + keyinfo[4].seg[0].flag=0; + keyinfo[4].seg[0].null_bit=0; + keyinfo[4].seg[0].null_pos=0; + keyinfo[4].key_alg=HA_KEY_ALG_BTREE; + keyinfo[4].keysegs=1; + keyinfo[4].flag = pack_type; + keyinfo[4].block_length= 0; /* Default block length */ + keyinfo[5].seg= &glob_keyseg[5][0]; + keyinfo[5].seg[0].start=0; + keyinfo[5].seg[0].length=4; + keyinfo[5].seg[0].type=HA_KEYTYPE_TEXT; + keyinfo[5].seg[0].language=default_charset_info->number; + keyinfo[5].seg[0].flag=pack_seg; + keyinfo[5].seg[0].null_bit=0; + keyinfo[5].seg[0].null_pos=0; + keyinfo[5].key_alg=HA_KEY_ALG_BTREE; + keyinfo[5].keysegs=1; + keyinfo[5].flag = pack_type; + keyinfo[5].block_length= 0; /* Default block length */ + + recinfo[0].type=pack_fields ? FIELD_SKIP_PRESPACE : 0; + recinfo[0].length=7; + recinfo[0].null_bit=0; + recinfo[0].null_pos=0; + recinfo[1].type=pack_fields ? FIELD_SKIP_PRESPACE : 0; + recinfo[1].length=5; + recinfo[1].null_bit=0; + recinfo[1].null_pos=0; + recinfo[2].type=pack_fields ? FIELD_SKIP_PRESPACE : 0; + recinfo[2].length=9; + recinfo[2].null_bit=0; + recinfo[2].null_pos=0; + recinfo[3].type=FIELD_NORMAL; + recinfo[3].length=STANDARD_LENGTH-7-5-9-4; + recinfo[3].null_bit=0; + recinfo[3].null_pos=0; + recinfo[4].type=pack_fields ? FIELD_SKIP_ZERO : 0; + recinfo[4].length=4; + recinfo[4].null_bit=0; + recinfo[4].null_pos=0; + recinfo[5].type=pack_fields ? FIELD_SKIP_ENDSPACE : 0; + recinfo[5].length=60; + recinfo[5].null_bit=0; + recinfo[5].null_pos=0; + if (use_blob) + { + recinfo[6].type=FIELD_BLOB; + recinfo[6].length=4+portable_sizeof_char_ptr; + recinfo[6].null_bit=0; + recinfo[6].null_pos=0; + } + + write_count=update=dupp_keys=opt_delete=0; + blob_buffer=0; + + for (i=1000 ; i>0 ; i--) key1[i]=0; + for (i=4999 ; i>0 ; i--) key3[i]=0; + + if (!silent) + printf("- Creating maria-file\n"); + file= 0; + bzero((char*) &create_info,sizeof(create_info)); + create_info.max_rows=(ha_rows) (rec_pointer_size ? + (1L << (rec_pointer_size*8))/ + reclength : 0); + create_info.reloc_rows=(ha_rows) 100; + create_info.transactional= transactional; + if (maria_create(filename, record_type, keys,&keyinfo[first_key], + use_blob ? 7 : 6, &recinfo[0], + 0,(MARIA_UNIQUEDEF*) 0, + &create_info,create_flag)) + goto err; + if (!(file=maria_open(filename,2,HA_OPEN_ABORT_IF_LOCKED))) + goto err; + if (!silent) + printf("- Writing key:s\n"); + maria_data_root= "."; + /* Maria requires that we always have a page cache */ + if ((init_pagecache(maria_pagecache, pagecache_size, 0, 0, + maria_block_size) == 0) || + ma_control_file_create_or_open() || + (init_pagecache(maria_log_pagecache, + TRANSLOG_PAGECACHE_SIZE, 0, 0, + TRANSLOG_PAGE_SIZE) == 0) || + translog_init(maria_data_root, TRANSLOG_FILE_SIZE, + 0, 0, maria_log_pagecache, + TRANSLOG_DEFAULT_FLAGS)) + { + fprintf(stderr, "Error in initialization"); + exit(1); + } + + if (locking) + maria_lock_database(file,F_WRLCK); + if (write_cacheing) + maria_extra(file,HA_EXTRA_WRITE_CACHE,0); + if (opt_quick_mode) + maria_extra(file,HA_EXTRA_QUICK,0); + + for (i=0 ; i < recant ; i++) + { + ulong blob_length; + n1=rnd(1000); n2=rnd(100); n3=rnd(5000); + sprintf(record,"%6d:%4d:%8d:Pos: %4d ",n1,n2,n3,write_count); + int4store(record+STANDARD_LENGTH-4,(long) i); + fix_length(record,(uint) STANDARD_LENGTH+rnd(60)); + put_blob_in_record(record+blob_pos,&blob_buffer, &blob_length); + DBUG_PRINT("test",("record: %d blob_length: %lu", i, blob_length)); + + if (maria_write(file,record)) + { + if (my_errno != HA_ERR_FOUND_DUPP_KEY || key3[n3] == 0) + { + printf("Error: %d in write at record: %d\n",my_errno,i); + goto err; + } + if (verbose) printf(" Double key: %d\n",n3); + } + else + { + if (key3[n3] == 1 && first_key <3 && first_key+keys >= 3) + { + printf("Error: Didn't get error when writing second key: '%8d'\n",n3); + goto err; + } + write_count++; key1[n1]++; key3[n3]=1; + } + + /* Check if we can find key without flushing database */ + if (i % 10 == 0) + { + for (j=rnd(1000)+1 ; j>0 && key1[j] == 0 ; j--) ; + if (!j) + for (j=999 ; j>0 && key1[j] == 0 ; j--) ; + sprintf(key,"%6d",j); + if (maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT)) + { + printf("Test in loop: Can't find key: \"%s\"\n",key); + goto err; + } + } + } + if (testflag == 1) + goto end; + + if (write_cacheing) + { + if (maria_extra(file,HA_EXTRA_NO_CACHE,0)) + { + puts("got error from maria_extra(HA_EXTRA_NO_CACHE)"); + goto end; + } + } +#ifdef REMOVE_WHEN_WE_HAVE_RESIZE + if (pagecacheing) + resize_pagecache(maria_pagecache, maria_block_size, + pagecache_size * 2, 0, 0); +#endif + if (!silent) + printf("- Delete\n"); + if (srand_arg) + srand(srand_arg); + for (i=0 ; i0 && key1[j] == 0 ; j--) ; + if (j != 0) + { + sprintf(key,"%6d",j); + if (maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT)) + { + printf("can't find key1: \"%s\"\n",key); + goto err; + } + if (bcmp(read_record+keyinfo[0].seg[0].start, + key, keyinfo[0].seg[0].length)) + { + printf("Found wrong record when searching for key: \"%s\"\n",key); + goto err; + } + if (opt_delete == (uint) remove_count) /* While testing */ + goto end; + if (maria_delete(file,read_record)) + { + printf("error: %d; can't delete record: \"%s\"\n", my_errno,read_record); + goto err; + } + opt_delete++; + key1[atoi(read_record+keyinfo[0].seg[0].start)]--; + key3[atoi(read_record+keyinfo[2].seg[0].start)]=0; + } + else + puts("Warning: Skipping delete test because no dupplicate keys"); + } + if (testflag == 2) + goto end; + + if (!silent) + printf("- Update\n"); + if (srand_arg) + srand(srand_arg); + for (i=0 ; i0 && key1[j] == 0 ; j--) ; + if (j != 0) + { + sprintf(key,"%6d",j); + if (maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT)) + { + printf("can't find key1: \"%s\"\n",key); + goto err; + } + if (bcmp(read_record+keyinfo[0].seg[0].start, + key, keyinfo[0].seg[0].length)) + { + printf("Found wrong record when searching for key: \"%s\"; Found \"%.*s\"\n", + key, keyinfo[0].seg[0].length, + read_record+keyinfo[0].seg[0].start); + goto err; + } + if (use_blob) + { + ulong blob_length; + if (i & 1) + put_blob_in_record(record+blob_pos,&blob_buffer, &blob_length); + else + bmove(record+blob_pos,read_record+blob_pos,8); + } + if (maria_update(file,read_record,record2)) + { + if (my_errno != HA_ERR_FOUND_DUPP_KEY || key3[n3] == 0) + { + printf("error: %d; can't update:\nFrom: \"%s\"\nTo: \"%s\"\n", + my_errno,read_record,record2); + goto err; + } + if (verbose) + printf("Double key when tried to update:\nFrom: \"%s\"\nTo: \"%s\"\n",record,record2); + } + else + { + key1[atoi(read_record+keyinfo[0].seg[0].start)]--; + key3[atoi(read_record+keyinfo[2].seg[0].start)]=0; + key1[n1]++; key3[n3]=1; + update++; + } + } + } + if (testflag == 3) + goto end; + + for (i=999, dupp_keys=j=0 ; i>0 ; i--) + { + if (key1[i] > dupp_keys) + { + dupp_keys=key1[i]; j=i; + } + } + sprintf(key,"%6d",j); + start=keyinfo[0].seg[0].start; + length=keyinfo[0].seg[0].length; + if (dupp_keys) + { + if (!silent) + printf("- Same key: first - next -> last - prev -> first\n"); + DBUG_PRINT("progpos",("first - next -> last - prev -> first")); + if (verbose) printf(" Using key: \"%s\" Keys: %d\n",key,dupp_keys); + + if (maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT)) + goto err; + if (maria_rsame(file,read_record2,-1)) + goto err; + if (memcmp(read_record,read_record2,reclength) != 0) + { + printf("maria_rsame didn't find same record\n"); + goto end; + } + info.recpos=maria_position(file); + if (maria_rfirst(file,read_record2,0) || + maria_rsame_with_pos(file,read_record2,0,info.recpos) || + memcmp(read_record,read_record2,reclength) != 0) + { + printf("maria_rsame_with_pos didn't find same record\n"); + goto end; + } + { + info.recpos= maria_position(file); + int skr=maria_rnext(file,read_record2,0); + if ((skr && my_errno != HA_ERR_END_OF_FILE) || + maria_rprev(file,read_record2,-1) || + memcmp(read_record,read_record2,reclength) != 0 || + info.recpos != maria_position(file)) + { + printf("maria_rsame_with_pos lost position\n"); + goto end; + } + } + ant=1; + while (maria_rnext(file,read_record2,0) == 0 && + memcmp(read_record2+start,key,length) == 0) ant++; + if (ant != dupp_keys) + { + printf("next: Found: %d keys of %d\n",ant,dupp_keys); + goto end; + } + ant=0; + while (maria_rprev(file,read_record3,0) == 0 && + bcmp(read_record3+start,key,length) == 0) ant++; + if (ant != dupp_keys) + { + printf("prev: Found: %d records of %d\n",ant,dupp_keys); + goto end; + } + + /* Check of maria_rnext_same */ + if (maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT)) + goto err; + ant=1; + while (!maria_rnext_same(file,read_record3) && ant < dupp_keys+10) + ant++; + if (ant != dupp_keys || my_errno != HA_ERR_END_OF_FILE) + { + printf("maria_rnext_same: Found: %d records of %d\n",ant,dupp_keys); + goto end; + } + } + + if (!silent) + printf("- All keys: first - next -> last - prev -> first\n"); + DBUG_PRINT("progpos",("All keys: first - next -> last - prev -> first")); + ant=1; + if (maria_rfirst(file,read_record,0)) + { + printf("Can't find first record\n"); + goto end; + } + while ((error=maria_rnext(file,read_record3,0)) == 0 && ant < write_count+10) + ant++; + if (ant != write_count - opt_delete || error != HA_ERR_END_OF_FILE) + { + printf("next: I found: %d records of %d (error: %d)\n", + ant, write_count - opt_delete, error); + goto end; + } + if (maria_rlast(file,read_record2,0) || + bcmp(read_record2,read_record3,reclength)) + { + printf("Can't find last record\n"); + DBUG_DUMP("record2",(byte*) read_record2,reclength); + DBUG_DUMP("record3",(byte*) read_record3,reclength); + goto end; + } + ant=1; + while (maria_rprev(file,read_record3,0) == 0 && ant < write_count+10) + ant++; + if (ant != write_count - opt_delete) + { + printf("prev: I found: %d records of %d\n",ant,write_count); + goto end; + } + if (bcmp(read_record,read_record3,reclength)) + { + printf("Can't find first record\n"); + goto end; + } + + if (!silent) + printf("- Test if: Read first - next - prev - prev - next == first\n"); + DBUG_PRINT("progpos",("- Read first - next - prev - prev - next == first")); + if (maria_rfirst(file,read_record,0) || + maria_rnext(file,read_record3,0) || + maria_rprev(file,read_record3,0) || + maria_rprev(file,read_record3,0) == 0 || + maria_rnext(file,read_record3,0)) + goto err; + if (bcmp(read_record,read_record3,reclength) != 0) + printf("Can't find first record\n"); + + if (!silent) + printf("- Test if: Read last - prev - next - next - prev == last\n"); + DBUG_PRINT("progpos",("Read last - prev - next - next - prev == last")); + if (maria_rlast(file,read_record2,0) || + maria_rprev(file,read_record3,0) || + maria_rnext(file,read_record3,0) || + maria_rnext(file,read_record3,0) == 0 || + maria_rprev(file,read_record3,0)) + goto err; + if (bcmp(read_record2,read_record3,reclength)) + printf("Can't find last record\n"); + + if (!silent) + puts("- Test read key-part"); + strmov(key2,key); + for(i=strlen(key2) ; i-- > 1 ;) + { + key2[i]=0; + + /* The following row is just to catch some bugs in the key code */ + bzero((char*) file->lastkey,file->s->base.max_key_length*2); + if (maria_rkey(file,read_record,0,key2,(uint) i,HA_READ_PREFIX)) + goto err; + if (bcmp(read_record+start,key,(uint) i)) + { + puts("Didn't find right record"); + goto end; + } + } + if (dupp_keys > 2) + { + if (!silent) + printf("- Read key (first) - next - delete - next -> last\n"); + DBUG_PRINT("progpos",("first - next - delete - next -> last")); + if (maria_rkey(file,read_record,0,key,0,HA_READ_KEY_EXACT)) goto err; + if (maria_rnext(file,read_record3,0)) goto err; + if (maria_delete(file,read_record3)) goto err; + opt_delete++; + ant=1; + while (maria_rnext(file,read_record3,0) == 0 && + bcmp(read_record3+start,key,length) == 0) ant++; + if (ant != dupp_keys-1) + { + printf("next: I can only find: %d keys of %d\n",ant,dupp_keys-1); + goto end; + } + } + if (dupp_keys>4) + { + if (!silent) + printf("- Read last of key - prev - delete - prev -> first\n"); + DBUG_PRINT("progpos",("last - prev - delete - prev -> first")); + if (maria_rprev(file,read_record3,0)) goto err; + if (maria_rprev(file,read_record3,0)) goto err; + if (maria_delete(file,read_record3)) goto err; + opt_delete++; + ant=1; + while (maria_rprev(file,read_record3,0) == 0 && + bcmp(read_record3+start,key,length) == 0) ant++; + if (ant != dupp_keys-2) + { + printf("next: I can only find: %d keys of %d\n",ant,dupp_keys-2); + goto end; + } + } + if (dupp_keys > 6) + { + if (!silent) + printf("- Read first - delete - next -> last\n"); + DBUG_PRINT("progpos",("first - delete - next -> last")); + if (maria_rkey(file,read_record3,0,key,0,HA_READ_KEY_EXACT)) goto err; + if (maria_delete(file,read_record3)) goto err; + opt_delete++; + ant=1; + if (maria_rnext(file,read_record,0)) + goto err; /* Skall finnas poster */ + while (maria_rnext(file,read_record3,0) == 0 && + bcmp(read_record3+start,key,length) == 0) ant++; + if (ant != dupp_keys-3) + { + printf("next: I can only find: %d keys of %d\n",ant,dupp_keys-3); + goto end; + } + + if (!silent) + printf("- Read last - delete - prev -> first\n"); + DBUG_PRINT("progpos",("last - delete - prev -> first")); + if (maria_rprev(file,read_record3,0)) goto err; + if (maria_delete(file,read_record3)) goto err; + opt_delete++; + ant=0; + while (maria_rprev(file,read_record3,0) == 0 && + bcmp(read_record3+start,key,length) == 0) ant++; + if (ant != dupp_keys-4) + { + printf("next: I can only find: %d keys of %d\n",ant,dupp_keys-4); + goto end; + } + } + + if (!silent) + puts("- Test if: Read rrnd - same"); + DBUG_PRINT("progpos",("Read rrnd - same")); + assert(maria_scan_init(file) == 0); + for (i=0 ; i < write_count ; i++) + { + int tmp; + if ((tmp= maria_scan(file,read_record)) && + tmp != HA_ERR_END_OF_FILE && + tmp != HA_ERR_RECORD_DELETED) + { + printf("Got error %d when scanning table\n", tmp); + break; + } + } + maria_scan_end(file); + if (i != write_count && i != write_count - opt_delete) + { + printf("Found wrong number of rows while scanning table\n"); + goto err; + } + + bmove(read_record2,read_record,reclength); + for (i=min(2,keys) ; i-- > 0 ;) + { + if (maria_rsame(file,read_record2,(int) i)) goto err; + if (bcmp(read_record,read_record2,reclength) != 0) + { + printf("maria_rsame didn't find same record\n"); + goto end; + } + } + if (!silent) + puts("- Test maria_records_in_range"); + maria_status(file,&info,HA_STATUS_VARIABLE); + for (i=0 ; i < info.keys ; i++) + { + key_range min_key, max_key; + if (maria_rfirst(file,read_record,(int) i) || + maria_rlast(file,read_record2,(int) i)) + goto err; + copy_key(file,(uint) i,(uchar*) read_record,(uchar*) key); + copy_key(file,(uint) i,(uchar*) read_record2,(uchar*) key2); + min_key.key= key; + min_key.length= USE_WHOLE_KEY; + min_key.flag= HA_READ_KEY_EXACT; + max_key.key= key2; + max_key.length= USE_WHOLE_KEY; + max_key.flag= HA_READ_AFTER_KEY; + + range_records= maria_records_in_range(file,(int) i, &min_key, &max_key); + if (range_records < info.records*8/10 || + range_records > info.records*12/10) + { + printf("maria_records_range returned %ld; Should be about %ld\n", + (long) range_records,(long) info.records); + goto end; + } + if (verbose) + { + printf("maria_records_range returned %ld; Exact is %ld (diff: %4.2g %%)\n", + (long) range_records, (long) info.records, + labs((long) range_records - (long) info.records)*100.0/ + info.records); + } + } + for (i=0 ; i < 5 ; i++) + { + for (j=rnd(1000)+1 ; j>0 && key1[j] == 0 ; j--) ; + for (k=rnd(1000)+1 ; k>0 && key1[k] == 0 ; k--) ; + if (j != 0 && k != 0) + { + key_range min_key, max_key; + if (j > k) + swap_variables(int, j, k); + sprintf(key,"%6d",j); + sprintf(key2,"%6d",k); + + min_key.key= key; + min_key.length= USE_WHOLE_KEY; + min_key.flag= HA_READ_AFTER_KEY; + max_key.key= key2; + max_key.length= USE_WHOLE_KEY; + max_key.flag= HA_READ_BEFORE_KEY; + range_records= maria_records_in_range(file, 0, &min_key, &max_key); + records=0; + for (j++ ; j < k ; j++) + records+=key1[j]; + if ((long) range_records < (long) records*7/10-2 || + (long) range_records > (long) records*14/10+2) + { + printf("maria_records_range for key: %d returned %lu; Should be about %lu\n", + i, (ulong) range_records, (ulong) records); + goto end; + } + if (verbose && records) + { + printf("maria_records_range returned %lu; Exact is %lu (diff: %4.2g %%)\n", + (ulong) range_records, (ulong) records, + labs((long) range_records-(long) records)*100.0/records); + + } + } + } + + if (!silent) + printf("- maria_info\n"); + maria_status(file,&info,HA_STATUS_VARIABLE | HA_STATUS_CONST); + if (info.records != write_count-opt_delete || info.deleted > opt_delete + update + || info.keys != keys) + { + puts("Wrong info from maria_info"); + printf("Got: records: %lu delete: %lu i_keys: %d\n", + (ulong) info.records, (ulong) info.deleted, info.keys); + } + if (verbose) + { + char buff[80]; + get_date(buff,3,info.create_time); + printf("info: Created %s\n",buff); + get_date(buff,3,info.check_time); + printf("info: checked %s\n",buff); + get_date(buff,3,info.update_time); + printf("info: Modified %s\n",buff); + } + + maria_panic(HA_PANIC_WRITE); + maria_panic(HA_PANIC_READ); + if (maria_is_changed(file)) + puts("Warning: maria_is_changed reported that datafile was changed"); + + if (!silent) + printf("- maria_extra(CACHE) + maria_rrnd.... + maria_extra(NO_CACHE)\n"); + if (maria_reset(file) || maria_extra(file,HA_EXTRA_CACHE,0)) + { + if (locking || (!use_blob && !pack_fields)) + { + puts("got error from maria_extra(HA_EXTRA_CACHE)"); + goto end; + } + } + ant=0; + assert(maria_scan_init(file) == 0); + while ((error= maria_scan(file,record)) != HA_ERR_END_OF_FILE && + ant < write_count + 10) + ant+= error ? 0 : 1; + maria_scan_end(file); + if (ant != write_count-opt_delete) + { + printf("scan with cache: I can only find: %d records of %d\n", + ant,write_count-opt_delete); + goto end; + } + if (maria_extra(file,HA_EXTRA_NO_CACHE,0)) + { + puts("got error from maria_extra(HA_EXTRA_NO_CACHE)"); + goto end; + } + + ant=0; + maria_scan_init(file); + while ((error=maria_scan(file,record)) != HA_ERR_END_OF_FILE && + ant < write_count + 10) + ant+= error ? 0 : 1; + if (ant != write_count-opt_delete) + { + printf("scan with cache: I can only find: %d records of %d\n", + ant,write_count-opt_delete); + goto end; + } + + if (testflag == 4) + goto end; + + if (!silent) + printf("- Removing keys\n"); + DBUG_PRINT("progpos",("Removing keys")); + lastpos = HA_OFFSET_ERROR; + /* DBUG_POP(); */ + maria_reset(file); + found_parts=0; + maria_scan_init(file); + while ((error= maria_scan(file,read_record)) != HA_ERR_END_OF_FILE) + { + info.recpos=maria_position(file); + if (lastpos >= info.recpos && lastpos != HA_OFFSET_ERROR) + { + printf("maria_rrnd didn't advance filepointer; old: %ld, new: %ld\n", + (long) lastpos, (long) info.recpos); + goto err; + } + lastpos=info.recpos; + if (error == 0) + { + if (opt_delete == (uint) remove_count) /* While testing */ + goto end; + if (rnd(2) == 1 && maria_rsame(file,read_record,-1)) + { + printf("can't find record %lx\n",(long) info.recpos); + goto err; + } + if (use_blob) + { + ulong blob_length,pos; + uchar *ptr; + memcpy_fixed(&ptr, read_record+blob_pos+4, sizeof(ptr)); + longget(blob_length,read_record+blob_pos); + for (pos=0 ; pos < blob_length ; pos++) + { + if (ptr[pos] != (uchar) (blob_length+pos)) + { + printf("Found blob with wrong info at %ld\n",(long) lastpos); + maria_scan_end(file); + my_errno= 0; + goto err; + } + } + } + if (maria_delete(file,read_record)) + { + printf("can't delete record: %6.6s, delete_count: %d\n", + read_record, opt_delete); + maria_scan_end(file); + goto err; + } + opt_delete++; + } + else + found_parts++; + } + maria_scan_end(file); + if (my_errno != HA_ERR_END_OF_FILE && my_errno != HA_ERR_RECORD_DELETED) + printf("error: %d from maria_rrnd\n",my_errno); + if (write_count != opt_delete) + { + printf("Deleted only %d of %d records (%d parts)\n",opt_delete,write_count, + found_parts); + goto err; + } +end: + if (maria_close(file)) + goto err; + maria_panic(HA_PANIC_CLOSE); /* Should close log */ + if (!silent) + { + printf("\nFollowing test have been made:\n"); + printf("Write records: %d\nUpdate records: %d\nSame-key-read: %d\nDelete records: %d\n", write_count,update,dupp_keys,opt_delete); + if (rec_pointer_size) + printf("Record pointer size: %d\n",rec_pointer_size); + printf("maria_block_size: %lu\n", maria_block_size); + if (write_cacheing) + puts("Key cache resized"); + if (write_cacheing) + puts("Write cacheing used"); + if (write_cacheing) + puts("quick mode"); + if (async_io && locking) + puts("Asyncron io with locking used"); + else if (locking) + puts("Locking used"); + if (use_blob) + puts("blobs used"); + printf("key cache status: \n\ +blocks used:%10lu\n\ +not flushed:%10lu\n\ +w_requests: %10lu\n\ +writes: %10lu\n\ +r_requests: %10lu\n\ +reads: %10lu\n", + maria_pagecache->blocks_used, + maria_pagecache->global_blocks_changed, + (ulong) maria_pagecache->global_cache_w_requests, + (ulong) maria_pagecache->global_cache_write, + (ulong) maria_pagecache->global_cache_r_requests, + (ulong) maria_pagecache->global_cache_read); + } + end_pagecache(maria_pagecache,1); + my_free(blob_buffer, MYF(MY_ALLOW_ZERO_PTR)); + my_end(silent ? MY_CHECK_ERROR : MY_CHECK_ERROR | MY_GIVE_INFO); + return(0); +err: + printf("got error: %d when using MARIA-database\n",my_errno); + if (file) + VOID(maria_close(file)); + maria_end(); + return(1); +} /* main */ + + +/* Read options */ + +static void get_options(int argc, char **argv) +{ + char *pos,*progname; + + progname= argv[0]; + + while (--argc >0 && *(pos = *(++argv)) == '-' ) { + switch(*++pos) { + case 'B': + pack_type= HA_BINARY_PACK_KEY; + break; + case 'b': + use_blob= 1; + if (*++pos) + use_blob= atol(pos); + break; + case 'K': /* Use key cacheing */ + pagecacheing=1; + if (*++pos) + pagecache_size=atol(pos); + break; + case 'W': /* Use write cacheing */ + write_cacheing=1; + if (*++pos) + my_default_record_cache_size=atoi(pos); + break; + case 'd': + remove_count= atoi(++pos); + break; + case 'i': + if (*++pos) + srand(srand_arg= atoi(pos)); + break; + case 'L': + locking=1; + break; + case 'A': /* use asyncron io */ + async_io=1; + if (*++pos) + my_default_record_cache_size=atoi(pos); + break; + case 'v': /* verbose */ + verbose=1; + break; + case 'm': /* records */ + if ((recant=atoi(++pos)) < 10 && testflag > 1) + { + fprintf(stderr,"record count must be >= 10 (if testflag != 1)\n"); + exit(1); + } + break; + case 'e': /* maria_block_length */ + case 'E': + if ((maria_block_size= atoi(++pos)) < MARIA_MIN_KEY_BLOCK_LENGTH || + maria_block_size > MARIA_MAX_KEY_BLOCK_LENGTH) + { + fprintf(stderr,"Wrong maria_block_length\n"); + exit(1); + } + maria_block_size= my_round_up_to_next_power(maria_block_size); + break; + case 'f': + if ((first_key=atoi(++pos)) < 0 || first_key >= MARIA_KEYS) + first_key=0; + break; + case 'k': + if ((keys=(uint) atoi(++pos)) < 1 || + keys > (uint) (MARIA_KEYS-first_key)) + keys=MARIA_KEYS-first_key; + break; + case 'M': + record_type= BLOCK_RECORD; + break; + case 'P': + pack_type=0; /* Don't use DIFF_LENGTH */ + pack_seg=0; + break; + case 'R': /* Length of record pointer */ + rec_pointer_size=atoi(++pos); + if (rec_pointer_size > 7) + rec_pointer_size=0; + break; + case 'S': + pack_fields=0; /* Static-length-records */ + record_type= STATIC_RECORD; + break; + case 's': + silent=1; + break; + case 't': + testflag=atoi(++pos); /* testmod */ + break; + case 'T': + transactional= 1; + break; + case 'q': + opt_quick_mode=1; + break; + case 'c': + create_flag|= HA_CREATE_CHECKSUM; + break; + case 'D': + create_flag|=HA_CREATE_DELAY_KEY_WRITE; + break; + case '?': + case 'I': + case 'V': + printf("%s Ver 1.0 for %s at %s\n",progname,SYSTEM_TYPE,MACHINE_TYPE); + puts("By Monty, for your professional use\n"); + printf("Usage: %s [-?AbBcDIKLPRqSsTVWltv] [-k#] [-f#] [-m#] [-e#] [-E#] [-t#]\n", + progname); + exit(0); + case '#': + DBUG_PUSH (++pos); + break; + default: + printf("Illegal option: '%c'\n",*pos); + break; + } + } + return; +} /* get options */ + + /* Get a random value 0 <= x <= n */ + +static uint rnd(uint max_value) +{ + return (uint) ((rand() & 32767)/32767.0*max_value); +} /* rnd */ + + + /* Create a variable length record */ + +static void fix_length(byte *rec, uint length) +{ + bmove(rec+STANDARD_LENGTH, + "0123456789012345678901234567890123456789012345678901234567890", + length-STANDARD_LENGTH); + strfill(rec+length,STANDARD_LENGTH+60-length,' '); +} /* fix_length */ + + + /* Put maybe a blob in record */ + +static void put_blob_in_record(char *blob_pos, char **blob_buffer, + ulong *blob_length) +{ + ulong i,length; + if (use_blob) + { + if (rnd(10) == 0) + { + if (! *blob_buffer && + !(*blob_buffer=my_malloc((uint) use_blob,MYF(MY_WME)))) + { + use_blob=0; + return; + } + length=rnd(use_blob); + for (i=0 ; i < length ; i++) + (*blob_buffer)[i]=(char) (length+i); + int4store(blob_pos,length); + memcpy_fixed(blob_pos+4,(char*) blob_buffer,sizeof(char*)); + *blob_length= length; + } + else + { + int4store(blob_pos,0); + *blob_length= 0; + } + } + return; +} + + +static void copy_key(MARIA_HA *info,uint inx,uchar *rec,uchar *key_buff) +{ + HA_KEYSEG *keyseg; + + for (keyseg=info->s->keyinfo[inx].seg ; keyseg->type ; keyseg++) + { + memcpy(key_buff,rec+keyseg->start,(size_t) keyseg->length); + key_buff+=keyseg->length; + } + return; +} diff --git a/storage/maria/ma_test3.c b/storage/maria/ma_test3.c new file mode 100644 index 00000000000..dcc0ac2e013 --- /dev/null +++ b/storage/maria/ma_test3.c @@ -0,0 +1,500 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Test av locking */ + +#ifndef __NETWARE__ + +#include "maria.h" +#include +#ifdef HAVE_SYS_WAIT_H +# include +#endif +#ifndef WEXITSTATUS +# define WEXITSTATUS(stat_val) ((unsigned)(stat_val) >> 8) +#endif +#ifndef WIFEXITED +# define WIFEXITED(stat_val) (((stat_val) & 255) == 0) +#endif + + +#if defined(HAVE_LRAND48) +#define rnd(X) (lrand48() % X) +#define rnd_init(X) srand48(X) +#else +#define rnd(X) (random() % X) +#define rnd_init(X) srandom(X) +#endif + + +const char *filename= "test3"; +uint tests=10,forks=10,pagecacheing=0; + +static void get_options(int argc, char *argv[]); +void start_test(int id); +int test_read(MARIA_HA *,int),test_write(MARIA_HA *,int,int), + test_update(MARIA_HA *,int,int),test_rrnd(MARIA_HA *,int); + +struct record { + char id[8]; + char nr[4]; + char text[10]; +} record; + + +int main(int argc,char **argv) +{ + int status,wait_ret; + uint i=0; + MARIA_KEYDEF keyinfo[10]; + MARIA_COLUMNDEF recinfo[10]; + HA_KEYSEG keyseg[10][2]; + MY_INIT(argv[0]); + get_options(argc,argv); + + fprintf(stderr, "WARNING! this program is to test 'external locking'" + " (when several processes share a table through file locking)" + " which is not supported by Maria at all; expect errors." + " We may soon remove this program.\n"); + maria_init(); + bzero((char*) keyinfo,sizeof(keyinfo)); + bzero((char*) recinfo,sizeof(recinfo)); + bzero((char*) keyseg,sizeof(keyseg)); + keyinfo[0].seg= &keyseg[0][0]; + keyinfo[0].seg[0].start=0; + keyinfo[0].seg[0].length=8; + keyinfo[0].seg[0].type=HA_KEYTYPE_TEXT; + keyinfo[0].seg[0].flag=HA_SPACE_PACK; + keyinfo[0].key_alg=HA_KEY_ALG_BTREE; + keyinfo[0].keysegs=1; + keyinfo[0].flag = (uint8) HA_PACK_KEY; + keyinfo[0].block_length= 0; /* Default block length */ + keyinfo[1].seg= &keyseg[1][0]; + keyinfo[1].seg[0].start=8; + keyinfo[1].seg[0].length=4; /* Long is always 4 in maria */ + keyinfo[1].seg[0].type=HA_KEYTYPE_LONG_INT; + keyinfo[1].seg[0].flag=0; + keyinfo[1].key_alg=HA_KEY_ALG_BTREE; + keyinfo[1].keysegs=1; + keyinfo[1].flag =HA_NOSAME; + keyinfo[1].block_length= 0; /* Default block length */ + + recinfo[0].type=0; + recinfo[0].length=sizeof(record.id); + recinfo[1].type=0; + recinfo[1].length=sizeof(record.nr); + recinfo[2].type=0; + recinfo[2].length=sizeof(record.text); + + puts("- Creating maria-file"); + my_delete(filename,MYF(0)); /* Remove old locks under gdb */ + if (maria_create(filename,BLOCK_RECORD, 2, &keyinfo[0],2,&recinfo[0],0, + (MARIA_UNIQUEDEF*) 0, (MARIA_CREATE_INFO*) 0,0)) + exit(1); + + rnd_init(0); + printf("- Starting %d processes\n",forks); fflush(stdout); + for (i=0 ; i < forks; i++) + { + if (!fork()) + { + start_test(i+1); + sleep(1); + return 0; + } + VOID(rnd(1)); + } + + for (i=0 ; i < forks ; i++) + while ((wait_ret=wait(&status)) && wait_ret == -1); + maria_end(); + return 0; +} + + +static void get_options(int argc, char **argv) +{ + char *pos,*progname; + + progname= argv[0]; + + while (--argc >0 && *(pos = *(++argv)) == '-' ) { + switch(*++pos) { + case 'f': + forks=atoi(++pos); + break; + case 't': + tests=atoi(++pos); + break; + case 'K': /* Use key cacheing */ + pagecacheing=1; + break; + case 'A': /* All flags */ + pagecacheing=1; + break; + case '?': + case 'I': + case 'V': + printf("%s Ver 1.0 for %s at %s\n",progname,SYSTEM_TYPE,MACHINE_TYPE); + puts("By Monty, for your professional use\n"); + puts("Test av locking with threads\n"); + printf("Usage: %s [-?lKA] [-f#] [-t#]\n",progname); + exit(0); + case '#': + DBUG_PUSH (++pos); + break; + default: + printf("Illegal option: '%c'\n",*pos); + break; + } + } + return; +} + + +void start_test(int id) +{ + uint i; + int error,lock_type; + MARIA_INFO isam_info; + MARIA_HA *file,*file1,*file2=0,*lock; + + if (!(file1=maria_open(filename,O_RDWR,HA_OPEN_WAIT_IF_LOCKED)) || + !(file2=maria_open(filename,O_RDWR,HA_OPEN_WAIT_IF_LOCKED))) + { + fprintf(stderr,"Can't open isam-file: %s\n",filename); + exit(1); + } + if (pagecacheing && rnd(2) == 0) + init_pagecache(maria_pagecache, 65536L, 0, 0, MARIA_KEY_BLOCK_LENGTH); + printf("Process %d, pid: %d\n",id,getpid()); fflush(stdout); + + for (error=i=0 ; i < tests && !error; i++) + { + file= (rnd(2) == 1) ? file1 : file2; + lock=0 ; lock_type=0; + if (rnd(10) == 0) + { + if (maria_lock_database(lock=(rnd(2) ? file1 : file2), + lock_type=(rnd(2) == 0 ? F_RDLCK : F_WRLCK))) + { + fprintf(stderr,"%2d: start: Can't lock table %d\n",id,my_errno); + error=1; + break; + } + } + switch (rnd(4)) { + case 0: error=test_read(file,id); break; + case 1: error=test_rrnd(file,id); break; + case 2: error=test_write(file,id,lock_type); break; + case 3: error=test_update(file,id,lock_type); break; + } + if (lock) + maria_lock_database(lock,F_UNLCK); + } + if (!error) + { + maria_status(file1,&isam_info,HA_STATUS_VARIABLE); + printf("%2d: End of test. Records: %ld Deleted: %ld\n", + id,(long) isam_info.records, (long) isam_info.deleted); + fflush(stdout); + } + + maria_close(file1); + maria_close(file2); + if (error) + { + printf("%2d: Aborted\n",id); fflush(stdout); + exit(1); + } +} + + +int test_read(MARIA_HA *file,int id) +{ + uint i,lock,found,next,prev; + ulong find; + + lock=0; + if (rnd(2) == 0) + { + lock=1; + if (maria_lock_database(file,F_RDLCK)) + { + fprintf(stderr,"%2d: Can't lock table %d\n",id,my_errno); + return 1; + } + } + + found=next=prev=0; + for (i=0 ; i < 100 ; i++) + { + find=rnd(100000); + if (!maria_rkey(file,record.id,1,(byte*) &find, + sizeof(find),HA_READ_KEY_EXACT)) + found++; + else + { + if (my_errno != HA_ERR_KEY_NOT_FOUND) + { + fprintf(stderr,"%2d: Got error %d from read in read\n",id,my_errno); + return 1; + } + else if (!maria_rnext(file,record.id,1)) + next++; + else + { + if (my_errno != HA_ERR_END_OF_FILE) + { + fprintf(stderr,"%2d: Got error %d from rnext in read\n",id,my_errno); + return 1; + } + else if (!maria_rprev(file,record.id,1)) + prev++; + else + { + if (my_errno != HA_ERR_END_OF_FILE) + { + fprintf(stderr,"%2d: Got error %d from rnext in read\n", + id,my_errno); + return 1; + } + } + } + } + } + if (lock) + { + if (maria_lock_database(file,F_UNLCK)) + { + fprintf(stderr,"%2d: Can't unlock table\n",id); + return 1; + } + } + printf("%2d: read: found: %5d next: %5d prev: %5d\n", + id,found,next,prev); + fflush(stdout); + return 0; +} + + +int test_rrnd(MARIA_HA *file,int id) +{ + uint count,lock; + + lock=0; + if (rnd(2) == 0) + { + lock=1; + if (maria_lock_database(file,F_RDLCK)) + { + fprintf(stderr,"%2d: Can't lock table (%d)\n",id,my_errno); + maria_close(file); + return 1; + } + if (rnd(2) == 0) + maria_extra(file,HA_EXTRA_CACHE,0); + } + + count=0; + if (maria_rrnd(file,record.id,0L)) + { + if (my_errno == HA_ERR_END_OF_FILE) + goto end; + fprintf(stderr,"%2d: Can't read first record (%d)\n",id,my_errno); + return 1; + } + for (count=1 ; !maria_rrnd(file,record.id,HA_OFFSET_ERROR) ;count++) ; + if (my_errno != HA_ERR_END_OF_FILE) + { + fprintf(stderr,"%2d: Got error %d from rrnd\n",id,my_errno); + return 1; + } + +end: + if (lock) + { + maria_extra(file,HA_EXTRA_NO_CACHE,0); + if (maria_lock_database(file,F_UNLCK)) + { + fprintf(stderr,"%2d: Can't unlock table\n",id); + exit(0); + } + } + printf("%2d: rrnd: %5d\n",id,count); fflush(stdout); + return 0; +} + + +int test_write(MARIA_HA *file,int id,int lock_type) +{ + uint i,tries,count,lock; + + lock=0; + if (rnd(2) == 0 || lock_type == F_RDLCK) + { + lock=1; + if (maria_lock_database(file,F_WRLCK)) + { + if (lock_type == F_RDLCK && my_errno == EDEADLK) + { + printf("%2d: write: deadlock\n",id); fflush(stdout); + return 0; + } + fprintf(stderr,"%2d: Can't lock table (%d)\n",id,my_errno); + maria_close(file); + return 1; + } + if (rnd(2) == 0) + maria_extra(file,HA_EXTRA_WRITE_CACHE,0); + } + + sprintf(record.id,"%7d",getpid()); + strnmov(record.text,"Testing...", sizeof(record.text)); + + tries=(uint) rnd(100)+10; + for (i=count=0 ; i < tries ; i++) + { + uint32 tmp=rnd(80000)+20000; + int4store(record.nr,tmp); + if (!maria_write(file,record.id)) + count++; + else + { + if (my_errno != HA_ERR_FOUND_DUPP_KEY) + { + fprintf(stderr,"%2d: Got error %d (errno %d) from write\n",id,my_errno, + errno); + return 1; + } + } + } + if (lock) + { + maria_extra(file,HA_EXTRA_NO_CACHE,0); + if (maria_lock_database(file,F_UNLCK)) + { + fprintf(stderr,"%2d: Can't unlock table\n",id); + exit(0); + } + } + printf("%2d: write: %5d\n",id,count); fflush(stdout); + return 0; +} + + +int test_update(MARIA_HA *file,int id,int lock_type) +{ + uint i,lock,found,next,prev,update; + uint32 tmp; + char find[4]; + struct record new_record; + + lock=0; + if (rnd(2) == 0 || lock_type == F_RDLCK) + { + lock=1; + if (maria_lock_database(file,F_WRLCK)) + { + if (lock_type == F_RDLCK && my_errno == EDEADLK) + { + printf("%2d: write: deadlock\n",id); fflush(stdout); + return 0; + } + fprintf(stderr,"%2d: Can't lock table (%d)\n",id,my_errno); + return 1; + } + } + bzero((char*) &new_record,sizeof(new_record)); + strmov(new_record.text,"Updated"); + + found=next=prev=update=0; + for (i=0 ; i < 100 ; i++) + { + tmp=rnd(100000); + int4store(find,tmp); + if (!maria_rkey(file,record.id,1,(byte*) find, + sizeof(find),HA_READ_KEY_EXACT)) + found++; + else + { + if (my_errno != HA_ERR_KEY_NOT_FOUND) + { + fprintf(stderr,"%2d: Got error %d from read in update\n",id,my_errno); + return 1; + } + else if (!maria_rnext(file,record.id,1)) + next++; + else + { + if (my_errno != HA_ERR_END_OF_FILE) + { + fprintf(stderr,"%2d: Got error %d from rnext in update\n", + id,my_errno); + return 1; + } + else if (!maria_rprev(file,record.id,1)) + prev++; + else + { + if (my_errno != HA_ERR_END_OF_FILE) + { + fprintf(stderr,"%2d: Got error %d from rnext in update\n", + id,my_errno); + return 1; + } + continue; + } + } + } + memcpy_fixed(new_record.id,record.id,sizeof(record.id)); + tmp=rnd(20000)+40000; + int4store(new_record.nr,tmp); + if (!maria_update(file,record.id,new_record.id)) + update++; + else + { + if (my_errno != HA_ERR_RECORD_CHANGED && + my_errno != HA_ERR_RECORD_DELETED && + my_errno != HA_ERR_FOUND_DUPP_KEY) + { + fprintf(stderr,"%2d: Got error %d from update\n",id,my_errno); + return 1; + } + } + } + if (lock) + { + if (maria_lock_database(file,F_UNLCK)) + { + fprintf(stderr,"Can't unlock table,id, error%d\n",my_errno); + return 1; + } + } + printf("%2d: update: %5d\n",id,update); fflush(stdout); + return 0; +} + +#else /* __NETWARE__ */ + +#include + +main() +{ + fprintf(stderr,"this test has not been ported to NetWare\n"); + return 0; +} + +#endif /* __NETWARE__ */ diff --git a/storage/maria/ma_test_all.res b/storage/maria/ma_test_all.res new file mode 100644 index 00000000000..57b0feeeae8 --- /dev/null +++ b/storage/maria/ma_test_all.res @@ -0,0 +1,62 @@ +Running tests with dynamic row format +Running tests with static row format +Running tests with block row format +ma_test2 -s -L -K -R1 -m2000 ; Should give error 135 +Error: 135 in write at record: 1099 +got error: 135 when using MARIA-database +./maria_chk -sm test2 will warn that 'Datafile is almost full' +maria_chk: MARIA file test2 +maria_chk: warning: Datafile is almost full, 65516 of 65534 used +MARIA-table 'test2' is usable but should be fixed + +real 0m0.808s +user 0m0.584s +sys 0m0.212s + +real 0m0.780s +user 0m0.584s +sys 0m0.176s + +real 0m0.809s +user 0m0.616s +sys 0m0.180s + +real 0m1.356s +user 0m1.140s +sys 0m0.188s + +real 0m0.783s +user 0m0.600s +sys 0m0.176s + +real 0m1.390s +user 0m1.184s +sys 0m0.152s + +real 0m1.875s +user 0m1.632s +sys 0m0.244s + +real 0m1.313s +user 0m1.148s +sys 0m0.160s + +real 0m1.846s +user 0m1.644s +sys 0m0.188s + +real 0m1.875s +user 0m1.632s +sys 0m0.212s + +real 0m1.819s +user 0m1.672s +sys 0m0.124s + +real 0m2.117s +user 0m1.816s +sys 0m0.292s + +real 0m1.871s +user 0m1.636s +sys 0m0.196s diff --git a/storage/maria/ma_test_all.sh b/storage/maria/ma_test_all.sh new file mode 100755 index 00000000000..76b6c32913f --- /dev/null +++ b/storage/maria/ma_test_all.sh @@ -0,0 +1,201 @@ +#!/bin/sh +# +# Execute some simple basic test on MyISAM libary to check if things +# works at all. + +# If you want to run this in Valgrind, you should use --trace-children=yes, +# so that it detects problems in ma_test* and not in the shell script +valgrind="valgrind --alignment=8 --leak-check=yes" +silent="-s" +suffix="" +#set -x -v -e +if [ -z "$maria_path" ] +then + maria_path="." +fi + +run_tests() +{ + row_type=$1 + # + # First some simple tests + # + $maria_path/ma_test1$suffix $silent $row_type + $maria_path/maria_chk$suffix -se test1 + $maria_path/ma_test1$suffix $silent -N $row_type + $maria_path/maria_chk$suffix -se test1 + $maria_path/ma_test1$suffix $silent -P --checksum $row_type + $maria_path/maria_chk$suffix -se test1 + $maria_path/ma_test1$suffix $silent -P -N $row_type + $maria_path/maria_chk$suffix -se test1 + $maria_path/ma_test1$suffix $silent -B -N -R2 $row_type + $maria_path/maria_chk$suffix -sm test1 + $maria_path/ma_test1$suffix $silent -a -k 480 --unique $row_type + $maria_path/maria_chk$suffix -sm test1 + $maria_path/ma_test1$suffix $silent -a -N -R1 $row_type + $maria_path/maria_chk$suffix -sm test1 + $maria_path/ma_test1$suffix $silent -p $row_type + $maria_path/maria_chk$suffix -sm test1 + $maria_path/ma_test1$suffix $silent -p -N --unique $row_type + $maria_path/maria_chk$suffix -sm test1 + $maria_path/ma_test1$suffix $silent -p -N --key_length=127 --checksum $row_type + $maria_path/maria_chk$suffix -sm test1 + $maria_path/ma_test1$suffix $silent -p -N --key_length=128 $row_type + $maria_path/maria_chk$suffix -sm test1 + $maria_path/ma_test1$suffix $silent -p --key_length=480 $row_type + $maria_path/maria_chk$suffix -sm test1 + $maria_path/ma_test1$suffix $silent -a -B $row_type + $maria_path/maria_chk$suffix -sm test1 + $maria_path/ma_test1$suffix $silent -a -B --key_length=64 --unique $row_type + $maria_path/maria_chk$suffix -sm test1 + $maria_path/ma_test1$suffix $silent -a -B -k 480 --checksum $row_type + $maria_path/maria_chk$suffix -sm test1 + $maria_path/ma_test1$suffix $silent -a -B -k 480 -N --unique --checksum $row_type + $maria_path/maria_chk$suffix -sm test1 + $maria_path/ma_test1$suffix $silent -a -m $row_type + $maria_path/maria_chk$suffix -sm test1 + $maria_path/ma_test1$suffix $silent -a -m -P --unique --checksum $row_type + $maria_path/maria_chk$suffix -sm test1 + $maria_path/ma_test1$suffix $silent -a -m -P --key_length=480 --key_cache $row_type + $maria_path/maria_chk$suffix -sm test1 + $maria_path/ma_test1$suffix $silent -m -p $row_type + $maria_path/maria_chk$suffix -sm test1 + $maria_path/ma_test1$suffix $silent -w --unique $row_type + $maria_path/maria_chk$suffix -sm test1 + $maria_path/ma_test1$suffix $silent -a -w --key_length=64 --checksum $row_type + $maria_path/maria_chk$suffix -sm test1 + $maria_path/ma_test1$suffix $silent -a -w -N --key_length=480 $row_type + $maria_path/maria_chk$suffix -sm test1 + $maria_path/ma_test1$suffix $silent -a -w --key_length=480 --checksum $row_type + $maria_path/maria_chk$suffix -sm test1 + $maria_path/ma_test1$suffix $silent -a -b -N $row_type + $maria_path/maria_chk$suffix -sm test1 + $maria_path/ma_test1$suffix $silent -a -b --key_length=480 $row_type + $maria_path/maria_chk$suffix -sm test1 + $maria_path/ma_test1$suffix $silent -p -B --key_length=480 $row_type + $maria_path/maria_chk$suffix -sm test1 + $maria_path/ma_test1$suffix $silent --checksum --unique $row_type + $maria_path/maria_chk$suffix -se test1 + $maria_path/ma_test1$suffix $silent --unique $row_type + $maria_path/maria_chk$suffix -se test1 + + $maria_path/ma_test1$suffix $silent --key_multiple -N -S $row_type + $maria_path/maria_chk$suffix -sm test1 + $maria_path/ma_test1$suffix $silent --key_multiple -a -p --key_length=480 $row_type + $maria_path/maria_chk$suffix -sm test1 + $maria_path/ma_test1$suffix $silent --key_multiple -a -B --key_length=480 $row_type + $maria_path/maria_chk$suffix -sm test1 + $maria_path/ma_test1$suffix $silent --key_multiple -P -S $row_type + $maria_path/maria_chk$suffix -sm test1 + + $maria_path/maria_pack$suffix --force -s test1 + $maria_path/maria_chk$suffix -ess test1 + + $maria_path/ma_test2$suffix $silent -L -K -W -P $row_type + $maria_path/maria_chk$suffix -sm test2 + $maria_path/ma_test2$suffix $silent -L -K -W -P -A $row_type + $maria_path/maria_chk$suffix -sm test2 + $maria_path/ma_test2$suffix $silent -L -K -P -R3 -m50 -b1000000 $row_type + $maria_path/maria_chk$suffix -sm test2 + $maria_path/ma_test2$suffix $silent -L -B $row_type + $maria_path/maria_chk$suffix -sm test2 + $maria_path/ma_test2$suffix $silent -D -B -c $row_type + $maria_path/maria_chk$suffix -sm test2 + $maria_path/ma_test2$suffix $silent -m10000 -e4096 -K $row_type + $maria_path/maria_chk$suffix -sm test2 + $maria_path/ma_test2$suffix $silent -m10000 -e8192 -K $row_type + $maria_path/maria_chk$suffix -sm test2 + $maria_path/ma_test2$suffix $silent -m10000 -e16384 -E16384 -K -L $row_type + $maria_path/maria_chk$suffix -sm test2 +} + +run_repair_tests() +{ + row_type=$1 + $maria_path/ma_test1$suffix $silent --checksum $row_type + $maria_path/maria_chk$suffix -se test1 + $maria_path/maria_chk$suffix -rs test1 + $maria_path/maria_chk$suffix -se test1 + $maria_path/maria_chk$suffix -rqs test1 + $maria_path/maria_chk$suffix -se test1 + $maria_path/maria_chk$suffix -rs --correct-checksum test1 + $maria_path/maria_chk$suffix -se test1 + $maria_path/maria_chk$suffix -rqs --correct-checksum test1 + $maria_path/maria_chk$suffix -se test1 + $maria_path/maria_chk$suffix -ros --correct-checksum test1 + $maria_path/maria_chk$suffix -se test1 + $maria_path/maria_chk$suffix -rqos --correct-checksum test1 + $maria_path/maria_chk$suffix -se test1 +} + +run_pack_tests() +{ + row_type=$1 + # check of maria_pack / maria_chk + $maria_path/ma_test1$suffix $silent --checksum $row_type + $maria_path/maria_pack$suffix --force -s test1 + $maria_path/maria_chk$suffix -ess test1 + $maria_path/maria_chk$suffix -rqs test1 + $maria_path/maria_chk$suffix -es test1 + $maria_path/maria_chk$suffix -rs test1 + $maria_path/maria_chk$suffix -es test1 + $maria_path/maria_chk$suffix -rus test1 + $maria_path/maria_chk$suffix -es test1 + + $maria_path/ma_test1$suffix $silent --checksum -S $row_type + $maria_path/maria_chk$suffix -se test1 + $maria_path/maria_chk$suffix -ros test1 + $maria_path/maria_chk$suffix -rqs test1 + $maria_path/maria_chk$suffix -se test1 + + $maria_path/maria_pack$suffix --force -s test1 + $maria_path/maria_chk$suffix -rqs test1 + $maria_path/maria_chk$suffix -es test1 + $maria_path/maria_chk$suffix -rus test1 + $maria_path/maria_chk$suffix -es test1 +} + +echo "Running tests with dynamic row format" +run_tests "" +run_repair_tests "" +run_pack_tests "" + +echo "Running tests with static row format" +run_tests -S +run_repair_tests -S +run_pack_tests -S + +echo "Running tests with block row format" +run_tests -M + +echo "Running tests with block row format and transactions" +run_tests "-M -T" + +# +# Tests that gives warnings +# + +$maria_path/ma_test2$suffix $silent -L -K -W -P -S -R1 -m500 +$maria_path/maria_chk$suffix -sm test2 +echo "ma_test2$suffix $silent -L -K -R1 -m2000 ; Should give error 135" +$maria_path/ma_test2$suffix $silent -L -K -R1 -m2000 +echo "$maria_path/maria_chk$suffix -sm test2 will warn that 'Datafile is almost full'" +$maria_path/maria_chk$suffix -sm test2 +$maria_path/maria_chk$suffix -ssm test2 + +# +# Some timing tests +# +time $maria_path/ma_test2$suffix $silent +time $maria_path/ma_test2$suffix $silent -S +time $maria_path/ma_test2$suffix $silent -M +time $maria_path/ma_test2$suffix $silent -B +time $maria_path/ma_test2$suffix $silent -L +time $maria_path/ma_test2$suffix $silent -K +time $maria_path/ma_test2$suffix $silent -K -B +time $maria_path/ma_test2$suffix $silent -L -B +time $maria_path/ma_test2$suffix $silent -L -K -B +time $maria_path/ma_test2$suffix $silent -L -K -W -B +time $maria_path/ma_test2$suffix $silent -L -K -W -B -S +time $maria_path/ma_test2$suffix $silent -L -K -W -B -M +time $maria_path/ma_test2$suffix $silent -D -K -W -B -S diff --git a/storage/maria/ma_unique.c b/storage/maria/ma_unique.c new file mode 100644 index 00000000000..06d29b9b037 --- /dev/null +++ b/storage/maria/ma_unique.c @@ -0,0 +1,235 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Functions to check if a row is unique */ + +#include "maria_def.h" +#include + +my_bool _ma_check_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, byte *record, + ha_checksum unique_hash, my_off_t disk_pos) +{ + my_off_t lastpos=info->cur_row.lastpos; + MARIA_KEYDEF *key= &info->s->keyinfo[def->key]; + byte *key_buff= info->lastkey2; + DBUG_ENTER("_ma_check_unique"); + DBUG_PRINT("enter",("unique_hash: %lu", (ulong) unique_hash)); + + maria_unique_store(record+key->seg->start, unique_hash); + _ma_make_key(info,def->key,key_buff,record,0); + + /* The above changed info->lastkey2. Inform maria_rnext_same(). */ + info->update&= ~HA_STATE_RNEXT_SAME; + + if (_ma_search(info,info->s->keyinfo+def->key,key_buff, + MARIA_UNIQUE_HASH_LENGTH, + SEARCH_FIND,info->s->state.key_root[def->key])) + { + info->page_changed=1; /* Can't optimize read next */ + info->cur_row.lastpos= lastpos; + DBUG_RETURN(0); /* No matching rows */ + } + + for (;;) + { + if (info->cur_row.lastpos != disk_pos && + !(*info->s->compare_unique)(info,def,record,info->cur_row.lastpos)) + { + my_errno=HA_ERR_FOUND_DUPP_UNIQUE; + info->errkey= (int) def->key; + info->dup_key_pos= info->cur_row.lastpos; + info->page_changed= 1; /* Can't optimize read next */ + info->cur_row.lastpos= lastpos; + DBUG_PRINT("info",("Found duplicate")); + DBUG_RETURN(1); /* Found identical */ + } + if (_ma_search_next(info,info->s->keyinfo+def->key, info->lastkey, + MARIA_UNIQUE_HASH_LENGTH, SEARCH_BIGGER, + info->s->state.key_root[def->key]) || + bcmp((char*) info->lastkey, (char*) key_buff, + MARIA_UNIQUE_HASH_LENGTH)) + { + info->page_changed= 1; /* Can't optimize read next */ + info->cur_row.lastpos= lastpos; + DBUG_RETURN(0); /* end of tree */ + } + } +} + + +/* + Calculate a hash for a row + + TODO + Add support for bit fields +*/ + +ha_checksum _ma_unique_hash(MARIA_UNIQUEDEF *def, const byte *record) +{ + const byte *pos, *end; + ha_checksum crc= 0; + ulong seed1=0, seed2= 4; + HA_KEYSEG *keyseg; + + for (keyseg=def->seg ; keyseg < def->end ; keyseg++) + { + enum ha_base_keytype type=(enum ha_base_keytype) keyseg->type; + uint length=keyseg->length; + + if (keyseg->null_bit) + { + if (record[keyseg->null_pos] & keyseg->null_bit) + { + /* + Change crc in a way different from an empty string or 0. + (This is an optimisation; The code will work even if this isn't + done) + */ + crc=((crc << 8) + 511+ + (crc >> (8*sizeof(ha_checksum)-8))); + continue; + } + } + pos= record+keyseg->start; + if (keyseg->flag & HA_VAR_LENGTH_PART) + { + uint pack_length= keyseg->bit_start; + uint tmp_length= (pack_length == 1 ? (uint) *(uchar*) pos : + uint2korr(pos)); + pos+= pack_length; /* Skip VARCHAR length */ + set_if_smaller(length,tmp_length); + } + else if (keyseg->flag & HA_BLOB_PART) + { + uint tmp_length= _ma_calc_blob_length(keyseg->bit_start,pos); + memcpy_fixed((byte*) &pos,pos+keyseg->bit_start,sizeof(char*)); + if (!length || length > tmp_length) + length=tmp_length; /* The whole blob */ + } + end= pos+length; + if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT1 || + type == HA_KEYTYPE_VARTEXT2) + { + keyseg->charset->coll->hash_sort(keyseg->charset, + (const uchar*) pos, length, &seed1, + &seed2); + crc^= seed1; + } + else + while (pos != end) + crc=((crc << 8) + + (((uchar) *(uchar*) pos++))) + + (crc >> (8*sizeof(ha_checksum)-8)); + } + return crc; +} + + +/* + compare unique key for two rows + + TODO + Add support for bit fields + + RETURN + 0 if both rows have equal unique value + 1 Rows are different +*/ + +my_bool _ma_unique_comp(MARIA_UNIQUEDEF *def, const byte *a, const byte *b, + my_bool null_are_equal) +{ + const byte *pos_a, *pos_b, *end; + HA_KEYSEG *keyseg; + + for (keyseg=def->seg ; keyseg < def->end ; keyseg++) + { + enum ha_base_keytype type=(enum ha_base_keytype) keyseg->type; + uint a_length, b_length; + a_length= b_length= keyseg->length; + + /* If part is NULL it's regarded as different */ + if (keyseg->null_bit) + { + uint tmp; + if ((tmp=(a[keyseg->null_pos] & keyseg->null_bit)) != + (uint) (b[keyseg->null_pos] & keyseg->null_bit)) + return 1; + if (tmp) + { + if (!null_are_equal) + return 1; + continue; + } + } + pos_a= a+keyseg->start; + pos_b= b+keyseg->start; + if (keyseg->flag & HA_VAR_LENGTH_PART) + { + uint pack_length= keyseg->bit_start; + if (pack_length == 1) + { + a_length= (uint) *(uchar*) pos_a++; + b_length= (uint) *(uchar*) pos_b++; + } + else + { + a_length= uint2korr(pos_a); + b_length= uint2korr(pos_b); + pos_a+= 2; /* Skip VARCHAR length */ + pos_b+= 2; + } + set_if_smaller(a_length, keyseg->length); /* Safety */ + set_if_smaller(b_length, keyseg->length); /* safety */ + } + else if (keyseg->flag & HA_BLOB_PART) + { + /* Only compare 'length' characters if length != 0 */ + a_length= _ma_calc_blob_length(keyseg->bit_start,pos_a); + b_length= _ma_calc_blob_length(keyseg->bit_start,pos_b); + /* Check that a and b are of equal length */ + if (keyseg->length) + { + /* + This is used in some cases when we are not interested in comparing + the whole length of the blob. + */ + set_if_smaller(a_length, keyseg->length); + set_if_smaller(b_length, keyseg->length); + } + memcpy_fixed((byte*) &pos_a,pos_a+keyseg->bit_start,sizeof(char*)); + memcpy_fixed((byte*) &pos_b,pos_b+keyseg->bit_start,sizeof(char*)); + } + if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT1 || + type == HA_KEYTYPE_VARTEXT2) + { + if (ha_compare_text(keyseg->charset, (uchar *) pos_a, a_length, + (uchar *) pos_b, b_length, 0, 1)) + return 1; + } + else + { + if (a_length != b_length) + return 1; + end= pos_a+a_length; + while (pos_a != end) + { + if (*pos_a++ != *pos_b++) + return 1; + } + } + } + return 0; +} diff --git a/storage/maria/ma_update.c b/storage/maria/ma_update.c new file mode 100644 index 00000000000..737c7c909b4 --- /dev/null +++ b/storage/maria/ma_update.c @@ -0,0 +1,251 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Update an old row in a MARIA table */ + +#include "ma_fulltext.h" +#include "ma_rt_index.h" + +int maria_update(register MARIA_HA *info, const byte *oldrec, byte *newrec) +{ + int flag,key_changed,save_errno; + reg3 my_off_t pos; + uint i; + byte old_key[HA_MAX_KEY_BUFF],*new_key; + bool auto_key_changed=0; + ulonglong changed; + MARIA_SHARE *share=info->s; + ha_checksum old_checksum; + DBUG_ENTER("maria_update"); + LINT_INIT(new_key); + LINT_INIT(changed); + LINT_INIT(old_checksum); + + DBUG_EXECUTE_IF("maria_pretend_crashed_table_on_usage", + maria_print_error(info->s, HA_ERR_CRASHED); + DBUG_RETURN(my_errno= HA_ERR_CRASHED);); + if (!(info->update & HA_STATE_AKTIV)) + { + DBUG_RETURN(my_errno=HA_ERR_KEY_NOT_FOUND); + } + if (share->options & HA_OPTION_READ_ONLY_DATA) + { + DBUG_RETURN(my_errno=EACCES); + } + if (info->state->key_file_length >= share->base.margin_key_file_length) + { + DBUG_RETURN(my_errno=HA_ERR_INDEX_FILE_FULL); + } + pos= info->cur_row.lastpos; + if (_ma_readinfo(info,F_WRLCK,1)) + DBUG_RETURN(my_errno); + + if ((*share->compare_record)(info,oldrec)) + { + save_errno= my_errno; + DBUG_PRINT("warning", ("Got error from compare record")); + goto err_end; /* Record has changed */ + } + + if (share->calc_checksum) + { + /* + We can't use the row based checksum as this doesn't have enough + precision. + */ + if (info->s->calc_checksum) + old_checksum= (*info->s->calc_checksum)(info, oldrec); + } + + /* Calculate and check all unique constraints */ + key_changed=0; + for (i=0 ; i < share->state.header.uniques ; i++) + { + MARIA_UNIQUEDEF *def=share->uniqueinfo+i; + if (_ma_unique_comp(def, newrec, oldrec,1) && + _ma_check_unique(info, def, newrec, _ma_unique_hash(def, newrec), + pos)) + { + save_errno=my_errno; + goto err_end; + } + } + if (_ma_mark_file_changed(info)) + { + save_errno=my_errno; + goto err_end; + } + + /* Check which keys changed from the original row */ + + new_key= info->lastkey2; + changed=0; + for (i=0 ; i < share->base.keys ; i++) + { + if (maria_is_key_active(share->state.key_map, i)) + { + if (share->keyinfo[i].flag & HA_FULLTEXT ) + { + if (_ma_ft_cmp(info,i,oldrec, newrec)) + { + if ((int) i == info->lastinx) + { + /* + We are changeing the index we are reading on. Mark that + the index data has changed and we need to do a full search + when doing read-next + */ + key_changed|=HA_STATE_WRITTEN; + } + changed|=((ulonglong) 1 << i); + if (_ma_ft_update(info,i,(char*) old_key,oldrec,newrec,pos)) + goto err; + } + } + else + { + uint new_length= _ma_make_key(info,i,new_key,newrec,pos); + uint old_length= _ma_make_key(info,i,old_key,oldrec,pos); + + /* The above changed info->lastkey2. Inform maria_rnext_same(). */ + info->update&= ~HA_STATE_RNEXT_SAME; + + if (new_length != old_length || + memcmp(old_key, new_key, new_length)) + { + if ((int) i == info->lastinx) + key_changed|=HA_STATE_WRITTEN; /* Mark that keyfile changed */ + changed|=((ulonglong) 1 << i); + share->keyinfo[i].version++; + if (share->keyinfo[i].ck_delete(info,i,old_key,old_length)) goto err; + if (share->keyinfo[i].ck_insert(info,i,new_key,new_length)) goto err; + if (share->base.auto_key == i+1) + auto_key_changed=1; + } + } + } + } + /* + If we are running with external locking, we must update the index file + that something has changed. + */ + if (changed || !my_disable_locking) + key_changed|= HA_STATE_CHANGED; + + if (share->calc_checksum) + { + info->cur_row.checksum= (*share->calc_checksum)(info,newrec); + /* Store new checksum in index file header */ + key_changed|= HA_STATE_CHANGED; + } + { + /* + Don't update index file if data file is not extended and no status + information changed + */ + MARIA_STATUS_INFO state; + ha_rows org_split; + my_off_t org_delete_link; + + memcpy((char*) &state, (char*) info->state, sizeof(state)); + org_split= share->state.split; + org_delete_link= share->state.dellink; + if ((*share->update_record)(info, pos, oldrec, newrec)) + goto err; + if (!key_changed && + (memcmp((char*) &state, (char*) info->state, sizeof(state)) || + org_split != share->state.split || + org_delete_link != share->state.dellink)) + key_changed|= HA_STATE_CHANGED; /* Must update index file */ + } + if (auto_key_changed) + set_if_bigger(info->s->state.auto_increment, + ma_retrieve_auto_increment(info, newrec)); + if (share->calc_checksum) + info->state->checksum+= (info->cur_row.checksum - old_checksum); + + /* + We can't yet have HA_STATE_AKTIV here, as block_record dosn't support + it + */ + info->update= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED | key_changed); + + /* + Every Maria function that updates Maria table must end with + call to _ma_writeinfo(). If operation (second param of + _ma_writeinfo()) is not 0 it sets share->changed to 1, that is + flags that data has changed. If operation is 0, this function + equals to no-op in this case. + + ma_update() must always pass !0 value as operation, since even if + there is no index change there could be data change. + */ + VOID(_ma_writeinfo(info, WRITEINFO_UPDATE_KEYFILE)); + allow_break(); /* Allow SIGHUP & SIGINT */ + if (info->invalidator != 0) + { + DBUG_PRINT("info", ("invalidator... '%s' (update)", info->s->open_file_name)); + (*info->invalidator)(info->s->open_file_name); + info->invalidator=0; + } + DBUG_RETURN(0); + +err: + DBUG_PRINT("error",("key: %d errno: %d",i,my_errno)); + save_errno=my_errno; + if (my_errno == HA_ERR_FOUND_DUPP_KEY || my_errno == HA_ERR_OUT_OF_MEM || + my_errno == HA_ERR_RECORD_FILE_FULL) + { + info->errkey= (int) i; + flag=0; + do + { + if (((ulonglong) 1 << i) & changed) + { + if (share->keyinfo[i].flag & HA_FULLTEXT) + { + if ((flag++ && _ma_ft_del(info,i,(char*) new_key,newrec,pos)) || + _ma_ft_add(info,i,(char*) old_key,oldrec,pos)) + break; + } + else + { + uint new_length= _ma_make_key(info,i,new_key,newrec,pos); + uint old_length= _ma_make_key(info,i,old_key,oldrec,pos); + if ((flag++ && _ma_ck_delete(info,i,new_key,new_length)) || + _ma_ck_write(info,i,old_key,old_length)) + break; + } + } + } while (i-- != 0); + } + else + { + maria_print_error(info->s, HA_ERR_CRASHED); + maria_mark_crashed(info); + } + info->update= (HA_STATE_CHANGED | HA_STATE_AKTIV | HA_STATE_ROW_CHANGED | + key_changed); + + err_end: + VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); + allow_break(); /* Allow SIGHUP & SIGINT */ + if (save_errno == HA_ERR_KEY_NOT_FOUND) + { + maria_print_error(info->s, HA_ERR_CRASHED); + save_errno=HA_ERR_CRASHED; + } + DBUG_RETURN(my_errno=save_errno); +} /* maria_update */ diff --git a/storage/maria/ma_write.c b/storage/maria/ma_write.c new file mode 100644 index 00000000000..a87e2d76fc7 --- /dev/null +++ b/storage/maria/ma_write.c @@ -0,0 +1,1084 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Write a row to a MARIA table */ + +#include "ma_fulltext.h" +#include "ma_rt_index.h" + +#define MAX_POINTER_LENGTH 8 + + /* Functions declared in this file */ + +static int w_search(MARIA_HA *info,MARIA_KEYDEF *keyinfo, + uint comp_flag, byte *key, + uint key_length, my_off_t pos, byte *father_buff, + byte *father_keypos, my_off_t father_page, + my_bool insert_last); +static int _ma_balance_page(MARIA_HA *info,MARIA_KEYDEF *keyinfo,byte *key, + byte *curr_buff,byte *father_buff, + byte *father_keypos,my_off_t father_page); +static byte *_ma_find_last_pos(MARIA_KEYDEF *keyinfo, byte *page, + byte *key, uint *return_key_length, + byte **after_key); +int _ma_ck_write_tree(register MARIA_HA *info, uint keynr,byte *key, + uint key_length); +int _ma_ck_write_btree(register MARIA_HA *info, uint keynr,byte *key, + uint key_length); + + +MARIA_RECORD_POS _ma_write_init_default(MARIA_HA *info, + const byte *record + __attribute__((unused))) +{ + return ((info->s->state.dellink != HA_OFFSET_ERROR && + !info->append_insert_at_end) ? + info->s->state.dellink : + info->state->data_file_length); +} + +my_bool _ma_write_abort_default(MARIA_HA *info __attribute__((unused))) +{ + return 0; +} + + +/* Write new record to a table */ + +int maria_write(MARIA_HA *info, byte *record) +{ + MARIA_SHARE *share=info->s; + uint i; + int save_errno; + MARIA_RECORD_POS filepos; + byte *buff; + my_bool lock_tree= share->concurrent_insert; + my_bool fatal_error; + DBUG_ENTER("maria_write"); + DBUG_PRINT("enter",("index_file: %d data_file: %d", + info->s->kfile.file, info->dfile.file)); + + DBUG_EXECUTE_IF("maria_pretend_crashed_table_on_usage", + maria_print_error(info->s, HA_ERR_CRASHED); + DBUG_RETURN(my_errno= HA_ERR_CRASHED);); + if (share->options & HA_OPTION_READ_ONLY_DATA) + { + DBUG_RETURN(my_errno=EACCES); + } + if (_ma_readinfo(info,F_WRLCK,1)) + DBUG_RETURN(my_errno); + dont_break(); /* Dont allow SIGHUP or SIGINT */ + + if (share->base.reloc == (ha_rows) 1 && + share->base.records == (ha_rows) 1 && + info->state->records == (ha_rows) 1) + { /* System file */ + my_errno=HA_ERR_RECORD_FILE_FULL; + goto err2; + } + if (info->state->key_file_length >= share->base.margin_key_file_length) + { + my_errno=HA_ERR_INDEX_FILE_FULL; + goto err2; + } + if (_ma_mark_file_changed(info)) + goto err2; + + /* Calculate and check all unique constraints */ + for (i=0 ; i < share->state.header.uniques ; i++) + { + if (_ma_check_unique(info,share->uniqueinfo+i,record, + _ma_unique_hash(share->uniqueinfo+i,record), + HA_OFFSET_ERROR)) + goto err2; + } + + if ((info->opt_flag & OPT_NO_ROWS)) + filepos= HA_OFFSET_ERROR; + else + { + /* + This may either calculate a record or, or write the record and return + the record id + */ + if ((filepos= (*share->write_record_init)(info, record)) == + HA_OFFSET_ERROR) + goto err2; + } + + /* Write all keys to indextree */ + buff= info->lastkey2; + for (i=0 ; i < share->base.keys ; i++) + { + if (maria_is_key_active(share->state.key_map, i)) + { + bool local_lock_tree= (lock_tree && + !(info->bulk_insert && + is_tree_inited(&info->bulk_insert[i]))); + if (local_lock_tree) + { + rw_wrlock(&share->key_root_lock[i]); + share->keyinfo[i].version++; + } + if (share->keyinfo[i].flag & HA_FULLTEXT ) + { + if (_ma_ft_add(info,i,(char*) buff,record,filepos)) + { + if (local_lock_tree) + rw_unlock(&share->key_root_lock[i]); + DBUG_PRINT("error",("Got error: %d on write",my_errno)); + goto err; + } + } + else + { + if (share->keyinfo[i].ck_insert(info,i,buff, + _ma_make_key(info,i,buff,record, + filepos))) + { + if (local_lock_tree) + rw_unlock(&share->key_root_lock[i]); + DBUG_PRINT("error",("Got error: %d on write",my_errno)); + goto err; + } + } + + /* The above changed info->lastkey2. Inform maria_rnext_same(). */ + info->update&= ~HA_STATE_RNEXT_SAME; + + if (local_lock_tree) + rw_unlock(&share->key_root_lock[i]); + } + } + if (share->calc_write_checksum) + info->cur_row.checksum= (*share->calc_write_checksum)(info,record); + if (filepos != HA_OFFSET_ERROR) + { + if ((*share->write_record)(info,record)) + goto err; + info->state->checksum+= info->cur_row.checksum; + } + if (share->base.auto_key) + set_if_bigger(info->s->state.auto_increment, + ma_retrieve_auto_increment(info, record)); + info->update= (HA_STATE_CHANGED | HA_STATE_AKTIV | HA_STATE_WRITTEN | + HA_STATE_ROW_CHANGED); + info->state->records++; + info->cur_row.lastpos= filepos; + VOID(_ma_writeinfo(info, WRITEINFO_UPDATE_KEYFILE)); + if (info->invalidator != 0) + { + DBUG_PRINT("info", ("invalidator... '%s' (update)", info->s->open_file_name)); + (*info->invalidator)(info->s->open_file_name); + info->invalidator=0; + } + + /* + Update status of the table. We need to do so after each row write + for the log tables, as we want the new row to become visible to + other threads as soon as possible. We don't lock mutex here + (as it is required by pthread memory visibility rules) as (1) it's + not critical to use outdated share->is_log_table value (2) locking + mutex here for every write is too expensive. + */ + if (share->is_log_table) + _ma_update_status((void*) info); + + allow_break(); /* Allow SIGHUP & SIGINT */ + DBUG_RETURN(0); + +err: + save_errno= my_errno; + fatal_error= 0; + if (my_errno == HA_ERR_FOUND_DUPP_KEY || + my_errno == HA_ERR_RECORD_FILE_FULL || + my_errno == HA_ERR_NULL_IN_SPATIAL || + my_errno == HA_ERR_OUT_OF_MEM) + { + if (info->bulk_insert) + { + uint j; + for (j=0 ; j < share->base.keys ; j++) + maria_flush_bulk_insert(info, j); + } + info->errkey= (int) i; + while ( i-- > 0) + { + if (maria_is_key_active(share->state.key_map, i)) + { + bool local_lock_tree= (lock_tree && + !(info->bulk_insert && + is_tree_inited(&info->bulk_insert[i]))); + if (local_lock_tree) + rw_wrlock(&share->key_root_lock[i]); + if (share->keyinfo[i].flag & HA_FULLTEXT) + { + if (_ma_ft_del(info,i,(char*) buff,record,filepos)) + { + if (local_lock_tree) + rw_unlock(&share->key_root_lock[i]); + break; + } + } + else + { + uint key_length= _ma_make_key(info,i,buff,record,filepos); + if (_ma_ck_delete(info,i,buff,key_length)) + { + if (local_lock_tree) + rw_unlock(&share->key_root_lock[i]); + break; + } + } + if (local_lock_tree) + rw_unlock(&share->key_root_lock[i]); + } + } + } + else + fatal_error= 1; + + if ((*share->write_record_abort)(info)) + fatal_error= 1; + if (fatal_error) + { + maria_print_error(info->s, HA_ERR_CRASHED); + maria_mark_crashed(info); + } + + info->update= (HA_STATE_CHANGED | HA_STATE_WRITTEN | HA_STATE_ROW_CHANGED); + my_errno=save_errno; +err2: + save_errno=my_errno; + DBUG_PRINT("error", ("got error: %d", save_errno)); + VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); + allow_break(); /* Allow SIGHUP & SIGINT */ + DBUG_RETURN(my_errno=save_errno); +} /* maria_write */ + + + /* Write one key to btree */ + +int _ma_ck_write(MARIA_HA *info, uint keynr, byte *key, uint key_length) +{ + DBUG_ENTER("_ma_ck_write"); + + if (info->bulk_insert && is_tree_inited(&info->bulk_insert[keynr])) + { + DBUG_RETURN(_ma_ck_write_tree(info, keynr, key, key_length)); + } + else + { + DBUG_RETURN(_ma_ck_write_btree(info, keynr, key, key_length)); + } +} /* _ma_ck_write */ + + +/********************************************************************** + * Normal insert code * + **********************************************************************/ + +int _ma_ck_write_btree(register MARIA_HA *info, uint keynr, byte *key, + uint key_length) +{ + int error; + uint comp_flag; + MARIA_KEYDEF *keyinfo=info->s->keyinfo+keynr; + my_off_t *root=&info->s->state.key_root[keynr]; + DBUG_ENTER("_ma_ck_write_btree"); + + if (keyinfo->flag & HA_SORT_ALLOWS_SAME) + comp_flag=SEARCH_BIGGER; /* Put after same key */ + else if (keyinfo->flag & (HA_NOSAME|HA_FULLTEXT)) + { + comp_flag=SEARCH_FIND | SEARCH_UPDATE; /* No duplicates */ + if (keyinfo->flag & HA_NULL_ARE_EQUAL) + comp_flag|= SEARCH_NULL_ARE_EQUAL; + } + else + comp_flag=SEARCH_SAME; /* Keys in rec-pos order */ + + error= _ma_ck_real_write_btree(info, keyinfo, key, key_length, + root, comp_flag); + if (info->ft1_to_ft2) + { + if (!error) + error= _ma_ft_convert_to_ft2(info, keynr, key); + delete_dynamic(info->ft1_to_ft2); + my_free((gptr)info->ft1_to_ft2, MYF(0)); + info->ft1_to_ft2=0; + } + DBUG_RETURN(error); +} /* _ma_ck_write_btree */ + + +int _ma_ck_real_write_btree(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *key, uint key_length, my_off_t *root, + uint comp_flag) +{ + int error; + DBUG_ENTER("_ma_ck_real_write_btree"); + /* key_length parameter is used only if comp_flag is SEARCH_FIND */ + if (*root == HA_OFFSET_ERROR || + (error=w_search(info, keyinfo, comp_flag, key, key_length, + *root, (byte*) 0, (byte*) 0, + (my_off_t) 0, 1)) > 0) + error= _ma_enlarge_root(info,keyinfo,key,root); + DBUG_RETURN(error); +} /* _ma_ck_real_write_btree */ + + + /* Make a new root with key as only pointer */ + +int _ma_enlarge_root(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *key, + my_off_t *root) +{ + uint t_length,nod_flag; + MARIA_KEY_PARAM s_temp; + MARIA_SHARE *share=info->s; + DBUG_ENTER("_ma_enlarge_root"); + + nod_flag= (*root != HA_OFFSET_ERROR) ? share->base.key_reflength : 0; + _ma_kpointer(info,info->buff+2,*root); /* if nod */ + t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,(byte*) 0, + (byte*) 0, (byte*) 0, key,&s_temp); + maria_putint(info->buff,t_length+2+nod_flag,nod_flag); + (*keyinfo->store_key)(keyinfo,info->buff+2+nod_flag,&s_temp); + info->keyread_buff_used=info->page_changed=1; /* info->buff is used */ + if ((*root= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR || + _ma_write_keypage(info,keyinfo,*root,DFLT_INIT_HITS,info->buff)) + DBUG_RETURN(-1); + DBUG_RETURN(0); +} /* _ma_enlarge_root */ + + + /* + Search after a position for a key and store it there + Returns -1 = error + 0 = ok + 1 = key should be stored in higher tree + */ + +static int w_search(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + uint comp_flag, byte *key, uint key_length, my_off_t page, + byte *father_buff, byte *father_keypos, + my_off_t father_page, my_bool insert_last) +{ + int error,flag; + uint nod_flag, search_key_length; + byte *temp_buff,*keypos; + byte keybuff[HA_MAX_KEY_BUFF]; + my_bool was_last_key; + my_off_t next_page, dup_key_pos; + DBUG_ENTER("w_search"); + DBUG_PRINT("enter",("page: %ld", (long) page)); + + search_key_length= (comp_flag & SEARCH_FIND) ? key_length : USE_WHOLE_KEY; + if (!(temp_buff= (byte*) my_alloca((uint) keyinfo->block_length+ + HA_MAX_KEY_BUFF*2))) + DBUG_RETURN(-1); + if (!_ma_fetch_keypage(info,keyinfo,page,DFLT_INIT_HITS,temp_buff,0)) + goto err; + + flag=(*keyinfo->bin_search)(info,keyinfo,temp_buff,key,search_key_length, + comp_flag, &keypos, keybuff, &was_last_key); + nod_flag= _ma_test_if_nod(temp_buff); + if (flag == 0) + { + uint tmp_key_length; + /* get position to record with duplicated key */ + tmp_key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,keybuff); + if (tmp_key_length) + dup_key_pos= _ma_dpos(info,0,keybuff+tmp_key_length); + else + dup_key_pos= HA_OFFSET_ERROR; + + if (keyinfo->flag & HA_FULLTEXT) + { + uint off; + int subkeys; + + get_key_full_length_rdonly(off, keybuff); + subkeys=ft_sintXkorr(keybuff+off); + comp_flag=SEARCH_SAME; + if (subkeys >= 0) + { + /* normal word, one-level tree structure */ + flag=(*keyinfo->bin_search)(info, keyinfo, temp_buff, key, + USE_WHOLE_KEY, comp_flag, + &keypos, keybuff, &was_last_key); + } + else + { + /* popular word. two-level tree. going down */ + my_off_t root=dup_key_pos; + keyinfo=&info->s->ft2_keyinfo; + get_key_full_length_rdonly(off, key); + key+=off; + keypos-=keyinfo->keylength+nod_flag; /* we'll modify key entry 'in vivo' */ + error= _ma_ck_real_write_btree(info, keyinfo, key, 0, + &root, comp_flag); + _ma_dpointer(info, keypos+HA_FT_WLEN, root); + subkeys--; /* should there be underflow protection ? */ + DBUG_ASSERT(subkeys < 0); + ft_intXstore(keypos, subkeys); + if (!error) + error= _ma_write_keypage(info,keyinfo,page,DFLT_INIT_HITS,temp_buff); + my_afree((byte*) temp_buff); + DBUG_RETURN(error); + } + } + else /* not HA_FULLTEXT, normal HA_NOSAME key */ + { + info->dup_key_pos= dup_key_pos; + my_afree((byte*) temp_buff); + my_errno=HA_ERR_FOUND_DUPP_KEY; + DBUG_RETURN(-1); + } + } + if (flag == MARIA_FOUND_WRONG_KEY) + DBUG_RETURN(-1); + if (!was_last_key) + insert_last=0; + next_page= _ma_kpos(nod_flag,keypos); + if (next_page == HA_OFFSET_ERROR || + (error=w_search(info, keyinfo, comp_flag, key, key_length, next_page, + temp_buff, keypos, page, insert_last)) >0) + { + error= _ma_insert(info,keyinfo,key,temp_buff,keypos,keybuff,father_buff, + father_keypos,father_page, insert_last); + if (_ma_write_keypage(info,keyinfo,page,DFLT_INIT_HITS,temp_buff)) + goto err; + } + my_afree((byte*) temp_buff); + DBUG_RETURN(error); +err: + my_afree((byte*) temp_buff); + DBUG_PRINT("exit",("Error: %d",my_errno)); + DBUG_RETURN (-1); +} /* w_search */ + + +/* + Insert new key. + + SYNOPSIS + _ma_insert() + info Open table information. + keyinfo Key definition information. + key New key. + anc_buff Key page (beginning). + key_pos Position in key page where to insert. + key_buff Copy of previous key. + father_buff parent key page for balancing. + father_key_pos position in parent key page for balancing. + father_page position of parent key page in file. + insert_last If to append at end of page. + + DESCRIPTION + Insert new key at right of key_pos. + + RETURN + 2 if key contains key to upper level. + 0 OK. + < 0 Error. +*/ + +int _ma_insert(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + byte *key, byte *anc_buff, byte *key_pos, byte *key_buff, + byte *father_buff, byte *father_key_pos, my_off_t father_page, + my_bool insert_last) +{ + uint a_length,nod_flag; + int t_length; + byte *endpos, *prev_key; + MARIA_KEY_PARAM s_temp; + DBUG_ENTER("_ma_insert"); + DBUG_PRINT("enter",("key_pos: 0x%lx", (ulong) key_pos)); + DBUG_EXECUTE("key", _ma_print_key(DBUG_FILE,keyinfo->seg,key, + USE_WHOLE_KEY);); + + nod_flag=_ma_test_if_nod(anc_buff); + a_length= maria_data_on_page(anc_buff); + endpos= anc_buff+ a_length; + prev_key=(key_pos == anc_buff+2+nod_flag ? (byte*) 0 : key_buff); + t_length=(*keyinfo->pack_key)(keyinfo,nod_flag, + (key_pos == endpos ? (byte*) 0 : key_pos), + prev_key, prev_key, + key,&s_temp); +#ifndef DBUG_OFF + if (key_pos != anc_buff+2+nod_flag && (keyinfo->flag & + (HA_BINARY_PACK_KEY | HA_PACK_KEY))) + { + DBUG_DUMP("prev_key",(byte*) key_buff, _ma_keylength(keyinfo,key_buff)); + } + if (keyinfo->flag & HA_PACK_KEY) + { + DBUG_PRINT("test",("t_length: %d ref_len: %d", + t_length,s_temp.ref_length)); + DBUG_PRINT("test",("n_ref_len: %d n_length: %d key_pos: 0x%lx", + s_temp.n_ref_length, s_temp.n_length, (long) s_temp.key)); + } +#endif + if (t_length > 0) + { + if (t_length >= keyinfo->maxlength*2+MAX_POINTER_LENGTH) + { + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_RETURN(-1); + } + bmove_upp((byte*) endpos+t_length,(byte*) endpos,(uint) (endpos-key_pos)); + } + else + { + if (-t_length >= keyinfo->maxlength*2+MAX_POINTER_LENGTH) + { + maria_print_error(info->s, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_RETURN(-1); + } + bmove(key_pos,key_pos-t_length,(uint) (endpos-key_pos)+t_length); + } + (*keyinfo->store_key)(keyinfo,key_pos,&s_temp); + a_length+=t_length; + maria_putint(anc_buff,a_length,nod_flag); + if (a_length <= keyinfo->block_length) + { + if (keyinfo->block_length - a_length < 32 && + keyinfo->flag & HA_FULLTEXT && key_pos == endpos && + info->s->base.key_reflength <= info->s->base.rec_reflength && + info->s->options & (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) + { + /* + Normal word. One-level tree. Page is almost full. + Let's consider converting. + We'll compare 'key' and the first key at anc_buff + */ + byte *a=key, *b=anc_buff+2+nod_flag; + uint alen, blen, ft2len=info->s->ft2_keyinfo.keylength; + /* the very first key on the page is always unpacked */ + DBUG_ASSERT((*b & 128) == 0); +#if HA_FT_MAXLEN >= 127 + blen= mi_uint2korr(b); b+=2; +#else + blen= *(uchar*) b++; +#endif + get_key_length(alen,a); + DBUG_ASSERT(info->ft1_to_ft2==0); + if (alen == blen && + ha_compare_text(keyinfo->seg->charset, (uchar*) a, alen, + (uchar*) b, blen, 0, 0) == 0) + { + /* yup. converting */ + info->ft1_to_ft2=(DYNAMIC_ARRAY *) + my_malloc(sizeof(DYNAMIC_ARRAY), MYF(MY_WME)); + my_init_dynamic_array(info->ft1_to_ft2, ft2len, 300, 50); + + /* + now, adding all keys from the page to dynarray + if the page is a leaf (if not keys will be deleted later) + */ + if (!nod_flag) + { + /* let's leave the first key on the page, though, because + we cannot easily dispatch an empty page here */ + b+=blen+ft2len+2; + for (a=anc_buff+a_length ; b < a ; b+=ft2len+2) + insert_dynamic(info->ft1_to_ft2, (char*) b); + + /* fixing the page's length - it contains only one key now */ + maria_putint(anc_buff,2+blen+ft2len+2,0); + } + /* the rest will be done when we're back from recursion */ + } + } + DBUG_RETURN(0); /* There is room on page */ + } + /* Page is full */ + if (nod_flag) + insert_last=0; + if (!(keyinfo->flag & (HA_VAR_LENGTH_KEY | HA_BINARY_PACK_KEY)) && + father_buff && !insert_last) + DBUG_RETURN(_ma_balance_page(info,keyinfo,key,anc_buff,father_buff, + father_key_pos,father_page)); + DBUG_RETURN(_ma_split_page(info,keyinfo,key,anc_buff,key_buff, insert_last)); +} /* _ma_insert */ + + + /* split a full page in two and assign emerging item to key */ + +int _ma_split_page(register MARIA_HA *info, register MARIA_KEYDEF *keyinfo, + byte *key, byte *buff, byte *key_buff, + my_bool insert_last_key) +{ + uint length,a_length,key_ref_length,t_length,nod_flag,key_length; + byte *key_pos,*pos, *after_key; + my_off_t new_pos; + MARIA_KEY_PARAM s_temp; + DBUG_ENTER("maria_split_page"); + LINT_INIT(after_key); + DBUG_DUMP("buff",(byte*) buff,maria_data_on_page(buff)); + + if (info->s->keyinfo+info->lastinx == keyinfo) + info->page_changed=1; /* Info->buff is used */ + info->keyread_buff_used=1; + nod_flag=_ma_test_if_nod(buff); + key_ref_length=2+nod_flag; + if (insert_last_key) + key_pos= _ma_find_last_pos(keyinfo,buff,key_buff, &key_length, &after_key); + else + key_pos= _ma_find_half_pos(nod_flag,keyinfo,buff,key_buff, &key_length, + &after_key); + if (!key_pos) + DBUG_RETURN(-1); + + length=(uint) (key_pos-buff); + a_length= maria_data_on_page(buff); + maria_putint(buff,length,nod_flag); + + key_pos=after_key; + if (nod_flag) + { + DBUG_PRINT("test",("Splitting nod")); + pos=key_pos-nod_flag; + memcpy((byte*) info->buff+2,(byte*) pos,(size_t) nod_flag); + } + + /* Move middle item to key and pointer to new page */ + if ((new_pos= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR) + DBUG_RETURN(-1); + _ma_kpointer(info, _ma_move_key(keyinfo,key,key_buff),new_pos); + + /* Store new page */ + if (!(*keyinfo->get_key)(keyinfo,nod_flag,&key_pos,key_buff)) + DBUG_RETURN(-1); + + t_length=(*keyinfo->pack_key)(keyinfo,nod_flag,(byte *) 0, + (byte*) 0, (byte*) 0, + key_buff, &s_temp); + length=(uint) ((buff+a_length)-key_pos); + memcpy((byte*) info->buff+key_ref_length+t_length,(byte*) key_pos, + (size_t) length); + (*keyinfo->store_key)(keyinfo,info->buff+key_ref_length,&s_temp); + maria_putint(info->buff,length+t_length+key_ref_length,nod_flag); + + if (_ma_write_keypage(info,keyinfo,new_pos,DFLT_INIT_HITS,info->buff)) + DBUG_RETURN(-1); + DBUG_DUMP("key",(byte*) key, _ma_keylength(keyinfo,key)); + DBUG_RETURN(2); /* Middle key up */ +} /* _ma_split_page */ + + + /* + Calculate how to much to move to split a page in two + Returns pointer to start of key. + key will contain the key. + return_key_length will contain the length of key + after_key will contain the position to where the next key starts + */ + +byte *_ma_find_half_pos(uint nod_flag, MARIA_KEYDEF *keyinfo, byte *page, + byte *key, uint *return_key_length, + byte **after_key) +{ + uint keys,length,key_ref_length; + byte *end,*lastpos; + DBUG_ENTER("_ma_find_half_pos"); + + key_ref_length=2+nod_flag; + length= maria_data_on_page(page)-key_ref_length; + page+=key_ref_length; + if (!(keyinfo->flag & + (HA_PACK_KEY | HA_SPACE_PACK_USED | HA_VAR_LENGTH_KEY | + HA_BINARY_PACK_KEY))) + { + key_ref_length=keyinfo->keylength+nod_flag; + keys=length/(key_ref_length*2); + *return_key_length=keyinfo->keylength; + end=page+keys*key_ref_length; + *after_key=end+key_ref_length; + memcpy(key,end,key_ref_length); + DBUG_RETURN(end); + } + + end=page+length/2-key_ref_length; /* This is aprox. half */ + *key='\0'; + do + { + lastpos=page; + if (!(length=(*keyinfo->get_key)(keyinfo,nod_flag,&page,key))) + DBUG_RETURN(0); + } while (page < end); + *return_key_length=length; + *after_key=page; + DBUG_PRINT("exit",("returns: 0x%lx page: 0x%lx half: 0x%lx", + (long) lastpos, (long) page, (long) end)); + DBUG_RETURN(lastpos); +} /* _ma_find_half_pos */ + + +/* + Split buffer at last key + Returns pointer to the start of the key before the last key + key will contain the last key +*/ + +static byte *_ma_find_last_pos(MARIA_KEYDEF *keyinfo, byte *page, + byte *key, uint *return_key_length, + byte **after_key) +{ + uint keys,length,last_length,key_ref_length; + byte *end,*lastpos,*prevpos; + byte key_buff[HA_MAX_KEY_BUFF]; + DBUG_ENTER("_ma_find_last_pos"); + + key_ref_length=2; + length= maria_data_on_page(page)-key_ref_length; + page+=key_ref_length; + if (!(keyinfo->flag & + (HA_PACK_KEY | HA_SPACE_PACK_USED | HA_VAR_LENGTH_KEY | + HA_BINARY_PACK_KEY))) + { + keys=length/keyinfo->keylength-2; + *return_key_length=length=keyinfo->keylength; + end=page+keys*length; + *after_key=end+length; + memcpy(key,end,length); + DBUG_RETURN(end); + } + + LINT_INIT(prevpos); + LINT_INIT(last_length); + end=page+length-key_ref_length; + *key='\0'; + length=0; + lastpos=page; + while (page < end) + { + prevpos=lastpos; lastpos=page; + last_length=length; + memcpy(key, key_buff, length); /* previous key */ + if (!(length=(*keyinfo->get_key)(keyinfo,0,&page,key_buff))) + { + maria_print_error(keyinfo->share, HA_ERR_CRASHED); + my_errno=HA_ERR_CRASHED; + DBUG_RETURN(0); + } + } + *return_key_length=last_length; + *after_key=lastpos; + DBUG_PRINT("exit",("returns: 0x%lx page: 0x%lx end: 0x%lx", + (long) prevpos,(long) page,(long) end)); + DBUG_RETURN(prevpos); +} /* _ma_find_last_pos */ + + + /* Balance page with not packed keys with page on right/left */ + /* returns 0 if balance was done */ + +static int _ma_balance_page(register MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *key, byte *curr_buff, byte *father_buff, + byte *father_key_pos, my_off_t father_page) +{ + my_bool right; + uint k_length,father_length,father_keylength,nod_flag,curr_keylength, + right_length,left_length,new_right_length,new_left_length,extra_length, + length,keys; + byte *pos,*buff,*extra_buff; + my_off_t next_page,new_pos; + byte tmp_part_key[HA_MAX_KEY_BUFF]; + DBUG_ENTER("_ma_balance_page"); + + k_length=keyinfo->keylength; + father_length= maria_data_on_page(father_buff); + father_keylength=k_length+info->s->base.key_reflength; + nod_flag=_ma_test_if_nod(curr_buff); + curr_keylength=k_length+nod_flag; + info->page_changed=1; + + if ((father_key_pos != father_buff+father_length && + (info->state->records & 1)) || + father_key_pos == father_buff+2+info->s->base.key_reflength) + { + right=1; + next_page= _ma_kpos(info->s->base.key_reflength, + father_key_pos+father_keylength); + buff=info->buff; + DBUG_PRINT("test",("use right page: %lu", (ulong) next_page)); + } + else + { + right=0; + father_key_pos-=father_keylength; + next_page= _ma_kpos(info->s->base.key_reflength,father_key_pos); + /* Fix that curr_buff is to left */ + buff=curr_buff; curr_buff=info->buff; + DBUG_PRINT("test",("use left page: %lu", (ulong) next_page)); + } /* father_key_pos ptr to parting key */ + + if (!_ma_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,info->buff,0)) + goto err; + DBUG_DUMP("next",(byte*) info->buff,maria_data_on_page(info->buff)); + + /* Test if there is room to share keys */ + + left_length= maria_data_on_page(curr_buff); + right_length= maria_data_on_page(buff); + keys=(left_length+right_length-4-nod_flag*2)/curr_keylength; + + if ((right ? right_length : left_length) + curr_keylength <= + keyinfo->block_length) + { /* Merge buffs */ + new_left_length=2+nod_flag+(keys/2)*curr_keylength; + new_right_length=2+nod_flag+((keys+1)/2)*curr_keylength; + maria_putint(curr_buff,new_left_length,nod_flag); + maria_putint(buff,new_right_length,nod_flag); + + if (left_length < new_left_length) + { /* Move keys buff -> leaf */ + pos=curr_buff+left_length; + memcpy((byte*) pos,(byte*) father_key_pos, (size_t) k_length); + memcpy((byte*) pos+k_length, (byte*) buff+2, + (size_t) (length=new_left_length - left_length - k_length)); + pos=buff+2+length; + memcpy((byte*) father_key_pos,(byte*) pos,(size_t) k_length); + bmove((byte*) buff+2,(byte*) pos+k_length,new_right_length); + } + else + { /* Move keys -> buff */ + + bmove_upp((byte*) buff+new_right_length,(byte*) buff+right_length, + right_length-2); + length=new_right_length-right_length-k_length; + memcpy((byte*) buff+2+length,father_key_pos,(size_t) k_length); + pos=curr_buff+new_left_length; + memcpy((byte*) father_key_pos,(byte*) pos,(size_t) k_length); + memcpy((byte*) buff+2,(byte*) pos+k_length,(size_t) length); + } + + if (_ma_write_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,info->buff) || + _ma_write_keypage(info,keyinfo,father_page,DFLT_INIT_HITS,father_buff)) + goto err; + DBUG_RETURN(0); + } + + /* curr_buff[] and buff[] are full, lets split and make new nod */ + + extra_buff=info->buff+info->s->base.max_key_block_length; + new_left_length=new_right_length=2+nod_flag+(keys+1)/3*curr_keylength; + if (keys == 5) /* Too few keys to balance */ + new_left_length-=curr_keylength; + extra_length=nod_flag+left_length+right_length- + new_left_length-new_right_length-curr_keylength; + DBUG_PRINT("info",("left_length: %d right_length: %d new_left_length: %d new_right_length: %d extra_length: %d", + left_length, right_length, + new_left_length, new_right_length, + extra_length)); + maria_putint(curr_buff,new_left_length,nod_flag); + maria_putint(buff,new_right_length,nod_flag); + maria_putint(extra_buff,extra_length+2,nod_flag); + + /* move first largest keys to new page */ + pos=buff+right_length-extra_length; + memcpy((byte*) extra_buff+2,pos,(size_t) extra_length); + /* Save new parting key */ + memcpy(tmp_part_key, pos-k_length,k_length); + /* Make place for new keys */ + bmove_upp((byte*) buff+new_right_length,(byte*) pos-k_length, + right_length-extra_length-k_length-2); + /* Copy keys from left page */ + pos= curr_buff+new_left_length; + memcpy((byte*) buff+2,(byte*) pos+k_length, + (size_t) (length=left_length-new_left_length-k_length)); + /* Copy old parting key */ + memcpy((byte*) buff+2+length,father_key_pos,(size_t) k_length); + + /* Move new parting keys up to caller */ + memcpy((byte*) (right ? key : father_key_pos),pos,(size_t) k_length); + memcpy((byte*) (right ? father_key_pos : key),tmp_part_key, k_length); + + if ((new_pos= _ma_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR) + goto err; + _ma_kpointer(info,key+k_length,new_pos); + if (_ma_write_keypage(info,keyinfo,(right ? new_pos : next_page), + DFLT_INIT_HITS,info->buff) || + _ma_write_keypage(info,keyinfo,(right ? next_page : new_pos), + DFLT_INIT_HITS,extra_buff)) + goto err; + + DBUG_RETURN(1); /* Middle key up */ + +err: + DBUG_RETURN(-1); +} /* _ma_balance_page */ + +/********************************************************************** + * Bulk insert code * + **********************************************************************/ + +typedef struct { + MARIA_HA *info; + uint keynr; +} bulk_insert_param; + + +int _ma_ck_write_tree(register MARIA_HA *info, uint keynr, byte *key, + uint key_length) +{ + int error; + DBUG_ENTER("_ma_ck_write_tree"); + + error= tree_insert(&info->bulk_insert[keynr], key, + key_length + info->s->rec_reflength, + info->bulk_insert[keynr].custom_arg) ? 0 : HA_ERR_OUT_OF_MEM ; + + DBUG_RETURN(error); +} /* _ma_ck_write_tree */ + + +/* typeof(_ma_keys_compare)=qsort_cmp2 */ + +static int keys_compare(bulk_insert_param *param, byte *key1, byte *key2) +{ + uint not_used[2]; + return ha_key_cmp(param->info->s->keyinfo[param->keynr].seg, + (uchar*) key1, (uchar*) key2, USE_WHOLE_KEY, SEARCH_SAME, + not_used); +} + + +static int keys_free(byte *key, TREE_FREE mode, bulk_insert_param *param) +{ + /* + Probably I can use info->lastkey here, but I'm not sure, + and to be safe I'd better use local lastkey. + */ + byte lastkey[HA_MAX_KEY_BUFF]; + uint keylen; + MARIA_KEYDEF *keyinfo; + + switch (mode) { + case free_init: + if (param->info->s->concurrent_insert) + { + rw_wrlock(¶m->info->s->key_root_lock[param->keynr]); + param->info->s->keyinfo[param->keynr].version++; + } + return 0; + case free_free: + keyinfo=param->info->s->keyinfo+param->keynr; + keylen= _ma_keylength(keyinfo, key); + memcpy(lastkey, key, keylen); + return _ma_ck_write_btree(param->info,param->keynr,lastkey, + keylen - param->info->s->rec_reflength); + case free_end: + if (param->info->s->concurrent_insert) + rw_unlock(¶m->info->s->key_root_lock[param->keynr]); + return 0; + } + return -1; +} + + +int maria_init_bulk_insert(MARIA_HA *info, ulong cache_size, ha_rows rows) +{ + MARIA_SHARE *share=info->s; + MARIA_KEYDEF *key=share->keyinfo; + bulk_insert_param *params; + uint i, num_keys, total_keylength; + ulonglong key_map; + DBUG_ENTER("_ma_init_bulk_insert"); + DBUG_PRINT("enter",("cache_size: %lu", cache_size)); + + DBUG_ASSERT(!info->bulk_insert && + (!rows || rows >= MARIA_MIN_ROWS_TO_USE_BULK_INSERT)); + + maria_clear_all_keys_active(key_map); + for (i=total_keylength=num_keys=0 ; i < share->base.keys ; i++) + { + if (! (key[i].flag & HA_NOSAME) && (share->base.auto_key != i + 1) && + maria_is_key_active(share->state.key_map, i)) + { + num_keys++; + maria_set_key_active(key_map, i); + total_keylength+=key[i].maxlength+TREE_ELEMENT_EXTRA_SIZE; + } + } + + if (num_keys==0 || + num_keys * MARIA_MIN_SIZE_BULK_INSERT_TREE > cache_size) + DBUG_RETURN(0); + + if (rows && rows*total_keylength < cache_size) + cache_size=rows; + else + cache_size/=total_keylength*16; + + info->bulk_insert=(TREE *) + my_malloc((sizeof(TREE)*share->base.keys+ + sizeof(bulk_insert_param)*num_keys),MYF(0)); + + if (!info->bulk_insert) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + + params=(bulk_insert_param *)(info->bulk_insert+share->base.keys); + for (i=0 ; i < share->base.keys ; i++) + { + if (maria_is_key_active(key_map, i)) + { + params->info=info; + params->keynr=i; + /* Only allocate a 16'th of the buffer at a time */ + init_tree(&info->bulk_insert[i], + cache_size * key[i].maxlength, + cache_size * key[i].maxlength, 0, + (qsort_cmp2)keys_compare, 0, + (tree_element_free) keys_free, (void *)params++); + } + else + info->bulk_insert[i].root=0; + } + + DBUG_RETURN(0); +} + +void maria_flush_bulk_insert(MARIA_HA *info, uint inx) +{ + if (info->bulk_insert) + { + if (is_tree_inited(&info->bulk_insert[inx])) + reset_tree(&info->bulk_insert[inx]); + } +} + +void maria_end_bulk_insert(MARIA_HA *info) +{ + DBUG_ENTER("maria_end_bulk_insert"); + if (info->bulk_insert) + { + uint i; + for (i=0 ; i < info->s->base.keys ; i++) + { + if (is_tree_inited(& info->bulk_insert[i])) + { + delete_tree(& info->bulk_insert[i]); + } + } + my_free((void *)info->bulk_insert, MYF(0)); + info->bulk_insert=0; + } + DBUG_VOID_RETURN; +} diff --git a/storage/maria/maria_chk.c b/storage/maria/maria_chk.c new file mode 100644 index 00000000000..0b82a71f736 --- /dev/null +++ b/storage/maria/maria_chk.c @@ -0,0 +1,1815 @@ +/* Copyright (C) 2006-2003 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Describe, check and repair of MARIA tables */ + +#include "ma_fulltext.h" +#include +#include +#include +#include +#include +#ifdef HAVE_SYS_VADVICE_H +#include +#endif +#ifdef HAVE_SYS_MMAN_H +#include +#endif +SET_STACK_SIZE(9000) /* Minimum stack size for program */ + +#ifndef USE_RAID +#define my_raid_create(A,B,C,D,E,F,G) my_create(A,B,C,G) +#define my_raid_delete(A,B,C) my_delete(A,B) +#endif + +static uint decode_bits; +static char **default_argv; +static const char *load_default_groups[]= { "maria_chk", 0 }; +static const char *set_collation_name, *opt_tmpdir; +static CHARSET_INFO *set_collation; +static const char *my_progname_short; +static int stopwords_inited= 0; +static MY_TMPDIR maria_chk_tmpdir; + +static const char *type_names[]= +{ + "impossible","char","binary", "short", "long", "float", + "double","number","unsigned short", + "unsigned long","longlong","ulonglong","int24", + "uint24","int8","varchar", "varbin", "varchar2", "varbin2", "bit", + "?","?" +}; + +static const char *prefix_packed_txt="packed ", + *bin_packed_txt="prefix ", + *diff_txt="stripped ", + *null_txt="NULL", + *blob_txt="BLOB "; + +static const char *field_pack[]= +{ + "","no endspace", "no prespace", + "no zeros", "blob", "constant", "table-lockup", + "always zero","varchar","unique-hash","?","?" +}; + +static const char *record_formats[]= +{ + "Fixed length", "Packed", "Compressed", "Block", "?" +}; + +static const char *maria_stats_method_str="nulls_unequal"; + +static void get_options(int *argc,char * * *argv); +static void print_version(void); +static void usage(void); +static int maria_chk(HA_CHECK *param, char *filename); +static void descript(HA_CHECK *param, register MARIA_HA *info, my_string name); +static int maria_sort_records(HA_CHECK *param, register MARIA_HA *info, + my_string name, uint sort_key, + my_bool write_info, my_bool update_index); +static int sort_record_index(MARIA_SORT_PARAM *sort_param, MARIA_HA *info, + MARIA_KEYDEF *keyinfo, + my_off_t page, byte *buff,uint sortkey, + File new_file, my_bool update_index); + +HA_CHECK check_param; + + /* Main program */ + +int main(int argc, char **argv) +{ + int error; + MY_INIT(argv[0]); + my_progname_short= my_progname+dirname_length(my_progname); + + maria_chk_init(&check_param); + check_param.opt_lock_memory= 1; /* Lock memory if possible */ + check_param.using_global_keycache = 0; + get_options(&argc,(char***) &argv); + maria_quick_table_bits=decode_bits; + error=0; + maria_init(); + + while (--argc >= 0) + { + int new_error=maria_chk(&check_param, *(argv++)); + if ((check_param.testflag & T_REP_ANY) != T_REP) + check_param.testflag&= ~T_REP; + VOID(fflush(stdout)); + VOID(fflush(stderr)); + if ((check_param.error_printed | check_param.warning_printed) && + (check_param.testflag & T_FORCE_CREATE) && + (!(check_param.testflag & (T_REP | T_REP_BY_SORT | T_SORT_RECORDS | + T_SORT_INDEX)))) + { + uint old_testflag=check_param.testflag; + if (!(check_param.testflag & T_REP)) + check_param.testflag|= T_REP_BY_SORT; + check_param.testflag&= ~T_EXTEND; /* Don't needed */ + error|=maria_chk(&check_param, argv[-1]); + check_param.testflag= old_testflag; + VOID(fflush(stdout)); + VOID(fflush(stderr)); + } + else + error|=new_error; + if (argc && (!(check_param.testflag & T_SILENT) || check_param.testflag & T_INFO)) + { + puts("\n---------\n"); + VOID(fflush(stdout)); + } + } + if (check_param.total_files > 1) + { /* Only if descript */ + char buff[22],buff2[22]; + if (!(check_param.testflag & T_SILENT) || check_param.testflag & T_INFO) + puts("\n---------\n"); + printf("\nTotal of all %d MARIA-files:\nData records: %9s Deleted blocks: %9s\n",check_param.total_files,llstr(check_param.total_records,buff), + llstr(check_param.total_deleted,buff2)); + } + free_defaults(default_argv); + free_tmpdir(&maria_chk_tmpdir); + maria_end(); + my_end(check_param.testflag & T_INFO ? + MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR); + exit(error); +#ifndef _lint + return 0; /* No compiler warning */ +#endif +} /* main */ + +enum options_mc { + OPT_CHARSETS_DIR=256, OPT_SET_COLLATION,OPT_START_CHECK_POS, + OPT_CORRECT_CHECKSUM, OPT_KEY_BUFFER_SIZE, + OPT_KEY_CACHE_BLOCK_SIZE, OPT_MARIA_BLOCK_SIZE, + OPT_READ_BUFFER_SIZE, OPT_WRITE_BUFFER_SIZE, OPT_SORT_BUFFER_SIZE, + OPT_SORT_KEY_BLOCKS, OPT_DECODE_BITS, OPT_FT_MIN_WORD_LEN, + OPT_FT_MAX_WORD_LEN, OPT_FT_STOPWORD_FILE, + OPT_MAX_RECORD_LENGTH, OPT_AUTO_CLOSE, OPT_STATS_METHOD +}; + +static struct my_option my_long_options[] = +{ + {"analyze", 'a', + "Analyze distribution of keys. Will make some joins in MySQL faster. You can check the calculated distribution.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, +#ifdef __NETWARE__ + {"autoclose", OPT_AUTO_CLOSE, "Auto close the screen on exit for Netware.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, +#endif + {"block-search", 'b', + "No help available.", + 0, 0, 0, GET_ULONG, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"backup", 'B', + "Make a backup of the .MYD file as 'filename-time.BAK'.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"character-sets-dir", OPT_CHARSETS_DIR, + "Directory where character sets are.", + (gptr*) &charsets_dir, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"check", 'c', + "Check table for errors.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"check-only-changed", 'C', + "Check only tables that have changed since last check. It also applies to other requested actions (e.g. --analyze will be ignored if the table is already analyzed).", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"correct-checksum", OPT_CORRECT_CHECKSUM, + "Correct checksum information for table.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, +#ifndef DBUG_OFF + {"debug", '#', + "Output debug log. Often this is 'd:t:o,filename'.", + 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0}, +#endif + {"description", 'd', + "Prints some information about table.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"data-file-length", 'D', + "Max length of data file (when recreating data-file when it's full).", + (gptr*) &check_param.max_data_file_length, + (gptr*) &check_param.max_data_file_length, + 0, GET_LL, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"extend-check", 'e', + "If used when checking a table, ensure that the table is 100 percent consistent, which will take a long time. If used when repairing a table, try to recover every possible row from the data file. Normally this will also find a lot of garbage rows; Don't use this option with repair if you are not totally desperate.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"fast", 'F', + "Check only tables that haven't been closed properly. It also applies to other requested actions (e.g. --analyze will be ignored if the table is already analyzed).", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"force", 'f', + "Restart with -r if there are any errors in the table. States will be updated as with --update-state.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"HELP", 'H', + "Display this help and exit.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"help", '?', + "Display this help and exit.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"information", 'i', + "Print statistics information about table that is checked.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"keys-used", 'k', + "Tell MARIA to update only some specific keys. # is a bit mask of which keys to use. This can be used to get faster inserts.", + (gptr*) &check_param.keys_in_use, + (gptr*) &check_param.keys_in_use, + 0, GET_ULL, REQUIRED_ARG, -1, 0, 0, 0, 0, 0}, + {"max-record-length", OPT_MAX_RECORD_LENGTH, + "Skip rows bigger than this if maria_chk can't allocate memory to hold it", + (gptr*) &check_param.max_record_length, + (gptr*) &check_param.max_record_length, + 0, GET_ULL, REQUIRED_ARG, LONGLONG_MAX, 0, LONGLONG_MAX, 0, 0, 0}, + {"medium-check", 'm', + "Faster than extend-check, but only finds 99.99% of all errors. Should be good enough for most cases.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"quick", 'q', "Faster repair by not modifying the data file.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"read-only", 'T', + "Don't mark table as checked.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"recover", 'r', + "Can fix almost anything except unique keys that aren't unique.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"parallel-recover", 'p', + "Same as '-r' but creates all the keys in parallel.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"safe-recover", 'o', + "Uses old recovery method; Slower than '-r' but can handle a couple of cases where '-r' reports that it can't fix the data file.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"sort-recover", 'n', + "Force recovering with sorting even if the temporary file was very big.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, +#ifdef DEBUG + {"start-check-pos", OPT_START_CHECK_POS, + "No help available.", + 0, 0, 0, GET_ULL, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, +#endif + {"set-auto-increment", 'A', + "Force auto_increment to start at this or higher value. If no value is given, then sets the next auto_increment value to the highest used value for the auto key + 1.", + (gptr*) &check_param.auto_increment_value, + (gptr*) &check_param.auto_increment_value, + 0, GET_ULL, OPT_ARG, 0, 0, 0, 0, 0, 0}, + {"set-collation", OPT_SET_COLLATION, + "Change the collation used by the index", + (gptr*) &set_collation_name, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"set-variable", 'O', + "Change the value of a variable. Please note that this option is deprecated; you can set variables directly with --variable-name=value.", + 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"silent", 's', + "Only print errors. One can use two -s to make maria_chk very silent.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"sort-index", 'S', + "Sort index blocks. This speeds up 'read-next' in applications.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"sort-records", 'R', + "Sort records according to an index. This makes your data much more localized and may speed up things. (It may be VERY slow to do a sort the first time!)", + (gptr*) &check_param.opt_sort_key, + (gptr*) &check_param.opt_sort_key, + 0, GET_UINT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"tmpdir", 't', + "Path for temporary files.", + (gptr*) &opt_tmpdir, + 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"update-state", 'U', + "Mark tables as crashed if any errors were found.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"unpack", 'u', + "Unpack file packed with mariapack.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"verbose", 'v', + "Print more information. This can be used with --description and --check. Use many -v for more verbosity!", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"version", 'V', + "Print version and exit.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"wait", 'w', + "Wait if table is locked.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + { "key_buffer_size", OPT_KEY_BUFFER_SIZE, "", + (gptr*) &check_param.use_buffers, (gptr*) &check_param.use_buffers, 0, + GET_ULONG, REQUIRED_ARG, (long) USE_BUFFER_INIT, (long) MALLOC_OVERHEAD, + (long) ~0L, (long) MALLOC_OVERHEAD, (long) IO_SIZE, 0}, + { "read_buffer_size", OPT_READ_BUFFER_SIZE, "", + (gptr*) &check_param.read_buffer_length, + (gptr*) &check_param.read_buffer_length, 0, GET_ULONG, REQUIRED_ARG, + (long) READ_BUFFER_INIT, (long) MALLOC_OVERHEAD, + (long) ~0L, (long) MALLOC_OVERHEAD, (long) 1L, 0}, + { "write_buffer_size", OPT_WRITE_BUFFER_SIZE, "", + (gptr*) &check_param.write_buffer_length, + (gptr*) &check_param.write_buffer_length, 0, GET_ULONG, REQUIRED_ARG, + (long) READ_BUFFER_INIT, (long) MALLOC_OVERHEAD, + (long) ~0L, (long) MALLOC_OVERHEAD, (long) 1L, 0}, + { "sort_buffer_size", OPT_SORT_BUFFER_SIZE, "", + (gptr*) &check_param.sort_buffer_length, + (gptr*) &check_param.sort_buffer_length, 0, GET_ULONG, REQUIRED_ARG, + (long) SORT_BUFFER_INIT, (long) (MIN_SORT_BUFFER + MALLOC_OVERHEAD), + (long) ~0L, (long) MALLOC_OVERHEAD, (long) 1L, 0}, + { "sort_key_blocks", OPT_SORT_KEY_BLOCKS, "", + (gptr*) &check_param.sort_key_blocks, + (gptr*) &check_param.sort_key_blocks, 0, GET_ULONG, REQUIRED_ARG, + BUFFERS_WHEN_SORTING, 4L, 100L, 0L, 1L, 0}, + { "decode_bits", OPT_DECODE_BITS, "", (gptr*) &decode_bits, + (gptr*) &decode_bits, 0, GET_UINT, REQUIRED_ARG, 9L, 4L, 17L, 0L, 1L, 0}, + { "ft_min_word_len", OPT_FT_MIN_WORD_LEN, "", (gptr*) &ft_min_word_len, + (gptr*) &ft_min_word_len, 0, GET_ULONG, REQUIRED_ARG, 4, 1, HA_FT_MAXCHARLEN, + 0, 1, 0}, + { "ft_max_word_len", OPT_FT_MAX_WORD_LEN, "", (gptr*) &ft_max_word_len, + (gptr*) &ft_max_word_len, 0, GET_ULONG, REQUIRED_ARG, HA_FT_MAXCHARLEN, 10, + HA_FT_MAXCHARLEN, 0, 1, 0}, + { "maria_ft_stopword_file", OPT_FT_STOPWORD_FILE, + "Use stopwords from this file instead of built-in list.", + (gptr*) &ft_stopword_file, (gptr*) &ft_stopword_file, 0, GET_STR, + REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"stats_method", OPT_STATS_METHOD, + "Specifies how index statistics collection code should threat NULLs. " + "Possible values of name are \"nulls_unequal\" (default behavior for 4.1/5.0), " + "\"nulls_equal\" (emulate 4.0 behavior), and \"nulls_ignored\".", + (gptr*) &maria_stats_method_str, (gptr*) &maria_stats_method_str, 0, + GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} +}; + + +#include + +static void print_version(void) +{ + printf("%s Ver 1.0 for %s at %s\n", my_progname, SYSTEM_TYPE, + MACHINE_TYPE); + NETWARE_SET_SCREEN_MODE(1); +} + + +static void usage(void) +{ + print_version(); + puts("By Monty, for your professional use"); + puts("This software comes with NO WARRANTY: see the PUBLIC for details.\n"); + puts("Description, check and repair of MARIA tables."); + puts("Used without options all tables on the command will be checked for errors"); + printf("Usage: %s [OPTIONS] tables[.MYI]\n", my_progname_short); + printf("\nGlobal options:\n"); +#ifndef DBUG_OFF + printf("\ + -#, --debug=... Output debug log. Often this is 'd:t:o,filename'.\n"); +#endif + printf("\ + -?, --help Display this help and exit.\n\ + -O, --set-variable var=option.\n\ + Change the value of a variable. Please note that\n\ + this option is deprecated; you can set variables\n\ + directly with '--variable-name=value'.\n\ + -t, --tmpdir=path Path for temporary files. Multiple paths can be\n\ + specified, separated by "); +#if defined( __WIN__) || defined(__NETWARE__) + printf("semicolon (;)"); +#else + printf("colon (:)"); +#endif + printf(", they will be used\n\ + in a round-robin fashion.\n\ + -s, --silent Only print errors. One can use two -s to make\n\ + maria_chk very silent.\n\ + -v, --verbose Print more information. This can be used with\n\ + --description and --check. Use many -v for more verbosity.\n\ + -V, --version Print version and exit.\n\ + -w, --wait Wait if table is locked.\n\n"); +#ifdef DEBUG + puts(" --start-check-pos=# Start reading file at given offset.\n"); +#endif + + puts("Check options (check is the default action for maria_chk):\n\ + -c, --check Check table for errors.\n\ + -e, --extend-check Check the table VERY throughly. Only use this in\n\ + extreme cases as maria_chk should normally be able to\n\ + find out if the table is ok even without this switch.\n\ + -F, --fast Check only tables that haven't been closed properly.\n\ + -C, --check-only-changed\n\ + Check only tables that have changed since last check.\n\ + -f, --force Restart with '-r' if there are any errors in the table.\n\ + States will be updated as with '--update-state'.\n\ + -i, --information Print statistics information about table that is checked.\n\ + -m, --medium-check Faster than extend-check, but only finds 99.99% of\n\ + all errors. Should be good enough for most cases.\n\ + -U --update-state Mark tables as crashed if you find any errors.\n\ + -T, --read-only Don't mark table as checked.\n"); + + puts("Repair options (When using '-r' or '-o'):\n\ + -B, --backup Make a backup of the .MYD file as 'filename-time.BAK'.\n\ + --correct-checksum Correct checksum information for table.\n\ + -D, --data-file-length=# Max length of data file (when recreating data\n\ + file when it's full).\n\ + -e, --extend-check Try to recover every possible row from the data file\n\ + Normally this will also find a lot of garbage rows;\n\ + Don't use this option if you are not totally desperate.\n\ + -f, --force Overwrite old temporary files.\n\ + -k, --keys-used=# Tell MARIA to update only some specific keys. # is a\n\ + bit mask of which keys to use. This can be used to\n\ + get faster inserts.\n\ + --max-record-length=#\n\ + Skip rows bigger than this if maria_chk can't allocate\n\ + memory to hold it.\n\ + -r, --recover Can fix almost anything except unique keys that aren't\n\ + unique.\n\ + -n, --sort-recover Forces recovering with sorting even if the temporary\n\ + file would be very big.\n\ + -p, --parallel-recover\n\ + Uses the same technique as '-r' and '-n', but creates\n\ + all the keys in parallel, in different threads.\n\ + -o, --safe-recover Uses old recovery method; Slower than '-r' but can\n\ + handle a couple of cases where '-r' reports that it\n\ + can't fix the data file.\n\ + --character-sets-dir=...\n\ + Directory where character sets are.\n\ + --set-collation=name\n\ + Change the collation used by the index.\n\ + -q, --quick Faster repair by not modifying the data file.\n\ + One can give a second '-q' to force maria_chk to\n\ + modify the original datafile in case of duplicate keys.\n\ + NOTE: Tables where the data file is currupted can't be\n\ + fixed with this option.\n\ + -u, --unpack Unpack file packed with mariapack.\n\ +"); + + puts("Other actions:\n\ + -a, --analyze Analyze distribution of keys. Will make some joins in\n\ + MySQL faster. You can check the calculated distribution\n\ + by using '--description --verbose table_name'.\n\ + --stats_method=name Specifies how index statistics collection code should\n\ + threat NULLs. Possible values of name are \"nulls_unequal\"\n\ + (default for 4.1/5.0), \"nulls_equal\" (emulate 4.0), and \n\ + \"nulls_ignored\".\n\ + -d, --description Prints some information about table.\n\ + -A, --set-auto-increment[=value]\n\ + Force auto_increment to start at this or higher value\n\ + If no value is given, then sets the next auto_increment\n\ + value to the highest used value for the auto key + 1.\n\ + -S, --sort-index Sort index blocks. This speeds up 'read-next' in\n\ + applications.\n\ + -R, --sort-records=#\n\ + Sort records according to an index. This makes your\n\ + data much more localized and may speed up things\n\ + (It may be VERY slow to do a sort the first time!).\n\ + -b, --block-search=#\n\ + Find a record, a block at given offset belongs to."); + + print_defaults("my", load_default_groups); + my_print_variables(my_long_options); +} + +#include + +const char *maria_stats_method_names[] = {"nulls_unequal", "nulls_equal", + "nulls_ignored", NullS}; +TYPELIB maria_stats_method_typelib= { + array_elements(maria_stats_method_names) - 1, "", + maria_stats_method_names, NULL}; + + /* Read options */ + +static my_bool +get_one_option(int optid, + const struct my_option *opt __attribute__((unused)), + char *argument) +{ + switch (optid) { +#ifdef __NETWARE__ + case OPT_AUTO_CLOSE: + setscreenmode(SCR_AUTOCLOSE_ON_EXIT); + break; +#endif + case 'a': + if (argument == disabled_my_option) + check_param.testflag&= ~T_STATISTICS; + else + check_param.testflag|= T_STATISTICS; + break; + case 'A': + if (argument) + check_param.auto_increment_value= strtoull(argument, NULL, 0); + else + check_param.auto_increment_value= 0; /* Set to max used value */ + check_param.testflag|= T_AUTO_INC; + break; + case 'b': + check_param.search_after_block= strtoul(argument, NULL, 10); + break; + case 'B': + if (argument == disabled_my_option) + check_param.testflag&= ~T_BACKUP_DATA; + else + check_param.testflag|= T_BACKUP_DATA; + break; + case 'c': + if (argument == disabled_my_option) + check_param.testflag&= ~T_CHECK; + else + check_param.testflag|= T_CHECK; + break; + case 'C': + if (argument == disabled_my_option) + check_param.testflag&= ~(T_CHECK | T_CHECK_ONLY_CHANGED); + else + check_param.testflag|= T_CHECK | T_CHECK_ONLY_CHANGED; + break; + case 'D': + check_param.max_data_file_length=strtoll(argument, NULL, 10); + break; + case 's': /* silent */ + if (argument == disabled_my_option) + check_param.testflag&= ~(T_SILENT | T_VERY_SILENT); + else + { + if (check_param.testflag & T_SILENT) + check_param.testflag|= T_VERY_SILENT; + check_param.testflag|= T_SILENT; + check_param.testflag&= ~T_WRITE_LOOP; + } + break; + case 'w': + if (argument == disabled_my_option) + check_param.testflag&= ~T_WAIT_FOREVER; + else + check_param.testflag|= T_WAIT_FOREVER; + break; + case 'd': /* description if isam-file */ + if (argument == disabled_my_option) + check_param.testflag&= ~T_DESCRIPT; + else + check_param.testflag|= T_DESCRIPT; + break; + case 'e': /* extend check */ + if (argument == disabled_my_option) + check_param.testflag&= ~T_EXTEND; + else + check_param.testflag|= T_EXTEND; + break; + case 'i': + if (argument == disabled_my_option) + check_param.testflag&= ~T_INFO; + else + check_param.testflag|= T_INFO; + break; + case 'f': + if (argument == disabled_my_option) + { + check_param.tmpfile_createflag= O_RDWR | O_TRUNC | O_EXCL; + check_param.testflag&= ~(T_FORCE_CREATE | T_UPDATE_STATE); + } + else + { + check_param.tmpfile_createflag= O_RDWR | O_TRUNC; + check_param.testflag|= T_FORCE_CREATE | T_UPDATE_STATE; + } + break; + case 'F': + if (argument == disabled_my_option) + check_param.testflag&= ~T_FAST; + else + check_param.testflag|= T_FAST; + break; + case 'k': + check_param.keys_in_use= (ulonglong) strtoll(argument, NULL, 10); + break; + case 'm': + if (argument == disabled_my_option) + check_param.testflag&= ~T_MEDIUM; + else + check_param.testflag|= T_MEDIUM; /* Medium check */ + break; + case 'r': /* Repair table */ + check_param.testflag&= ~T_REP_ANY; + if (argument != disabled_my_option) + check_param.testflag|= T_REP_BY_SORT; + break; + case 'p': + check_param.testflag&= ~T_REP_ANY; + if (argument != disabled_my_option) + check_param.testflag|= T_REP_PARALLEL; + break; + case 'o': + check_param.testflag&= ~T_REP_ANY; + check_param.force_sort= 0; + if (argument != disabled_my_option) + { + check_param.testflag|= T_REP; + my_disable_async_io= 1; /* More safety */ + } + break; + case 'n': + check_param.testflag&= ~T_REP_ANY; + if (argument == disabled_my_option) + check_param.force_sort= 0; + else + { + check_param.testflag|= T_REP_BY_SORT; + check_param.force_sort= 1; + } + break; + case 'q': + if (argument == disabled_my_option) + check_param.testflag&= ~(T_QUICK | T_FORCE_UNIQUENESS); + else + check_param.testflag|= + (check_param.testflag & T_QUICK) ? T_FORCE_UNIQUENESS : T_QUICK; + break; + case 'u': + if (argument == disabled_my_option) + check_param.testflag&= ~(T_UNPACK | T_REP_BY_SORT); + else + check_param.testflag|= T_UNPACK | T_REP_BY_SORT; + break; + case 'v': /* Verbose */ + if (argument == disabled_my_option) + { + check_param.testflag&= ~T_VERBOSE; + check_param.verbose=0; + } + else + { + check_param.testflag|= T_VERBOSE; + check_param.verbose++; + } + break; + case 'R': /* Sort records */ + if (argument == disabled_my_option) + check_param.testflag&= ~T_SORT_RECORDS; + else + { + check_param.testflag|= T_SORT_RECORDS; + check_param.opt_sort_key= (uint) atoi(argument) - 1; + if (check_param.opt_sort_key >= MARIA_MAX_KEY) + { + fprintf(stderr, + "The value of the sort key is bigger than max key: %d.\n", + MARIA_MAX_KEY); + exit(1); + } + } + break; + case 'S': /* Sort index */ + if (argument == disabled_my_option) + check_param.testflag&= ~T_SORT_INDEX; + else + check_param.testflag|= T_SORT_INDEX; + break; + case 'T': + if (argument == disabled_my_option) + check_param.testflag&= ~T_READONLY; + else + check_param.testflag|= T_READONLY; + break; + case 'U': + if (argument == disabled_my_option) + check_param.testflag&= ~T_UPDATE_STATE; + else + check_param.testflag|= T_UPDATE_STATE; + break; + case '#': + if (argument == disabled_my_option) + { + DBUG_POP(); + } + else + { + DBUG_PUSH(argument ? argument : "d:t:o,/tmp/maria_chk.trace"); + } + break; + case 'V': + print_version(); + exit(0); + case OPT_CORRECT_CHECKSUM: + if (argument == disabled_my_option) + check_param.testflag&= ~T_CALC_CHECKSUM; + else + check_param.testflag|= T_CALC_CHECKSUM; + break; + case OPT_STATS_METHOD: + { + int method; + enum_handler_stats_method method_conv; + LINT_INIT(method_conv); + maria_stats_method_str= argument; + if ((method=find_type(argument, &maria_stats_method_typelib, 2)) <= 0) + { + fprintf(stderr, "Invalid value of stats_method: %s.\n", argument); + exit(1); + } + switch (method-1) { + case 0: + method_conv= MI_STATS_METHOD_NULLS_EQUAL; + break; + case 1: + method_conv= MI_STATS_METHOD_NULLS_NOT_EQUAL; + break; + case 2: + method_conv= MI_STATS_METHOD_IGNORE_NULLS; + break; + default: assert(0); /* Impossible */ + } + check_param.stats_method= method_conv; + break; + } +#ifdef DEBUG /* Only useful if debugging */ + case OPT_START_CHECK_POS: + check_param.start_check_pos= strtoull(argument, NULL, 0); + break; +#endif + case 'H': + my_print_help(my_long_options); + exit(0); + case '?': + usage(); + exit(0); + } + return 0; +} + + +static void get_options(register int *argc,register char ***argv) +{ + int ho_error; + + load_defaults("my", load_default_groups, argc, argv); + default_argv= *argv; + if (isatty(fileno(stdout))) + check_param.testflag|=T_WRITE_LOOP; + + if ((ho_error=handle_options(argc, argv, my_long_options, get_one_option))) + exit(ho_error); + + /* If using repair, then update checksum if one uses --update-state */ + if ((check_param.testflag & T_UPDATE_STATE) && + (check_param.testflag & T_REP_ANY)) + check_param.testflag|= T_CALC_CHECKSUM; + + if (*argc == 0) + { + usage(); + exit(-1); + } + + if ((check_param.testflag & T_UNPACK) && + (check_param.testflag & (T_QUICK | T_SORT_RECORDS))) + { + VOID(fprintf(stderr, + "%s: --unpack can't be used with --quick or --sort-records\n", + my_progname_short)); + exit(1); + } + if ((check_param.testflag & T_READONLY) && + (check_param.testflag & + (T_REP_ANY | T_STATISTICS | T_AUTO_INC | + T_SORT_RECORDS | T_SORT_INDEX | T_FORCE_CREATE))) + { + VOID(fprintf(stderr, + "%s: Can't use --readonly when repairing or sorting\n", + my_progname_short)); + exit(1); + } + + if (init_tmpdir(&maria_chk_tmpdir, opt_tmpdir)) + exit(1); + + check_param.tmpdir=&maria_chk_tmpdir; + + if (set_collation_name) + if (!(set_collation= get_charset_by_name(set_collation_name, + MYF(MY_WME)))) + exit(1); + + return; +} /* get options */ + + + /* Check table */ + +static int maria_chk(HA_CHECK *param, my_string filename) +{ + int error,lock_type,recreate; + int rep_quick= param->testflag & (T_QUICK | T_FORCE_UNIQUENESS); + MARIA_HA *info; + File datafile; + char llbuff[22],llbuff2[22]; + my_bool state_updated=0; + MARIA_SHARE *share; + DBUG_ENTER("maria_chk"); + + param->out_flag=error=param->warning_printed=param->error_printed= + recreate=0; + datafile=0; + param->isam_file_name=filename; /* For error messages */ + if (!(info=maria_open(filename, + (param->testflag & (T_DESCRIPT | T_READONLY)) ? + O_RDONLY : O_RDWR, + HA_OPEN_FOR_REPAIR | + ((param->testflag & T_WAIT_FOREVER) ? + HA_OPEN_WAIT_IF_LOCKED : + (param->testflag & T_DESCRIPT) ? + HA_OPEN_IGNORE_IF_LOCKED : HA_OPEN_ABORT_IF_LOCKED)))) + { + /* Avoid twice printing of isam file name */ + param->error_printed=1; + switch (my_errno) { + case HA_ERR_CRASHED: + _ma_check_print_error(param,"'%s' doesn't have a correct index definition. You need to recreate it before you can do a repair",filename); + break; + case HA_ERR_NOT_A_TABLE: + _ma_check_print_error(param,"'%s' is not a MARIA-table",filename); + break; + case HA_ERR_CRASHED_ON_USAGE: + _ma_check_print_error(param,"'%s' is marked as crashed",filename); + break; + case HA_ERR_CRASHED_ON_REPAIR: + _ma_check_print_error(param,"'%s' is marked as crashed after last repair",filename); + break; + case HA_ERR_OLD_FILE: + _ma_check_print_error(param,"'%s' is a old type of MARIA-table", filename); + break; + case HA_ERR_END_OF_FILE: + _ma_check_print_error(param,"Couldn't read complete header from '%s'", filename); + break; + case EAGAIN: + _ma_check_print_error(param,"'%s' is locked. Use -w to wait until unlocked",filename); + break; + case ENOENT: + _ma_check_print_error(param,"File '%s' doesn't exist",filename); + break; + case EACCES: + _ma_check_print_error(param,"You don't have permission to use '%s'", + filename); + break; + default: + _ma_check_print_error(param,"%d when opening MARIA-table '%s'", + my_errno,filename); + break; + } + DBUG_RETURN(1); + } + share=info->s; + share->options&= ~HA_OPTION_READ_ONLY_DATA; /* We are modifing it */ + share->tot_locks-= share->r_locks; + share->r_locks=0; + maria_block_size= share->base.block_size; + + if (share->data_file_type == BLOCK_RECORD && + (param->testflag & (T_REP_ANY | T_SORT_RECORDS | T_FAST | T_STATISTICS | + T_CHECK | T_CHECK_ONLY_CHANGED))) + { + _ma_check_print_error(param, + "Record format used by '%s' is is not yet supported with repair/check", + filename); + param->error_printed= 0; + error= 1; + goto end2; + } + + /* + Skip the checking of the file if: + We are using --fast and the table is closed properly + We are using --check-only-changed-tables and the table hasn't changed + */ + if (param->testflag & (T_FAST | T_CHECK_ONLY_CHANGED)) + { + my_bool need_to_check= (maria_is_crashed(info) || + share->state.open_count != 0); + + if ((param->testflag & (T_REP_ANY | T_SORT_RECORDS)) && + ((share->state.changed & (STATE_CHANGED | STATE_CRASHED | + STATE_CRASHED_ON_REPAIR) || + !(param->testflag & T_CHECK_ONLY_CHANGED)))) + need_to_check=1; + + if (info->s->base.keys && info->state->records) + { + if ((param->testflag & T_STATISTICS) && + (share->state.changed & STATE_NOT_ANALYZED)) + need_to_check=1; + if ((param->testflag & T_SORT_INDEX) && + (share->state.changed & STATE_NOT_SORTED_PAGES)) + need_to_check=1; + if ((param->testflag & T_REP_BY_SORT) && + (share->state.changed & STATE_NOT_OPTIMIZED_KEYS)) + need_to_check=1; + } + if ((param->testflag & T_CHECK_ONLY_CHANGED) && + (share->state.changed & (STATE_CHANGED | STATE_CRASHED | + STATE_CRASHED_ON_REPAIR))) + need_to_check=1; + if (!need_to_check) + { + if (!(param->testflag & T_SILENT) || param->testflag & T_INFO) + printf("MARIA file: %s is already checked\n",filename); + if (maria_close(info)) + { + _ma_check_print_error(param,"%d when closing MARIA-table '%s'", + my_errno,filename); + DBUG_RETURN(1); + } + DBUG_RETURN(0); + } + } + if ((param->testflag & (T_REP_ANY | T_STATISTICS | + T_SORT_RECORDS | T_SORT_INDEX)) && + (((param->testflag & T_UNPACK) && + share->data_file_type == COMPRESSED_RECORD) || + mi_uint2korr(share->state.header.state_info_length) != + MARIA_STATE_INFO_SIZE || + mi_uint2korr(share->state.header.base_info_length) != + MARIA_BASE_INFO_SIZE || + maria_is_any_intersect_keys_active(param->keys_in_use, share->base.keys, + ~share->state.key_map) || + maria_test_if_almost_full(info) || + info->s->state.header.file_version[3] != maria_file_magic[3] || + (set_collation && + set_collation->number != share->state.header.language))) + { + if (set_collation) + param->language= set_collation->number; + if (maria_recreate_table(param, &info,filename)) + { + VOID(fprintf(stderr, + "MARIA-table '%s' is not fixed because of errors\n", + filename)); + return(-1); + } + recreate=1; + if (!(param->testflag & T_REP_ANY)) + { + param->testflag|=T_REP_BY_SORT; /* if only STATISTICS */ + if (!(param->testflag & T_SILENT)) + printf("- '%s' has old table-format. Recreating index\n",filename); + rep_quick|=T_QUICK; + } + share=info->s; + share->tot_locks-= share->r_locks; + share->r_locks=0; + } + + if (param->testflag & T_DESCRIPT) + { + param->total_files++; + param->total_records+=info->state->records; + param->total_deleted+=info->state->del; + descript(param, info, filename); + maria_close(info); /* Should always succeed */ + return(0); + } + + if (!stopwords_inited++) + ft_init_stopwords(); + + if (!(param->testflag & T_READONLY)) + lock_type = F_WRLCK; /* table is changed */ + else + lock_type= F_RDLCK; + if (info->lock_type == F_RDLCK) + info->lock_type=F_UNLCK; /* Read only table */ + if (_ma_readinfo(info,lock_type,0)) + { + _ma_check_print_error(param,"Can't lock indexfile of '%s', error: %d", + filename,my_errno); + param->error_printed=0; + error= 1; + goto end2; + } + /* + _ma_readinfo() has locked the table. + We mark the table as locked (without doing file locks) to be able to + use functions that only works on locked tables (like row caching). + */ + maria_lock_database(info, F_EXTRA_LCK); + datafile= info->dfile.file; + if (init_pagecache(maria_pagecache, param->use_buffers, 0, 0, + maria_block_size) == 0) + { + _ma_check_print_error(param, "Can't initialize page cache with %lu memory", + (ulong) param->use_buffers); + error= 1; + goto end2; + } + + if (param->testflag & (T_REP_ANY | T_SORT_RECORDS | T_SORT_INDEX)) + { + if (param->testflag & T_REP_ANY) + { + ulonglong tmp=share->state.key_map; + maria_copy_keys_active(share->state.key_map, share->base.keys, + param->keys_in_use); + if (tmp != share->state.key_map) + info->update|=HA_STATE_CHANGED; + } + if (rep_quick && + maria_chk_del(param, info, param->testflag & ~T_VERBOSE)) + { + if (param->testflag & T_FORCE_CREATE) + { + rep_quick=0; + _ma_check_print_info(param,"Creating new data file\n"); + } + else + { + error=1; + _ma_check_print_error(param, + "Quick-recover aborted; Run recovery without switch 'q'"); + } + } + if (!error) + { + if ((param->testflag & (T_REP_BY_SORT | T_REP_PARALLEL)) && + (maria_is_any_key_active(share->state.key_map) || + (rep_quick && !param->keys_in_use && !recreate)) && + maria_test_if_sort_rep(info, info->state->records, + info->s->state.key_map, + param->force_sort)) + { + if (param->testflag & T_REP_BY_SORT) + error=maria_repair_by_sort(param,info,filename,rep_quick); + else + error=maria_repair_parallel(param,info,filename,rep_quick); + state_updated=1; + } + else if (param->testflag & T_REP_ANY) + error=maria_repair(param, info,filename,rep_quick); + } + if (!error && param->testflag & T_SORT_RECORDS) + { + /* + The data file is nowadays reopened in the repair code so we should + soon remove the following reopen-code + */ +#ifndef TO_BE_REMOVED + if (param->out_flag & O_NEW_DATA) + { /* Change temp file to org file */ + VOID(my_close(info->dfile.file, MYF(MY_WME))); /* Close new file */ + error|=maria_change_to_newfile(filename,MARIA_NAME_DEXT,DATA_TMP_EXT, + MYF(0)); + if (_ma_open_datafile(info,info->s, -1)) + error=1; + param->out_flag&= ~O_NEW_DATA; /* We are using new datafile */ + param->read_cache.file= info->dfile.file; + } +#endif + if (! error) + { + uint key; + /* + We can't update the index in maria_sort_records if we have a + prefix compressed or fulltext index + */ + my_bool update_index=1; + for (key=0 ; key < share->base.keys; key++) + if (share->keyinfo[key].flag & (HA_BINARY_PACK_KEY|HA_FULLTEXT)) + update_index=0; + + error=maria_sort_records(param,info,filename,param->opt_sort_key, + /* what is the following parameter for ? */ + (my_bool) !(param->testflag & T_REP), + update_index); + datafile= info->dfile.file; /* This is now locked */ + if (!error && !update_index) + { + if (param->verbose) + puts("Table had a compressed index; We must now recreate the index"); + error=maria_repair_by_sort(param,info,filename,1); + } + } + } + if (!error && param->testflag & T_SORT_INDEX) + error=maria_sort_index(param,info,filename); + if (!error) + share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED | + STATE_CRASHED_ON_REPAIR); + else + maria_mark_crashed(info); + } + else if ((param->testflag & T_CHECK) || !(param->testflag & T_AUTO_INC)) + { + if (!(param->testflag & T_SILENT) || param->testflag & T_INFO) + printf("Checking MARIA file: %s\n",filename); + if (!(param->testflag & T_SILENT)) + printf("Data records: %7s Deleted blocks: %7s\n", + llstr(info->state->records,llbuff), + llstr(info->state->del,llbuff2)); + error =maria_chk_status(param,info); + maria_intersect_keys_active(share->state.key_map, param->keys_in_use); + error =maria_chk_size(param,info); + if (!error || !(param->testflag & (T_FAST | T_FORCE_CREATE))) + error|=maria_chk_del(param, info,param->testflag); + if ((!error || (!(param->testflag & (T_FAST | T_FORCE_CREATE)) && + !param->start_check_pos))) + { + error|=maria_chk_key(param, info); + if (!error && (param->testflag & (T_STATISTICS | T_AUTO_INC))) + error=maria_update_state_info(param, info, + ((param->testflag & T_STATISTICS) ? + UPDATE_STAT : 0) | + ((param->testflag & T_AUTO_INC) ? + UPDATE_AUTO_INC : 0)); + } + if ((!rep_quick && !error) || + !(param->testflag & (T_FAST | T_FORCE_CREATE))) + { + VOID(init_io_cache(¶m->read_cache,datafile, + (uint) param->read_buffer_length, + READ_CACHE, + (param->start_check_pos ? + param->start_check_pos : + share->pack.header_length), + 1, + MYF(MY_WME))); + maria_lock_memory(param); + if ((info->s->data_file_type != STATIC_RECORD) || + (param->testflag & (T_EXTEND | T_MEDIUM))) + error|=maria_chk_data_link(param, info, param->testflag & T_EXTEND); + error|=_ma_flush_blocks(param, share->pagecache, &share->kfile); + VOID(end_io_cache(¶m->read_cache)); + } + if (!error) + { + if ((share->state.changed & STATE_CHANGED) && + (param->testflag & T_UPDATE_STATE)) + info->update|=HA_STATE_CHANGED | HA_STATE_ROW_CHANGED; + share->state.changed&= ~(STATE_CHANGED | STATE_CRASHED | + STATE_CRASHED_ON_REPAIR); + } + else if (!maria_is_crashed(info) && + (param->testflag & T_UPDATE_STATE)) + { /* Mark crashed */ + maria_mark_crashed(info); + info->update|=HA_STATE_CHANGED | HA_STATE_ROW_CHANGED; + } + } + + if ((param->testflag & T_AUTO_INC) || + ((param->testflag & T_REP_ANY) && info->s->base.auto_key)) + _ma_update_auto_increment_key(param, info, + (my_bool) !test(param->testflag & T_AUTO_INC)); + + if (info->update & HA_STATE_CHANGED && ! (param->testflag & T_READONLY)) + error|=maria_update_state_info(param, info, + UPDATE_OPEN_COUNT | + (((param->testflag & T_REP_ANY) ? + UPDATE_TIME : 0) | + (state_updated ? UPDATE_STAT : 0) | + ((param->testflag & T_SORT_RECORDS) ? + UPDATE_SORT : 0))); + info->update&= ~HA_STATE_CHANGED; + maria_lock_database(info, F_UNLCK); + +end2: + end_pagecache(maria_pagecache, 1); + if (maria_close(info)) + { + _ma_check_print_error(param,"%d when closing MARIA-table '%s'", + my_errno,filename); + DBUG_RETURN(1); + } + if (error == 0) + { + if (param->out_flag & O_NEW_DATA) + error|=maria_change_to_newfile(filename,MARIA_NAME_DEXT,DATA_TMP_EXT, + ((param->testflag & T_BACKUP_DATA) ? + MYF(MY_REDEL_MAKE_BACKUP) : MYF(0))); + if (param->out_flag & O_NEW_INDEX) + error|=maria_change_to_newfile(filename,MARIA_NAME_IEXT,INDEX_TMP_EXT, + MYF(0)); + } + VOID(fflush(stdout)); VOID(fflush(stderr)); + if (param->error_printed) + { + if (param->testflag & (T_REP_ANY | T_SORT_RECORDS | T_SORT_INDEX)) + { + VOID(fprintf(stderr, + "MARIA-table '%s' is not fixed because of errors\n", + filename)); + if (param->testflag & T_REP_ANY) + VOID(fprintf(stderr, + "Try fixing it by using the --safe-recover (-o), the --force (-f) option or by not using the --quick (-q) flag\n")); + } + else if (!(param->error_printed & 2) && + !(param->testflag & T_FORCE_CREATE)) + VOID(fprintf(stderr, + "MARIA-table '%s' is corrupted\nFix it using switch \"-r\" or \"-o\"\n", + filename)); + } + else if (param->warning_printed && + ! (param->testflag & (T_REP_ANY | T_SORT_RECORDS | T_SORT_INDEX | + T_FORCE_CREATE))) + VOID(fprintf(stderr, "MARIA-table '%s' is usable but should be fixed\n", + filename)); + VOID(fflush(stderr)); + DBUG_RETURN(error); +} /* maria_chk */ + + +/* Write info about table */ + +static void descript(HA_CHECK *param, register MARIA_HA *info, my_string name) +{ + uint key,keyseg_nr,field; + reg3 MARIA_KEYDEF *keyinfo; + reg2 HA_KEYSEG *keyseg; + reg4 const char *text; + char buff[160],length[10],*pos,*end; + enum en_fieldtype type; + MARIA_SHARE *share=info->s; + char llbuff[22],llbuff2[22]; + DBUG_ENTER("describe"); + + printf("\nMARIA file: %s\n",name); + printf("Record format: %s\n", record_formats[share->data_file_type]); + printf("Character set: %s (%d)\n", + get_charset_name(share->state.header.language), + share->state.header.language); + + if (param->testflag & T_VERBOSE) + { + printf("File-version: %d\n", + (int) share->state.header.file_version[3]); + if (share->state.create_time) + { + get_date(buff,1,share->state.create_time); + printf("Creation time: %s\n",buff); + } + if (share->state.check_time) + { + get_date(buff,1,share->state.check_time); + printf("Recover time: %s\n",buff); + } + pos=buff; + if (share->state.changed & STATE_CRASHED) + strmov(buff,"crashed"); + else + { + if (share->state.open_count) + pos=strmov(pos,"open,"); + if (share->state.changed & STATE_CHANGED) + pos=strmov(pos,"changed,"); + else + pos=strmov(pos,"checked,"); + if (!(share->state.changed & STATE_NOT_ANALYZED)) + pos=strmov(pos,"analyzed,"); + if (!(share->state.changed & STATE_NOT_OPTIMIZED_KEYS)) + pos=strmov(pos,"optimized keys,"); + if (!(share->state.changed & STATE_NOT_SORTED_PAGES)) + pos=strmov(pos,"sorted index pages,"); + pos[-1]=0; /* Remove extra ',' */ + } + printf("Status: %s\n",buff); + if (share->base.auto_key) + { + printf("Auto increment key: %16d Last value: %18s\n", + share->base.auto_key, + llstr(share->state.auto_increment,llbuff)); + } + if (share->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) + printf("Checksum: %26s\n",llstr(info->state->checksum,llbuff)); +; + if (share->options & HA_OPTION_DELAY_KEY_WRITE) + printf("Keys are only flushed at close\n"); + + } + printf("Data records: %16s Deleted blocks: %18s\n", + llstr(info->state->records,llbuff),llstr(info->state->del,llbuff2)); + if (param->testflag & T_SILENT) + DBUG_VOID_RETURN; /* This is enough */ + + if (param->testflag & T_VERBOSE) + { +#ifdef USE_RELOC + printf("Init-relocation: %16s\n",llstr(share->base.reloc,llbuff)); +#endif + printf("Datafile parts: %16s Deleted data: %18s\n", + llstr(share->state.split,llbuff), + llstr(info->state->empty,llbuff2)); + printf("Datafile pointer (bytes): %11d Keyfile pointer (bytes): %13d\n", + share->rec_reflength,share->base.key_reflength); + printf("Datafile length: %16s Keyfile length: %18s\n", + llstr(info->state->data_file_length,llbuff), + llstr(info->state->key_file_length,llbuff2)); + + if (info->s->base.reloc == 1L && info->s->base.records == 1L) + puts("This is a one-record table"); + else + { + if (share->base.max_data_file_length != HA_OFFSET_ERROR || + share->base.max_key_file_length != HA_OFFSET_ERROR) + printf("Max datafile length: %16s Max keyfile length: %18s\n", + llstr(share->base.max_data_file_length-1,llbuff), + llstr(share->base.max_key_file_length-1,llbuff2)); + } + } + printf("Block_size: %16d\n",(int) share->block_size); + printf("Recordlength: %16d\n",(int) share->base.pack_reclength); + if (! maria_is_all_keys_active(share->state.key_map, share->base.keys)) + { + longlong2str(share->state.key_map,buff,2); + printf("Using only keys '%s' of %d possibly keys\n", + buff, share->base.keys); + } + puts("\ntable description:"); + printf("Key Start Len Index Type"); + if (param->testflag & T_VERBOSE) + printf(" Rec/key Root Blocksize"); + VOID(putchar('\n')); + + for (key=keyseg_nr=0, keyinfo= &share->keyinfo[0] ; + key < share->base.keys; + key++,keyinfo++) + { + keyseg=keyinfo->seg; + if (keyinfo->flag & HA_NOSAME) text="unique "; + else if (keyinfo->flag & HA_FULLTEXT) text="fulltext "; + else text="multip."; + + pos=buff; + if (keyseg->flag & HA_REVERSE_SORT) + *pos++ = '-'; + pos=strmov(pos,type_names[keyseg->type]); + *pos++ = ' '; + *pos=0; + if (keyinfo->flag & HA_PACK_KEY) + pos=strmov(pos,prefix_packed_txt); + if (keyinfo->flag & HA_BINARY_PACK_KEY) + pos=strmov(pos,bin_packed_txt); + if (keyseg->flag & HA_SPACE_PACK) + pos=strmov(pos,diff_txt); + if (keyseg->flag & HA_BLOB_PART) + pos=strmov(pos,blob_txt); + if (keyseg->flag & HA_NULL_PART) + pos=strmov(pos,null_txt); + *pos=0; + + printf("%-4d%-6ld%-3d %-8s%-21s", + key+1,(long) keyseg->start+1,keyseg->length,text,buff); + if (share->state.key_root[key] != HA_OFFSET_ERROR) + llstr(share->state.key_root[key],buff); + else + buff[0]=0; + if (param->testflag & T_VERBOSE) + printf("%11lu %12s %10d", + share->state.rec_per_key_part[keyseg_nr++], + buff,keyinfo->block_length); + VOID(putchar('\n')); + while ((++keyseg)->type != HA_KEYTYPE_END) + { + pos=buff; + if (keyseg->flag & HA_REVERSE_SORT) + *pos++ = '-'; + pos=strmov(pos,type_names[keyseg->type]); + *pos++= ' '; + if (keyseg->flag & HA_SPACE_PACK) + pos=strmov(pos,diff_txt); + if (keyseg->flag & HA_BLOB_PART) + pos=strmov(pos,blob_txt); + if (keyseg->flag & HA_NULL_PART) + pos=strmov(pos,null_txt); + *pos=0; + printf(" %-6ld%-3d %-21s", + (long) keyseg->start+1,keyseg->length,buff); + if (param->testflag & T_VERBOSE) + printf("%11lu", share->state.rec_per_key_part[keyseg_nr++]); + VOID(putchar('\n')); + } + keyseg++; + } + if (share->state.header.uniques) + { + MARIA_UNIQUEDEF *uniqueinfo; + puts("\nUnique Key Start Len Nullpos Nullbit Type"); + for (key=0,uniqueinfo= &share->uniqueinfo[0] ; + key < share->state.header.uniques; key++, uniqueinfo++) + { + my_bool new_row=0; + char null_bit[8],null_pos[8]; + printf("%-8d%-5d",key+1,uniqueinfo->key+1); + for (keyseg=uniqueinfo->seg ; keyseg->type != HA_KEYTYPE_END ; keyseg++) + { + if (new_row) + fputs(" ",stdout); + null_bit[0]=null_pos[0]=0; + if (keyseg->null_bit) + { + sprintf(null_bit,"%d",keyseg->null_bit); + sprintf(null_pos,"%ld",(long) keyseg->null_pos+1); + } + printf("%-7ld%-5d%-9s%-10s%-30s\n", + (long) keyseg->start+1,keyseg->length, + null_pos,null_bit, + type_names[keyseg->type]); + new_row=1; + } + } + } + if (param->verbose > 1) + { + char null_bit[8],null_pos[8]; + printf("\nField Start Length Nullpos Nullbit Type"); + if (share->options & HA_OPTION_COMPRESS_RECORD) + printf(" Huff tree Bits"); + VOID(putchar('\n')); + + for (field=0 ; field < share->base.fields ; field++) + { + if (share->options & HA_OPTION_COMPRESS_RECORD) + type=share->columndef[field].base_type; + else + type=(enum en_fieldtype) share->columndef[field].type; + end=strmov(buff,field_pack[type]); + if (share->options & HA_OPTION_COMPRESS_RECORD) + { + if (share->columndef[field].pack_type & PACK_TYPE_SELECTED) + end=strmov(end,", not_always"); + if (share->columndef[field].pack_type & PACK_TYPE_SPACE_FIELDS) + end=strmov(end,", no empty"); + if (share->columndef[field].pack_type & PACK_TYPE_ZERO_FILL) + { + sprintf(end,", zerofill(%d)",share->columndef[field].space_length_bits); + end=strend(end); + } + } + if (buff[0] == ',') + strmov(buff,buff+2); + int10_to_str((long) share->columndef[field].length,length,10); + null_bit[0]=null_pos[0]=0; + if (share->columndef[field].null_bit) + { + sprintf(null_bit,"%d",share->columndef[field].null_bit); + sprintf(null_pos,"%d",share->columndef[field].null_pos+1); + } + printf("%-6d%-6u%-7s%-8s%-8s%-35s",field+1, + (uint) share->columndef[field].offset+1, + length, null_pos, null_bit, buff); + if (share->options & HA_OPTION_COMPRESS_RECORD) + { + if (share->columndef[field].huff_tree) + printf("%3d %2d", + (uint) (share->columndef[field].huff_tree-share->decode_trees)+1, + share->columndef[field].huff_tree->quick_table_bits); + } + VOID(putchar('\n')); + } + } + DBUG_VOID_RETURN; +} /* describe */ + + + /* Sort records according to one key */ + +static int maria_sort_records(HA_CHECK *param, + register MARIA_HA *info, my_string name, + uint sort_key, + my_bool write_info, + my_bool update_index) +{ + int got_error; + uint key; + MARIA_KEYDEF *keyinfo; + File new_file; + byte *temp_buff; + ha_rows old_record_count; + MARIA_SHARE *share=info->s; + char llbuff[22],llbuff2[22]; + MARIA_SORT_INFO sort_info; + MARIA_SORT_PARAM sort_param; + DBUG_ENTER("sort_records"); + + bzero((char*)&sort_info,sizeof(sort_info)); + bzero((char*)&sort_param,sizeof(sort_param)); + sort_param.sort_info=&sort_info; + sort_info.param=param; + keyinfo= &share->keyinfo[sort_key]; + got_error=1; + temp_buff=0; + new_file= -1; + + if (! maria_is_key_active(share->state.key_map, sort_key)) + { + _ma_check_print_warning(param, + "Can't sort table '%s' on key %d; No such key", + name,sort_key+1); + param->error_printed=0; + DBUG_RETURN(0); /* Nothing to do */ + } + if (keyinfo->flag & HA_FULLTEXT) + { + _ma_check_print_warning(param,"Can't sort table '%s' on FULLTEXT key %d", + name,sort_key+1); + param->error_printed=0; + DBUG_RETURN(0); /* Nothing to do */ + } + if (share->data_file_type == COMPRESSED_RECORD) + { + _ma_check_print_warning(param,"Can't sort read-only table '%s'", name); + param->error_printed=0; + DBUG_RETURN(0); /* Nothing to do */ + } + if (!(param->testflag & T_SILENT)) + { + printf("- Sorting records for MARIA-table '%s'\n",name); + if (write_info) + printf("Data records: %9s Deleted: %9s\n", + llstr(info->state->records,llbuff), + llstr(info->state->del,llbuff2)); + } + if (share->state.key_root[sort_key] == HA_OFFSET_ERROR) + DBUG_RETURN(0); /* Nothing to do */ + + if (init_io_cache(&info->rec_cache,-1,(uint) param->write_buffer_length, + WRITE_CACHE,share->pack.header_length,1, + MYF(MY_WME | MY_WAIT_IF_FULL))) + goto err; + info->opt_flag|=WRITE_CACHE_USED; + + if (!(temp_buff=(byte*) my_alloca((uint) keyinfo->block_length))) + { + _ma_check_print_error(param,"Not enough memory for key block"); + goto err; + } + if (!(sort_param.record=(byte*) my_malloc((uint) share->base.pack_reclength, + MYF(0)))) + { + _ma_check_print_error(param,"Not enough memory for record"); + goto err; + } + fn_format(param->temp_filename,name,"", MARIA_NAME_DEXT,2+4+32); + new_file= my_create(fn_format(param->temp_filename, + param->temp_filename,"", + DATA_TMP_EXT, + MY_REPLACE_EXT | MY_UNPACK_FILENAME), + 0, param->tmpfile_createflag, + MYF(0)); + if (new_file < 0) + { + _ma_check_print_error(param,"Can't create new tempfile: '%s'", + param->temp_filename); + goto err; + } + if (share->pack.header_length) + if (maria_filecopy(param, new_file, info->dfile.file, 0L, + share->pack.header_length, + "datafile-header")) + goto err; + info->rec_cache.file=new_file; /* Use this file for cacheing*/ + + maria_lock_memory(param); + for (key=0 ; key < share->base.keys ; key++) + share->keyinfo[key].flag|= HA_SORT_ALLOWS_SAME; + + if (my_pread(share->kfile.file, temp_buff, + (uint) keyinfo->block_length, + share->state.key_root[sort_key], + MYF(MY_NABP+MY_WME))) + { + _ma_check_print_error(param,"Can't read indexpage from filepos: %s", + (ulong) share->state.key_root[sort_key]); + goto err; + } + + /* Setup param for _ma_sort_write_record */ + sort_info.info=info; + sort_info.new_data_file_type=share->data_file_type; + sort_param.fix_datafile=1; + sort_param.master=1; + sort_param.filepos=share->pack.header_length; + old_record_count=info->state->records; + info->state->records=0; + if (sort_info.new_data_file_type != COMPRESSED_RECORD) + info->state->checksum=0; + + if (sort_record_index(&sort_param,info,keyinfo, + share->state.key_root[sort_key], + temp_buff, sort_key,new_file,update_index) || + maria_write_data_suffix(&sort_info,1) || + flush_io_cache(&info->rec_cache)) + goto err; + + if (info->state->records != old_record_count) + { + _ma_check_print_error(param,"found %s of %s records", + llstr(info->state->records,llbuff), + llstr(old_record_count,llbuff2)); + goto err; + } + + VOID(my_close(info->dfile.file, MYF(MY_WME))); + param->out_flag|=O_NEW_DATA; /* Data in new file */ + info->dfile.file= new_file; /* Use new datafile */ + info->state->del=0; + info->state->empty=0; + share->state.dellink= HA_OFFSET_ERROR; + info->state->data_file_length=sort_param.filepos; + share->state.split=info->state->records; /* Only hole records */ + share->state.version=(ulong) time((time_t*) 0); + + info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); + + if (param->testflag & T_WRITE_LOOP) + { + VOID(fputs(" \r",stdout)); VOID(fflush(stdout)); + } + got_error=0; + +err: + if (got_error && new_file >= 0) + { + VOID(end_io_cache(&info->rec_cache)); + (void) my_close(new_file,MYF(MY_WME)); + (void) my_delete(param->temp_filename, MYF(MY_WME)); + } + if (temp_buff) + { + my_afree((gptr) temp_buff); + } + my_free(sort_param.record,MYF(MY_ALLOW_ZERO_PTR)); + info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED); + VOID(end_io_cache(&info->rec_cache)); + my_free(sort_info.buff,MYF(MY_ALLOW_ZERO_PTR)); + sort_info.buff=0; + share->state.sortkey=sort_key; + DBUG_RETURN(_ma_flush_blocks(param, share->pagecache, &share->kfile) | + got_error); +} /* sort_records */ + + +/* Sort records recursive using one index */ + +static int sort_record_index(MARIA_SORT_PARAM *sort_param,MARIA_HA *info, + MARIA_KEYDEF *keyinfo, + my_off_t page, byte *buff, uint sort_key, + File new_file,my_bool update_index) +{ + uint nod_flag,used_length,key_length; + byte *temp_buff,*keypos,*endpos; + my_off_t next_page,rec_pos; + byte lastkey[HA_MAX_KEY_BUFF]; + char llbuff[22]; + MARIA_SORT_INFO *sort_info= sort_param->sort_info; + HA_CHECK *param=sort_info->param; + DBUG_ENTER("sort_record_index"); + + nod_flag=_ma_test_if_nod(buff); + temp_buff=0; + + if (nod_flag) + { + if (!(temp_buff= (byte*) my_alloca((uint) keyinfo->block_length))) + { + _ma_check_print_error(param,"Not Enough memory"); + DBUG_RETURN(-1); + } + } + used_length= maria_data_on_page(buff); + keypos=buff+2+nod_flag; + endpos=buff+used_length; + for ( ;; ) + { + _sanity(__FILE__,__LINE__); + if (nod_flag) + { + next_page= _ma_kpos(nod_flag, keypos); + if (my_pread(info->s->kfile.file, (byte*)temp_buff, + (uint) keyinfo->block_length, next_page, + MYF(MY_NABP+MY_WME))) + { + _ma_check_print_error(param,"Can't read keys from filepos: %s", + llstr(next_page,llbuff)); + goto err; + } + if (sort_record_index(sort_param, info,keyinfo,next_page,temp_buff, + sort_key, + new_file, update_index)) + goto err; + } + _sanity(__FILE__,__LINE__); + if (keypos >= endpos || + (key_length=(*keyinfo->get_key)(keyinfo,nod_flag,&keypos,lastkey)) + == 0) + break; + rec_pos= _ma_dpos(info,0,lastkey+key_length); + + if ((*info->s->read_record)(info,sort_param->record,rec_pos)) + { + _ma_check_print_error(param,"%d when reading datafile",my_errno); + goto err; + } + if (rec_pos != sort_param->filepos && update_index) + { + _ma_dpointer(info,keypos-nod_flag-info->s->rec_reflength, + sort_param->filepos); + if (maria_movepoint(info,sort_param->record,rec_pos,sort_param->filepos, + sort_key)) + { + _ma_check_print_error(param,"%d when updating key-pointers",my_errno); + goto err; + } + } + if (_ma_sort_write_record(sort_param)) + goto err; + } + /* Clear end of block to get better compression if the table is backuped */ + bzero((byte*) buff+used_length,keyinfo->block_length-used_length); + if (my_pwrite(info->s->kfile.file, (byte*)buff, (uint)keyinfo->block_length, + page,param->myf_rw)) + { + _ma_check_print_error(param,"%d when updating keyblock",my_errno); + goto err; + } + if (temp_buff) + my_afree((gptr) temp_buff); + DBUG_RETURN(0); +err: + if (temp_buff) + my_afree((gptr) temp_buff); + DBUG_RETURN(1); +} /* sort_record_index */ + + + +/* + Check if maria_chk was killed by a signal + This is overloaded by other programs that want to be able to abort + sorting +*/ + +static int not_killed= 0; + +volatile int *_ma_killed_ptr(HA_CHECK *param __attribute__((unused))) +{ + return ¬_killed; /* always NULL */ +} + + /* print warnings and errors */ + /* VARARGS */ + +void _ma_check_print_info(HA_CHECK *param __attribute__((unused)), + const char *fmt,...) +{ + va_list args; + + va_start(args,fmt); + VOID(vfprintf(stdout, fmt, args)); + VOID(fputc('\n',stdout)); + va_end(args); +} + +/* VARARGS */ + +void _ma_check_print_warning(HA_CHECK *param, const char *fmt,...) +{ + va_list args; + DBUG_ENTER("_ma_check_print_warning"); + + fflush(stdout); + if (!param->warning_printed && !param->error_printed) + { + if (param->testflag & T_SILENT) + fprintf(stderr,"%s: MARIA file %s\n",my_progname_short, + param->isam_file_name); + param->out_flag|= O_DATA_LOST; + } + param->warning_printed=1; + va_start(args,fmt); + fprintf(stderr,"%s: warning: ",my_progname_short); + VOID(vfprintf(stderr, fmt, args)); + VOID(fputc('\n',stderr)); + fflush(stderr); + va_end(args); + DBUG_VOID_RETURN; +} + +/* VARARGS */ + +void _ma_check_print_error(HA_CHECK *param, const char *fmt,...) +{ + va_list args; + DBUG_ENTER("_ma_check_print_error"); + DBUG_PRINT("enter",("format: %s",fmt)); + + fflush(stdout); + if (!param->warning_printed && !param->error_printed) + { + if (param->testflag & T_SILENT) + fprintf(stderr,"%s: MARIA file %s\n",my_progname_short,param->isam_file_name); + param->out_flag|= O_DATA_LOST; + } + param->error_printed|=1; + va_start(args,fmt); + fprintf(stderr,"%s: error: ",my_progname_short); + VOID(vfprintf(stderr, fmt, args)); + VOID(fputc('\n',stderr)); + fflush(stderr); + va_end(args); + DBUG_VOID_RETURN; +} diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h new file mode 100644 index 00000000000..740808c7bbe --- /dev/null +++ b/storage/maria/maria_def.h @@ -0,0 +1,916 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* This file is included by all internal maria files */ + +#include "maria.h" /* Structs & some defines */ +#include "myisampack.h" /* packing of keys */ +#include +#ifdef THREAD +#include +#include +#else +#include +#endif + +#include "ma_loghandler.h" +#include "ma_control_file.h" + +#define MAX_NONMAPPED_INSERTS 1000 +#define MARIA_MAX_TREE_LEVELS 32 + +struct st_transaction; + +/* undef map from my_nosys; We need test-if-disk full */ +#undef my_write + +typedef struct st_maria_status_info +{ + ha_rows records; /* Rows in table */ + ha_rows del; /* Removed rows */ + my_off_t empty; /* lost space in datafile */ + my_off_t key_empty; /* lost space in indexfile */ + my_off_t key_file_length; + my_off_t data_file_length; + ha_checksum checksum; +} MARIA_STATUS_INFO; + +typedef struct st_maria_state_info +{ + struct + { /* Fileheader */ + uchar file_version[4]; + uchar options[2]; + uchar header_length[2]; + uchar state_info_length[2]; + uchar base_info_length[2]; + uchar base_pos[2]; + uchar key_parts[2]; /* Key parts */ + uchar unique_key_parts[2]; /* Key parts + unique parts */ + uchar keys; /* number of keys in file */ + uchar uniques; /* number of UNIQUE definitions */ + uchar language; /* Language for indexes */ + uchar fulltext_keys; + uchar data_file_type; + /* Used by mariapack to store the original data_file_type */ + uchar org_data_file_type; + } header; + + MARIA_STATUS_INFO state; + ha_rows split; /* number of split blocks */ + my_off_t dellink; /* Link to next removed block */ + ulonglong first_bitmap_with_space; + ulonglong auto_increment; + ulong process; /* process that updated table last */ + ulong unique; /* Unique number for this process */ + ulong update_count; /* Updated for each write lock */ + ulong status; + ulong *rec_per_key_part; + ha_checksum checksum; /* Table checksum */ + my_off_t *key_root; /* Start of key trees */ + my_off_t key_del; /* delete links for index pages */ + my_off_t rec_per_key_rows; /* Rows when calculating rec_per_key */ + + ulong sec_index_changed; /* Updated when new sec_index */ + ulong sec_index_used; /* which extra index are in use */ + ulonglong key_map; /* Which keys are in use */ + ulong version; /* timestamp of create */ + time_t create_time; /* Time when created database */ + time_t recover_time; /* Time for last recover */ + time_t check_time; /* Time for last check */ + uint sortkey; /* sorted by this key (not used) */ + uint open_count; + uint8 changed; /* Changed since mariachk */ + LSN create_rename_lsn; /**< LSN when table was last created/renamed */ + + /* the following isn't saved on disk */ + uint state_diff_length; /* Should be 0 */ + uint state_length; /* Length of state header in file */ + ulong *key_info; +} MARIA_STATE_INFO; + + +#define MARIA_STATE_INFO_SIZE \ + (24 + LSN_STORE_SIZE + 4 + 11*8 + 4*4 + 8 + 3*4 + 5*8) +#define MARIA_STATE_KEY_SIZE 8 +#define MARIA_STATE_KEYBLOCK_SIZE 8 +#define MARIA_STATE_KEYSEG_SIZE 4 +#define MARIA_STATE_EXTRA_SIZE (MARIA_MAX_KEY*MARIA_STATE_KEY_SIZE + MARIA_MAX_KEY*HA_MAX_KEY_SEG*MARIA_STATE_KEYSEG_SIZE) +#define MARIA_KEYDEF_SIZE (2+ 5*2) +#define MARIA_UNIQUEDEF_SIZE (2+1+1) +#define HA_KEYSEG_SIZE (6+ 2*2 + 4*2) +#define MARIA_COLUMNDEF_SIZE (6+2+2+2+2+2+1+1) +#define MARIA_BASE_INFO_SIZE (5*8 + 6*4 + 11*2 + 6 + 5*2 + 1 + 16) +#define MARIA_INDEX_BLOCK_MARGIN 16 /* Safety margin for .MYI tables */ +/* Internal management bytes needed to store 2 keys on an index page */ +#define MARIA_INDEX_MIN_OVERHEAD_SIZE (4 + (TRANSID_SIZE+1) * 2) + +/* + Basic information of the Maria table. This is stored on disk + and not changed (unless we do DLL changes). +*/ + +typedef struct st_ma_base_info +{ + my_off_t keystart; /* Start of keys */ + my_off_t max_data_file_length; + my_off_t max_key_file_length; + my_off_t margin_key_file_length; + ha_rows records, reloc; /* Create information */ + ulong mean_row_length; /* Create information */ + ulong reclength; /* length of unpacked record */ + ulong pack_reclength; /* Length of full packed rec */ + ulong min_pack_length; + ulong max_pack_length; /* Max possibly length of packed rec */ + ulong min_block_length; + uint fields; /* fields in table */ + uint fixed_not_null_fields; + uint fixed_not_null_fields_length; + uint max_field_lengths; + uint pack_fields; /* packed fields in table */ + uint varlength_fields; /* char/varchar/blobs */ + /* Number of bytes in the index used to refer to a row (2-8) */ + uint rec_reflength; + /* Number of bytes in the index used to refer to another index page (2-8) */ + uint key_reflength; /* = 2-8 */ + uint keys; /* same as in state.header */ + uint auto_key; /* Which key-1 is a auto key */ + uint blobs; /* Number of blobs */ + /* Length of packed bits (when table was created first time) */ + uint pack_bytes; + /* Length of null bits (when table was created first time) */ + uint original_null_bytes; + uint null_bytes; /* Null bytes in record */ + uint field_offsets; /* Number of field offsets */ + uint max_key_block_length; /* Max block length */ + uint max_key_length; /* Max key length */ + /* Extra allocation when using dynamic record format */ + uint extra_alloc_bytes; + uint extra_alloc_procent; + uint is_nulls_extended; /* 1 if new null bytes */ + uint min_row_length; /* Min possible length of a row */ + uint default_row_flag; /* 0 or ROW_FLAG_NULLS_EXTENDED */ + uint block_size; + /* Size of initial record buffer */ + uint default_rec_buff_size; + /* Extra number of bytes the row format require in the record buffer */ + uint extra_rec_buff_size; + + /* The following are from the header */ + uint key_parts, all_key_parts; + /* If false, we disable logging, versioning, transaction etc */ + my_bool transactional; +} MARIA_BASE_INFO; + + +/* Structs used intern in database */ + +typedef struct st_maria_blob /* Info of record */ +{ + ulong offset; /* Offset to blob in record */ + uint pack_length; /* Type of packed length */ + ulong length; /* Calc:ed for each record */ +} MARIA_BLOB; + + +typedef struct st_maria_pack +{ + ulong header_length; + uint ref_length; + uchar version; +} MARIA_PACK; + +typedef struct st_maria_file_bitmap +{ + uchar *map; + ulonglong page; /* Page number for current bitmap */ + uint used_size; /* Size of bitmap head that is not 0 */ + my_bool changed; /* 1 if page needs to be flushed */ + PAGECACHE_FILE file; /* datafile where bitmap is stored */ + +#ifdef THREAD + pthread_mutex_t bitmap_lock; +#endif + /* Constants, allocated when initiating bitmaps */ + uint sizes[8]; /* Size per bit combination */ + uint total_size; /* Total usable size of bitmap page */ + uint block_size; /* Block size of file */ + ulong pages_covered; /* Pages covered by bitmap + 1 */ +} MARIA_FILE_BITMAP; + + +typedef struct st_maria_share +{ /* Shared between opens */ + MARIA_STATE_INFO state; + MARIA_BASE_INFO base; + MARIA_KEYDEF ft2_keyinfo; /* Second-level ft-key + definition */ + MARIA_KEYDEF *keyinfo; /* Key definitions */ + MARIA_UNIQUEDEF *uniqueinfo; /* unique definitions */ + HA_KEYSEG *keyparts; /* key part info */ + MARIA_COLUMNDEF *columndef; /* Pointer to column information */ + MARIA_PACK pack; /* Data about packed records */ + MARIA_BLOB *blobs; /* Pointer to blobs */ + char *unique_file_name; /* realpath() of index file */ + char *data_file_name; /* Resolved path names from symlinks */ + char *index_file_name; + char *open_file_name; /* parameter to open filename */ + byte *file_map; /* mem-map of file if possible */ + PAGECACHE *pagecache; /* ref to the current key cache */ + MARIA_DECODE_TREE *decode_trees; + uint16 *decode_tables; + uint16 id; /**< 2-byte id by which log records refer to the table */ + /* Called the first time the table instance is opened */ + my_bool (*once_init)(struct st_maria_share *, File); + /* Called when the last instance of the table is closed */ + my_bool (*once_end)(struct st_maria_share *); + /* Is called for every open of the table */ + my_bool (*init)(struct st_maria_info *); + /* Is called for every close of the table */ + void (*end)(struct st_maria_info *); + /* Called when we want to read a record from a specific position */ + int (*read_record)(struct st_maria_info *, byte *, MARIA_RECORD_POS); + /* Initialize a scan */ + my_bool (*scan_init)(struct st_maria_info *); + /* Read next record while scanning */ + int (*scan)(struct st_maria_info *, byte *, MARIA_RECORD_POS, my_bool); + /* End scan */ + void (*scan_end)(struct st_maria_info *); + /* Pre-write of row (some handlers may do the actual write here) */ + MARIA_RECORD_POS (*write_record_init)(struct st_maria_info *, const byte *); + /* Write record (or accept write_record_init) */ + my_bool (*write_record)(struct st_maria_info *, const byte *); + /* Called when write failed */ + my_bool (*write_record_abort)(struct st_maria_info *); + my_bool (*update_record)(struct st_maria_info *, MARIA_RECORD_POS, + const byte *, const byte *); + my_bool (*delete_record)(struct st_maria_info *, const byte *record); + my_bool (*compare_record)(struct st_maria_info *, const byte *); + /* calculate checksum for a row */ + ha_checksum(*calc_checksum)(struct st_maria_info *, const byte *); + /* + Calculate checksum for a row during write. May be 0 if we calculate + the checksum in write_record_init() + */ + ha_checksum(*calc_write_checksum) (struct st_maria_info *, const byte *); + /* Compare a row in memory with a row on disk */ + my_bool (*compare_unique)(struct st_maria_info *, MARIA_UNIQUEDEF *, + const byte *record, MARIA_RECORD_POS pos); + /* Mapings to read/write the data file */ + uint (*file_read)(MARIA_HA *, byte *, uint, my_off_t, myf); + uint (*file_write)(MARIA_HA *, byte *, uint, my_off_t, myf); + invalidator_by_filename invalidator; /* query cache invalidator */ + ulong this_process; /* processid */ + ulong last_process; /* For table-change-check */ + ulong last_version; /* Version on start */ + ulong options; /* Options used */ + ulong min_pack_length; /* These are used by packed data */ + ulong max_pack_length; + ulong state_diff_length; + uint rec_reflength; /* rec_reflength in use now */ + uint unique_name_length; + uint32 ftparsers; /* Number of distinct ftparsers + + 1 */ + PAGECACHE_FILE kfile; /* Shared keyfile */ + File data_file; /* Shared data file */ + int mode; /* mode of file on open */ + uint reopen; /* How many times reopened */ + uint w_locks, r_locks, tot_locks; /* Number of read/write locks */ + uint block_size; /* block_size of keyfile & data file*/ + /* Fixed length part of a packed row in BLOCK_RECORD format */ + uint base_length; + myf write_flag; + enum data_file_type data_file_type; + enum pagecache_page_type page_type; /* value depending transactional */ + my_bool temporary; + /* Below flag is needed to make log tables work with concurrent insert */ + my_bool is_log_table; + + my_bool changed, /* If changed since lock */ + global_changed, /* If changed since open */ + not_flushed, concurrent_insert; + my_bool delay_key_write; + my_bool have_rtree; +#ifdef THREAD + THR_LOCK lock; + pthread_mutex_t intern_lock; /* Locking for use with _locking */ + rw_lock_t *key_root_lock; +#endif + my_off_t mmaped_length; + uint nonmmaped_inserts; /* counter of writing in + non-mmaped area */ + MARIA_FILE_BITMAP bitmap; + rw_lock_t mmap_lock; +} MARIA_SHARE; + + +typedef byte MARIA_BITMAP_BUFFER; + +typedef struct st_maria_bitmap_block +{ + ulonglong page; /* Page number */ + /* Number of continuous pages. TAIL_BIT is set if this is a tail page */ + uint page_count; + uint empty_space; /* Set for head and tail pages */ + /* + Number of BLOCKS for block-region (holds all non-blob-fields or one blob) + */ + uint sub_blocks; + /* set to <> 0 in write_record() if this block was actually used */ + uint8 used; + uint8 org_bitmap_value; +} MARIA_BITMAP_BLOCK; + + +typedef struct st_maria_bitmap_blocks +{ + MARIA_BITMAP_BLOCK *block; + uint count; + my_bool tail_page_skipped; /* If some tail pages was not used */ + my_bool page_skipped; /* If some full pages was not used */ +} MARIA_BITMAP_BLOCKS; + + +/* Data about the currently read row */ +typedef struct st_maria_row +{ + MARIA_BITMAP_BLOCKS insert_blocks; + MARIA_BITMAP_BUFFER *extents; + MARIA_RECORD_POS lastpos, nextpos; + MARIA_RECORD_POS *tail_positions; + ha_checksum checksum; + byte *empty_bits, *field_lengths; + uint *null_field_lengths; /* All null field lengths */ + ulong *blob_lengths; /* Length for each blob */ + ulong base_length, normal_length, char_length, varchar_length, blob_length; + ulong head_length, total_length; + my_size_t extents_buffer_length; /* Size of 'extents' buffer */ + uint field_lengths_length; /* Length of data in field_lengths */ + uint extents_count; /* number of extents in 'extents' */ + uint full_page_count, tail_count; /* For maria_chk */ +} MARIA_ROW; + +/* Data to scan row in blocked format */ +typedef struct st_maria_block_scan +{ + byte *bitmap_buff, *bitmap_pos, *bitmap_end, *page_buff; + byte *dir, *dir_end; + ulong bitmap_page; + ulonglong bits; + uint number_of_rows, bit_pos; + MARIA_RECORD_POS row_base_page; +} MARIA_BLOCK_SCAN; + + +struct st_maria_info +{ + MARIA_SHARE *s; /* Shared between open:s */ + struct st_transaction *trn; /* Pointer to active transaction */ + MARIA_STATUS_INFO *state, save_state; + MARIA_ROW cur_row; /* The active row that we just read */ + MARIA_ROW new_row; /* Storage for a row during update */ + MARIA_BLOCK_SCAN scan; + MARIA_BLOB *blobs; /* Pointer to blobs */ + MARIA_BIT_BUFF bit_buff; + DYNAMIC_ARRAY bitmap_blocks; + DYNAMIC_ARRAY pinned_pages; + /* accumulate indexfile changes between write's */ + TREE *bulk_insert; + LEX_STRING *log_row_parts; /* For logging */ + DYNAMIC_ARRAY *ft1_to_ft2; /* used only in ft1->ft2 conversion */ + MEM_ROOT ft_memroot; /* used by the parser */ + MYSQL_FTPARSER_PARAM *ftparser_param; /* share info between init/deinit */ + byte *buff; /* page buffer */ + byte *keyread_buff; /* Buffer for last key read */ + byte *lastkey, *lastkey2; /* Last used search key */ + byte *first_mbr_key; /* Searhed spatial key */ + byte *rec_buff; /* Temp buffer for recordpack */ + byte *int_keypos, /* Save position for next/previous */ + *int_maxpos; /* -""- */ + byte *update_field_data; /* Used by update in rows-in-block */ + uint int_nod_flag; /* -""- */ + uint32 int_keytree_version; /* -""- */ + int (*read_record) (struct st_maria_info *, byte*, MARIA_RECORD_POS); + invalidator_by_filename invalidator; /* query cache invalidator */ + ulong this_unique; /* uniq filenumber or thread */ + ulong last_unique; /* last unique number */ + ulong this_loop; /* counter for this open */ + ulong last_loop; /* last used counter */ + MARIA_RECORD_POS save_lastpos; + MARIA_RECORD_POS dup_key_pos; + my_off_t pos; /* Intern variable */ + my_off_t last_keypage; /* Last key page read */ + my_off_t last_search_keypage; /* Last keypage when searching */ + + /* + QQ: the folloing two xxx_length fields should be removed, + as they are not compatible with parallel repair + */ + ulong packed_length, blob_length; /* Length of found, packed record */ + my_size_t rec_buff_size; + PAGECACHE_FILE dfile; /* The datafile */ + uint opt_flag; /* Optim. for space/speed */ + uint update; /* If file changed since open */ + int lastinx; /* Last used index */ + uint lastkey_length; /* Length of key in lastkey */ + uint last_rkey_length; /* Last length in maria_rkey() */ + enum ha_rkey_function last_key_func; /* CONTAIN, OVERLAP, etc */ + uint save_lastkey_length; + uint pack_key_length; /* For MARIAMRG */ + int errkey; /* Got last error on this key */ + int lock_type; /* How database was locked */ + int tmp_lock_type; /* When locked by readinfo */ + uint data_changed; /* Somebody has changed data */ + uint save_update; /* When using KEY_READ */ + int save_lastinx; + LIST open_list; + IO_CACHE rec_cache; /* When cacheing records */ + uint preload_buff_size; /* When preloading indexes */ + myf lock_wait; /* is 0 or MY_DONT_WAIT */ + my_bool was_locked; /* Was locked in panic */ + my_bool append_insert_at_end; /* Set if concurrent insert */ + my_bool quick_mode; + /* If info->keyread_buff can't be used for rnext */ + my_bool page_changed; + /* If info->keyread_buff has to be re-read for rnext */ + my_bool keyread_buff_used; + my_bool once_flags; /* For MARIA_MRG */ +#ifdef __WIN__ + my_bool owned_by_merge; /* This Maria table is part of a merge union */ +#endif +#ifdef THREAD + THR_LOCK_DATA lock; +#endif + uchar *maria_rtree_recursion_state; /* For RTREE */ + int maria_rtree_recursion_depth; +}; + +/* Some defines used by maria-functions */ + +#define USE_WHOLE_KEY 65535 /* Use whole key in _search() */ +#define F_EXTRA_LCK -1 + +/* bits in opt_flag */ +#define MEMMAP_USED 32 +#define REMEMBER_OLD_POS 64 + +#define WRITEINFO_UPDATE_KEYFILE 1 +#define WRITEINFO_NO_UNLOCK 2 + +/* once_flags */ +#define USE_PACKED_KEYS 1 +#define RRND_PRESERVE_LASTINX 2 + +/* bits in state.changed */ + +#define STATE_CHANGED 1 +#define STATE_CRASHED 2 +#define STATE_CRASHED_ON_REPAIR 4 +#define STATE_NOT_ANALYZED 8 +#define STATE_NOT_OPTIMIZED_KEYS 16 +#define STATE_NOT_SORTED_PAGES 32 +#define STATE_NOT_OPTIMIZED_ROWS 64 + +/* options to maria_read_cache */ + +#define READING_NEXT 1 +#define READING_HEADER 2 + +#define maria_data_on_page(x) ((uint) mi_uint2korr(x) & 32767) +#define maria_putint(x,y,nod) { uint16 boh=(nod ? (uint16) 32768 : 0) + (uint16) (y);\ + mi_int2store(x,boh); } +#define _ma_test_if_nod(x) (x[0] & 128 ? info->s->base.key_reflength : 0) +#define maria_mark_crashed(x) do{(x)->s->state.changed|= STATE_CRASHED; \ + DBUG_PRINT("error", ("Marked table crashed")); \ + }while(0) +#define maria_mark_crashed_on_repair(x) do{(x)->s->state.changed|= \ + STATE_CRASHED|STATE_CRASHED_ON_REPAIR; \ + (x)->update|= HA_STATE_CHANGED; \ + DBUG_PRINT("error", \ + ("Marked table crashed")); \ + }while(0) +#define maria_is_crashed(x) ((x)->s->state.changed & STATE_CRASHED) +#define maria_is_crashed_on_repair(x) ((x)->s->state.changed & STATE_CRASHED_ON_REPAIR) +#define maria_print_error(SHARE, ERRNO) \ + _ma_report_error((ERRNO), (SHARE)->index_file_name) + +/* Functions to store length of space packed keys, VARCHAR or BLOB keys */ + +#define store_key_length(key,length) \ +{ if ((length) < 255) \ + { *(key)=(length); } \ + else \ + { *(key)=255; mi_int2store((key)+1,(length)); } \ +} + +#define get_key_full_length(length,key) \ + { if (*(uchar*) (key) != 255) \ + length= ((uint) *(uchar*) ((key)++))+1; \ + else \ + { length=mi_uint2korr((key)+1)+3; (key)+=3; } \ +} + +#define get_key_full_length_rdonly(length,key) \ +{ if (*(uchar*) (key) != 255) \ + length= ((uint) *(uchar*) ((key)))+1; \ + else \ + { length=mi_uint2korr((key)+1)+3; } \ +} + +#define maria_max_key_length() ((maria_block_size - MARIA_INDEX_MIN_OVERHEAD_SIZE)/2) +#define get_pack_length(length) ((length) >= 255 ? 3 : 1) + +#define MARIA_MIN_BLOCK_LENGTH 20 /* Because of delete-link */ +/* Don't use to small record-blocks */ +#define MARIA_EXTEND_BLOCK_LENGTH 20 +#define MARIA_SPLIT_LENGTH ((MARIA_EXTEND_BLOCK_LENGTH+4)*2) + /* Max prefix of record-block */ +#define MARIA_MAX_DYN_BLOCK_HEADER 20 +#define MARIA_BLOCK_INFO_HEADER_LENGTH 20 +#define MARIA_DYN_DELETE_BLOCK_HEADER 20 /* length of delete-block-header */ +#define MARIA_DYN_MAX_BLOCK_LENGTH ((1L << 24)-4L) +#define MARIA_DYN_MAX_ROW_LENGTH (MARIA_DYN_MAX_BLOCK_LENGTH - MARIA_SPLIT_LENGTH) +#define MARIA_DYN_ALIGN_SIZE 4 /* Align blocks on this */ +#define MARIA_MAX_DYN_HEADER_BYTE 13 /* max header byte for dynamic rows */ +#define MARIA_MAX_BLOCK_LENGTH ((((ulong) 1 << 24)-1) & (~ (ulong) (MARIA_DYN_ALIGN_SIZE-1))) +#define MARIA_REC_BUFF_OFFSET ALIGN_SIZE(MARIA_DYN_DELETE_BLOCK_HEADER+sizeof(uint32)) + +#define MEMMAP_EXTRA_MARGIN 7 /* Write this as a suffix for file */ + +#define PACK_TYPE_SELECTED 1 /* Bits in field->pack_type */ +#define PACK_TYPE_SPACE_FIELDS 2 +#define PACK_TYPE_ZERO_FILL 4 +#define MARIA_FOUND_WRONG_KEY 32738 /* Impossible value from ha_key_cmp */ + +#define MARIA_BLOCK_SIZE(key_length,data_pointer,key_pointer,block_size) (((((key_length)+(data_pointer)+(key_pointer))*4+(key_pointer)+2)/(block_size)+1)*(block_size)) +#define MARIA_MAX_KEYPTR_SIZE 5 /* For calculating block lengths */ +#define MARIA_MIN_KEYBLOCK_LENGTH 50 /* When to split delete blocks */ + +#define MARIA_MIN_SIZE_BULK_INSERT_TREE 16384 /* this is per key */ +#define MARIA_MIN_ROWS_TO_USE_BULK_INSERT 100 +#define MARIA_MIN_ROWS_TO_DISABLE_INDEXES 100 +#define MARIA_MIN_ROWS_TO_USE_WRITE_CACHE 10 + +/* The UNIQUE check is done with a hashed long key */ + +#define MARIA_UNIQUE_HASH_TYPE HA_KEYTYPE_ULONG_INT +#define maria_unique_store(A,B) mi_int4store((A),(B)) + +#ifdef THREAD +extern pthread_mutex_t THR_LOCK_maria; +#endif +#if !defined(THREAD) || defined(DONT_USE_RW_LOCKS) +#define rw_wrlock(A) {} +#define rw_rdlock(A) {} +#define rw_unlock(A) {} +#endif + + +/* Some extern variables */ +extern LIST *maria_open_list; +extern uchar NEAR maria_file_magic[], NEAR maria_pack_file_magic[]; +extern uint NEAR maria_read_vec[], NEAR maria_readnext_vec[]; +extern uint maria_quick_table_bits; +extern const char *maria_data_root; +extern byte maria_zero_string[]; +extern my_bool maria_inited; + + +/* This is used by _ma_calc_xxx_key_length och _ma_store_key */ +typedef struct st_maria_s_param +{ + uint ref_length, key_length, n_ref_length; + uint n_length, totlength, part_of_prev_key, prev_length, pack_marker; + const byte *key; + byte *prev_key, *next_key_pos; + bool store_not_null; +} MARIA_KEY_PARAM; + + +/* Used to store reference to pinned page */ +typedef struct st_pinned_page +{ + PAGECACHE_PAGE_LINK link; + enum pagecache_page_lock unlock; +} MARIA_PINNED_PAGE; + + +/* Prototypes for intern functions */ +extern int _ma_read_dynamic_record(MARIA_HA *, byte *, MARIA_RECORD_POS); +extern int _ma_read_rnd_dynamic_record(MARIA_HA *, byte *, MARIA_RECORD_POS, + my_bool); +extern my_bool _ma_write_dynamic_record(MARIA_HA *, const byte *); +extern my_bool _ma_update_dynamic_record(MARIA_HA *, MARIA_RECORD_POS, + const byte *, const byte *); +extern my_bool _ma_delete_dynamic_record(MARIA_HA *info, const byte *record); +extern my_bool _ma_cmp_dynamic_record(MARIA_HA *info, const byte *record); +extern my_bool _ma_write_blob_record(MARIA_HA *, const byte *); +extern my_bool _ma_update_blob_record(MARIA_HA *, MARIA_RECORD_POS, + const byte *, const byte *); +extern int _ma_read_static_record(MARIA_HA *info, byte *, MARIA_RECORD_POS); +extern int _ma_read_rnd_static_record(MARIA_HA *, byte *, MARIA_RECORD_POS, + my_bool); +extern my_bool _ma_write_static_record(MARIA_HA *, const byte *); +extern my_bool _ma_update_static_record(MARIA_HA *, MARIA_RECORD_POS, + const byte *, const byte *); +extern my_bool _ma_delete_static_record(MARIA_HA *info, const byte *record); +extern my_bool _ma_cmp_static_record(MARIA_HA *info, const byte *record); +extern int _ma_ck_write(MARIA_HA *info, uint keynr, byte *key, + uint length); +extern int _ma_ck_real_write_btree(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *key, uint key_length, + MARIA_RECORD_POS *root, uint comp_flag); +extern int _ma_enlarge_root(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *key, MARIA_RECORD_POS *root); +extern int _ma_insert(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *key, + byte *anc_buff, byte *key_pos, byte *key_buff, + byte *father_buff, byte *father_keypos, + my_off_t father_page, my_bool insert_last); +extern int _ma_split_page(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *key, byte *buff, byte *key_buff, + my_bool insert_last); +extern byte *_ma_find_half_pos(uint nod_flag, MARIA_KEYDEF *keyinfo, + byte *page, byte *key, + uint *return_key_length, + byte ** after_key); +extern int _ma_calc_static_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag, + byte *key_pos, byte *org_key, + byte *key_buff, const byte *key, + MARIA_KEY_PARAM *s_temp); +extern int _ma_calc_var_key_length(MARIA_KEYDEF *keyinfo, uint nod_flag, + byte *key_pos, byte *org_key, + byte *key_buff, const byte *key, + MARIA_KEY_PARAM *s_temp); +extern int _ma_calc_var_pack_key_length(MARIA_KEYDEF *keyinfo, + uint nod_flag, byte *key_pos, + byte *org_key, byte *prev_key, + const byte *key, + MARIA_KEY_PARAM *s_temp); +extern int _ma_calc_bin_pack_key_length(MARIA_KEYDEF *keyinfo, + uint nod_flag, byte *key_pos, + byte *org_key, byte *prev_key, + const byte *key, + MARIA_KEY_PARAM *s_temp); +void _ma_store_static_key(MARIA_KEYDEF *keyinfo, byte *key_pos, + MARIA_KEY_PARAM *s_temp); +void _ma_store_var_pack_key(MARIA_KEYDEF *keyinfo, byte *key_pos, + MARIA_KEY_PARAM *s_temp); +#ifdef NOT_USED +void _ma_store_pack_key(MARIA_KEYDEF *keyinfo, byte *key_pos, + MARIA_KEY_PARAM *s_temp); +#endif +void _ma_store_bin_pack_key(MARIA_KEYDEF *keyinfo, byte *key_pos, + MARIA_KEY_PARAM *s_temp); + +extern int _ma_ck_delete(MARIA_HA *info, uint keynr, byte *key, + uint key_length); +extern int _ma_readinfo(MARIA_HA *info, int lock_flag, int check_keybuffer); +extern int _ma_writeinfo(MARIA_HA *info, uint options); +extern int _ma_test_if_changed(MARIA_HA *info); +extern int _ma_mark_file_changed(MARIA_HA *info); +extern int _ma_decrement_open_count(MARIA_HA *info); +extern int _ma_check_index(MARIA_HA *info, int inx); +extern int _ma_search(MARIA_HA *info, MARIA_KEYDEF *keyinfo, byte *key, + uint key_len, uint nextflag, my_off_t pos); +extern int _ma_bin_search(struct st_maria_info *info, MARIA_KEYDEF *keyinfo, + byte *page, byte *key, uint key_len, + uint comp_flag, byte **ret_pos, byte *buff, + my_bool *was_last_key); +extern int _ma_seq_search(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *page, byte *key, uint key_len, + uint comp_flag, byte ** ret_pos, byte *buff, + my_bool *was_last_key); +extern int _ma_prefix_search(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *page, byte *key, uint key_len, + uint comp_flag, byte ** ret_pos, byte *buff, + my_bool *was_last_key); +extern my_off_t _ma_kpos(uint nod_flag, byte *after_key); +extern void _ma_kpointer(MARIA_HA *info, byte *buff, my_off_t pos); +extern MARIA_RECORD_POS _ma_dpos(MARIA_HA *info, uint nod_flag, + const byte *after_key); +extern MARIA_RECORD_POS _ma_rec_pos(MARIA_SHARE *info, byte *ptr); +extern void _ma_dpointer(MARIA_HA *info, byte *buff, MARIA_RECORD_POS pos); +extern uint _ma_get_static_key(MARIA_KEYDEF *keyinfo, uint nod_flag, + byte **page, byte *key); +extern uint _ma_get_pack_key(MARIA_KEYDEF *keyinfo, uint nod_flag, + byte **page, byte *key); +extern uint _ma_get_binary_pack_key(MARIA_KEYDEF *keyinfo, uint nod_flag, + byte ** page_pos, byte *key); +extern byte *_ma_get_last_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *keypos, byte *lastkey, + byte *endpos, uint *return_key_length); +extern byte *_ma_get_key(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *page, byte *key, byte *keypos, + uint *return_key_length); +extern uint _ma_keylength(MARIA_KEYDEF *keyinfo, const byte *key); +extern uint _ma_keylength_part(MARIA_KEYDEF *keyinfo, register const byte *key, + HA_KEYSEG *end); +extern byte *_ma_move_key(MARIA_KEYDEF *keyinfo, byte *to, const byte *from); +extern int _ma_search_next(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + byte *key, uint key_length, uint nextflag, + my_off_t pos); +extern int _ma_search_first(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + my_off_t pos); +extern int _ma_search_last(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + my_off_t pos); +extern byte *_ma_fetch_keypage(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + my_off_t page, int level, byte *buff, + int return_buffer); +extern int _ma_write_keypage(MARIA_HA *info, MARIA_KEYDEF *keyinfo, + my_off_t page, int level, byte *buff); +extern int _ma_dispose(MARIA_HA *info, MARIA_KEYDEF *keyinfo, my_off_t pos, + int level); +extern my_off_t _ma_new(MARIA_HA *info, MARIA_KEYDEF *keyinfo, int level); +extern uint _ma_make_key(MARIA_HA *info, uint keynr, byte *key, + const byte *record, MARIA_RECORD_POS filepos); +extern uint _ma_pack_key(MARIA_HA *info, uint keynr, byte *key, + const byte *old, uint key_length, + HA_KEYSEG ** last_used_keyseg); +extern int _ma_read_key_record(MARIA_HA *info, byte *buf, MARIA_RECORD_POS); +extern int _ma_read_cache(IO_CACHE *info, byte *buff, MARIA_RECORD_POS pos, + uint length, int re_read_if_possibly); +extern ulonglong ma_retrieve_auto_increment(MARIA_HA *info, const byte *record); + +extern my_bool _ma_alloc_buffer(byte **old_addr, my_size_t *old_size, + my_size_t new_size); +extern ulong _ma_rec_unpack(MARIA_HA *info, byte *to, byte *from, + ulong reclength); +extern my_bool _ma_rec_check(MARIA_HA *info, const char *record, + byte *packpos, ulong packed_length, + my_bool with_checkum); +extern int _ma_write_part_record(MARIA_HA *info, my_off_t filepos, + ulong length, my_off_t next_filepos, + byte ** record, ulong *reclength, + int *flag); +extern void _ma_print_key(FILE *stream, HA_KEYSEG *keyseg, + const byte *key, uint length); +extern my_bool _ma_once_init_pack_row(MARIA_SHARE *share, File dfile); +extern my_bool _ma_once_end_pack_row(MARIA_SHARE *share); +extern int _ma_read_pack_record(MARIA_HA *info, byte *buf, + MARIA_RECORD_POS filepos); +extern int _ma_read_rnd_pack_record(MARIA_HA *, byte *, MARIA_RECORD_POS, + my_bool); +extern int _ma_pack_rec_unpack(MARIA_HA *info, MARIA_BIT_BUFF *bit_buff, + byte *to, byte *from, ulong reclength); +extern ulonglong _ma_safe_mul(ulonglong a, ulonglong b); +extern int _ma_ft_update(MARIA_HA *info, uint keynr, byte *keybuf, + const byte *oldrec, const byte *newrec, + my_off_t pos); + +/* + Parameter to _ma_get_block_info + The dynamic row header is read into this struct. For an explanation of + the fields, look at the function _ma_get_block_info(). +*/ + +typedef struct st_maria_block_info +{ + uchar header[MARIA_BLOCK_INFO_HEADER_LENGTH]; + ulong rec_len; + ulong data_len; + ulong block_len; + ulong blob_len; + MARIA_RECORD_POS filepos; + MARIA_RECORD_POS next_filepos; + MARIA_RECORD_POS prev_filepos; + uint second_read; + uint offset; +} MARIA_BLOCK_INFO; + + +/* bits in return from _ma_get_block_info */ + +#define BLOCK_FIRST 1 +#define BLOCK_LAST 2 +#define BLOCK_DELETED 4 +#define BLOCK_ERROR 8 /* Wrong data */ +#define BLOCK_SYNC_ERROR 16 /* Right data at wrong place */ +#define BLOCK_FATAL_ERROR 32 /* hardware-error */ + +#define NEED_MEM ((uint) 10*4*(IO_SIZE+32)+32) /* Nead for recursion */ +#define MAXERR 20 +#define BUFFERS_WHEN_SORTING 16 /* Alloc for sort-key-tree */ +#define WRITE_COUNT MY_HOW_OFTEN_TO_WRITE +#define INDEX_TMP_EXT ".TMM" +#define DATA_TMP_EXT ".TMD" + +#define UPDATE_TIME 1 +#define UPDATE_STAT 2 +#define UPDATE_SORT 4 +#define UPDATE_AUTO_INC 8 +#define UPDATE_OPEN_COUNT 16 + +#define USE_BUFFER_INIT (((1024L*512L-MALLOC_OVERHEAD)/IO_SIZE)*IO_SIZE) +#define READ_BUFFER_INIT (1024L*256L-MALLOC_OVERHEAD) +#define SORT_BUFFER_INIT (2048L*1024L-MALLOC_OVERHEAD) +#define MIN_SORT_BUFFER (4096-MALLOC_OVERHEAD) + +#define fast_ma_writeinfo(INFO) if (!(INFO)->s->tot_locks) (void) _ma_writeinfo((INFO),0) +#define fast_ma_readinfo(INFO) ((INFO)->lock_type == F_UNLCK) && _ma_readinfo((INFO),F_RDLCK,1) + +extern uint _ma_get_block_info(MARIA_BLOCK_INFO *, File, my_off_t); +extern uint _ma_rec_pack(MARIA_HA *info, byte *to, const byte *from); +extern uint _ma_pack_get_block_info(MARIA_HA *maria, MARIA_BIT_BUFF *bit_buff, + MARIA_BLOCK_INFO *info, byte **rec_buff_p, + my_size_t *rec_buff_size, + File file, my_off_t filepos); +extern void _ma_store_blob_length(byte *pos, uint pack_length, uint length); +extern void _ma_report_error(int errcode, const char *file_name); +extern my_bool _ma_memmap_file(MARIA_HA *info); +extern void _ma_unmap_file(MARIA_HA *info); +extern uint _ma_save_pack_length(uint version, byte * block_buff, + ulong length); +extern uint _ma_calc_pack_length(uint version, ulong length); +extern ulong _ma_calc_blob_length(uint length, const byte *pos); +extern uint _ma_mmap_pread(MARIA_HA *info, byte *Buffer, + uint Count, my_off_t offset, myf MyFlags); +extern uint _ma_mmap_pwrite(MARIA_HA *info, byte *Buffer, + uint Count, my_off_t offset, myf MyFlags); +extern uint _ma_nommap_pread(MARIA_HA *info, byte *Buffer, + uint Count, my_off_t offset, myf MyFlags); +extern uint _ma_nommap_pwrite(MARIA_HA *info, byte *Buffer, + uint Count, my_off_t offset, myf MyFlags); + +uint _ma_state_info_write(File file, MARIA_STATE_INFO *state, uint pWrite); +byte *_ma_state_info_read(byte *ptr, MARIA_STATE_INFO *state); +uint _ma_state_info_read_dsk(File file, MARIA_STATE_INFO *state, + my_bool pRead); +uint _ma_base_info_write(File file, MARIA_BASE_INFO *base); +int _ma_keyseg_write(File file, const HA_KEYSEG *keyseg); +char *_ma_keyseg_read(char *ptr, HA_KEYSEG *keyseg); +uint _ma_keydef_write(File file, MARIA_KEYDEF *keydef); +char *_ma_keydef_read(char *ptr, MARIA_KEYDEF *keydef); +uint _ma_uniquedef_write(File file, MARIA_UNIQUEDEF *keydef); +char *_ma_uniquedef_read(char *ptr, MARIA_UNIQUEDEF *keydef); +uint _ma_columndef_write(File file, MARIA_COLUMNDEF *columndef); +char *_ma_columndef_read(char *ptr, MARIA_COLUMNDEF *columndef); +ulong _ma_calc_total_blob_length(MARIA_HA *info, const byte *record); +ha_checksum _ma_checksum(MARIA_HA *info, const byte *buf); +ha_checksum _ma_static_checksum(MARIA_HA *info, const byte *buf); +my_bool _ma_check_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, + byte *record, ha_checksum unique_hash, + MARIA_RECORD_POS pos); +ha_checksum _ma_unique_hash(MARIA_UNIQUEDEF *def, const byte *buf); +my_bool _ma_cmp_static_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, + const byte *record, MARIA_RECORD_POS pos); +my_bool _ma_cmp_dynamic_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def, + const byte *record, MARIA_RECORD_POS pos); +my_bool _ma_unique_comp(MARIA_UNIQUEDEF *def, const byte *a, const byte *b, + my_bool null_are_equal); +void _ma_get_status(void *param, int concurrent_insert); +void _ma_update_status(void *param); +void _ma_restore_status(void *param); +void _ma_copy_status(void *to, void *from); +my_bool _ma_check_status(void *param); + +extern MARIA_HA *_ma_test_if_reopen(char *filename); +my_bool _ma_check_table_is_closed(const char *name, const char *where); +int _ma_open_datafile(MARIA_HA *info, MARIA_SHARE *share, File file_to_dup); +int _ma_open_keyfile(MARIA_SHARE *share); +void _ma_setup_functions(register MARIA_SHARE *share); +my_bool _ma_dynmap_file(MARIA_HA *info, my_off_t size); +void _ma_remap_file(MARIA_HA *info, my_off_t size); + +MARIA_RECORD_POS _ma_write_init_default(MARIA_HA *info, const byte *record); +my_bool _ma_write_abort_default(MARIA_HA *info); + +/* Functions needed by _ma_check (are overrided in MySQL) */ +C_MODE_START +volatile int *_ma_killed_ptr(HA_CHECK *param); +void _ma_check_print_error _VARARGS((HA_CHECK *param, const char *fmt, ...)); +void _ma_check_print_warning _VARARGS((HA_CHECK *param, const char *fmt, ...)); +void _ma_check_print_info _VARARGS((HA_CHECK *param, const char *fmt, ...)); +int _ma_repair_write_log_record(const HA_CHECK *param, MARIA_HA *info); +C_MODE_END + +int _ma_flush_pending_blocks(MARIA_SORT_PARAM *param); +int _ma_sort_ft_buf_flush(MARIA_SORT_PARAM *sort_param); +int _ma_thr_write_keys(MARIA_SORT_PARAM *sort_param); +#ifdef THREAD +pthread_handler_t _ma_thr_find_all_keys(void *arg); +#endif +int _ma_flush_blocks(HA_CHECK *param, PAGECACHE *pagecache, + PAGECACHE_FILE *file); + +int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param); +int _ma_create_index_by_sort(MARIA_SORT_PARAM *info, my_bool no_messages, + ulong); +int _ma_sync_table_files(const MARIA_HA *info); +int _ma_initialize_data_file(File dfile, MARIA_SHARE *share); + +void _ma_unpin_all_pages(MARIA_HA *info, LSN undo_lsn); + +extern PAGECACHE *maria_log_pagecache; + diff --git a/storage/maria/maria_ftdump.c b/storage/maria/maria_ftdump.c new file mode 100644 index 00000000000..8b0256344cb --- /dev/null +++ b/storage/maria/maria_ftdump.c @@ -0,0 +1,279 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code + added support for long options (my_getopt) 22.5.2002 by Jani Tolonen */ + +#include "ma_ftdefs.h" +#include + +static void usage(); +static void complain(int val); +static my_bool get_one_option(int, const struct my_option *, char *); + +static int count=0, stats=0, dump=0, lstats=0; +static my_bool verbose; +static char *query=NULL; +static uint lengths[256]; + +#define MAX_LEN (HA_FT_MAXBYTELEN+10) +#define HOW_OFTEN_TO_WRITE 10000 + +static struct my_option my_long_options[] = +{ + {"help", 'h', "Display help and exit.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"help", '?', "Synonym for -h.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"count", 'c', "Calculate per-word stats (counts and global weights).", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"dump", 'd', "Dump index (incl. data offsets and word weights).", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"length", 'l', "Report length distribution.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"stats", 's', "Report global stats.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"verbose", 'v', "Be verbose.", + (gptr*) &verbose, (gptr*) &verbose, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} +}; + + +int main(int argc,char *argv[]) +{ + int error=0, subkeys; + uint keylen, keylen2=0, inx, doc_cnt=0; + float weight= 1.0; + double gws, min_gws=0, avg_gws=0; + MARIA_HA *info; + char buf[MAX_LEN], buf2[MAX_LEN], buf_maxlen[MAX_LEN], buf_min_gws[MAX_LEN]; + ulong total=0, maxlen=0, uniq=0, max_doc_cnt=0; + struct { MARIA_HA *info; } aio0, *aio=&aio0; /* for GWS_IN_USE */ + + MY_INIT(argv[0]); + if ((error= handle_options(&argc, &argv, my_long_options, get_one_option))) + exit(error); + maria_init(); + if (count || dump) + verbose=0; + if (!count && !dump && !lstats && !query) + stats=1; + + if (verbose) + setbuf(stdout,NULL); + + if (argc < 2) + usage(); + + { + char *end; + inx= (uint) strtoll(argv[1], &end, 10); + if (*end) + usage(); + } + + init_pagecache(maria_pagecache, USE_BUFFER_INIT, 0, 0, + MARIA_KEY_BLOCK_LENGTH); + + if (!(info=maria_open(argv[0], O_RDONLY, + HA_OPEN_ABORT_IF_LOCKED|HA_OPEN_FROM_SQL_LAYER))) + { + error=my_errno; + goto err; + } + + *buf2=0; + aio->info=info; + + if ((inx >= info->s->base.keys) || + !(info->s->keyinfo[inx].flag & HA_FULLTEXT)) + { + printf("Key %d in table %s is not a FULLTEXT key\n", inx, info->s->open_file_name); + goto err; + } + + maria_lock_database(info, F_EXTRA_LCK); + + info->cur_row.lastpos= HA_OFFSET_ERROR; + info->update|= HA_STATE_PREV_FOUND; + + while (!(error=maria_rnext(info,NULL,inx))) + { + keylen=*(info->lastkey); + + subkeys=ft_sintXkorr(info->lastkey+keylen+1); + if (subkeys >= 0) + weight=*(float*)&subkeys; + +#ifdef HAVE_SNPRINTF + snprintf(buf,MAX_LEN,"%.*s",(int) keylen,info->lastkey+1); +#else + sprintf(buf,"%.*s",(int) keylen,info->lastkey+1); +#endif + my_casedn_str(default_charset_info,buf); + total++; + lengths[keylen]++; + + if (count || stats) + { + if (strcmp(buf, buf2)) + { + if (*buf2) + { + uniq++; + avg_gws+=gws=GWS_IN_USE; + if (count) + printf("%9u %20.7f %s\n",doc_cnt,gws,buf2); + if (maxlen= 0 ? 1 : -subkeys); + } + if (dump) + { + if (subkeys>=0) + printf("%9lx %20.7f %s\n", (long) info->cur_row.lastpos,weight,buf); + else + printf("%9lx => %17d %s\n",(long) info->cur_row.lastpos,-subkeys,buf); + } + if (verbose && (total%HOW_OFTEN_TO_WRITE)==0) + printf("%10ld\r",total); + } + maria_lock_database(info, F_UNLCK); + + if (count || stats) + { + if (*buf2) + { + uniq++; + avg_gws+=gws=GWS_IN_USE; + if (count) + printf("%9u %20.7f %s\n",doc_cnt,gws,buf2); + if (maxlen= total/2) + break; + } + printf("Total rows: %lu\nTotal words: %lu\n" + "Unique words: %lu\nLongest word: %lu chars (%s)\n" + "Median length: %u\n" + "Average global weight: %f\n" + "Most common word: %lu times, weight: %f (%s)\n", + (long) info->state->records, total, uniq, maxlen, buf_maxlen, + inx, avg_gws/uniq, max_doc_cnt, min_gws, buf_min_gws); + } + if (lstats) + { + count=0; + for (inx=0; inx<256; inx++) + { + count+=lengths[inx]; + if (count && lengths[inx]) + printf("%3u: %10lu %5.2f%% %20lu %4.1f%%\n", inx, + (ulong) lengths[inx],100.0*lengths[inx]/total,(ulong) count, + 100.0*count/total); + } + } + +err: + if (error && error != HA_ERR_END_OF_FILE) + printf("got error %d\n",my_errno); + if (info) + maria_close(info); + maria_end(); + return 0; +} + + +static my_bool +get_one_option(int optid, const struct my_option *opt __attribute__((unused)), + char *argument __attribute__((unused))) +{ + switch(optid) { + case 'd': + dump=1; + complain(count || query); + break; + case 's': + stats=1; + complain(query!=0); + break; + case 'c': + count= 1; + complain(dump || query); + break; + case 'l': + lstats=1; + complain(query!=0); + break; + case '?': + case 'h': + usage(); + } + return 0; +} + +#include + +static void usage() +{ + printf("Use: maria_ft_dump \n"); + my_print_help(my_long_options); + my_print_variables(my_long_options); + NETWARE_SET_SCREEN_MODE(1); + exit(1); +} + +#include + +static void complain(int val) /* Kinda assert :-) */ +{ + if (val) + { + printf("You cannot use these options together!\n"); + exit(1); + } +} diff --git a/storage/maria/maria_pack.c b/storage/maria/maria_pack.c new file mode 100644 index 00000000000..f6e962191c2 --- /dev/null +++ b/storage/maria/maria_pack.c @@ -0,0 +1,3226 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Pack MARIA file */ + +#ifndef USE_MY_FUNC +#define USE_MY_FUNC /* We need at least my_malloc */ +#endif + +#include "maria_def.h" +#include +#include +#include "mysys_err.h" +#ifdef MSDOS +#include +#endif +#ifndef __GNU_LIBRARY__ +#define __GNU_LIBRARY__ /* Skip warnings in getopt.h */ +#endif +#include +#include + +#if SIZEOF_LONG_LONG > 4 +#define BITS_SAVED 64 +#else +#define BITS_SAVED 32 +#endif + +#define IS_OFFSET ((uint) 32768) /* Bit if offset or char in tree */ +#define HEAD_LENGTH 32 +#define ALLOWED_JOIN_DIFF 256 /* Diff allowed to join trees */ + +#define DATA_TMP_EXT ".TMD" +#define OLD_EXT ".OLD" +#define WRITE_COUNT MY_HOW_OFTEN_TO_WRITE + +struct st_file_buffer { + File file; + uchar *buffer,*pos,*end; + my_off_t pos_in_file; + int bits; + ulonglong bitbucket; +}; + +struct st_huff_tree; +struct st_huff_element; + +typedef struct st_huff_counts { + uint field_length,max_zero_fill; + uint pack_type; + uint max_end_space,max_pre_space,length_bits,min_space; + ulong max_length; + enum en_fieldtype field_type; + struct st_huff_tree *tree; /* Tree for field */ + my_off_t counts[256]; + my_off_t end_space[8]; + my_off_t pre_space[8]; + my_off_t tot_end_space,tot_pre_space,zero_fields,empty_fields,bytes_packed; + TREE int_tree; /* Tree for detecting distinct column values. */ + byte *tree_buff; /* Column values, 'field_length' each. */ + byte *tree_pos; /* Points to end of column values in 'tree_buff'. */ +} HUFF_COUNTS; + +typedef struct st_huff_element HUFF_ELEMENT; + +/* + WARNING: It is crucial for the optimizations in calc_packed_length() + that 'count' is the first element of 'HUFF_ELEMENT'. +*/ +struct st_huff_element { + my_off_t count; + union un_element { + struct st_nod { + HUFF_ELEMENT *left,*right; + } nod; + struct st_leaf { + HUFF_ELEMENT *null; + uint element_nr; /* Number of element */ + } leaf; + } a; +}; + + +typedef struct st_huff_tree { + HUFF_ELEMENT *root,*element_buffer; + HUFF_COUNTS *counts; + uint tree_number; + uint elements; + my_off_t bytes_packed; + uint tree_pack_length; + uint min_chr,max_chr,char_bits,offset_bits,max_offset,height; + ulonglong *code; + uchar *code_len; +} HUFF_TREE; + + +typedef struct st_isam_mrg { + MARIA_HA **file,**current,**end; + uint free_file; + uint count; + uint min_pack_length; /* Theese is used by packed data */ + uint max_pack_length; + uint ref_length; + uint max_blob_length; + my_off_t records; + /* true if at least one source file has at least one disabled index */ + my_bool src_file_has_indexes_disabled; +} PACK_MRG_INFO; + + +extern int main(int argc,char * *argv); +static void get_options(int *argc,char ***argv); +static MARIA_HA *open_isam_file(char *name,int mode); +static bool open_isam_files(PACK_MRG_INFO *mrg,char **names,uint count); +static int compress(PACK_MRG_INFO *file,char *join_name); +static HUFF_COUNTS *init_huff_count(MARIA_HA *info,my_off_t records); +static void free_counts_and_tree_and_queue(HUFF_TREE *huff_trees, + uint trees, + HUFF_COUNTS *huff_counts, + uint fields); +static int compare_tree(void* cmp_arg __attribute__((unused)), + const uchar *s,const uchar *t); +static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts); +static void check_counts(HUFF_COUNTS *huff_counts,uint trees, + my_off_t records); +static int test_space_compress(HUFF_COUNTS *huff_counts,my_off_t records, + uint max_space_length,my_off_t *space_counts, + my_off_t tot_space_count, + enum en_fieldtype field_type); +static HUFF_TREE* make_huff_trees(HUFF_COUNTS *huff_counts,uint trees); +static int make_huff_tree(HUFF_TREE *tree,HUFF_COUNTS *huff_counts); +static int compare_huff_elements(void *not_used, byte *a,byte *b); +static int save_counts_in_queue(byte *key,element_count count, + HUFF_TREE *tree); +static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts,uint flag); +static uint join_same_trees(HUFF_COUNTS *huff_counts,uint trees); +static int make_huff_decode_table(HUFF_TREE *huff_tree,uint trees); +static void make_traverse_code_tree(HUFF_TREE *huff_tree, + HUFF_ELEMENT *element,uint size, + ulonglong code); +static int write_header(PACK_MRG_INFO *isam_file, uint header_length,uint trees, + my_off_t tot_elements,my_off_t filelength); +static void write_field_info(HUFF_COUNTS *counts, uint fields,uint trees); +static my_off_t write_huff_tree(HUFF_TREE *huff_tree,uint trees); +static uint *make_offset_code_tree(HUFF_TREE *huff_tree, + HUFF_ELEMENT *element, + uint *offset); +static uint max_bit(uint value); +static int compress_isam_file(PACK_MRG_INFO *file,HUFF_COUNTS *huff_counts); +static char *make_new_name(char *new_name,char *old_name); +static char *make_old_name(char *new_name,char *old_name); +static void init_file_buffer(File file,pbool read_buffer); +static int flush_buffer(ulong neaded_length); +static void end_file_buffer(void); +static void write_bits(ulonglong value, uint bits); +static void flush_bits(void); +static int save_state(MARIA_HA *isam_file,PACK_MRG_INFO *mrg,my_off_t new_length, + ha_checksum crc); +static int save_state_mrg(File file,PACK_MRG_INFO *isam_file,my_off_t new_length, + ha_checksum crc); +static int mrg_close(PACK_MRG_INFO *mrg); +static int mrg_rrnd(PACK_MRG_INFO *info,byte *buf); +static void mrg_reset(PACK_MRG_INFO *mrg); +#if !defined(DBUG_OFF) +static void fakebigcodes(HUFF_COUNTS *huff_counts, HUFF_COUNTS *end_count); +static int fakecmp(my_off_t **count1, my_off_t **count2); +#endif + + +static int error_on_write=0,test_only=0,verbose=0,silent=0, + write_loop=0,force_pack=0, isamchk_neaded=0; +static int tmpfile_createflag=O_RDWR | O_TRUNC | O_EXCL; +static my_bool backup, opt_wait; +/* + tree_buff_length is somewhat arbitrary. The bigger it is the better + the chance to win in terms of compression factor. On the other hand, + this table becomes part of the compressed file header. And its length + is coded with 16 bits in the header. Hence the limit is 2**16 - 1. +*/ +static uint tree_buff_length= 65536 - MALLOC_OVERHEAD; +static char tmp_dir[FN_REFLEN]={0},*join_table; +static my_off_t intervall_length; +static ha_checksum glob_crc; +static struct st_file_buffer file_buffer; +static QUEUE queue; +static HUFF_COUNTS *global_count; +static char zero_string[]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; +static const char *load_default_groups[]= { "mariapack",0 }; + + /* The main program */ + +int main(int argc, char **argv) +{ + int error,ok; + PACK_MRG_INFO merge; + char **default_argv; + MY_INIT(argv[0]); + + load_defaults("my",load_default_groups,&argc,&argv); + default_argv= argv; + get_options(&argc,&argv); + maria_init(); + + error=ok=isamchk_neaded=0; + if (join_table) + { /* Join files into one */ + if (open_isam_files(&merge,argv,(uint) argc) || + compress(&merge,join_table)) + error=1; + } + else while (argc--) + { + MARIA_HA *isam_file; + if (!(isam_file=open_isam_file(*argv++,O_RDWR))) + error=1; + else + { + merge.file= &isam_file; + merge.current=0; + merge.free_file=0; + merge.count=1; + if (compress(&merge,0)) + error=1; + else + ok=1; + } + } + if (ok && isamchk_neaded && !silent) + puts("Remember to run maria_chk -rq on compressed tables"); + VOID(fflush(stdout)); + VOID(fflush(stderr)); + free_defaults(default_argv); + maria_end(); + my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR); + exit(error ? 2 : 0); +#ifndef _lint + return 0; /* No compiler warning */ +#endif +} + +enum options_mp {OPT_CHARSETS_DIR_MP=256, OPT_AUTO_CLOSE}; + +static struct my_option my_long_options[] = +{ +#ifdef __NETWARE__ + {"autoclose", OPT_AUTO_CLOSE, "Auto close the screen on exit for Netware.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, +#endif + {"backup", 'b', "Make a backup of the table as table_name.OLD.", + (gptr*) &backup, (gptr*) &backup, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"character-sets-dir", OPT_CHARSETS_DIR_MP, + "Directory where character sets are.", (gptr*) &charsets_dir, + (gptr*) &charsets_dir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"debug", '#', "Output debug log. Often this is 'd:t:o,filename'.", + 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0}, + {"force", 'f', + "Force packing of table even if it gets bigger or if tempfile exists.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"join", 'j', + "Join all given tables into 'new_table_name'. All tables MUST have identical layouts.", + (gptr*) &join_table, (gptr*) &join_table, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, + 0, 0, 0}, + {"help", '?', "Display this help and exit.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"silent", 's', "Be more silent.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"tmpdir", 'T', "Use temporary directory to store temporary table.", + 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, + {"test", 't', "Don't pack table, only test packing it.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"verbose", 'v', "Write info about progress and packing result. Use many -v for more verbosity!", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"version", 'V', "Output version information and exit.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"wait", 'w', "Wait and retry if table is in use.", (gptr*) &opt_wait, + (gptr*) &opt_wait, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} +}; + +#include + +static void print_version(void) +{ + VOID(printf("%s Ver 1.0 for %s on %s\n", + my_progname, SYSTEM_TYPE, MACHINE_TYPE)); + NETWARE_SET_SCREEN_MODE(1); +} + + +static void usage(void) +{ + print_version(); + puts("Copyright (C) 2002 MySQL AB"); + puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,"); + puts("and you are welcome to modify and redistribute it under the GPL license\n"); + + puts("Pack a MARIA-table to take much less space."); + puts("Keys are not updated, you must run maria_chk -rq on the index (.MAI) file"); + puts("afterwards to update the keys."); + puts("You should give the .MAI file as the filename argument."); + puts("To unpack a packed table, run maria_chk -u on the table"); + + VOID(printf("\nUsage: %s [OPTIONS] filename...\n", my_progname)); + my_print_help(my_long_options); + print_defaults("my", load_default_groups); + my_print_variables(my_long_options); +} + +#include + +static my_bool +get_one_option(int optid, const struct my_option *opt __attribute__((unused)), + char *argument) +{ + uint length; + + switch(optid) { +#ifdef __NETWARE__ + case OPT_AUTO_CLOSE: + setscreenmode(SCR_AUTOCLOSE_ON_EXIT); + break; +#endif + case 'f': + force_pack= 1; + tmpfile_createflag= O_RDWR | O_TRUNC; + break; + case 's': + write_loop= verbose= 0; + silent= 1; + break; + case 't': + test_only= 1; + /* Avoid to reset 'verbose' if it was already set > 1. */ + if (! verbose) + verbose= 1; + break; + case 'T': + length= (uint) (strmov(tmp_dir, argument) - tmp_dir); + if (length != dirname_length(tmp_dir)) + { + tmp_dir[length]=FN_LIBCHAR; + tmp_dir[length+1]=0; + } + break; + case 'v': + verbose++; /* Allow for selecting the level of verbosity. */ + silent= 0; + break; + case '#': + DBUG_PUSH(argument ? argument : "d:t:o,/tmp/maria_pack.trace"); + break; + case 'V': + print_version(); + exit(0); + case 'I': + case '?': + usage(); + exit(0); + } + return 0; +} + + /* reads options */ + /* Initiates DEBUG - but no debugging here ! */ + +static void get_options(int *argc,char ***argv) +{ + int ho_error; + + my_progname= argv[0][0]; + if (isatty(fileno(stdout))) + write_loop=1; + + if ((ho_error=handle_options(argc, argv, my_long_options, get_one_option))) + exit(ho_error); + + if (!*argc) + { + usage(); + exit(1); + } + if (join_table) + { + backup=0; /* Not needed */ + tmp_dir[0]=0; + } + return; +} + + +static MARIA_HA *open_isam_file(char *name,int mode) +{ + MARIA_HA *isam_file; + MARIA_SHARE *share; + DBUG_ENTER("open_isam_file"); + + if (!(isam_file=maria_open(name,mode, + (opt_wait ? HA_OPEN_WAIT_IF_LOCKED : + HA_OPEN_ABORT_IF_LOCKED)))) + { + VOID(fprintf(stderr, "%s gave error %d on open\n", name, my_errno)); + DBUG_RETURN(0); + } + share=isam_file->s; + if (share->options & HA_OPTION_COMPRESS_RECORD && !join_table) + { + if (!force_pack) + { + VOID(fprintf(stderr, "%s is already compressed\n", name)); + VOID(maria_close(isam_file)); + DBUG_RETURN(0); + } + if (verbose) + puts("Recompressing already compressed table"); + share->options&= ~HA_OPTION_READ_ONLY_DATA; /* We are modifing it */ + } + if (! force_pack && share->state.state.records != 0 && + (share->state.state.records <= 1 || + share->state.state.data_file_length < 1024)) + { + VOID(fprintf(stderr, "%s is too small to compress\n", name)); + VOID(maria_close(isam_file)); + DBUG_RETURN(0); + } + VOID(maria_lock_database(isam_file,F_WRLCK)); + DBUG_RETURN(isam_file); +} + + +static bool open_isam_files(PACK_MRG_INFO *mrg,char **names,uint count) +{ + uint i,j; + mrg->count=0; + mrg->current=0; + mrg->file=(MARIA_HA**) my_malloc(sizeof(MARIA_HA*)*count,MYF(MY_FAE)); + mrg->free_file=1; + mrg->src_file_has_indexes_disabled= 0; + for (i=0; i < count ; i++) + { + if (!(mrg->file[i]=open_isam_file(names[i],O_RDONLY))) + goto error; + + mrg->src_file_has_indexes_disabled|= + ! maria_is_all_keys_active(mrg->file[i]->s->state.key_map, + mrg->file[i]->s->base.keys); + } + /* Check that files are identical */ + for (j=0 ; j < count-1 ; j++) + { + MARIA_COLUMNDEF *m1,*m2,*end; + if (mrg->file[j]->s->base.reclength != mrg->file[j+1]->s->base.reclength || + mrg->file[j]->s->base.fields != mrg->file[j+1]->s->base.fields) + goto diff_file; + m1=mrg->file[j]->s->columndef; + end=m1+mrg->file[j]->s->base.fields; + m2=mrg->file[j+1]->s->columndef; + for ( ; m1 != end ; m1++,m2++) + { + if (m1->type != m2->type || m1->length != m2->length) + goto diff_file; + } + } + mrg->count=count; + return 0; + + diff_file: + VOID(fprintf(stderr, "%s: Tables '%s' and '%s' are not identical\n", + my_progname, names[j], names[j+1])); + error: + while (i--) + maria_close(mrg->file[i]); + my_free((gptr) mrg->file,MYF(0)); + return 1; +} + + +static int compress(PACK_MRG_INFO *mrg,char *result_table) +{ + int error; + File new_file,join_isam_file; + MARIA_HA *isam_file; + MARIA_SHARE *share; + char org_name[FN_REFLEN],new_name[FN_REFLEN],temp_name[FN_REFLEN]; + uint i,header_length,fields,trees,used_trees; + my_off_t old_length,new_length,tot_elements; + HUFF_COUNTS *huff_counts; + HUFF_TREE *huff_trees; + DBUG_ENTER("compress"); + + isam_file=mrg->file[0]; /* Take this as an example */ + share=isam_file->s; + new_file=join_isam_file= -1; + trees=fields=0; + huff_trees=0; + huff_counts=0; + maria_block_size= isam_file->s->block_size; + + /* Create temporary or join file */ + if (backup) + VOID(fn_format(org_name,isam_file->s->open_file_name,"",MARIA_NAME_DEXT, + 2)); + else + VOID(fn_format(org_name,isam_file->s->open_file_name,"",MARIA_NAME_DEXT, + 2+4+16)); + + if (init_pagecache(maria_pagecache, MARIA_MIN_PAGE_CACHE_SIZE, 0, 0, + maria_block_size) == 0) + { + fprintf(stderr, "Can't initialize page cache\n"); + goto err; + } + + if (!test_only && result_table) + { + /* Make a new indexfile based on first file in list */ + uint length; + char *buff; + strmov(org_name,result_table); /* Fix error messages */ + VOID(fn_format(new_name,result_table,"",MARIA_NAME_IEXT,2)); + if ((join_isam_file=my_create(new_name,0,tmpfile_createflag,MYF(MY_WME))) + < 0) + goto err; + length=(uint) share->base.keystart; + if (!(buff=my_malloc(length,MYF(MY_WME)))) + goto err; + if (my_pread(share->kfile.file, buff, length, 0L, MYF(MY_WME | MY_NABP)) || + my_write(join_isam_file,buff,length, + MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL))) + { + my_free(buff,MYF(0)); + goto err; + } + my_free(buff,MYF(0)); + VOID(fn_format(new_name,result_table,"",MARIA_NAME_DEXT,2)); + } + else if (!tmp_dir[0]) + VOID(make_new_name(new_name,org_name)); + else + VOID(fn_format(new_name,org_name,tmp_dir,DATA_TMP_EXT,1+2+4)); + if (!test_only && + (new_file=my_create(new_name,0,tmpfile_createflag,MYF(MY_WME))) < 0) + goto err; + + /* Start calculating statistics */ + + mrg->records=0; + for (i=0 ; i < mrg->count ; i++) + mrg->records+=mrg->file[i]->s->state.state.records; + + DBUG_PRINT("info", ("Compressing %s: (%lu records)", + result_table ? new_name : org_name, + (ulong) mrg->records)); + if (write_loop || verbose) + { + VOID(printf("Compressing %s: (%lu records)\n", + result_table ? new_name : org_name, (ulong) mrg->records)); + } + trees=fields=share->base.fields; + huff_counts=init_huff_count(isam_file,mrg->records); + QUICK_SAFEMALLOC; + + /* + Read the whole data file(s) for statistics. + */ + DBUG_PRINT("info", ("- Calculating statistics")); + if (write_loop || verbose) + VOID(printf("- Calculating statistics\n")); + if (get_statistic(mrg,huff_counts)) + goto err; + NORMAL_SAFEMALLOC; + old_length=0; + for (i=0; i < mrg->count ; i++) + old_length+= (mrg->file[i]->s->state.state.data_file_length - + mrg->file[i]->s->state.state.empty); + + /* + Create a global priority queue in preparation for making + temporary Huffman trees. + */ + if (init_queue(&queue,256,0,0,compare_huff_elements,0)) + goto err; + + /* + Check each column if we should use pre-space-compress, end-space- + compress, empty-field-compress or zero-field-compress. + */ + check_counts(huff_counts,fields,mrg->records); + + /* + Build a Huffman tree for each column. + */ + huff_trees=make_huff_trees(huff_counts,trees); + + /* + If the packed lengths of combined columns is less then the sum of + the non-combined columns, then create common Huffman trees for them. + We do this only for byte compressed columns, not for distinct values + compressed columns. + */ + if ((int) (used_trees=join_same_trees(huff_counts,trees)) < 0) + goto err; + + /* + Assign codes to all byte or column values. + */ + if (make_huff_decode_table(huff_trees,fields)) + goto err; + + /* Prepare a file buffer. */ + init_file_buffer(new_file,0); + + /* + Reserve space in the target file for the fixed compressed file header. + */ + file_buffer.pos_in_file=HEAD_LENGTH; + if (! test_only) + VOID(my_seek(new_file,file_buffer.pos_in_file,MY_SEEK_SET,MYF(0))); + + /* + Write field infos: field type, pack type, length bits, tree number. + */ + write_field_info(huff_counts,fields,used_trees); + + /* + Write decode trees. + */ + if (!(tot_elements=write_huff_tree(huff_trees,trees))) + goto err; + + /* + Calculate the total length of the compression info header. + This includes the fixed compressed file header, the column compression + type descriptions, and the decode trees. + */ + header_length=(uint) file_buffer.pos_in_file+ + (uint) (file_buffer.pos-file_buffer.buffer); + + /* + Compress the source file into the target file. + */ + DBUG_PRINT("info", ("- Compressing file")); + if (write_loop || verbose) + VOID(printf("- Compressing file\n")); + error=compress_isam_file(mrg,huff_counts); + new_length=file_buffer.pos_in_file; + if (!error && !test_only) + { + char buff[MEMMAP_EXTRA_MARGIN]; /* End marginal for memmap */ + bzero(buff,sizeof(buff)); + error=my_write(file_buffer.file,buff,sizeof(buff), + MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)) != 0; + } + + /* + Write the fixed compressed file header. + */ + if (!error) + error=write_header(mrg,header_length,used_trees,tot_elements, + new_length); + + /* Flush the file buffer. */ + end_file_buffer(); + + /* Display statistics. */ + DBUG_PRINT("info", ("Min record length: %6d Max length: %6d " + "Mean total length: %6ld", + mrg->min_pack_length, mrg->max_pack_length, + (ulong) (mrg->records ? (new_length/mrg->records) : 0))); + if (verbose && mrg->records) + VOID(printf("Min record length: %6d Max length: %6d " + "Mean total length: %6ld\n", mrg->min_pack_length, + mrg->max_pack_length, (ulong) (new_length/mrg->records))); + + /* Close source and target file. */ + if (!test_only) + { + error|=my_close(new_file,MYF(MY_WME)); + if (!result_table) + { + error|=my_close(isam_file->dfile.file, MYF(MY_WME)); + isam_file->dfile.file= -1; /* Tell maria_close file is closed */ + isam_file->s->bitmap.file.file= -1; + } + } + + /* Cleanup. */ + free_counts_and_tree_and_queue(huff_trees,trees,huff_counts,fields); + if (! test_only && ! error) + { + if (result_table) + { + error=save_state_mrg(join_isam_file,mrg,new_length,glob_crc); + } + else + { + if (backup) + { + if (my_rename(org_name,make_old_name(temp_name, + isam_file->s->open_file_name), + MYF(MY_WME))) + error=1; + else + { + if (tmp_dir[0]) + error=my_copy(new_name,org_name,MYF(MY_WME)); + else + error=my_rename(new_name,org_name,MYF(MY_WME)); + if (!error) + { + VOID(my_copystat(temp_name,org_name,MYF(MY_COPYTIME))); + if (tmp_dir[0]) + VOID(my_delete(new_name,MYF(MY_WME))); + } + } + } + else + { + if (tmp_dir[0]) + { + error=my_copy(new_name,org_name, + MYF(MY_WME | MY_HOLD_ORIGINAL_MODES | MY_COPYTIME)); + if (!error) + VOID(my_delete(new_name,MYF(MY_WME))); + } + else + error=my_redel(org_name,new_name,MYF(MY_WME | MY_COPYTIME)); + } + if (! error) + error=save_state(isam_file,mrg,new_length,glob_crc); + } + } + error|=mrg_close(mrg); + if (join_isam_file >= 0) + error|=my_close(join_isam_file,MYF(MY_WME)); + if (error) + { + VOID(fprintf(stderr, "Aborting: %s is not compressed\n", org_name)); + VOID(my_delete(new_name,MYF(MY_WME))); + DBUG_RETURN(-1); + } + if (write_loop || verbose) + { + if (old_length) + VOID(printf("%.4g%% \n", + (((longlong) (old_length - new_length)) * 100.0 / + (longlong) old_length))); + else + puts("Empty file saved in compressed format"); + } + DBUG_RETURN(0); + + err: + end_pagecache(maria_pagecache, 1); + free_counts_and_tree_and_queue(huff_trees,trees,huff_counts,fields); + if (new_file >= 0) + VOID(my_close(new_file,MYF(0))); + if (join_isam_file >= 0) + VOID(my_close(join_isam_file,MYF(0))); + mrg_close(mrg); + VOID(fprintf(stderr, "Aborted: %s is not compressed\n", org_name)); + DBUG_RETURN(-1); +} + + /* Init a huff_count-struct for each field and init it */ + +static HUFF_COUNTS *init_huff_count(MARIA_HA *info,my_off_t records) +{ + reg2 uint i; + reg1 HUFF_COUNTS *count; + if ((count = (HUFF_COUNTS*) my_malloc(info->s->base.fields* + sizeof(HUFF_COUNTS), + MYF(MY_ZEROFILL | MY_WME)))) + { + for (i=0 ; i < info->s->base.fields ; i++) + { + enum en_fieldtype type; + count[i].field_length=info->s->columndef[i].length; + type= count[i].field_type= (enum en_fieldtype) info->s->columndef[i].type; + if (type == FIELD_INTERVALL || + type == FIELD_CONSTANT || + type == FIELD_ZERO) + type = FIELD_NORMAL; + if (count[i].field_length <= 8 && + (type == FIELD_NORMAL || + type == FIELD_SKIP_ZERO)) + count[i].max_zero_fill= count[i].field_length; + /* + For every column initialize a tree, which is used to detect distinct + column values. 'int_tree' works together with 'tree_buff' and + 'tree_pos'. It's keys are implemented by pointers into 'tree_buff'. + This is accomplished by '-1' as the element size. + */ + init_tree(&count[i].int_tree,0,0,-1,(qsort_cmp2) compare_tree,0, NULL, + NULL); + if (records && type != FIELD_BLOB && type != FIELD_VARCHAR) + count[i].tree_pos=count[i].tree_buff = + my_malloc(count[i].field_length > 1 ? tree_buff_length : 2, + MYF(MY_WME)); + } + } + return count; +} + + + /* Free memory used by counts and trees */ + +static void free_counts_and_tree_and_queue(HUFF_TREE *huff_trees, uint trees, + HUFF_COUNTS *huff_counts, + uint fields) +{ + register uint i; + + if (huff_trees) + { + for (i=0 ; i < trees ; i++) + { + if (huff_trees[i].element_buffer) + my_free((gptr) huff_trees[i].element_buffer,MYF(0)); + if (huff_trees[i].code) + my_free((gptr) huff_trees[i].code,MYF(0)); + } + my_free((gptr) huff_trees,MYF(0)); + } + if (huff_counts) + { + for (i=0 ; i < fields ; i++) + { + if (huff_counts[i].tree_buff) + { + my_free((gptr) huff_counts[i].tree_buff,MYF(0)); + delete_tree(&huff_counts[i].int_tree); + } + } + my_free((gptr) huff_counts,MYF(0)); + } + delete_queue(&queue); /* This is safe to free */ + return; +} + + /* Read through old file and gather some statistics */ + +static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts) +{ + int error; + uint length, null_bytes; + ulong reclength,max_blob_length; + byte *record,*pos,*next_pos,*end_pos,*start_pos; + ha_rows record_count; + HUFF_COUNTS *count,*end_count; + TREE_ELEMENT *element; + ha_checksum(*calc_checksum) (struct st_maria_info *, const byte *); + DBUG_ENTER("get_statistic"); + + reclength= mrg->file[0]->s->base.reclength; + null_bytes= mrg->file[0]->s->base.null_bytes; + record=(byte*) my_alloca(reclength); + end_count=huff_counts+mrg->file[0]->s->base.fields; + record_count=0; glob_crc=0; + max_blob_length=0; + + /* Check how to calculate checksum */ + if (mrg->file[0]->s->data_file_type == STATIC_RECORD) + calc_checksum= _ma_static_checksum; + else + calc_checksum= _ma_checksum; + + mrg_reset(mrg); + while ((error=mrg_rrnd(mrg,record)) != HA_ERR_END_OF_FILE) + { + ulong tot_blob_length=0; + if (! error) + { + /* glob_crc is a checksum over all bytes of all records. */ + glob_crc+= (*calc_checksum)(mrg->file[0],record); + + /* Count the incidence of values separately for every column. */ + for (pos=record + null_bytes, count=huff_counts ; + count < end_count ; + count++, + pos=next_pos) + { + next_pos=end_pos=(start_pos=pos)+count->field_length; + + /* + Put the whole column value in a tree if there is room for it. + 'int_tree' is used to quickly check for duplicate values. + 'tree_buff' collects as many distinct column values as + possible. If the field length is > 1, it is tree_buff_length, + else 2 bytes. Each value is 'field_length' bytes big. If there + are more distinct column values than fit into the buffer, we + give up with this tree. BLOBs and VARCHARs do not have a + tree_buff as it can only be used with fixed length columns. + For the special case of field length == 1, we handle only the + case that there is only one distinct value in the table(s). + Otherwise, we can have a maximum of 256 distinct values. This + is then handled by the normal Huffman tree build. + + Another limit for collecting distinct column values is the + number of values itself. Since we would need to build a + Huffman tree for the values, we are limited by the 'IS_OFFSET' + constant. This constant expresses a bit which is used to + determine if a tree element holds a final value or an offset + to a child element. Hence, all values and offsets need to be + smaller than 'IS_OFFSET'. A tree element is implemented with + two integer values, one for the left branch and one for the + right branch. For the extreme case that the first element + points to the last element, the number of integers in the tree + must be less or equal to IS_OFFSET. So the number of elements + must be less or equal to IS_OFFSET / 2. + + WARNING: At first, we insert a pointer into the record buffer + as the key for the tree. If we got a new distinct value, which + is really inserted into the tree, instead of being counted + only, we will copy the column value from the record buffer to + 'tree_buff' and adjust the key pointer of the tree accordingly. + */ + if (count->tree_buff) + { + global_count=count; + if (!(element=tree_insert(&count->int_tree,pos, 0, + count->int_tree.custom_arg)) || + (element->count == 1 && + (count->tree_buff + tree_buff_length < + count->tree_pos + count->field_length)) || + (count->int_tree.elements_in_tree > IS_OFFSET / 2) || + (count->field_length == 1 && + count->int_tree.elements_in_tree > 1)) + { + delete_tree(&count->int_tree); + my_free(count->tree_buff,MYF(0)); + count->tree_buff=0; + } + else + { + /* + If tree_insert() succeeds, it either creates a new element + or increments the counter of an existing element. + */ + if (element->count == 1) + { + /* Copy the new column value into 'tree_buff'. */ + memcpy(count->tree_pos,pos,(size_t) count->field_length); + /* Adjust the key pointer in the tree. */ + tree_set_pointer(element,count->tree_pos); + /* Point behind the last column value so far. */ + count->tree_pos+=count->field_length; + } + } + } + + /* Save character counters and space-counts and zero-field-counts */ + if (count->field_type == FIELD_NORMAL || + count->field_type == FIELD_SKIP_ENDSPACE) + { + /* Ignore trailing space. */ + for ( ; end_pos > pos ; end_pos--) + if (end_pos[-1] != ' ') + break; + /* Empty fields are just counted. Go to the next record. */ + if (end_pos == pos) + { + count->empty_fields++; + count->max_zero_fill=0; + continue; + } + /* + Count the total of all trailing spaces and the number of + short trailing spaces. Remember the longest trailing space. + */ + length= (uint) (next_pos-end_pos); + count->tot_end_space+=length; + if (length < 8) + count->end_space[length]++; + if (count->max_end_space < length) + count->max_end_space = length; + } + + if (count->field_type == FIELD_NORMAL || + count->field_type == FIELD_SKIP_PRESPACE) + { + /* Ignore leading space. */ + for (pos=start_pos; pos < end_pos ; pos++) + if (pos[0] != ' ') + break; + /* Empty fields are just counted. Go to the next record. */ + if (end_pos == pos) + { + count->empty_fields++; + count->max_zero_fill=0; + continue; + } + /* + Count the total of all leading spaces and the number of + short leading spaces. Remember the longest leading space. + */ + length= (uint) (pos-start_pos); + count->tot_pre_space+=length; + if (length < 8) + count->pre_space[length]++; + if (count->max_pre_space < length) + count->max_pre_space = length; + } + + /* Calculate pos, end_pos, and max_length for variable length fields. */ + if (count->field_type == FIELD_BLOB) + { + uint field_length=count->field_length -portable_sizeof_char_ptr; + ulong blob_length= _ma_calc_blob_length(field_length, start_pos); + memcpy_fixed((char*) &pos, start_pos+field_length,sizeof(char*)); + end_pos=pos+blob_length; + tot_blob_length+=blob_length; + set_if_bigger(count->max_length,blob_length); + } + else if (count->field_type == FIELD_VARCHAR) + { + uint pack_length= HA_VARCHAR_PACKLENGTH(count->field_length-1); + length= (pack_length == 1 ? (uint) *(uchar*) start_pos : + uint2korr(start_pos)); + pos= start_pos+pack_length; + end_pos= pos+length; + set_if_bigger(count->max_length,length); + } + + /* Evaluate 'max_zero_fill' for short fields. */ + if (count->field_length <= 8 && + (count->field_type == FIELD_NORMAL || + count->field_type == FIELD_SKIP_ZERO)) + { + uint i; + /* Zero fields are just counted. Go to the next record. */ + if (!memcmp((byte*) start_pos,zero_string,count->field_length)) + { + count->zero_fields++; + continue; + } + /* + max_zero_fill starts with field_length. It is decreased every + time a shorter "zero trailer" is found. It is set to zero when + an empty field is found (see above). This suggests that the + variable should be called 'min_zero_fill'. + */ + for (i =0 ; i < count->max_zero_fill && ! end_pos[-1 - (int) i] ; + i++) ; + if (i < count->max_zero_fill) + count->max_zero_fill=i; + } + + /* Ignore zero fields and check fields. */ + if (count->field_type == FIELD_ZERO || + count->field_type == FIELD_CHECK) + continue; + + /* + Count the incidence of every byte value in the + significant field value. + */ + for ( ; pos < end_pos ; pos++) + count->counts[(uchar) *pos]++; + + /* Step to next field. */ + } + + if (tot_blob_length > max_blob_length) + max_blob_length=tot_blob_length; + record_count++; + if (write_loop && record_count % WRITE_COUNT == 0) + { + VOID(printf("%lu\r", (ulong) record_count)); + VOID(fflush(stdout)); + } + } + else if (error != HA_ERR_RECORD_DELETED) + { + VOID(fprintf(stderr, "Got error %d while reading rows", error)); + break; + } + + /* Step to next record. */ + } + if (write_loop) + { + VOID(printf(" \r")); + VOID(fflush(stdout)); + } + + /* + If --debug=d,fakebigcodes is set, fake the counts to get big Huffman + codes. + */ + DBUG_EXECUTE_IF("fakebigcodes", fakebigcodes(huff_counts, end_count);); + + DBUG_PRINT("info", ("Found the following number of incidents " + "of the byte codes:")); + if (verbose >= 2) + VOID(printf("Found the following number of incidents " + "of the byte codes:\n")); + for (count= huff_counts ; count < end_count; count++) + { + uint idx; + my_off_t total_count; + char llbuf[32]; + + DBUG_PRINT("info", ("column: %3u", (uint) (count - huff_counts + 1))); + if (verbose >= 2) + VOID(printf("column: %3u\n", (uint) (count - huff_counts + 1))); + if (count->tree_buff) + { + DBUG_PRINT("info", ("number of distinct values: %u", + (uint) ((count->tree_pos - count->tree_buff) / + count->field_length))); + if (verbose >= 2) + VOID(printf("number of distinct values: %u\n", + (uint) ((count->tree_pos - count->tree_buff) / + count->field_length))); + } + total_count= 0; + for (idx= 0; idx < 256; idx++) + { + if (count->counts[idx]) + { + total_count+= count->counts[idx]; + DBUG_PRINT("info", ("counts[0x%02x]: %12s", idx, + llstr((longlong) count->counts[idx], llbuf))); + if (verbose >= 2) + VOID(printf("counts[0x%02x]: %12s\n", idx, + llstr((longlong) count->counts[idx], llbuf))); + } + } + DBUG_PRINT("info", ("total: %12s", llstr((longlong) total_count, + llbuf))); + if ((verbose >= 2) && total_count) + { + VOID(printf("total: %12s\n", + llstr((longlong) total_count, llbuf))); + } + } + + mrg->records=record_count; + mrg->max_blob_length=max_blob_length; + my_afree((gptr) record); + DBUG_RETURN(error != HA_ERR_END_OF_FILE); +} + +static int compare_huff_elements(void *not_used __attribute__((unused)), + byte *a, byte *b) +{ + return *((my_off_t*) a) < *((my_off_t*) b) ? -1 : + (*((my_off_t*) a) == *((my_off_t*) b) ? 0 : 1); +} + + /* Check each tree if we should use pre-space-compress, end-space- + compress, empty-field-compress or zero-field-compress */ + +static void check_counts(HUFF_COUNTS *huff_counts, uint trees, + my_off_t records) +{ + uint space_fields,fill_zero_fields,field_count[(int) FIELD_enum_val_count]; + my_off_t old_length,new_length,length; + DBUG_ENTER("check_counts"); + + bzero((gptr) field_count,sizeof(field_count)); + space_fields=fill_zero_fields=0; + + for (; trees-- ; huff_counts++) + { + if (huff_counts->field_type == FIELD_BLOB) + { + huff_counts->length_bits=max_bit(huff_counts->max_length); + goto found_pack; + } + else if (huff_counts->field_type == FIELD_VARCHAR) + { + huff_counts->length_bits=max_bit(huff_counts->max_length); + goto found_pack; + } + else if (huff_counts->field_type == FIELD_CHECK) + { + huff_counts->bytes_packed=0; + huff_counts->counts[0]=0; + goto found_pack; + } + + huff_counts->field_type=FIELD_NORMAL; + huff_counts->pack_type=0; + + /* Check for zero-filled records (in this column), or zero records. */ + if (huff_counts->zero_fields || ! records) + { + my_off_t old_space_count; + /* + If there are only zero filled records (in this column), + or no records at all, we are done. + */ + if (huff_counts->zero_fields == records) + { + huff_counts->field_type= FIELD_ZERO; + huff_counts->bytes_packed=0; + huff_counts->counts[0]=0; + goto found_pack; + } + /* Remeber the number of significant spaces. */ + old_space_count=huff_counts->counts[' ']; + /* Add all leading and trailing spaces. */ + huff_counts->counts[' ']+= (huff_counts->tot_end_space + + huff_counts->tot_pre_space + + huff_counts->empty_fields * + huff_counts->field_length); + /* Check, what the compressed length of this would be. */ + old_length=calc_packed_length(huff_counts,0)+records/8; + /* Get the number of zero bytes. */ + length=huff_counts->zero_fields*huff_counts->field_length; + /* Add it to the counts. */ + huff_counts->counts[0]+=length; + /* Check, what the compressed length of this would be. */ + new_length=calc_packed_length(huff_counts,0); + /* If the compression without the zeroes would be shorter, we are done. */ + if (old_length < new_length && huff_counts->field_length > 1) + { + huff_counts->field_type=FIELD_SKIP_ZERO; + huff_counts->counts[0]-=length; + huff_counts->bytes_packed=old_length- records/8; + goto found_pack; + } + /* Remove the insignificant spaces, but keep the zeroes. */ + huff_counts->counts[' ']=old_space_count; + } + /* Check, what the compressed length of this column would be. */ + huff_counts->bytes_packed=calc_packed_length(huff_counts,0); + + /* + If there are enough empty records (in this column), + treating them specially may pay off. + */ + if (huff_counts->empty_fields) + { + if (huff_counts->field_length > 2 && + huff_counts->empty_fields + (records - huff_counts->empty_fields)* + (1+max_bit(max(huff_counts->max_pre_space, + huff_counts->max_end_space))) < + records * max_bit(huff_counts->field_length)) + { + huff_counts->pack_type |= PACK_TYPE_SPACE_FIELDS; + } + else + { + length=huff_counts->empty_fields*huff_counts->field_length; + if (huff_counts->tot_end_space || ! huff_counts->tot_pre_space) + { + huff_counts->tot_end_space+=length; + huff_counts->max_end_space=huff_counts->field_length; + if (huff_counts->field_length < 8) + huff_counts->end_space[huff_counts->field_length]+= + huff_counts->empty_fields; + } + if (huff_counts->tot_pre_space) + { + huff_counts->tot_pre_space+=length; + huff_counts->max_pre_space=huff_counts->field_length; + if (huff_counts->field_length < 8) + huff_counts->pre_space[huff_counts->field_length]+= + huff_counts->empty_fields; + } + } + } + + /* + If there are enough trailing spaces (in this column), + treating them specially may pay off. + */ + if (huff_counts->tot_end_space) + { + huff_counts->counts[' ']+=huff_counts->tot_pre_space; + if (test_space_compress(huff_counts,records,huff_counts->max_end_space, + huff_counts->end_space, + huff_counts->tot_end_space,FIELD_SKIP_ENDSPACE)) + goto found_pack; + huff_counts->counts[' ']-=huff_counts->tot_pre_space; + } + + /* + If there are enough leading spaces (in this column), + treating them specially may pay off. + */ + if (huff_counts->tot_pre_space) + { + if (test_space_compress(huff_counts,records,huff_counts->max_pre_space, + huff_counts->pre_space, + huff_counts->tot_pre_space,FIELD_SKIP_PRESPACE)) + goto found_pack; + } + + found_pack: /* Found field-packing */ + + /* Test if we can use zero-fill */ + + if (huff_counts->max_zero_fill && + (huff_counts->field_type == FIELD_NORMAL || + huff_counts->field_type == FIELD_SKIP_ZERO)) + { + huff_counts->counts[0]-=huff_counts->max_zero_fill* + (huff_counts->field_type == FIELD_SKIP_ZERO ? + records - huff_counts->zero_fields : records); + huff_counts->pack_type|=PACK_TYPE_ZERO_FILL; + huff_counts->bytes_packed=calc_packed_length(huff_counts,0); + } + + /* Test if intervall-field is better */ + + if (huff_counts->tree_buff) + { + HUFF_TREE tree; + + DBUG_EXECUTE_IF("forceintervall", + huff_counts->bytes_packed= ~ (my_off_t) 0;); + tree.element_buffer=0; + if (!make_huff_tree(&tree,huff_counts) && + tree.bytes_packed+tree.tree_pack_length < huff_counts->bytes_packed) + { + if (tree.elements == 1) + huff_counts->field_type=FIELD_CONSTANT; + else + huff_counts->field_type=FIELD_INTERVALL; + huff_counts->pack_type=0; + } + else + { + my_free((gptr) huff_counts->tree_buff,MYF(0)); + delete_tree(&huff_counts->int_tree); + huff_counts->tree_buff=0; + } + if (tree.element_buffer) + my_free((gptr) tree.element_buffer,MYF(0)); + } + if (huff_counts->pack_type & PACK_TYPE_SPACE_FIELDS) + space_fields++; + if (huff_counts->pack_type & PACK_TYPE_ZERO_FILL) + fill_zero_fields++; + field_count[huff_counts->field_type]++; + } + DBUG_PRINT("info", ("normal: %3d empty-space: %3d " + "empty-zero: %3d empty-fill: %3d", + field_count[FIELD_NORMAL],space_fields, + field_count[FIELD_SKIP_ZERO],fill_zero_fields)); + DBUG_PRINT("info", ("pre-space: %3d end-space: %3d " + "intervall-fields: %3d zero: %3d", + field_count[FIELD_SKIP_PRESPACE], + field_count[FIELD_SKIP_ENDSPACE], + field_count[FIELD_INTERVALL], + field_count[FIELD_ZERO])); + if (verbose) + VOID(printf("\nnormal: %3d empty-space: %3d " + "empty-zero: %3d empty-fill: %3d\n" + "pre-space: %3d end-space: %3d " + "intervall-fields: %3d zero: %3d\n", + field_count[FIELD_NORMAL],space_fields, + field_count[FIELD_SKIP_ZERO],fill_zero_fields, + field_count[FIELD_SKIP_PRESPACE], + field_count[FIELD_SKIP_ENDSPACE], + field_count[FIELD_INTERVALL], + field_count[FIELD_ZERO])); + DBUG_VOID_RETURN; +} + + +/* Test if we can use space-compression and empty-field-compression */ + +static int +test_space_compress(HUFF_COUNTS *huff_counts, my_off_t records, + uint max_space_length, my_off_t *space_counts, + my_off_t tot_space_count, enum en_fieldtype field_type) +{ + int min_pos; + uint length_bits,i; + my_off_t space_count,min_space_count,min_pack,new_length,skip; + + length_bits=max_bit(max_space_length); + + /* Default no end_space-packing */ + space_count=huff_counts->counts[(uint) ' ']; + min_space_count= (huff_counts->counts[(uint) ' ']+= tot_space_count); + min_pack=calc_packed_length(huff_counts,0); + min_pos= -2; + huff_counts->counts[(uint) ' ']=space_count; + + /* Test with allways space-count */ + new_length=huff_counts->bytes_packed+length_bits*records/8; + if (new_length+1 < min_pack) + { + min_pos= -1; + min_pack=new_length; + min_space_count=space_count; + } + /* Test with length-flag */ + for (skip=0L, i=0 ; i < 8 ; i++) + { + if (space_counts[i]) + { + if (i) + huff_counts->counts[(uint) ' ']+=space_counts[i]; + skip+=huff_counts->pre_space[i]; + new_length=calc_packed_length(huff_counts,0)+ + (records+(records-skip)*(1+length_bits))/8; + if (new_length < min_pack) + { + min_pos=(int) i; + min_pack=new_length; + min_space_count=huff_counts->counts[(uint) ' ']; + } + } + } + + huff_counts->counts[(uint) ' ']=min_space_count; + huff_counts->bytes_packed=min_pack; + switch (min_pos) { + case -2: + return(0); /* No space-compress */ + case -1: /* Always space-count */ + huff_counts->field_type=field_type; + huff_counts->min_space=0; + huff_counts->length_bits=max_bit(max_space_length); + break; + default: + huff_counts->field_type=field_type; + huff_counts->min_space=(uint) min_pos; + huff_counts->pack_type|=PACK_TYPE_SELECTED; + huff_counts->length_bits=max_bit(max_space_length); + break; + } + return(1); /* Using space-compress */ +} + + + /* Make a huff_tree of each huff_count */ + +static HUFF_TREE* make_huff_trees(HUFF_COUNTS *huff_counts, uint trees) +{ + uint tree; + HUFF_TREE *huff_tree; + DBUG_ENTER("make_huff_trees"); + + if (!(huff_tree=(HUFF_TREE*) my_malloc(trees*sizeof(HUFF_TREE), + MYF(MY_WME | MY_ZEROFILL)))) + DBUG_RETURN(0); + + for (tree=0 ; tree < trees ; tree++) + { + if (make_huff_tree(huff_tree+tree,huff_counts+tree)) + { + while (tree--) + my_free((gptr) huff_tree[tree].element_buffer,MYF(0)); + my_free((gptr) huff_tree,MYF(0)); + DBUG_RETURN(0); + } + } + DBUG_RETURN(huff_tree); +} + +/* + Build a Huffman tree. + + SYNOPSIS + make_huff_tree() + huff_tree The Huffman tree. + huff_counts The counts. + + DESCRIPTION + Build a Huffman tree according to huff_counts->counts or + huff_counts->tree_buff. tree_buff, if non-NULL contains up to + tree_buff_length of distinct column values. In that case, whole + values can be Huffman encoded instead of single bytes. + + RETURN + 0 OK + != 0 Error +*/ + +static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts) +{ + uint i,found,bits_packed,first,last; + my_off_t bytes_packed; + HUFF_ELEMENT *a,*b,*new_huff_el; + + first=last=0; + if (huff_counts->tree_buff) + { + /* Calculate the number of distinct values in tree_buff. */ + found= (uint) (huff_counts->tree_pos - huff_counts->tree_buff) / + huff_counts->field_length; + first=0; last=found-1; + } + else + { + /* Count the number of byte codes found in the column. */ + for (i=found=0 ; i < 256 ; i++) + { + if (huff_counts->counts[i]) + { + if (! found++) + first=i; + last=i; + } + } + if (found < 2) + found=2; + } + + /* When using 'tree_buff' we can have more that 256 values. */ + if (queue.max_elements < found) + { + delete_queue(&queue); + if (init_queue(&queue,found,0,0,compare_huff_elements,0)) + return -1; + } + + /* Allocate or reallocate an element buffer for the Huffman tree. */ + if (!huff_tree->element_buffer) + { + if (!(huff_tree->element_buffer= + (HUFF_ELEMENT*) my_malloc(found*2*sizeof(HUFF_ELEMENT),MYF(MY_WME)))) + return 1; + } + else + { + HUFF_ELEMENT *temp; + if (!(temp= + (HUFF_ELEMENT*) my_realloc((gptr) huff_tree->element_buffer, + found*2*sizeof(HUFF_ELEMENT), + MYF(MY_WME)))) + return 1; + huff_tree->element_buffer=temp; + } + + huff_counts->tree=huff_tree; + huff_tree->counts=huff_counts; + huff_tree->min_chr=first; + huff_tree->max_chr=last; + huff_tree->char_bits=max_bit(last-first); + huff_tree->offset_bits=max_bit(found-1)+1; + + if (huff_counts->tree_buff) + { + huff_tree->elements=0; + huff_tree->tree_pack_length=(1+15+16+5+5+ + (huff_tree->char_bits+1)*found+ + (huff_tree->offset_bits+1)* + (found-2)+7)/8 + + (uint) (huff_tree->counts->tree_pos- + huff_tree->counts->tree_buff); + /* + Put a HUFF_ELEMENT into the queue for every distinct column value. + + tree_walk() calls save_counts_in_queue() for every element in + 'int_tree'. This takes elements from the target trees element + buffer and places references to them into the buffer of the + priority queue. We insert in column value order, but the order is + in fact irrelevant here. We will establish the correct order + later. + */ + tree_walk(&huff_counts->int_tree, + (int (*)(void*, element_count,void*)) save_counts_in_queue, + (gptr) huff_tree, left_root_right); + } + else + { + huff_tree->elements=found; + huff_tree->tree_pack_length=(9+9+5+5+ + (huff_tree->char_bits+1)*found+ + (huff_tree->offset_bits+1)* + (found-2)+7)/8; + /* + Put a HUFF_ELEMENT into the queue for every byte code found in the column. + + The elements are taken from the target trees element buffer. + Instead of using queue_insert(), we just place references to the + elements into the buffer of the priority queue. We insert in byte + value order, but the order is in fact irrelevant here. We will + establish the correct order later. + */ + for (i=first, found=0 ; i <= last ; i++) + { + if (huff_counts->counts[i]) + { + new_huff_el=huff_tree->element_buffer+(found++); + new_huff_el->count=huff_counts->counts[i]; + new_huff_el->a.leaf.null=0; + new_huff_el->a.leaf.element_nr=i; + queue.root[found]=(byte*) new_huff_el; + } + } + /* + If there is only a single byte value in this field in all records, + add a second element with zero incidence. This is required to enter + the loop, which builds the Huffman tree. + */ + while (found < 2) + { + new_huff_el=huff_tree->element_buffer+(found++); + new_huff_el->count=0; + new_huff_el->a.leaf.null=0; + if (last) + new_huff_el->a.leaf.element_nr=huff_tree->min_chr=last-1; + else + new_huff_el->a.leaf.element_nr=huff_tree->max_chr=last+1; + queue.root[found]=(byte*) new_huff_el; + } + } + + /* Make a queue from the queue buffer. */ + queue.elements=found; + + /* + Make a priority queue from the queue. Construct its index so that we + have a partially ordered tree. + */ + for (i=found/2 ; i > 0 ; i--) + _downheap(&queue,i); + + /* The Huffman algorithm. */ + bytes_packed=0; bits_packed=0; + for (i=1 ; i < found ; i++) + { + /* + Pop the top element from the queue (the one with the least incidence). + Popping from a priority queue includes a re-ordering of the queue, + to get the next least incidence element to the top. + */ + a=(HUFF_ELEMENT*) queue_remove(&queue,0); + /* + Copy the next least incidence element. The queue implementation + reserves root[0] for temporary purposes. root[1] is the top. + */ + b=(HUFF_ELEMENT*) queue.root[1]; + /* Get a new element from the element buffer. */ + new_huff_el=huff_tree->element_buffer+found+i; + /* The new element gets the sum of the two least incidence elements. */ + new_huff_el->count=a->count+b->count; + /* + The Huffman algorithm assigns another bit to the code for a byte + every time that bytes incidence is combined (directly or indirectly) + to a new element as one of the two least incidence elements. + This means that one more bit per incidence of that byte is required + in the resulting file. So we add the new combined incidence as the + number of bits by which the result grows. + */ + bits_packed+=(uint) (new_huff_el->count & 7); + bytes_packed+=new_huff_el->count/8; + /* The new element points to its children, lesser in left. */ + new_huff_el->a.nod.left=a; + new_huff_el->a.nod.right=b; + /* + Replace the copied top element by the new element and re-order the + queue. + */ + queue.root[1]=(byte*) new_huff_el; + queue_replaced(&queue); + } + huff_tree->root=(HUFF_ELEMENT*) queue.root[1]; + huff_tree->bytes_packed=bytes_packed+(bits_packed+7)/8; + return 0; +} + +static int compare_tree(void* cmp_arg __attribute__((unused)), + register const uchar *s, register const uchar *t) +{ + uint length; + for (length=global_count->field_length; length-- ;) + if (*s++ != *t++) + return (int) s[-1] - (int) t[-1]; + return 0; +} + +/* + Organize distinct column values and their incidences into a priority queue. + + SYNOPSIS + save_counts_in_queue() + key The column value. + count The incidence of this value. + tree The Huffman tree to be built later. + + DESCRIPTION + We use the element buffer of the targeted tree. The distinct column + values are organized in a priority queue first. The Huffman + algorithm will later organize the elements into a Huffman tree. For + the time being, we just place references to the elements into the + queue buffer. The buffer will later be organized into a priority + queue. + + RETURN + 0 + */ + +static int save_counts_in_queue(byte *key, element_count count, + HUFF_TREE *tree) +{ + HUFF_ELEMENT *new_huff_el; + + new_huff_el=tree->element_buffer+(tree->elements++); + new_huff_el->count=count; + new_huff_el->a.leaf.null=0; + new_huff_el->a.leaf.element_nr= (uint) (key- tree->counts->tree_buff) / + tree->counts->field_length; + queue.root[tree->elements]=(byte*) new_huff_el; + return 0; +} + + +/* + Calculate length of file if given counts should be used. + + SYNOPSIS + calc_packed_length() + huff_counts The counts for a column of the table(s). + add_tree_lenght If the decode tree length should be added. + + DESCRIPTION + We need to follow the Huffman algorithm until we know, how many bits + are required for each byte code. But we do not need the resulting + Huffman tree. Hence, we can leave out some steps which are essential + in make_huff_tree(). + + RETURN + Number of bytes required to compress this table column. +*/ + +static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts, + uint add_tree_lenght) +{ + uint i,found,bits_packed,first,last; + my_off_t bytes_packed; + HUFF_ELEMENT element_buffer[256]; + DBUG_ENTER("calc_packed_length"); + + /* + WARNING: We use a small hack for efficiency: Instead of placing + references to HUFF_ELEMENTs into the queue, we just insert + references to the counts of the byte codes which appeared in this + table column. During the Huffman algorithm they are successively + replaced by references to HUFF_ELEMENTs. This works, because + HUFF_ELEMENTs have the incidence count at their beginning. + Regardless, wether the queue array contains references to counts of + type my_off_t or references to HUFF_ELEMENTs which have the count of + type my_off_t at their beginning, it always points to a count of the + same type. + + Instead of using queue_insert(), we just copy the references into + the buffer of the priority queue. We insert in byte value order, but + the order is in fact irrelevant here. We will establish the correct + order later. + */ + first=last=0; + for (i=found=0 ; i < 256 ; i++) + { + if (huff_counts->counts[i]) + { + if (! found++) + first=i; + last=i; + /* We start with root[1], which is the queues top element. */ + queue.root[found]=(byte*) &huff_counts->counts[i]; + } + } + if (!found) + DBUG_RETURN(0); /* Empty tree */ + /* + If there is only a single byte value in this field in all records, + add a second element with zero incidence. This is required to enter + the loop, which follows the Huffman algorithm. + */ + if (found < 2) + queue.root[++found]=(byte*) &huff_counts->counts[last ? 0 : 1]; + + /* Make a queue from the queue buffer. */ + queue.elements=found; + + bytes_packed=0; bits_packed=0; + /* Add the length of the coding table, which would become part of the file. */ + if (add_tree_lenght) + bytes_packed=(8+9+5+5+(max_bit(last-first)+1)*found+ + (max_bit(found-1)+1+1)*(found-2) +7)/8; + + /* + Make a priority queue from the queue. Construct its index so that we + have a partially ordered tree. + */ + for (i=(found+1)/2 ; i > 0 ; i--) + _downheap(&queue,i); + + /* The Huffman algorithm. */ + for (i=0 ; i < found-1 ; i++) + { + my_off_t *a; + my_off_t *b; + HUFF_ELEMENT *new_huff_el; + + /* + Pop the top element from the queue (the one with the least + incidence). Popping from a priority queue includes a re-ordering + of the queue, to get the next least incidence element to the top. + */ + a= (my_off_t*) queue_remove(&queue, 0); + /* + Copy the next least incidence element. The queue implementation + reserves root[0] for temporary purposes. root[1] is the top. + */ + b= (my_off_t*) queue.root[1]; + /* Create a new element in a local (automatic) buffer. */ + new_huff_el= element_buffer + i; + /* The new element gets the sum of the two least incidence elements. */ + new_huff_el->count= *a + *b; + /* + The Huffman algorithm assigns another bit to the code for a byte + every time that bytes incidence is combined (directly or indirectly) + to a new element as one of the two least incidence elements. + This means that one more bit per incidence of that byte is required + in the resulting file. So we add the new combined incidence as the + number of bits by which the result grows. + */ + bits_packed+=(uint) (new_huff_el->count & 7); + bytes_packed+=new_huff_el->count/8; + /* + Replace the copied top element by the new element and re-order the + queue. This successively replaces the references to counts by + references to HUFF_ELEMENTs. + */ + queue.root[1]=(byte*) new_huff_el; + queue_replaced(&queue); + } + DBUG_RETURN(bytes_packed+(bits_packed+7)/8); +} + + + /* Remove trees that don't give any compression */ + +static uint join_same_trees(HUFF_COUNTS *huff_counts, uint trees) +{ + uint k,tree_number; + HUFF_COUNTS count,*i,*j,*last_count; + + last_count=huff_counts+trees; + for (tree_number=0, i=huff_counts ; i < last_count ; i++) + { + if (!i->tree->tree_number) + { + i->tree->tree_number= ++tree_number; + if (i->tree_buff) + continue; /* Don't join intervall */ + for (j=i+1 ; j < last_count ; j++) + { + if (! j->tree->tree_number && ! j->tree_buff) + { + for (k=0 ; k < 256 ; k++) + count.counts[k]=i->counts[k]+j->counts[k]; + if (calc_packed_length(&count,1) <= + i->tree->bytes_packed + j->tree->bytes_packed+ + i->tree->tree_pack_length+j->tree->tree_pack_length+ + ALLOWED_JOIN_DIFF) + { + memcpy_fixed((byte*) i->counts,(byte*) count.counts, + sizeof(count.counts[0])*256); + my_free((gptr) j->tree->element_buffer,MYF(0)); + j->tree->element_buffer=0; + j->tree=i->tree; + bmove((byte*) i->counts,(byte*) count.counts, + sizeof(count.counts[0])*256); + if (make_huff_tree(i->tree,i)) + return (uint) -1; + } + } + } + } + } + DBUG_PRINT("info", ("Original trees: %d After join: %d", + trees, tree_number)); + if (verbose) + VOID(printf("Original trees: %d After join: %d\n", trees, tree_number)); + return tree_number; /* Return trees left */ +} + + +/* + Fill in huff_tree encode tables. + + SYNOPSIS + make_huff_decode_table() + huff_tree An array of HUFF_TREE which are to be encoded. + trees The number of HUFF_TREE in the array. + + RETURN + 0 success + != 0 error +*/ + +static int make_huff_decode_table(HUFF_TREE *huff_tree, uint trees) +{ + uint elements; + for ( ; trees-- ; huff_tree++) + { + if (huff_tree->tree_number > 0) + { + elements=huff_tree->counts->tree_buff ? huff_tree->elements : 256; + if (!(huff_tree->code = + (ulonglong*) my_malloc(elements* + (sizeof(ulonglong) + sizeof(uchar)), + MYF(MY_WME | MY_ZEROFILL)))) + return 1; + huff_tree->code_len=(uchar*) (huff_tree->code+elements); + make_traverse_code_tree(huff_tree, huff_tree->root, + 8 * sizeof(ulonglong), LL(0)); + } + } + return 0; +} + + +static void make_traverse_code_tree(HUFF_TREE *huff_tree, + HUFF_ELEMENT *element, + uint size, ulonglong code) +{ + uint chr; + if (!element->a.leaf.null) + { + chr=element->a.leaf.element_nr; + huff_tree->code_len[chr]= (uchar) (8 * sizeof(ulonglong) - size); + huff_tree->code[chr]= (code >> size); + if (huff_tree->height < 8 * sizeof(ulonglong) - size) + huff_tree->height= 8 * sizeof(ulonglong) - size; + } + else + { + size--; + make_traverse_code_tree(huff_tree,element->a.nod.left,size,code); + make_traverse_code_tree(huff_tree, element->a.nod.right, size, + code + (((ulonglong) 1) << size)); + } + return; +} + + +/* + Convert a value into binary digits. + + SYNOPSIS + bindigits() + value The value. + length The number of low order bits to convert. + + NOTE + The result string is in static storage. It is reused on every call. + So you cannot use it twice in one expression. + + RETURN + A pointer to a static NUL-terminated string. + */ + +static char *bindigits(ulonglong value, uint bits) +{ + static char digits[72]; + char *ptr= digits; + uint idx= bits; + + DBUG_ASSERT(idx < sizeof(digits)); + while (idx) + *(ptr++)= '0' + ((char) (value >> (--idx)) & (char) 1); + *ptr= '\0'; + return digits; +} + + +/* + Convert a value into hexadecimal digits. + + SYNOPSIS + hexdigits() + value The value. + + NOTE + The result string is in static storage. It is reused on every call. + So you cannot use it twice in one expression. + + RETURN + A pointer to a static NUL-terminated string. + */ + +static char *hexdigits(ulonglong value) +{ + static char digits[20]; + char *ptr= digits; + uint idx= 2 * sizeof(value); /* Two hex digits per byte. */ + + DBUG_ASSERT(idx < sizeof(digits)); + while (idx) + { + if ((*(ptr++)= '0' + ((char) (value >> (4 * (--idx))) & (char) 0xf)) > '9') + *(ptr - 1)+= 'a' - '9' - 1; + } + *ptr= '\0'; + return digits; +} + + + /* Write header to new packed data file */ + +static int write_header(PACK_MRG_INFO *mrg,uint head_length,uint trees, + my_off_t tot_elements,my_off_t filelength) +{ + byte *buff= (byte*) file_buffer.pos; + + bzero(buff,HEAD_LENGTH); + memcpy_fixed(buff,maria_pack_file_magic,4); + int4store(buff+4,head_length); + int4store(buff+8, mrg->min_pack_length); + int4store(buff+12,mrg->max_pack_length); + int4store(buff+16,tot_elements); + int4store(buff+20,intervall_length); + int2store(buff+24,trees); + buff[26]=(char) mrg->ref_length; + /* Save record pointer length */ + buff[27]= (uchar) maria_get_pointer_length((ulonglong) filelength,2); + if (test_only) + return 0; + VOID(my_seek(file_buffer.file,0L,MY_SEEK_SET,MYF(0))); + return my_write(file_buffer.file,(const byte *) file_buffer.pos,HEAD_LENGTH, + MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)) != 0; +} + + /* Write fieldinfo to new packed file */ + +static void write_field_info(HUFF_COUNTS *counts, uint fields, uint trees) +{ + reg1 uint i; + uint huff_tree_bits; + huff_tree_bits=max_bit(trees ? trees-1 : 0); + + DBUG_PRINT("info", (" ")); + DBUG_PRINT("info", ("column types:")); + DBUG_PRINT("info", ("FIELD_NORMAL 0")); + DBUG_PRINT("info", ("FIELD_SKIP_ENDSPACE 1")); + DBUG_PRINT("info", ("FIELD_SKIP_PRESPACE 2")); + DBUG_PRINT("info", ("FIELD_SKIP_ZERO 3")); + DBUG_PRINT("info", ("FIELD_BLOB 4")); + DBUG_PRINT("info", ("FIELD_CONSTANT 5")); + DBUG_PRINT("info", ("FIELD_INTERVALL 6")); + DBUG_PRINT("info", ("FIELD_ZERO 7")); + DBUG_PRINT("info", ("FIELD_VARCHAR 8")); + DBUG_PRINT("info", ("FIELD_CHECK 9")); + DBUG_PRINT("info", (" ")); + DBUG_PRINT("info", ("pack type as a set of flags:")); + DBUG_PRINT("info", ("PACK_TYPE_SELECTED 1")); + DBUG_PRINT("info", ("PACK_TYPE_SPACE_FIELDS 2")); + DBUG_PRINT("info", ("PACK_TYPE_ZERO_FILL 4")); + DBUG_PRINT("info", (" ")); + if (verbose >= 2) + { + VOID(printf("\n")); + VOID(printf("column types:\n")); + VOID(printf("FIELD_NORMAL 0\n")); + VOID(printf("FIELD_SKIP_ENDSPACE 1\n")); + VOID(printf("FIELD_SKIP_PRESPACE 2\n")); + VOID(printf("FIELD_SKIP_ZERO 3\n")); + VOID(printf("FIELD_BLOB 4\n")); + VOID(printf("FIELD_CONSTANT 5\n")); + VOID(printf("FIELD_INTERVALL 6\n")); + VOID(printf("FIELD_ZERO 7\n")); + VOID(printf("FIELD_VARCHAR 8\n")); + VOID(printf("FIELD_CHECK 9\n")); + VOID(printf("\n")); + VOID(printf("pack type as a set of flags:\n")); + VOID(printf("PACK_TYPE_SELECTED 1\n")); + VOID(printf("PACK_TYPE_SPACE_FIELDS 2\n")); + VOID(printf("PACK_TYPE_ZERO_FILL 4\n")); + VOID(printf("\n")); + } + for (i=0 ; i++ < fields ; counts++) + { + write_bits((ulonglong) (int) counts->field_type, 5); + write_bits(counts->pack_type,6); + if (counts->pack_type & PACK_TYPE_ZERO_FILL) + write_bits(counts->max_zero_fill,5); + else + write_bits(counts->length_bits,5); + write_bits((ulonglong) counts->tree->tree_number - 1, huff_tree_bits); + DBUG_PRINT("info", ("column: %3u type: %2u pack: %2u zero: %4u " + "lbits: %2u tree: %2u length: %4u", + i , counts->field_type, counts->pack_type, + counts->max_zero_fill, counts->length_bits, + counts->tree->tree_number, counts->field_length)); + if (verbose >= 2) + VOID(printf("column: %3u type: %2u pack: %2u zero: %4u lbits: %2u " + "tree: %2u length: %4u\n", i , counts->field_type, + counts->pack_type, counts->max_zero_fill, counts->length_bits, + counts->tree->tree_number, counts->field_length)); + } + flush_bits(); + return; +} + + /* Write all huff_trees to new datafile. Return tot count of + elements in all trees + Returns 0 on error */ + +static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees) +{ + uint i,int_length; + uint tree_no; + uint codes; + uint errors= 0; + uint *packed_tree,*offset,length; + my_off_t elements; + + /* Find the highest number of elements in the trees. */ + for (i=length=0 ; i < trees ; i++) + if (huff_tree[i].tree_number > 0 && huff_tree[i].elements > length) + length=huff_tree[i].elements; + /* + Allocate a buffer for packing a decode tree. Two numbers per element + (left child and right child). + */ + if (!(packed_tree=(uint*) my_alloca(sizeof(uint)*length*2))) + { + my_error(EE_OUTOFMEMORY,MYF(ME_BELL),sizeof(uint)*length*2); + return 0; + } + + DBUG_PRINT("info", (" ")); + if (verbose >= 2) + VOID(printf("\n")); + tree_no= 0; + intervall_length=0; + for (elements=0; trees-- ; huff_tree++) + { + /* Skip columns that have been joined with other columns. */ + if (huff_tree->tree_number == 0) + continue; /* Deleted tree */ + tree_no++; + DBUG_PRINT("info", (" ")); + if (verbose >= 3) + VOID(printf("\n")); + /* Count the total number of elements (byte codes or column values). */ + elements+=huff_tree->elements; + huff_tree->max_offset=2; + /* Build a tree of offsets and codes for decoding in 'packed_tree'. */ + if (huff_tree->elements <= 1) + offset=packed_tree; + else + offset=make_offset_code_tree(huff_tree,huff_tree->root,packed_tree); + + /* This should be the same as 'length' above. */ + huff_tree->offset_bits=max_bit(huff_tree->max_offset); + + /* + Since we check this during collecting the distinct column values, + this should never happen. + */ + if (huff_tree->max_offset >= IS_OFFSET) + { /* This should be impossible */ + VOID(fprintf(stderr, "Tree offset got too big: %d, aborted\n", + huff_tree->max_offset)); + my_afree((gptr) packed_tree); + return 0; + } + + DBUG_PRINT("info", ("pos: %lu elements: %u tree-elements: %lu " + "char_bits: %u\n", + (ulong) (file_buffer.pos - file_buffer.buffer), + huff_tree->elements, (ulong) (offset - packed_tree), + huff_tree->char_bits)); + if (!huff_tree->counts->tree_buff) + { + /* We do a byte compression on this column. Mark with bit 0. */ + write_bits(0,1); + write_bits(huff_tree->min_chr,8); + write_bits(huff_tree->elements,9); + write_bits(huff_tree->char_bits,5); + write_bits(huff_tree->offset_bits,5); + int_length=0; + } + else + { + int_length=(uint) (huff_tree->counts->tree_pos - + huff_tree->counts->tree_buff); + /* We have distinct column values for this column. Mark with bit 1. */ + write_bits(1,1); + write_bits(huff_tree->elements,15); + write_bits(int_length,16); + write_bits(huff_tree->char_bits,5); + write_bits(huff_tree->offset_bits,5); + intervall_length+=int_length; + } + DBUG_PRINT("info", ("tree: %2u elements: %4u char_bits: %2u " + "offset_bits: %2u %s: %5u codelen: %2u", + tree_no, huff_tree->elements, huff_tree->char_bits, + huff_tree->offset_bits, huff_tree->counts->tree_buff ? + "bufflen" : "min_chr", huff_tree->counts->tree_buff ? + int_length : huff_tree->min_chr, huff_tree->height)); + if (verbose >= 2) + VOID(printf("tree: %2u elements: %4u char_bits: %2u offset_bits: %2u " + "%s: %5u codelen: %2u\n", tree_no, huff_tree->elements, + huff_tree->char_bits, huff_tree->offset_bits, + huff_tree->counts->tree_buff ? "bufflen" : "min_chr", + huff_tree->counts->tree_buff ? int_length : + huff_tree->min_chr, huff_tree->height)); + + /* Check that the code tree length matches the element count. */ + length=(uint) (offset-packed_tree); + if (length != huff_tree->elements*2-2) + { + VOID(fprintf(stderr, "error: Huff-tree-length: %d != calc_length: %d\n", + length, huff_tree->elements * 2 - 2)); + errors++; + break; + } + + for (i=0 ; i < length ; i++) + { + if (packed_tree[i] & IS_OFFSET) + write_bits(packed_tree[i] - IS_OFFSET+ (1 << huff_tree->offset_bits), + huff_tree->offset_bits+1); + else + write_bits(packed_tree[i]-huff_tree->min_chr,huff_tree->char_bits+1); + DBUG_PRINT("info", ("tree[0x%04x]: %s0x%04x", + i, (packed_tree[i] & IS_OFFSET) ? + " -> " : "", (packed_tree[i] & IS_OFFSET) ? + packed_tree[i] - IS_OFFSET + i : packed_tree[i])); + if (verbose >= 3) + VOID(printf("tree[0x%04x]: %s0x%04x\n", + i, (packed_tree[i] & IS_OFFSET) ? " -> " : "", + (packed_tree[i] & IS_OFFSET) ? + packed_tree[i] - IS_OFFSET + i : packed_tree[i])); + } + flush_bits(); + + /* + Display coding tables and check their correctness. + */ + codes= huff_tree->counts->tree_buff ? huff_tree->elements : 256; + for (i= 0; i < codes; i++) + { + ulonglong code; + uint bits; + uint len; + uint idx; + + if (! (len= huff_tree->code_len[i])) + continue; + DBUG_PRINT("info", ("code[0x%04x]: 0x%s bits: %2u bin: %s", i, + hexdigits(huff_tree->code[i]), huff_tree->code_len[i], + bindigits(huff_tree->code[i], + huff_tree->code_len[i]))); + if (verbose >= 3) + VOID(printf("code[0x%04x]: 0x%s bits: %2u bin: %s\n", i, + hexdigits(huff_tree->code[i]), huff_tree->code_len[i], + bindigits(huff_tree->code[i], huff_tree->code_len[i]))); + + /* Check that the encode table decodes correctly. */ + code= 0; + bits= 0; + idx= 0; + DBUG_EXECUTE_IF("forcechkerr1", len--;); + DBUG_EXECUTE_IF("forcechkerr2", bits= 8 * sizeof(code);); + DBUG_EXECUTE_IF("forcechkerr3", idx= length;); + for (;;) + { + if (! len) + { + VOID(fflush(stdout)); + VOID(fprintf(stderr, "error: code 0x%s with %u bits not found\n", + hexdigits(huff_tree->code[i]), huff_tree->code_len[i])); + errors++; + break; + } + code<<= 1; + code|= (huff_tree->code[i] >> (--len)) & 1; + bits++; + if (bits > 8 * sizeof(code)) + { + VOID(fflush(stdout)); + VOID(fprintf(stderr, "error: Huffman code too long: %u/%u\n", + bits, (uint) (8 * sizeof(code)))); + errors++; + break; + } + idx+= (uint) code & 1; + if (idx >= length) + { + VOID(fflush(stdout)); + VOID(fprintf(stderr, "error: illegal tree offset: %u/%u\n", + idx, length)); + errors++; + break; + } + if (packed_tree[idx] & IS_OFFSET) + idx+= packed_tree[idx] & ~IS_OFFSET; + else + break; /* Hit a leaf. This contains the result value. */ + } + if (errors) + break; + + DBUG_EXECUTE_IF("forcechkerr4", packed_tree[idx]++;); + if (packed_tree[idx] != i) + { + VOID(fflush(stdout)); + VOID(fprintf(stderr, "error: decoded value 0x%04x should be: 0x%04x\n", + packed_tree[idx], i)); + errors++; + break; + } + } /*end for (codes)*/ + if (errors) + break; + + /* Write column values in case of distinct column value compression. */ + if (huff_tree->counts->tree_buff) + { + for (i=0 ; i < int_length ; i++) + { + write_bits((ulonglong) (uchar) huff_tree->counts->tree_buff[i], 8); + DBUG_PRINT("info", ("column_values[0x%04x]: 0x%02x", + i, (uchar) huff_tree->counts->tree_buff[i])); + if (verbose >= 3) + VOID(printf("column_values[0x%04x]: 0x%02x\n", + i, (uchar) huff_tree->counts->tree_buff[i])); + } + } + flush_bits(); + } + DBUG_PRINT("info", (" ")); + if (verbose >= 2) + VOID(printf("\n")); + my_afree((gptr) packed_tree); + if (errors) + { + VOID(fprintf(stderr, "Error: Generated decode trees are corrupt. Stop.\n")); + return 0; + } + return elements; +} + + +static uint *make_offset_code_tree(HUFF_TREE *huff_tree, HUFF_ELEMENT *element, + uint *offset) +{ + uint *prev_offset; + + prev_offset= offset; + /* + 'a.leaf.null' takes the same place as 'a.nod.left'. If this is null, + then there is no left child and, hence no right child either. This + is a property of a binary tree. An element is either a node with two + childs, or a leaf without childs. + + The current element is always a node with two childs. Go left first. + */ + if (!element->a.nod.left->a.leaf.null) + { + /* Store the byte code or the index of the column value. */ + prev_offset[0] =(uint) element->a.nod.left->a.leaf.element_nr; + offset+=2; + } + else + { + /* + Recursively traverse the tree to the left. Mark it as an offset to + another tree node (in contrast to a byte code or column value index). + */ + prev_offset[0]= IS_OFFSET+2; + offset=make_offset_code_tree(huff_tree,element->a.nod.left,offset+2); + } + + /* Now, check the right child. */ + if (!element->a.nod.right->a.leaf.null) + { + /* Store the byte code or the index of the column value. */ + prev_offset[1]=element->a.nod.right->a.leaf.element_nr; + return offset; + } + else + { + /* + Recursively traverse the tree to the right. Mark it as an offset to + another tree node (in contrast to a byte code or column value index). + */ + uint temp=(uint) (offset-prev_offset-1); + prev_offset[1]= IS_OFFSET+ temp; + if (huff_tree->max_offset < temp) + huff_tree->max_offset = temp; + return make_offset_code_tree(huff_tree,element->a.nod.right,offset); + } +} + + /* Get number of bits neaded to represent value */ + +static uint max_bit(register uint value) +{ + reg2 uint power=1; + + while ((value>>=1)) + power++; + return (power); +} + + +static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) +{ + int error; + uint i,max_calc_length,pack_ref_length,min_record_length,max_record_length; + uint intervall,field_length,max_pack_length,pack_blob_length, null_bytes; + my_off_t record_count; + char llbuf[32]; + ulong length,pack_length; + byte *record,*pos,*end_pos,*record_pos,*start_pos; + HUFF_COUNTS *count,*end_count; + HUFF_TREE *tree; + MARIA_HA *isam_file=mrg->file[0]; + uint pack_version= (uint) isam_file->s->pack.version; + DBUG_ENTER("compress_isam_file"); + + /* Allocate a buffer for the records (excluding blobs). */ + if (!(record=(byte*) my_alloca(isam_file->s->base.reclength))) + return -1; + + end_count=huff_counts+isam_file->s->base.fields; + min_record_length= (uint) ~0; + max_record_length=0; + null_bytes= isam_file->s->base.null_bytes; + + /* + Calculate the maximum number of bits required to pack the records. + Remember to understand 'max_zero_fill' as 'min_zero_fill'. + The tree height determines the maximum number of bits per value. + Some fields skip leading or trailing spaces or zeroes. The skipped + number of bytes is encoded by 'length_bits' bits. + Empty blobs and varchar are encoded with a single 1 bit. Other blobs + and varchar get a leading 0 bit. + */ + max_calc_length= null_bytes; + for (i= 0 ; i < isam_file->s->base.fields ; i++) + { + if (!(huff_counts[i].pack_type & PACK_TYPE_ZERO_FILL)) + huff_counts[i].max_zero_fill=0; + if (huff_counts[i].field_type == FIELD_CONSTANT || + huff_counts[i].field_type == FIELD_ZERO || + huff_counts[i].field_type == FIELD_CHECK) + continue; + if (huff_counts[i].field_type == FIELD_INTERVALL) + max_calc_length+=huff_counts[i].tree->height; + else if (huff_counts[i].field_type == FIELD_BLOB || + huff_counts[i].field_type == FIELD_VARCHAR) + max_calc_length+=huff_counts[i].tree->height*huff_counts[i].max_length + huff_counts[i].length_bits +1; + else + max_calc_length+= + (huff_counts[i].field_length - huff_counts[i].max_zero_fill)* + huff_counts[i].tree->height+huff_counts[i].length_bits; + } + max_calc_length= (max_calc_length + 7) / 8; + pack_ref_length= _ma_calc_pack_length(pack_version, max_calc_length); + record_count=0; + /* 'max_blob_length' is the max length of all blobs of a record. */ + pack_blob_length= isam_file->s->base.blobs ? + _ma_calc_pack_length(pack_version, mrg->max_blob_length) : 0; + max_pack_length=pack_ref_length+pack_blob_length; + + DBUG_PRINT("fields", ("===")); + mrg_reset(mrg); + while ((error=mrg_rrnd(mrg,record)) != HA_ERR_END_OF_FILE) + { + ulong tot_blob_length=0; + if (! error) + { + if (flush_buffer((ulong) max_calc_length + (ulong) max_pack_length)) + break; + record_pos= (byte*) file_buffer.pos; + file_buffer.pos+= max_pack_length; + if (null_bytes) + { + /* Copy null bits 'as is' */ + memcpy(file_buffer.pos, record, null_bytes); + file_buffer.pos+= null_bytes; + } + for (start_pos=record+null_bytes, count= huff_counts; + count < end_count ; + count++) + { + end_pos=start_pos+(field_length=count->field_length); + tree=count->tree; + + DBUG_PRINT("fields", ("column: %3lu type: %2u pack: %2u zero: %4u " + "lbits: %2u tree: %2u length: %4u", + (ulong) (count - huff_counts + 1), + count->field_type, + count->pack_type, count->max_zero_fill, + count->length_bits, count->tree->tree_number, + count->field_length)); + + /* Check if the column contains spaces only. */ + if (count->pack_type & PACK_TYPE_SPACE_FIELDS) + { + for (pos=start_pos ; *pos == ' ' && pos < end_pos; pos++) ; + if (pos == end_pos) + { + DBUG_PRINT("fields", + ("PACK_TYPE_SPACE_FIELDS spaces only, bits: 1")); + DBUG_PRINT("fields", ("---")); + write_bits(1,1); + start_pos=end_pos; + continue; + } + DBUG_PRINT("fields", + ("PACK_TYPE_SPACE_FIELDS not only spaces, bits: 1")); + write_bits(0,1); + } + end_pos-=count->max_zero_fill; + field_length-=count->max_zero_fill; + + switch (count->field_type) { + case FIELD_SKIP_ZERO: + if (!memcmp((byte*) start_pos,zero_string,field_length)) + { + DBUG_PRINT("fields", ("FIELD_SKIP_ZERO zeroes only, bits: 1")); + write_bits(1,1); + start_pos=end_pos; + break; + } + DBUG_PRINT("fields", ("FIELD_SKIP_ZERO not only zeroes, bits: 1")); + write_bits(0,1); + /* Fall through */ + case FIELD_NORMAL: + DBUG_PRINT("fields", ("FIELD_NORMAL %lu bytes", + (ulong) (end_pos - start_pos))); + for ( ; start_pos < end_pos ; start_pos++) + { + DBUG_PRINT("fields", + ("value: 0x%02x code: 0x%s bits: %2u bin: %s", + (uchar) *start_pos, + hexdigits(tree->code[(uchar) *start_pos]), + (uint) tree->code_len[(uchar) *start_pos], + bindigits(tree->code[(uchar) *start_pos], + (uint) tree->code_len[(uchar) *start_pos]))); + write_bits(tree->code[(uchar) *start_pos], + (uint) tree->code_len[(uchar) *start_pos]); + } + break; + case FIELD_SKIP_ENDSPACE: + for (pos=end_pos ; pos > start_pos && pos[-1] == ' ' ; pos--) ; + length= (ulong) (end_pos - pos); + if (count->pack_type & PACK_TYPE_SELECTED) + { + if (length > count->min_space) + { + DBUG_PRINT("fields", + ("FIELD_SKIP_ENDSPACE more than min_space, bits: 1")); + DBUG_PRINT("fields", + ("FIELD_SKIP_ENDSPACE skip %lu/%u bytes, bits: %2u", + length, field_length, count->length_bits)); + write_bits(1,1); + write_bits(length,count->length_bits); + } + else + { + DBUG_PRINT("fields", + ("FIELD_SKIP_ENDSPACE not more than min_space, " + "bits: 1")); + write_bits(0,1); + pos=end_pos; + } + } + else + { + DBUG_PRINT("fields", + ("FIELD_SKIP_ENDSPACE skip %lu/%u bytes, bits: %2u", + length, field_length, count->length_bits)); + write_bits(length,count->length_bits); + } + /* Encode all significant bytes. */ + DBUG_PRINT("fields", ("FIELD_SKIP_ENDSPACE %lu bytes", + (ulong) (pos - start_pos))); + for ( ; start_pos < pos ; start_pos++) + { + DBUG_PRINT("fields", + ("value: 0x%02x code: 0x%s bits: %2u bin: %s", + (uchar) *start_pos, + hexdigits(tree->code[(uchar) *start_pos]), + (uint) tree->code_len[(uchar) *start_pos], + bindigits(tree->code[(uchar) *start_pos], + (uint) tree->code_len[(uchar) *start_pos]))); + write_bits(tree->code[(uchar) *start_pos], + (uint) tree->code_len[(uchar) *start_pos]); + } + start_pos=end_pos; + break; + case FIELD_SKIP_PRESPACE: + for (pos=start_pos ; pos < end_pos && pos[0] == ' ' ; pos++) ; + length= (ulong) (pos - start_pos); + if (count->pack_type & PACK_TYPE_SELECTED) + { + if (length > count->min_space) + { + DBUG_PRINT("fields", + ("FIELD_SKIP_PRESPACE more than min_space, bits: 1")); + DBUG_PRINT("fields", + ("FIELD_SKIP_PRESPACE skip %lu/%u bytes, bits: %2u", + length, field_length, count->length_bits)); + write_bits(1,1); + write_bits(length,count->length_bits); + } + else + { + DBUG_PRINT("fields", + ("FIELD_SKIP_PRESPACE not more than min_space, " + "bits: 1")); + pos=start_pos; + write_bits(0,1); + } + } + else + { + DBUG_PRINT("fields", + ("FIELD_SKIP_PRESPACE skip %lu/%u bytes, bits: %2u", + length, field_length, count->length_bits)); + write_bits(length,count->length_bits); + } + /* Encode all significant bytes. */ + DBUG_PRINT("fields", ("FIELD_SKIP_PRESPACE %lu bytes", + (ulong) (end_pos - start_pos))); + for (start_pos=pos ; start_pos < end_pos ; start_pos++) + { + DBUG_PRINT("fields", + ("value: 0x%02x code: 0x%s bits: %2u bin: %s", + (uchar) *start_pos, + hexdigits(tree->code[(uchar) *start_pos]), + (uint) tree->code_len[(uchar) *start_pos], + bindigits(tree->code[(uchar) *start_pos], + (uint) tree->code_len[(uchar) *start_pos]))); + write_bits(tree->code[(uchar) *start_pos], + (uint) tree->code_len[(uchar) *start_pos]); + } + break; + case FIELD_CONSTANT: + case FIELD_ZERO: + case FIELD_CHECK: + DBUG_PRINT("fields", ("FIELD_CONSTANT/ZERO/CHECK")); + start_pos=end_pos; + break; + case FIELD_INTERVALL: + global_count=count; + pos=(byte*) tree_search(&count->int_tree, start_pos, + count->int_tree.custom_arg); + intervall=(uint) (pos - count->tree_buff)/field_length; + DBUG_PRINT("fields", ("FIELD_INTERVALL")); + DBUG_PRINT("fields", ("index: %4u code: 0x%s bits: %2u", + intervall, hexdigits(tree->code[intervall]), + (uint) tree->code_len[intervall])); + write_bits(tree->code[intervall],(uint) tree->code_len[intervall]); + start_pos=end_pos; + break; + case FIELD_BLOB: + { + ulong blob_length= _ma_calc_blob_length(field_length- + portable_sizeof_char_ptr, + start_pos); + /* Empty blobs are encoded with a single 1 bit. */ + if (!blob_length) + { + DBUG_PRINT("fields", ("FIELD_BLOB empty, bits: 1")); + write_bits(1,1); + } + else + { + byte *blob,*blob_end; + DBUG_PRINT("fields", ("FIELD_BLOB not empty, bits: 1")); + write_bits(0,1); + /* Write the blob length. */ + DBUG_PRINT("fields", ("FIELD_BLOB %lu bytes, bits: %2u", + blob_length, count->length_bits)); + write_bits(blob_length,count->length_bits); + memcpy_fixed(&blob,end_pos-portable_sizeof_char_ptr, + sizeof(char*)); + blob_end=blob+blob_length; + /* Encode the blob bytes. */ + for ( ; blob < blob_end ; blob++) + { + DBUG_PRINT("fields", + ("value: 0x%02x code: 0x%s bits: %2u bin: %s", + (uchar) *blob, hexdigits(tree->code[(uchar) *blob]), + (uint) tree->code_len[(uchar) *blob], + bindigits(tree->code[(uchar) *start_pos], + (uint)tree->code_len[(uchar) *start_pos]))); + write_bits(tree->code[(uchar) *blob], + (uint) tree->code_len[(uchar) *blob]); + } + tot_blob_length+=blob_length; + } + start_pos= end_pos; + break; + } + case FIELD_VARCHAR: + { + uint var_pack_length= HA_VARCHAR_PACKLENGTH(count->field_length-1); + ulong col_length= (var_pack_length == 1 ? + (uint) *(uchar*) start_pos : + uint2korr(start_pos)); + /* Empty varchar are encoded with a single 1 bit. */ + if (!col_length) + { + DBUG_PRINT("fields", ("FIELD_VARCHAR empty, bits: 1")); + write_bits(1,1); /* Empty varchar */ + } + else + { + byte *end= start_pos + var_pack_length + col_length; + DBUG_PRINT("fields", ("FIELD_VARCHAR not empty, bits: 1")); + write_bits(0,1); + /* Write the varchar length. */ + DBUG_PRINT("fields", ("FIELD_VARCHAR %lu bytes, bits: %2u", + col_length, count->length_bits)); + write_bits(col_length,count->length_bits); + /* Encode the varchar bytes. */ + for (start_pos+= var_pack_length ; start_pos < end ; start_pos++) + { + DBUG_PRINT("fields", + ("value: 0x%02x code: 0x%s bits: %2u bin: %s", + (uchar) *start_pos, + hexdigits(tree->code[(uchar) *start_pos]), + (uint) tree->code_len[(uchar) *start_pos], + bindigits(tree->code[(uchar) *start_pos], + (uint)tree->code_len[(uchar) *start_pos]))); + write_bits(tree->code[(uchar) *start_pos], + (uint) tree->code_len[(uchar) *start_pos]); + } + } + start_pos= end_pos; + break; + } + case FIELD_LAST: + case FIELD_enum_val_count: + abort(); /* Impossible */ + } + start_pos+=count->max_zero_fill; + DBUG_PRINT("fields", ("---")); + } + flush_bits(); + length=(ulong) ((byte*) file_buffer.pos - record_pos) - max_pack_length; + pack_length= _ma_save_pack_length(pack_version, record_pos, length); + if (pack_blob_length) + pack_length+= _ma_save_pack_length(pack_version, + record_pos + pack_length, + tot_blob_length); + DBUG_PRINT("fields", ("record: %lu length: %lu blob-length: %lu " + "length-bytes: %lu", (ulong) record_count, length, + tot_blob_length, pack_length)); + DBUG_PRINT("fields", ("===")); + + /* Correct file buffer if the header was smaller */ + if (pack_length != max_pack_length) + { + bmove(record_pos+pack_length,record_pos+max_pack_length,length); + file_buffer.pos-= (max_pack_length-pack_length); + } + if (length < (ulong) min_record_length) + min_record_length=(uint) length; + if (length > (ulong) max_record_length) + max_record_length=(uint) length; + record_count++; + if (write_loop && record_count % WRITE_COUNT == 0) + { + VOID(printf("%lu\r", (ulong) record_count)); + VOID(fflush(stdout)); + } + } + else if (error != HA_ERR_RECORD_DELETED) + break; + } + if (error == HA_ERR_END_OF_FILE) + error=0; + else + { + VOID(fprintf(stderr, "%s: Got error %d reading records\n", + my_progname, error)); + } + if (verbose >= 2) + VOID(printf("wrote %s records.\n", llstr((longlong) record_count, llbuf))); + + my_afree((gptr) record); + mrg->ref_length=max_pack_length; + mrg->min_pack_length=max_record_length ? min_record_length : 0; + mrg->max_pack_length=max_record_length; + DBUG_RETURN(error || error_on_write || flush_buffer(~(ulong) 0)); +} + + +static char *make_new_name(char *new_name, char *old_name) +{ + return fn_format(new_name,old_name,"",DATA_TMP_EXT,2+4); +} + +static char *make_old_name(char *new_name, char *old_name) +{ + return fn_format(new_name,old_name,"",OLD_EXT,2+4); +} + + /* rutines for bit writing buffer */ + +static void init_file_buffer(File file, pbool read_buffer) +{ + file_buffer.file=file; + file_buffer.buffer= (uchar*) my_malloc(ALIGN_SIZE(RECORD_CACHE_SIZE), + MYF(MY_WME)); + file_buffer.end=file_buffer.buffer+ALIGN_SIZE(RECORD_CACHE_SIZE)-8; + file_buffer.pos_in_file=0; + error_on_write=0; + if (read_buffer) + { + + file_buffer.pos=file_buffer.end; + file_buffer.bits=0; + } + else + { + file_buffer.pos=file_buffer.buffer; + file_buffer.bits=BITS_SAVED; + } + file_buffer.bitbucket= 0; +} + + +static int flush_buffer(ulong neaded_length) +{ + ulong length; + + /* + file_buffer.end is 8 bytes lower than the real end of the buffer. + This is done so that the end-of-buffer condition does not need to be + checked for every byte (see write_bits()). Consequently, + file_buffer.pos can become greater than file_buffer.end. The + algorithms in the other functions ensure that there will never be + more than 8 bytes written to the buffer without an end-of-buffer + check. So the buffer cannot be overrun. But we need to check for the + near-to-buffer-end condition to avoid a negative result, which is + casted to unsigned and thus becomes giant. + */ + if ((file_buffer.pos < file_buffer.end) && + ((ulong) (file_buffer.end - file_buffer.pos) > neaded_length)) + return 0; + length=(ulong) (file_buffer.pos-file_buffer.buffer); + file_buffer.pos=file_buffer.buffer; + file_buffer.pos_in_file+=length; + if (test_only) + return 0; + if (error_on_write|| my_write(file_buffer.file, + (const byte*) file_buffer.buffer, + length, + MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL))) + { + error_on_write=1; + return 1; + } + + if (neaded_length != ~(ulong) 0 && + (ulong) (file_buffer.end-file_buffer.buffer) < neaded_length) + { + char *tmp; + neaded_length+=256; /* some margin */ + tmp= my_realloc((char*) file_buffer.buffer, neaded_length,MYF(MY_WME)); + if (!tmp) + return 1; + file_buffer.pos= ((uchar*) tmp + + (ulong) (file_buffer.pos - file_buffer.buffer)); + file_buffer.buffer= (uchar*) tmp; + file_buffer.end= (uchar*) (tmp+neaded_length-8); + } + return 0; +} + + +static void end_file_buffer(void) +{ + my_free((gptr) file_buffer.buffer,MYF(0)); +} + + /* output `bits` low bits of `value' */ + +static void write_bits(register ulonglong value, register uint bits) +{ + DBUG_ASSERT(((bits < 8 * sizeof(value)) && ! (value >> bits)) || + (bits == 8 * sizeof(value))); + + if ((file_buffer.bits-= (int) bits) >= 0) + { + file_buffer.bitbucket|= value << file_buffer.bits; + } + else + { + reg3 ulonglong bit_buffer; + bits= (uint) -file_buffer.bits; + bit_buffer= (file_buffer.bitbucket | + ((bits != 8 * sizeof(value)) ? (value >> bits) : 0)); +#if BITS_SAVED == 64 + *file_buffer.pos++= (uchar) (bit_buffer >> 56); + *file_buffer.pos++= (uchar) (bit_buffer >> 48); + *file_buffer.pos++= (uchar) (bit_buffer >> 40); + *file_buffer.pos++= (uchar) (bit_buffer >> 32); +#endif + *file_buffer.pos++= (uchar) (bit_buffer >> 24); + *file_buffer.pos++= (uchar) (bit_buffer >> 16); + *file_buffer.pos++= (uchar) (bit_buffer >> 8); + *file_buffer.pos++= (uchar) (bit_buffer); + + if (bits != 8 * sizeof(value)) + value&= (((ulonglong) 1) << bits) - 1; + if (file_buffer.pos >= file_buffer.end) + VOID(flush_buffer(~ (ulong) 0)); + file_buffer.bits=(int) (BITS_SAVED - bits); + file_buffer.bitbucket= value << (BITS_SAVED - bits); + } + return; +} + + /* Flush bits in bit_buffer to buffer */ + +static void flush_bits(void) +{ + int bits; + ulonglong bit_buffer; + + bits= file_buffer.bits & ~7; + bit_buffer= file_buffer.bitbucket >> bits; + bits= BITS_SAVED - bits; + while (bits > 0) + { + bits-= 8; + *file_buffer.pos++= (uchar) (bit_buffer >> bits); + } + if (file_buffer.pos >= file_buffer.end) + VOID(flush_buffer(~ (ulong) 0)); + file_buffer.bits= BITS_SAVED; + file_buffer.bitbucket= 0; +} + + +/**************************************************************************** +** functions to handle the joined files +****************************************************************************/ + +static int save_state(MARIA_HA *isam_file,PACK_MRG_INFO *mrg, + my_off_t new_length, + ha_checksum crc) +{ + MARIA_SHARE *share=isam_file->s; + uint options=mi_uint2korr(share->state.header.options); + uint key; + DBUG_ENTER("save_state"); + + options|= HA_OPTION_COMPRESS_RECORD | HA_OPTION_READ_ONLY_DATA; + mi_int2store(share->state.header.options,options); + /* Save the original file type of we have to undo the packing later */ + share->state.header.org_data_file_type= share->state.header.data_file_type; + share->state.header.data_file_type= COMPRESSED_RECORD; + + share->state.state.data_file_length=new_length; + share->state.state.del=0; + share->state.state.empty=0; + share->state.dellink= HA_OFFSET_ERROR; + share->state.split=(ha_rows) mrg->records; + share->state.version=(ulong) time((time_t*) 0); + if (! maria_is_all_keys_active(share->state.key_map, share->base.keys)) + { + /* + Some indexes are disabled, cannot use current key_file_length value + as an estimate of upper bound of index file size. Use packed data file + size instead. + */ + share->state.state.key_file_length= new_length; + } + /* + If there are no disabled indexes, keep key_file_length value from + original file so "maria_chk -rq" can use this value (this is necessary + because index size cannot be easily calculated for fulltext keys) + */ + maria_clear_all_keys_active(share->state.key_map); + for (key=0 ; key < share->base.keys ; key++) + share->state.key_root[key]= HA_OFFSET_ERROR; + share->state.key_del= HA_OFFSET_ERROR; + isam_file->state->checksum=crc; /* Save crc here */ + share->changed=1; /* Force write of header */ + share->state.open_count=0; + share->global_changed=0; + VOID(my_chsize(share->kfile.file, share->base.keystart, 0, MYF(0))); + if (share->base.keys) + isamchk_neaded=1; + DBUG_RETURN(_ma_state_info_write(share->kfile.file, &share->state, (1 + 2))); +} + + +static int save_state_mrg(File file,PACK_MRG_INFO *mrg,my_off_t new_length, + ha_checksum crc) +{ + MARIA_STATE_INFO state; + MARIA_HA *isam_file=mrg->file[0]; + uint options; + DBUG_ENTER("save_state_mrg"); + + state= isam_file->s->state; + options= (mi_uint2korr(state.header.options) | HA_OPTION_COMPRESS_RECORD | + HA_OPTION_READ_ONLY_DATA); + mi_int2store(state.header.options,options); + state.state.data_file_length=new_length; + state.state.del=0; + state.state.empty=0; + state.state.records=state.split=(ha_rows) mrg->records; + /* See comment above in save_state about key_file_length handling. */ + if (mrg->src_file_has_indexes_disabled) + { + isam_file->s->state.state.key_file_length= + max(isam_file->s->state.state.key_file_length, new_length); + } + state.dellink= HA_OFFSET_ERROR; + state.version=(ulong) time((time_t*) 0); + maria_clear_all_keys_active(state.key_map); + state.state.checksum=crc; + if (isam_file->s->base.keys) + isamchk_neaded=1; + state.changed=STATE_CHANGED | STATE_NOT_ANALYZED; /* Force check of table */ + DBUG_RETURN (_ma_state_info_write(file,&state,1+2)); +} + + +/* reset for mrg_rrnd */ + +static void mrg_reset(PACK_MRG_INFO *mrg) +{ + if (mrg->current) + { + maria_extra(*mrg->current, HA_EXTRA_NO_CACHE, 0); + mrg->current=0; + } +} + +static int mrg_rrnd(PACK_MRG_INFO *info,byte *buf) +{ + int error; + MARIA_HA *isam_info; + my_off_t filepos; + + if (!info->current) + { + isam_info= *(info->current=info->file); + info->end=info->current+info->count; + maria_reset(isam_info); + maria_extra(isam_info, HA_EXTRA_CACHE, 0); + if ((error= maria_scan_init(isam_info))) + return(error); + } + else + isam_info= *info->current; + + for (;;) + { + if (!(error= maria_scan(isam_info, buf)) || + error != HA_ERR_END_OF_FILE) + return (error); + maria_scan_end(isam_info); + maria_extra(isam_info,HA_EXTRA_NO_CACHE, 0); + if (info->current+1 == info->end) + return(HA_ERR_END_OF_FILE); + info->current++; + isam_info= *info->current; + filepos=isam_info->s->pack.header_length; + maria_reset(isam_info); + maria_extra(isam_info,HA_EXTRA_CACHE, 0); + if ((error= maria_scan_init(isam_info))) + return(error); + } +} + + +static int mrg_close(PACK_MRG_INFO *mrg) +{ + uint i; + int error=0; + DBUG_ENTER("mrg_close"); + + for (i=0 ; i < mrg->count ; i++) + error|=maria_close(mrg->file[i]); + if (mrg->free_file) + my_free((gptr) mrg->file,MYF(0)); + DBUG_RETURN(error); +} + + +#if !defined(DBUG_OFF) +/* + Fake the counts to get big Huffman codes. + + SYNOPSIS + fakebigcodes() + huff_counts A pointer to the counts array. + end_count A pointer past the counts array. + + DESCRIPTION + + Huffman coding works by removing the two least frequent values from + the list of values and add a new value with the sum of their + incidences in a loop until only one value is left. Every time a + value is reused for a new value, it gets one more bit for its + encoding. Hence, the least frequent values get the longest codes. + + To get a maximum code length for a value, two of the values must + have an incidence of 1. As their sum is 2, the next infrequent value + must have at least an incidence of 2, then 4, 8, 16 and so on. This + means that one needs 2**n bytes (values) for a code length of n + bits. However, using more distinct values forces the use of longer + codes, or reaching the code length with less total bytes (values). + + To get 64(32)-bit codes, I sort the counts by decreasing incidence. + I assign counts of 1 to the two most frequent values, a count of 2 + for the next one, then 4, 8, and so on until 2**64-1(2**30-1). All + the remaining values get 1. That way every possible byte has an + assigned code, though not all codes are used if not all byte values + are present in the column. + + This strategy would work with distinct column values too, but + requires that at least 64(32) values are present. To make things + easier here, I cancel all distinct column values and force byte + compression for all columns. + + RETURN + void +*/ + +static void fakebigcodes(HUFF_COUNTS *huff_counts, HUFF_COUNTS *end_count) +{ + HUFF_COUNTS *count; + my_off_t *cur_count_p; + my_off_t *end_count_p; + my_off_t **cur_sort_p; + my_off_t **end_sort_p; + my_off_t *sort_counts[256]; + my_off_t total; + DBUG_ENTER("fakebigcodes"); + + for (count= huff_counts; count < end_count; count++) + { + /* + Remove distinct column values. + */ + if (huff_counts->tree_buff) + { + my_free((gptr) huff_counts->tree_buff, MYF(0)); + delete_tree(&huff_counts->int_tree); + huff_counts->tree_buff= NULL; + DBUG_PRINT("fakebigcodes", ("freed distinct column values")); + } + + /* + Sort counts by decreasing incidence. + */ + cur_count_p= count->counts; + end_count_p= cur_count_p + 256; + cur_sort_p= sort_counts; + while (cur_count_p < end_count_p) + *(cur_sort_p++)= cur_count_p++; + (void) qsort(sort_counts, 256, sizeof(my_off_t*), (qsort_cmp) fakecmp); + + /* + Assign faked counts. + */ + cur_sort_p= sort_counts; +#if SIZEOF_LONG_LONG > 4 + end_sort_p= sort_counts + 8 * sizeof(ulonglong) - 1; +#else + end_sort_p= sort_counts + 8 * sizeof(ulonglong) - 2; +#endif + /* Most frequent value gets a faked count of 1. */ + **(cur_sort_p++)= 1; + total= 1; + while (cur_sort_p < end_sort_p) + { + **(cur_sort_p++)= total; + total<<= 1; + } + /* Set the last value. */ + **(cur_sort_p++)= --total; + /* + Set the remaining counts. + */ + end_sort_p= sort_counts + 256; + while (cur_sort_p < end_sort_p) + **(cur_sort_p++)= 1; + } + DBUG_VOID_RETURN; +} + + +/* + Compare two counts for reverse sorting. + + SYNOPSIS + fakecmp() + count1 One count. + count2 Another count. + + RETURN + 1 count1 < count2 + 0 count1 == count2 + -1 count1 > count2 +*/ + +static int fakecmp(my_off_t **count1, my_off_t **count2) +{ + return ((**count1 < **count2) ? 1 : + (**count1 > **count2) ? -1 : 0); +} +#endif diff --git a/storage/maria/maria_rename.sh b/storage/maria/maria_rename.sh new file mode 100755 index 00000000000..fb20e47e635 --- /dev/null +++ b/storage/maria/maria_rename.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +replace myisam maria MYISAM MARIA MyISAM MARIA -- mysql-test/t/*maria*test mysql-test/r/*maria*result + +FILES=`echo sql/ha_maria.{cc,h} include/maria*h storage/maria/*.{c,h}` + +replace myisam maria MYISAM MARIA MyISAM MARIA myisam.h maria.h myisamdef.h maria_def.h mi_ maria_ ft_ maria_ft_ "Copyright (C) 2000" "Copyright (C) 2006" MI_ISAMINFO MARIA_INFO MI_CREATE_INFO MARIA_CREATE_INFO maria_isam_ maria_ MI_INFO MARIA_HA MI_ MARIA_ MARIACHK MARIA_CHK rt_index.h ma_rt_index.h rtree_ maria_rtree rt_key.h ma_rt_key.h rt_mbr.h ma_rt_mbr.h -- $FILES + +replace check_table_is_closed _ma_check_table_is_closed test_if_reopen _ma_test_if_reopen my_n_base_info_read maria_n_base_info_read update_auto_increment _ma_update_auto_increment save_pack_length _ma_save_packlength calc_pack_length _ma_calc_pack_length -- $FILES + +replace mi_ ma_ ft_ ma_ft_ rt_ ma_rt_ myisam maria myisamchk maria_chk myisampack maria_pack myisamlog maria_log -- storage/maria/Makefile.am + +# +# Restore wrong replaces +# + +replace maria_sint1korr mi_sint1korr maria_uint1korr mi_uint1korr maria_sint2korr mi_sint2korr maria_sint3korr mi_sint3korr maria_sint4korr mi_sint4korr maria_sint8korr mi_sint8korr maria_uint2korr mi_uint2korr maria_uint3korr mi_uint3korr maria_uint4korr mi_uint4korr maria_uint5korr mi_uint5korr maria_uint6korr mi_uint6korr maria_uint7korr mi_uint7korr maria_uint8korr mi_uint8korr maria_int1store mi_int1store maria_int2store mi_int2store maria_int3store mi_int3store maria_int4store mi_int4store maria_int5store mi_int5store maria_int6store mi_int6store maria_int7store mi_int7store maria_int8store mi_int8store maria_float4store mi_float4store maria_float4get mi_float4get maria_float8store mi_float8store maria_float8get mi_float8get maria_rowstore mi_rowstore maria_rowkorr mi_rowkorr maria_sizestore mi_sizestore maria_sizekorr mi_sizekorr _maria_maria_ _maria MARIA_MAX_POSSIBLE_KEY HA_MAX_POSSIBLE_KEY MARIA_MAX_KEY_BUFF HA_MAX_KEY_BUFF MARIA_MAX_KEY_SEG HA_MAX_KEY_SEG maria_ft_sintXkorr ft_sintXkorr maria_ft_intXstore ft_intXstore maria_ft_boolean_syntax ft_boolean_syntax maria_ft_min_word_len ft_min_word_len maria_ft_max_word_len ft_max_word_len -- $FILES diff --git a/storage/maria/plug.in b/storage/maria/plug.in new file mode 100644 index 00000000000..198f5d8c289 --- /dev/null +++ b/storage/maria/plug.in @@ -0,0 +1,8 @@ +MYSQL_STORAGE_ENGINE(maria, no, [Maria Storage Engine], + [Traditional transactional MySQL tables], [max,max-no-ndb]) +MYSQL_PLUGIN_DIRECTORY(maria, [storage/maria]) +MYSQL_PLUGIN_ACTIONS(maria, [AC_CONFIG_FILES(storage/maria/unittest/Makefile)]) +MYSQL_PLUGIN_STATIC(maria, [libmaria.a]) +# Maria will probably go first into max builds, not all builds, +# so we don't declare it mandatory. +MYSQL_PLUGIN_DEPENDS_ON_MYSQL_INTERNALS(maria, [ha_maria.cc]) diff --git a/storage/maria/tablockman.c b/storage/maria/tablockman.c new file mode 100644 index 00000000000..4634f60a085 --- /dev/null +++ b/storage/maria/tablockman.c @@ -0,0 +1,676 @@ +/* QQ: TODO - allocate everything from dynarrays !!! (benchmark) */ +/* QQ: automatically place S instead of LS if possible */ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include +#include +#include "tablockman.h" + +/* + Lock Manager for Table Locks + + The code below handles locks on resources - but it is optimized for a + case when a number of resources is not very large, and there are many of + locks per resource - that is a resource is likely to be a table or a + database, but hardly a row in a table. + + Locks belong to "lock owners". A Lock Owner is uniquely identified by a + 16-bit number - loid (lock owner identifier). A function loid_to_tlo must + be provided by the application that takes such a number as an argument + and returns a TABLE_LOCK_OWNER structure. + + Lock levels are completely defined by three tables. Lock compatibility + matrix specifies which locks can be held at the same time on a resource. + Lock combining matrix specifies what lock level has the same behaviour as + a pair of two locks of given levels. getlock_result matrix simplifies + intention locking and lock escalation for an application, basically it + defines which locks are intention locks and which locks are "loose" + locks. It is only used to provide better diagnostics for the + application, lock manager itself does not differentiate between normal, + intention, and loose locks. + + The assumptions are: few distinct resources, many locks are held at the + same time on one resource. Thus: a lock structure _per resource_ can be + rather large; a lock structure _per lock_ does not need to be very small + either; we need to optimize for _speed_. Operations we need are: place a + lock, check if a particular transaction already has a lock on this + resource, check if a conflicting lock exists, if yes - find who owns it. + + Solution: every resource has a structure with + 1. Hash of latest (see the lock upgrade section below) granted locks with + loid as a key. Thus, checking if a given transaction has a lock on + this resource is O(1) operation. + 2. Doubly-linked lists of all granted locks - one list for every lock + type. Thus, checking if a conflicting lock exists is a check whether + an appropriate list head pointer is not null, also O(1). + 3. Every lock has a loid of the owner, thus checking who owns a + conflicting lock is also O(1). + 4. Deque of waiting locks. It's a deque (double-ended queue) not a fifo, + because for lock upgrades requests are added to the queue head, not + tail. This is a single place where there it gets O(N) on number + of locks - when a transaction wakes up from waiting on a condition, + it may need to scan the queue backward to the beginning to find + a conflicting lock. It is guaranteed though that "all transactions + before it" received the same - or earlier - signal. In other words a + transaction needs to scan all transactions before it that received the + signal but didn't have a chance to resume the execution yet, so + practically OS scheduler won't let the scan to be O(N). + + Waiting: if there is a conflicting lock or if wait queue is not empty, a + requested lock cannot be granted at once. It is added to the end of the + wait queue. If a queue was empty and there is a conflicting lock - the + "blocker" transaction is the owner of this lock. If a queue is not empty, + an owner of the previous lock in the queue is the "blocker". But if the + previous lock is compatible with the request, then the "blocker" is the + transaction that the owner of the lock at the end of the queue is waiting + for (in other words, our lock is added to the end of the wait queue, and + our blocker is the same as of the lock right before us). + + Lock upgrades: when a thread that has a lock on a given resource, + requests a new lock on the same resource and the old lock is not enough + to satisfy new lock requirements (which is defined by + lock_combining_matrix[old_lock][new_lock] != old_lock), a new lock + (defined by lock_combining_matrix as above) is placed. Depending on + other granted locks it is immediately granted or it has to wait. Here the + lock is added to the start of the waiting queue, not to the end. Old + lock, is removed from the hash, but not from the doubly-linked lists. + (indeed, a transaction checks "do I have a lock on this resource ?" by + looking in a hash, and it should find a latest lock, so old locks must be + removed; but a transaction checks "are there conflicting locks ?" by + checking doubly-linked lists, it doesn't matter if it will find an old + lock - if it would be removed, a new lock would be also a conflict). + So, a hash contains only "latest" locks - there can be only one latest + lock per resource per transaction. But doubly-linked lists contain all + locks, even "obsolete" ones, because it doesnt't hurt. Note that old + locks can not be freed early, in particular they stay in the + 'active_locks' list of a lock owner, because they may be "re-enabled" + on a savepoint rollback. + + To better support table-row relations where one needs to lock the table + with an intention lock before locking the row, extended diagnostics is + provided. When an intention lock (presumably on a table) is granted, + lockman_getlock() returns one of GOT_THE_LOCK (no need to lock the row, + perhaps the thread already has a normal lock on this table), + GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE (need to lock the row, as usual), + GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE (only need to check + whether it's possible to lock the row, but no need to lock it - perhaps + the thread has a loose lock on this table). This is defined by + getlock_result[] table. + + Instant duration locks are not supported. Though they're trivial to add, + they are normally only used on rows, not on tables. So, presumably, + they are not needed here. + + Mutexes: there're table mutexes (LOCKED_TABLE::mutex), lock owner mutexes + (TABLE_LOCK_OWNER::mutex), and a pool mutex (TABLOCKMAN::pool_mutex). + table mutex protects operations on the table lock structures, and lock + owner pointers waiting_for and waiting_for_loid. + lock owner mutex is only used to wait on lock owner condition + (TABLE_LOCK_OWNER::cond), there's no need to protect owner's lock + structures, and only lock owner itself may access them. + The pool mutex protects a pool of unused locks. Note the locking order: + first the table mutex, then the owner mutex or a pool mutex. + Table mutex lock cannot be attempted when owner or pool mutex are locked. + No mutex lock can be attempted if owner or pool mutex are locked. +*/ + +/* + Lock compatibility matrix. + + It's asymmetric. Read it as "Somebody has the lock , can I set the lock ?" + + ') Though you can take LS lock while somebody has S lock, it makes no + sense - it's simpler to take S lock too. + + 1 - compatible + 0 - incompatible + -1 - "impossible", so that we can assert the impossibility. +*/ +static const int lock_compatibility_matrix[10][10]= +{ /* N S X IS IX SIX LS LX SLX LSIX */ + { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, /* N */ + { -1, 1, 0, 1, 0, 0, 1, 0, 0, 0 }, /* S */ + { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* X */ + { -1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, /* IS */ + { -1, 0, 0, 1, 1, 0, 1, 1, 0, 1 }, /* IX */ + { -1, 0, 0, 1, 0, 0, 1, 0, 0, 0 }, /* SIX */ + { -1, 1, 0, 1, 0, 0, 1, 0, 0, 0 }, /* LS */ + { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* LX */ + { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* SLX */ + { -1, 0, 0, 1, 0, 0, 1, 0, 0, 0 } /* LSIX */ +}; + +/* + Lock combining matrix. + + It's symmetric. Read it as "what lock level L is identical to the + set of two locks A and B" + + One should never get N from it, we assert the impossibility +*/ +static const enum lock_type lock_combining_matrix[10][10]= +{/* N S X IS IX SIX LS LX SLX LSIX */ + { N, N, N, N, N, N, N, N, N, N}, /* N */ + { N, S, X, S, SIX, SIX, S, SLX, SLX, SIX}, /* S */ + { N, X, X, X, X, X, X, X, X, X}, /* X */ + { N, S, X, IS, IX, SIX, LS, LX, SLX, LSIX}, /* IS */ + { N, SIX, X, IX, IX, SIX, LSIX, LX, SLX, LSIX}, /* IX */ + { N, SIX, X, SIX, SIX, SIX, SIX, SLX, SLX, SIX}, /* SIX */ + { N, S, X, LS, LSIX, SIX, LS, LX, SLX, LSIX}, /* LS */ + { N, SLX, X, LX, LX, SLX, LX, LX, SLX, LX}, /* LX */ + { N, SLX, X, SLX, SLX, SLX, SLX, SLX, SLX, SLX}, /* SLX */ + { N, SIX, X, LSIX, LSIX, SIX, LSIX, LX, SLX, LSIX} /* LSIX */ +}; + +/* + the return codes for lockman_getlock + + It's asymmetric. Read it as "I have the lock , + what value should be returned for ?" + + 0 means impossible combination (assert!) + + Defines below help to preserve the table structure. + I/L/A values are self explanatory + x means the combination is possible (assert should not crash) + but it cannot happen in row locks, only in table locks (S,X), + or lock escalations (LS,LX) +*/ +#define I GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE +#define L GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE +#define A GOT_THE_LOCK +#define x GOT_THE_LOCK +static const enum lockman_getlock_result getlock_result[10][10]= +{/* N S X IS IX SIX LS LX SLX LSIX */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, /* N */ + { 0, x, 0, A, 0, 0, x, 0, 0, 0}, /* S */ + { 0, x, x, A, A, 0, x, x, 0, 0}, /* X */ + { 0, 0, 0, I, 0, 0, 0, 0, 0, 0}, /* IS */ + { 0, 0, 0, I, I, 0, 0, 0, 0, 0}, /* IX */ + { 0, x, 0, A, I, 0, x, 0, 0, 0}, /* SIX */ + { 0, 0, 0, L, 0, 0, x, 0, 0, 0}, /* LS */ + { 0, 0, 0, L, L, 0, x, x, 0, 0}, /* LX */ + { 0, x, 0, A, L, 0, x, x, 0, 0}, /* SLX */ + { 0, 0, 0, L, I, 0, x, 0, 0, 0} /* LSIX */ +}; +#undef I +#undef L +#undef A +#undef x + +/* + this structure is optimized for a case when there're many locks + on the same resource - e.g. a table +*/ + +struct st_table_lock { + /* QQ: do we need upgraded_from ? */ + struct st_table_lock *next_in_lo, *upgraded_from, *next, *prev; + struct st_locked_table *table; + uint16 loid; + uchar lock_type; +}; + +#define hash_insert my_hash_insert /* for consistency :) */ + +static inline +TABLE_LOCK *find_by_loid(LOCKED_TABLE *table, uint16 loid) +{ + return (TABLE_LOCK *)hash_search(& table->latest_locks, + (byte *)& loid, sizeof(loid)); +} + +static inline +void remove_from_wait_queue(TABLE_LOCK *lock, LOCKED_TABLE *table) +{ + DBUG_ASSERT(table == lock->table); + if (lock->prev) + { + DBUG_ASSERT(table->wait_queue_out != lock); + lock->prev->next= lock->next; + } + else + { + DBUG_ASSERT(table->wait_queue_out == lock); + table->wait_queue_out= lock->next; + } + if (lock->next) + { + DBUG_ASSERT(table->wait_queue_in != lock); + lock->next->prev= lock->prev; + } + else + { + DBUG_ASSERT(table->wait_queue_in == lock); + table->wait_queue_in= lock->prev; + } +} + +/* + DESCRIPTION + tries to lock a resource 'table' with a lock level 'lock'. + + RETURN + see enum lockman_getlock_result +*/ +enum lockman_getlock_result +tablockman_getlock(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo, + LOCKED_TABLE *table, enum lock_type lock) +{ + TABLE_LOCK *old, *new, *blocker, *blocker2; + TABLE_LOCK_OWNER *wait_for; + ulonglong deadline; + struct timespec timeout; + enum lock_type new_lock; + enum lockman_getlock_result res; + int i; + + DBUG_ASSERT(lo->waiting_lock == 0); + DBUG_ASSERT(lo->waiting_for == 0); + DBUG_ASSERT(lo->waiting_for_loid == 0); + + pthread_mutex_lock(& table->mutex); + /* do we already have a lock on this resource ? */ + old= find_by_loid(table, lo->loid); + + /* calculate the level of the upgraded lock, if yes */ + new_lock= old ? lock_combining_matrix[old->lock_type][lock] : lock; + + /* and check if old lock is enough to satisfy the new request */ + if (old && new_lock == old->lock_type) + { + /* yes */ + res= getlock_result[old->lock_type][lock]; + goto ret; + } + + /* no, placing a new lock. first - take a free lock structure from the pool */ + pthread_mutex_lock(& lm->pool_mutex); + new= lm->pool; + if (new) + { + lm->pool= new->next; + pthread_mutex_unlock(& lm->pool_mutex); + } + else + { + pthread_mutex_unlock(& lm->pool_mutex); + new= (TABLE_LOCK *)my_malloc(sizeof(*new), MYF(MY_WME)); + if (unlikely(!new)) + { + res= NO_MEMORY_FOR_LOCK; + goto ret; + } + } + + new->loid= lo->loid; + new->lock_type= new_lock; + new->table= table; + + /* and try to place it */ + for (new->prev= table->wait_queue_in;;) + { + wait_for= 0; + if (!old) + { + /* not upgrading - a lock must be added to the _end_ of the wait queue */ + for (blocker= new->prev; blocker && !wait_for; blocker= blocker->prev) + { + TABLE_LOCK_OWNER *tmp= lm->loid_to_tlo(blocker->loid); + + /* find a blocking lock */ + DBUG_ASSERT(table->wait_queue_out); + DBUG_ASSERT(table->wait_queue_in); + if (!lock_compatibility_matrix[blocker->lock_type][lock]) + { + /* found! */ + wait_for= tmp; + break; + } + + /* + hmm, the lock before doesn't block us, let's look one step further. + the condition below means: + + if we never waited on a condition yet + OR + the lock before ours (blocker) waits on a lock (blocker2) that is + present in the hash AND and conflicts with 'blocker' + + the condition after OR may fail if 'blocker2' was removed from + the hash, its signal woke us up, but 'blocker' itself didn't see + the signal yet. + */ + if (!lo->waiting_lock || + ((blocker2= find_by_loid(table, tmp->waiting_for_loid)) && + !lock_compatibility_matrix[blocker2->lock_type] + [blocker->lock_type])) + { + /* but it's waiting for a real lock. we'll wait for the same lock */ + wait_for= tmp->waiting_for; + /* + We don't really need tmp->waiting_for, as tmp->waiting_for_loid + is enough. waiting_for is just a local cache to avoid calling + loid_to_tlo(). + But it's essensial that tmp->waiting_for pointer can ONLY + be dereferenced if find_by_loid() above returns a non-null + pointer, because a TABLE_LOCK_OWNER object that it points to + may've been freed when we come here after a signal. + In particular tmp->waiting_for_loid cannot be replaced + with tmp->waiting_for->loid. + */ + DBUG_ASSERT(wait_for == lm->loid_to_tlo(tmp->waiting_for_loid)); + break; + } + + /* + otherwise - a lock it's waiting for doesn't exist. + We've no choice but to scan the wait queue backwards, looking + for a conflicting lock or a lock waiting for a real lock. + QQ is there a way to avoid this scanning ? + */ + } + } + + if (wait_for == 0) + { + /* checking for compatibility with existing locks */ + for (blocker= 0, i= 0; i < LOCK_TYPES; i++) + { + if (table->active_locks[i] && !lock_compatibility_matrix[i+1][lock]) + { + blocker= table->active_locks[i]; + /* if the first lock in the list is our own - skip it */ + if (blocker->loid == lo->loid) + blocker= blocker->next; + if (blocker) /* found a conflicting lock, need to wait */ + break; + } + } + if (!blocker) /* free to go */ + break; + wait_for= lm->loid_to_tlo(blocker->loid); + } + + /* ok, we're here - the wait is inevitable */ + lo->waiting_for= wait_for; + lo->waiting_for_loid= wait_for->loid; + if (!lo->waiting_lock) /* first iteration of the for() loop */ + { + /* lock upgrade or new lock request ? */ + if (old) + { + /* upgrade - add the lock to the _start_ of the wait queue */ + new->prev= 0; + if ((new->next= table->wait_queue_out)) + new->next->prev= new; + table->wait_queue_out= new; + if (!table->wait_queue_in) + table->wait_queue_in= table->wait_queue_out; + } + else + { + /* new lock - add the lock to the _end_ of the wait queue */ + new->next= 0; + if ((new->prev= table->wait_queue_in)) + new->prev->next= new; + table->wait_queue_in= new; + if (!table->wait_queue_out) + table->wait_queue_out= table->wait_queue_in; + } + lo->waiting_lock= new; + + deadline= my_getsystime() + lm->lock_timeout * 10000; + timeout.tv_sec= deadline/10000000; + timeout.tv_nsec= (deadline % 10000000) * 100; + } + + /* + prepare to wait. + we must lock blocker's mutex to wait on blocker's cond. + and we must release table's mutex. + note that blocker's mutex is locked _before_ table's mutex is released + */ + pthread_mutex_lock(wait_for->mutex); + pthread_mutex_unlock(& table->mutex); + + /* now really wait */ + i= pthread_cond_timedwait(wait_for->cond, wait_for->mutex, & timeout); + + pthread_mutex_unlock(wait_for->mutex); + + if (i == ETIMEDOUT || i == ETIME) + { + /* we rely on the caller to rollback and release all locks */ + res= LOCK_TIMEOUT; + goto ret2; + } + + pthread_mutex_lock(& table->mutex); + + /* ... and repeat from the beginning */ + } + /* yeah! we can place the lock now */ + + /* remove the lock from the wait queue, if it was there */ + if (lo->waiting_lock) + { + remove_from_wait_queue(new, table); + lo->waiting_lock= 0; + lo->waiting_for= 0; + lo->waiting_for_loid= 0; + } + + /* add it to the list of all locks of this lock owner */ + new->next_in_lo= lo->active_locks; + lo->active_locks= new; + + /* and to the list of active locks of this lock type */ + new->prev= 0; + if ((new->next= table->active_locks[new_lock-1])) + new->next->prev= new; + table->active_locks[new_lock-1]= new; + + /* update the latest_locks hash */ + if (old) + hash_delete(& table->latest_locks, (byte *)old); + hash_insert(& table->latest_locks, (byte *)new); + + new->upgraded_from= old; + + res= getlock_result[lock][lock]; + +ret: + pthread_mutex_unlock(& table->mutex); +ret2: + DBUG_ASSERT(res); + return res; +} + +/* + DESCRIPTION + release all locks belonging to a transaction. + signal waiters to continue +*/ +void tablockman_release_locks(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo) +{ + TABLE_LOCK *lock, *local_pool= 0, *local_pool_end; + + /* + instead of adding released locks to a pool one by one, we'll link + them in a list and add to a pool in one short action (under a mutex) + */ + local_pool_end= lo->waiting_lock ? lo->waiting_lock : lo->active_locks; + if (!local_pool_end) + return; + + /* release a waiting lock, if any */ + if ((lock= lo->waiting_lock)) + { + DBUG_ASSERT(lock->loid == lo->loid); + pthread_mutex_lock(& lock->table->mutex); + remove_from_wait_queue(lock, lock->table); + + /* + a special case: if this lock was not the last in the wait queue + and it's compatible with the next lock, than the next lock + is waiting for our blocker though really it waits for us, indirectly. + Signal our blocker to release this next lock (after we removed our + lock from the wait queue, of course). + */ + /* + An example to clarify the above: + trn1> S-lock the table. Granted. + trn2> IX-lock the table. Added to the wait queue. trn2 waits on trn1 + trn3> IS-lock the table. The queue is not empty, so IS-lock is added + to the queue. It's compatible with the waiting IX-lock, so trn3 + waits for trn2->waiting_for, that is trn1. + if trn1 releases the lock it signals trn1->cond and both waiting + transactions are awaken. But if trn2 times out, trn3 must be notified + too (as IS and S locks are compatible). So trn2 must signal trn1->cond. + */ + if (lock->next && + lock_compatibility_matrix[lock->next->lock_type][lock->lock_type]) + { + pthread_mutex_lock(lo->waiting_for->mutex); + pthread_cond_broadcast(lo->waiting_for->cond); + pthread_mutex_unlock(lo->waiting_for->mutex); + } + lo->waiting_for= 0; + lo->waiting_for_loid= 0; + pthread_mutex_unlock(& lock->table->mutex); + + lock->next= local_pool; + local_pool= lock; + } + + /* now release granted locks */ + lock= lo->active_locks; + while (lock) + { + TABLE_LOCK *cur= lock; + pthread_mutex_t *mutex= & lock->table->mutex; + DBUG_ASSERT(cur->loid == lo->loid); + + DBUG_ASSERT(lock != lock->next_in_lo); + lock= lock->next_in_lo; + + /* TODO ? group locks by table to reduce the number of mutex locks */ + pthread_mutex_lock(mutex); + hash_delete(& cur->table->latest_locks, (byte *)cur); + + if (cur->prev) + cur->prev->next= cur->next; + if (cur->next) + cur->next->prev= cur->prev; + if (cur->table->active_locks[cur->lock_type-1] == cur) + cur->table->active_locks[cur->lock_type-1]= cur->next; + + cur->next= local_pool; + local_pool= cur; + + pthread_mutex_unlock(mutex); + } + + lo->waiting_lock= lo->active_locks= 0; + + /* + okay, all locks released. now signal that we're leaving, + in case somebody's waiting for it + */ + pthread_mutex_lock(lo->mutex); + pthread_cond_broadcast(lo->cond); + pthread_mutex_unlock(lo->mutex); + + /* and push all freed locks to the lockman's pool */ + pthread_mutex_lock(& lm->pool_mutex); + local_pool_end->next= lm->pool; + lm->pool= local_pool; + pthread_mutex_unlock(& lm->pool_mutex); +} + +void tablockman_init(TABLOCKMAN *lm, loid_to_tlo_func *func, uint timeout) +{ + lm->pool= 0; + lm->loid_to_tlo= func; + lm->lock_timeout= timeout; + pthread_mutex_init(& lm->pool_mutex, MY_MUTEX_INIT_FAST); + my_getsystime(); /* ensure that my_getsystime() is initialized */ +} + +void tablockman_destroy(TABLOCKMAN *lm) +{ + while (lm->pool) + { + TABLE_LOCK *tmp= lm->pool; + lm->pool= tmp->next; + my_free((void *)tmp, MYF(0)); + } + pthread_mutex_destroy(& lm->pool_mutex); +} + +/* + initialize a LOCKED_TABLE structure + + SYNOPSYS + lt a LOCKED_TABLE to initialize + initial_hash_size initial size for 'latest_locks' hash +*/ +void tablockman_init_locked_table(LOCKED_TABLE *lt, int initial_hash_size) +{ + bzero(lt, sizeof(*lt)); + pthread_mutex_init(& lt->mutex, MY_MUTEX_INIT_FAST); + hash_init(& lt->latest_locks, & my_charset_bin, initial_hash_size, + offsetof(TABLE_LOCK, loid), + sizeof(((TABLE_LOCK*)0)->loid), 0, 0, 0); +} + +void tablockman_destroy_locked_table(LOCKED_TABLE *lt) +{ + int i; + + DBUG_ASSERT(lt->wait_queue_out == 0); + DBUG_ASSERT(lt->wait_queue_in == 0); + DBUG_ASSERT(lt->latest_locks.records == 0); + for (i= 0; iactive_locks[i] == 0); + + hash_free(& lt->latest_locks); + pthread_mutex_destroy(& lt->mutex); +} + +#ifdef EXTRA_DEBUG +static const char *lock2str[LOCK_TYPES+1]= {"N", "S", "X", "IS", "IX", "SIX", + "LS", "LX", "SLX", "LSIX"}; + +void tablockman_print_tlo(TABLE_LOCK_OWNER *lo) +{ + TABLE_LOCK *lock; + + printf("lo%d>", lo->loid); + if ((lock= lo->waiting_lock)) + printf(" (%s.0x%lx)", lock2str[lock->lock_type], (intptr)lock->table); + for (lock= lo->active_locks; + lock && lock != lock->next_in_lo; + lock= lock->next_in_lo) + printf(" %s.0x%lx", lock2str[lock->lock_type], (intptr)lock->table); + if (lock && lock == lock->next_in_lo) + printf("!"); + printf("\n"); +} +#endif + diff --git a/storage/maria/tablockman.h b/storage/maria/tablockman.h new file mode 100644 index 00000000000..58c852b5a21 --- /dev/null +++ b/storage/maria/tablockman.h @@ -0,0 +1,87 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _tablockman_h +#define _tablockman_h + +/* + Lock levels: + ^^^^^^^^^^^ + + N - "no lock", not a lock, used sometimes internally to simplify the code + S - Shared + X - eXclusive + IS - Intention Shared + IX - Intention eXclusive + SIX - Shared + Intention eXclusive + LS - Loose Shared + LX - Loose eXclusive + SLX - Shared + Loose eXclusive + LSIX - Loose Shared + Intention eXclusive +*/ +#ifndef _lockman_h +/* QQ: TODO remove N-locks */ +enum lock_type { N, S, X, IS, IX, SIX, LS, LX, SLX, LSIX, LOCK_TYPE_LAST }; +enum lockman_getlock_result { + NO_MEMORY_FOR_LOCK=1, DEADLOCK, LOCK_TIMEOUT, + GOT_THE_LOCK, + GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE, + GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE +}; +#endif + +#define LOCK_TYPES (LOCK_TYPE_LAST-1) + +typedef struct st_table_lock TABLE_LOCK; + +typedef struct st_table_lock_owner { + TABLE_LOCK *active_locks; /* list of active locks */ + TABLE_LOCK *waiting_lock; /* waiting lock (one lock only) */ + struct st_table_lock_owner *waiting_for; /* transaction we're waiting for */ + pthread_cond_t *cond; /* transactions waiting for us, wait on 'cond' */ + pthread_mutex_t *mutex; /* mutex is required to use 'cond' */ + uint16 loid, waiting_for_loid; /* Lock Owner IDentifier */ +} TABLE_LOCK_OWNER; + +typedef struct st_locked_table { + pthread_mutex_t mutex; /* mutex for everything below */ + HASH latest_locks; /* latest locks in a hash */ + TABLE_LOCK *active_locks[LOCK_TYPES]; /* dl-list of locks per type */ + TABLE_LOCK *wait_queue_in, *wait_queue_out; /* wait deque (double-end queue)*/ +} LOCKED_TABLE; + +typedef TABLE_LOCK_OWNER *loid_to_tlo_func(uint16); + +typedef struct { + pthread_mutex_t pool_mutex; + TABLE_LOCK *pool; /* lifo pool of free locks */ + uint lock_timeout; /* lock timeout in milliseconds */ + loid_to_tlo_func *loid_to_tlo; /* for mapping loid to TABLE_LOCK_OWNER */ +} TABLOCKMAN; + +void tablockman_init(TABLOCKMAN *, loid_to_tlo_func *, uint); +void tablockman_destroy(TABLOCKMAN *); +enum lockman_getlock_result tablockman_getlock(TABLOCKMAN *, TABLE_LOCK_OWNER *, + LOCKED_TABLE *, enum lock_type); +void tablockman_release_locks(TABLOCKMAN *, TABLE_LOCK_OWNER *); +void tablockman_init_locked_table(LOCKED_TABLE *, int); +void tablockman_destroy_locked_table(LOCKED_TABLE *); + +#ifdef EXTRA_DEBUG +void tablockman_print_tlo(TABLE_LOCK_OWNER *); +#endif + +#endif + diff --git a/storage/maria/test_pack b/storage/maria/test_pack new file mode 100755 index 00000000000..689645b1661 --- /dev/null +++ b/storage/maria/test_pack @@ -0,0 +1,10 @@ +silent="-s" +suffix="" + +ma_test1$suffix -s ; maria_pack$suffix --force -s test1 ; maria_chk$suffix -es test1 ; maria_chk$suffix -rqs test1 ; maria_chk$suffix -es test1 ; maria_chk$suffix -us test1 ; maria_chk$suffix -es test1 +ma_test1$suffix -s -S ; maria_pack$suffix --force -s test1 ; maria_chk$suffix -es test1 ; maria_chk$suffix -rqs test1 ; maria_chk$suffix -es test1 ;maria_chk$suffix -us test1 ; maria_chk$suffix -es test1 +ma_test1$suffix -s -b ; maria_pack$suffix --force -s test1 ; maria_chk$suffix -es test1 ; maria_chk$suffix -rqs test1 ; maria_chk$suffix -es test1 +ma_test1$suffix -s -w ; maria_pack$suffix --force -s test1 ; maria_chk$suffix -es test1 ; maria_chk$suffix -ros test1 ; maria_chk$suffix -es test1 + +ma_test2$suffix -s -t4 ; maria_pack$suffix --force -s test2 ; maria_chk$suffix -es test2 ; maria_chk$suffix -ros test2 ; maria_chk$suffix -es test2 ; maria_chk$suffix -s -u test2 ; maria_chk$suffix -sm test2 +ma_test2$suffix -s -t4 -b ; maria_pack$suffix --force -s test2 ; maria_chk$suffix -es test2 ; maria_chk$suffix -ros test2 ; maria_chk$suffix -es test2 ; maria_chk$suffix -s -u test2 ; maria_chk$suffix -sm test2 diff --git a/storage/maria/trnman.c b/storage/maria/trnman.c new file mode 100644 index 00000000000..83249ab328f --- /dev/null +++ b/storage/maria/trnman.c @@ -0,0 +1,705 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + +#include +#include +#include +#include "trnman.h" + +/* + status variables: + how many trns in the active list currently, + in the committed list currently, allocated since startup. +*/ +uint trnman_active_transactions, trnman_committed_transactions, + trnman_allocated_transactions; + +/* list of active transactions in the trid order */ +static TRN active_list_min, active_list_max; +/* list of committed transactions in the trid order */ +static TRN committed_list_min, committed_list_max; + +/* a counter, used to generate transaction ids */ +static TrID global_trid_generator; + +/* the mutex for everything above */ +static pthread_mutex_t LOCK_trn_list; + +/* LIFO pool of unused TRN structured for reuse */ +static TRN *pool; + +/* a hash for committed transactions that maps trid to a TRN structure */ +static LF_HASH trid_to_committed_trn; + +/* an array that maps short_trid of an active transaction to a TRN structure */ +static TRN **short_trid_to_active_trn; + +/* locks for short_trid_to_active_trn and pool */ +static my_atomic_rwlock_t LOCK_short_trid_to_trn, LOCK_pool; + +/* + Simple interface functions + QQ: if they stay so simple, should we make them inline? +*/ + +uint trnman_increment_locked_tables(TRN *trn) +{ + return trn->locked_tables++; +} + +my_bool trnman_has_locked_tables(TRN *trn) +{ + return trn->locked_tables != 0; +} + +uint trnman_decrement_locked_tables(TRN *trn) +{ + return --trn->locked_tables; +} + +void trnman_reset_locked_tables(TRN *trn) +{ + trn->locked_tables= 0; +} + + +/* + NOTE + Just as short_id doubles as loid, this function doubles as + short_trid_to_LOCK_OWNER. See the compile-time assert below. +*/ + +#ifdef NOT_USED +static TRN *short_trid_to_TRN(uint16 short_trid) +{ + TRN *trn; + compile_time_assert(offsetof(TRN, locks) == 0); + my_atomic_rwlock_rdlock(&LOCK_short_trid_to_trn); + trn= my_atomic_loadptr((void **)&short_trid_to_active_trn[short_trid]); + my_atomic_rwlock_rdunlock(&LOCK_short_trid_to_trn); + return (TRN *)trn; +} +#endif + +static byte *trn_get_hash_key(const byte *trn, uint* len, + my_bool unused __attribute__ ((unused))) +{ + *len= sizeof(TrID); + return (byte *) & ((*((TRN **)trn))->trid); +} + +int trnman_init() +{ + DBUG_ENTER("trnman_init"); + + short_trid_to_active_trn= (TRN **)my_malloc(SHORT_TRID_MAX*sizeof(TRN*), + MYF(MY_WME|MY_ZEROFILL)); + if (unlikely(!short_trid_to_active_trn)) + DBUG_RETURN(1); + short_trid_to_active_trn--; /* min short_trid is 1 */ + + /* + Initialize lists. + active_list_max.min_read_from must be larger than any trid, + so that when an active list is empty we would could free + all committed list. + And committed_list_max itself can not be freed so + committed_list_max.commit_trid must not be smaller that + active_list_max.min_read_from + */ + + active_list_max.trid= active_list_min.trid= 0; + active_list_max.min_read_from= ~(ulong) 0; + active_list_max.next= active_list_min.prev= 0; + active_list_max.prev= &active_list_min; + active_list_min.next= &active_list_max; + + committed_list_max.commit_trid= ~(ulong) 0; + committed_list_max.next= committed_list_min.prev= 0; + committed_list_max.prev= &committed_list_min; + committed_list_min.next= &committed_list_max; + + trnman_active_transactions= 0; + trnman_committed_transactions= 0; + trnman_allocated_transactions= 0; + + pool= 0; + global_trid_generator= 0; /* set later by the recovery code */ + lf_hash_init(&trid_to_committed_trn, sizeof(TRN*), LF_HASH_UNIQUE, + 0, 0, trn_get_hash_key, 0); + DBUG_PRINT("info", ("pthread_mutex_init LOCK_trn_list")); + pthread_mutex_init(&LOCK_trn_list, MY_MUTEX_INIT_FAST); + my_atomic_rwlock_init(&LOCK_short_trid_to_trn); + my_atomic_rwlock_init(&LOCK_pool); + +#ifdef NOT_USED + lockman_init(&maria_lockman, (loid_to_lo_func *)&short_trid_to_TRN, 10000); +#endif + + DBUG_RETURN(0); +} + +/* + NOTE + this could only be called in the "idle" state - no transaction can be + running. See asserts below. +*/ +void trnman_destroy() +{ + DBUG_ENTER("trnman_destroy"); + + if (short_trid_to_active_trn == NULL) /* trnman already destroyed */ + DBUG_VOID_RETURN; + DBUG_ASSERT(trid_to_committed_trn.count == 0); + DBUG_ASSERT(trnman_active_transactions == 0); + DBUG_ASSERT(trnman_committed_transactions == 0); + DBUG_ASSERT(active_list_max.prev == &active_list_min); + DBUG_ASSERT(active_list_min.next == &active_list_max); + DBUG_ASSERT(committed_list_max.prev == &committed_list_min); + DBUG_ASSERT(committed_list_min.next == &committed_list_max); + while (pool) + { + TRN *trn= pool; + pool= pool->next; + DBUG_ASSERT(trn->locks.mutex == 0); + DBUG_ASSERT(trn->locks.cond == 0); + my_free((void *)trn, MYF(0)); + } + lf_hash_destroy(&trid_to_committed_trn); + DBUG_PRINT("info", ("pthread_mutex_destroy LOCK_trn_list")); + pthread_mutex_destroy(&LOCK_trn_list); + my_atomic_rwlock_destroy(&LOCK_short_trid_to_trn); + my_atomic_rwlock_destroy(&LOCK_pool); + my_free((void *)(short_trid_to_active_trn+1), MYF(0)); + short_trid_to_active_trn= NULL; +#ifdef NOT_USED + lockman_destroy(&maria_lockman); +#endif + DBUG_VOID_RETURN; +} + +/* + NOTE + TrID is limited to 6 bytes. Initial value of the generator + is set by the recovery code - being read from the last checkpoint + (or 1 on a first run). +*/ +static TrID new_trid() +{ + DBUG_ENTER("new_trid"); + DBUG_ASSERT(global_trid_generator < 0xffffffffffffLL); + DBUG_PRINT("info", ("safe_mutex_assert_owner LOCK_trn_list")); + safe_mutex_assert_owner(&LOCK_trn_list); + DBUG_RETURN(++global_trid_generator); +} + +static void set_short_trid(TRN *trn) +{ + int i= (global_trid_generator + (intptr)trn) * 312089 % SHORT_TRID_MAX + 1; + my_atomic_rwlock_wrlock(&LOCK_short_trid_to_trn); + for ( ; ; i= i % SHORT_TRID_MAX + 1) /* the range is [1..SHORT_TRID_MAX] */ + { + void *tmp= NULL; + if (short_trid_to_active_trn[i] == NULL && + my_atomic_casptr((void **)&short_trid_to_active_trn[i], &tmp, trn)) + break; + } + my_atomic_rwlock_wrunlock(&LOCK_short_trid_to_trn); + trn->short_id= i; +} + +/* + DESCRIPTION + start a new transaction, allocate and initialize transaction object + mutex and cond will be used for lock waits +*/ + +TRN *trnman_new_trn(pthread_mutex_t *mutex, pthread_cond_t *cond, + void *stack_end) +{ + TRN *trn; + DBUG_ENTER("trnman_new_trn"); + + /* + we have a mutex, to do simple things under it - allocate a TRN, + increment trnman_active_transactions, set trn->min_read_from. + + Note that all the above is fast. generating short_trid may be slow, + as it involves scanning a large array - so it's done outside of the + mutex. + */ + + DBUG_PRINT("info", ("pthread_mutex_lock LOCK_trn_list")); + pthread_mutex_lock(&LOCK_trn_list); + + /* Allocating a new TRN structure */ + trn= pool; + /* + Popping an unused TRN from the pool + (ABA isn't possible, we're behind a mutex + */ + my_atomic_rwlock_wrlock(&LOCK_pool); + while (trn && !my_atomic_casptr((void **)&pool, (void **)&trn, + (void *)trn->next)) + /* no-op */; + my_atomic_rwlock_wrunlock(&LOCK_pool); + + /* Nothing in the pool ? Allocate a new one */ + if (!trn) + { + /* + trn should be completely initalized at create time to allow + one to keep a known state on it. + (Like redo_lns, which is assumed to be 0 at start of row handling + and reset to zero before end of row handling) + */ + trn= (TRN *)my_malloc(sizeof(TRN), MYF(MY_WME | MY_ZEROFILL)); + if (unlikely(!trn)) + { + DBUG_PRINT("info", ("pthread_mutex_unlock LOCK_trn_list")); + pthread_mutex_unlock(&LOCK_trn_list); + return 0; + } + trnman_allocated_transactions++; + } + trn->pins= lf_hash_get_pins(&trid_to_committed_trn, stack_end); + if (!trn->pins) + { + trnman_free_trn(trn); + return 0; + } + + trnman_active_transactions++; + + trn->min_read_from= active_list_min.next->trid; + + trn->trid= new_trid(); + trn->short_id= 0; + + trn->next= &active_list_max; + trn->prev= active_list_max.prev; + active_list_max.prev= trn->prev->next= trn; + DBUG_PRINT("info", ("pthread_mutex_unlock LOCK_trn_list")); + pthread_mutex_unlock(&LOCK_trn_list); + + if (unlikely(!trn->min_read_from)) + trn->min_read_from= trn->trid; + + trn->commit_trid= 0; + trn->rec_lsn= trn->undo_lsn= trn->first_undo_lsn= 0; + + trn->locks.mutex= mutex; + trn->locks.cond= cond; + trn->locks.waiting_for= 0; + trn->locks.all_locks= 0; +#ifdef NOT_USED + trn->locks.pins= lf_alloc_get_pins(&maria_lockman.alloc); +#endif + + trn->locked_tables= 0; + + /* + only after the following function TRN is considered initialized, + so it must be done the last + */ + set_short_trid(trn); + + DBUG_RETURN(trn); +} + +/* + remove a trn from the active list. + if necessary - move to committed list and set commit_trid + + NOTE + Locks are released at the end. In particular, after placing the + transaction in commit list, and after setting commit_trid. It's + important, as commit_trid affects visibility. Locks don't affect + anything they simply delay execution of other threads - they could be + released arbitrarily late. In other words, when locks are released it + serves as a start banner for other threads, they start to run. So + everything they may need must be ready at that point. + + RETURN + 0 ok + 1 error +*/ +int trnman_end_trn(TRN *trn, my_bool commit) +{ + int res= 1; + TRN *free_me= 0; + LF_PINS *pins= trn->pins; + DBUG_ENTER("trnman_end_trn"); + + DBUG_ASSERT(trn->rec_lsn == 0); + /* if a rollback, all UNDO records should have been executed */ + DBUG_ASSERT(commit || trn->undo_lsn == 0); + DBUG_PRINT("info", ("pthread_mutex_lock LOCK_trn_list")); + pthread_mutex_lock(&LOCK_trn_list); + + /* remove from active list */ + trn->next->prev= trn->prev; + trn->prev->next= trn->next; + + /* + if trn was the oldest active transaction, now that it goes away there + may be committed transactions in the list which no active transaction + needs to bother about - clean up the committed list + */ + if (trn->prev == &active_list_min) + { + uint free_me_count; + TRN *t; + for (t= committed_list_min.next, free_me_count= 0; + t->commit_trid < active_list_min.next->min_read_from; + t= t->next, free_me_count++) /* no-op */; + + DBUG_ASSERT((t != committed_list_min.next && free_me_count > 0) || + (t == committed_list_min.next && free_me_count == 0)); + /* found transactions committed before the oldest active one */ + if (t != committed_list_min.next) + { + free_me= committed_list_min.next; + committed_list_min.next= t; + t->prev->next= 0; + t->prev= &committed_list_min; + trnman_committed_transactions-= free_me_count; + } + } + + /* + if transaction is committed and it was not the only active transaction - + add it to the committed list (which is used for read-from relation) + */ + if (commit && active_list_min.next != &active_list_max) + { + trn->commit_trid= global_trid_generator; + trn->next= &committed_list_max; + trn->prev= committed_list_max.prev; + trnman_committed_transactions++; + + res= lf_hash_insert(&trid_to_committed_trn, pins, &trn); + /* + By going on with life is res<0, we let other threads block on + our rows (because they will never see us committed in + trid_to_committed_trn) until they timeout. Though correct, this is not a + good situation: + - if connection reconnects and wants to check if its rows have been + committed, it will not be able to do that (it will just lock on them) so + connection stays permanently in doubt + - internal structures trid_to_committed_trn and committed_list are + desynchronized. + So we should take Maria down immediately, the two problems being + automatically solved at restart. + */ + DBUG_ASSERT(res <= 0); + } + if (res) + { + /* + res == 1 means the condition in the if() above + was false. + res == -1 means lf_hash_insert failed + */ + trn->next= free_me; + free_me= trn; + } + else + { + committed_list_max.prev= trn->prev->next= trn; + } + trnman_active_transactions--; + DBUG_PRINT("info", ("pthread_mutex_unlock LOCK_trn_list")); + pthread_mutex_unlock(&LOCK_trn_list); + + /* the rest is done outside of a critical section */ +#ifdef NOT_USED + lockman_release_locks(&maria_lockman, &trn->locks); +#endif + trn->locks.mutex= 0; + trn->locks.cond= 0; + my_atomic_rwlock_rdlock(&LOCK_short_trid_to_trn); + my_atomic_storeptr((void **)&short_trid_to_active_trn[trn->short_id], 0); + my_atomic_rwlock_rdunlock(&LOCK_short_trid_to_trn); + + /* + we, under the mutex, removed going-in-free_me transactions from the + active and committed lists, thus nobody else may see them when it scans + those lists, and thus nobody may want to free them. Now we don't + need a mutex to access free_me list + */ + /* QQ: send them to the purge thread */ + while (free_me) + { + TRN *t= free_me; + free_me= free_me->next; + + /* + ignore OOM here. it's harmless, and there's nothing we could do, anyway + */ + (void)lf_hash_delete(&trid_to_committed_trn, pins, &t->trid, sizeof(TrID)); + + trnman_free_trn(t); + } + + lf_hash_put_pins(pins); +#ifdef NOT_USED + lf_pinbox_put_pins(trn->locks.pins); +#endif + + DBUG_RETURN(res < 0); +} + +/* + free a trn (add to the pool, that is) + note - we can never really free() a TRN if there's at least one other + running transaction - see, e.g., how lock waits are implemented in + lockman.c + The same is true for other lock-free data structures too. We may need some + kind of FLUSH command to reset them all - ensuring that no transactions are + running. It may even be called automatically on checkpoints if no + transactions are running. +*/ +void trnman_free_trn(TRN *trn) +{ + TRN *tmp= pool; + + my_atomic_rwlock_wrlock(&LOCK_pool); + do + { + /* + without this volatile cast gcc-3.4.4 moved the assignment + down after the loop at -O2 + */ + *(TRN * volatile *)&(trn->next)= tmp; + } while (!my_atomic_casptr((void **)&pool, (void **)&tmp, trn)); + my_atomic_rwlock_wrunlock(&LOCK_pool); +} + +/* + NOTE + here we access the hash in a lock-free manner. + It's safe, a 'found' TRN can never be freed/reused before we access it. + In fact, it cannot be freed before 'trn' ends, because a 'found' TRN + can only be removed from the hash when: + found->commit_trid < ALL (trn->min_read_from) + that is, at least + found->commit_trid < trn->min_read_from + but + found->trid >= trn->min_read_from + and + found->commit_trid > found->trid + + RETURN + 1 can + 0 cannot + -1 error (OOM) +*/ +int trnman_can_read_from(TRN *trn, TrID trid) +{ + TRN **found; + my_bool can; + LF_REQUIRE_PINS(3); + + if (trid < trn->min_read_from) + return 1; /* can read */ + if (trid > trn->trid) + return 0; /* cannot read */ + + found= lf_hash_search(&trid_to_committed_trn, trn->pins, &trid, sizeof(trid)); + if (found == NULL) + return 0; /* not in the hash of committed transactions = cannot read */ + if (found == MY_ERRPTR) + return -1; + + can= (*found)->commit_trid < trn->trid; + lf_hash_search_unpin(trn->pins); + return can; +} + +/* TODO: the stubs below are waiting for savepoints to be implemented */ + +void trnman_new_statement(TRN *trn __attribute__ ((unused))) +{ +} + +void trnman_rollback_statement(TRN *trn __attribute__ ((unused))) +{ +} + + +/** + @brief Allocates buffers and stores in them some info about transactions + + Does the allocation because the caller cannot know the size itself. + Memory freeing is to be done by the caller (if the "str" member of the + LEX_STRING is not NULL). + The caller has the intention of doing checkpoints. + + @param[out] str_act pointer to where the allocated buffer, + and its size, will be put; buffer will be filled + with info about active transactions + @param[out] str_com pointer to where the allocated buffer, + and its size, will be put; buffer will be filled + with info about committed transactions + @param[out] min_first_undo_lsn pointer to where the minimum + first_undo_lsn of all transactions will be put + + @return Operation status + @retval 0 OK + @retval 1 Error +*/ + +my_bool trnman_collect_transactions(LEX_STRING *str_act, LEX_STRING *str_com, + LSN *min_rec_lsn, LSN *min_first_undo_lsn) +{ + my_bool error; + TRN *trn; + char *ptr; + uint stored_transactions= 0; + LSN minimum_rec_lsn= ULONGLONG_MAX, minimum_first_undo_lsn= ULONGLONG_MAX; + DBUG_ENTER("trnman_collect_transactions"); + + DBUG_ASSERT((NULL == str_act->str) && (NULL == str_com->str)); + + /* validate the use of read_non_atomic() in general: */ + compile_time_assert((sizeof(LSN) == 8) && (sizeof(LSN_WITH_FLAGS) == 8)); + + DBUG_PRINT("info", ("pthread_mutex_lock LOCK_trn_list")); + pthread_mutex_lock(&LOCK_trn_list); + str_act->length= 2 + /* number of active transactions */ + LSN_STORE_SIZE + /* minimum of their rec_lsn */ + (6 + /* long id */ + 2 + /* short id */ + LSN_STORE_SIZE + /* undo_lsn */ +#ifdef MARIA_VERSIONING /* not enabled yet */ + LSN_STORE_SIZE + /* undo_purge_lsn */ +#endif + LSN_STORE_SIZE /* first_undo_lsn */ + ) * trnman_active_transactions; + str_com->length= 8 + /* number of committed transactions */ + (6 + /* long id */ +#ifdef MARIA_VERSIONING /* not enabled yet */ + LSN_STORE_SIZE + /* undo_purge_lsn */ +#endif + LSN_STORE_SIZE /* first_undo_lsn */ + ) * trnman_committed_transactions; + if ((NULL == (str_act->str= my_malloc(str_act->length, MYF(MY_WME)))) || + (NULL == (str_com->str= my_malloc(str_com->length, MYF(MY_WME))))) + goto err; + /* First, the active transactions */ + ptr= str_act->str + 2 + LSN_STORE_SIZE; + for (trn= active_list_min.next; trn != &active_list_max; trn= trn->next) + { + /* + trns with a short trid of 0 are not even initialized, we can ignore + them. trns with undo_lsn==0 have done no writes, we can ignore them + too. XID not needed now. + */ + uint sid; + LSN rec_lsn, undo_lsn, first_undo_lsn; + if ((sid= trn->short_id) == 0) + { + /* + Not even inited, has done nothing. Or it is the + dummy_transaction_object, which does only non-transactional + immediate-sync operations (CREATE/DROP/RENAME/REPAIR TABLE), and so + can be forgotten for Checkpoint. + */ + continue; + } +#ifndef MARIA_CHECKPOINT +/* + in the checkpoint patch (not yet ready) we will have a real implementation + of lsn_read_non_atomic(); for now it's not needed +*/ +#define lsn_read_non_atomic(A) (A) +#endif + /* needed for low-water mark calculation */ + if (((rec_lsn= lsn_read_non_atomic(trn->rec_lsn)) > 0) && + (cmp_translog_addr(rec_lsn, minimum_rec_lsn) < 0)) + minimum_rec_lsn= rec_lsn; + /* + trn may have logged REDOs but not yet UNDO, that's why we read rec_lsn + before deciding to ignore if undo_lsn==0. + */ + if ((undo_lsn= trn->undo_lsn) == 0) /* trn can be forgotten */ + continue; + stored_transactions++; + int6store(ptr, trn->trid); + ptr+= 6; + int2store(ptr, sid); + ptr+= 2; + lsn_store(ptr, undo_lsn); /* needed for rollback */ + ptr+= LSN_STORE_SIZE; +#ifdef MARIA_VERSIONING /* not enabled yet */ + /* to know where purging should start (last delete of this trn) */ + lsn_store(ptr, trn->undo_purge_lsn); + ptr+= LSN_STORE_SIZE; +#endif + /* needed for low-water mark calculation */ + if (((first_undo_lsn= lsn_read_non_atomic(trn->first_undo_lsn)) > 0) && + (cmp_translog_addr(first_undo_lsn, minimum_first_undo_lsn) < 0)) + minimum_first_undo_lsn= first_undo_lsn; + lsn_store(ptr, first_undo_lsn); + ptr+= LSN_STORE_SIZE; + /** + @todo RECOVERY: add a comment explaining why we can dirtily read some + vars, inspired by the text of "assumption 8" in WL#3072 + */ + } + str_act->length= ptr - str_act->str; /* as we maybe over-estimated */ + ptr= str_act->str; + int2store(ptr, stored_transactions); + ptr+= 2; + /* this LSN influences how REDOs for any page can be ignored by Recovery */ + lsn_store(ptr, minimum_rec_lsn); + /* one day there will also be a list of prepared transactions */ + /* do the same for committed ones */ + ptr= str_com->str; + int8store(ptr, (ulonglong)trnman_committed_transactions); + ptr+= 8; + for (trn= committed_list_min.next; trn != &committed_list_max; + trn= trn->next) + { + LSN first_undo_lsn; + int6store(ptr, trn->trid); + ptr+= 6; +#ifdef MARIA_VERSIONING /* not enabled yet */ + lsn_store(ptr, trn->undo_purge_lsn); + ptr+= LSN_STORE_SIZE; +#endif + first_undo_lsn= LSN_WITH_FLAGS_TO_LSN(trn->first_undo_lsn); + if (cmp_translog_addr(first_undo_lsn, minimum_first_undo_lsn) < 0) + minimum_first_undo_lsn= first_undo_lsn; + lsn_store(ptr, first_undo_lsn); + ptr+= LSN_STORE_SIZE; + } + /* + TODO: if we see there exists no transaction (active and committed) we can + tell the lock-free structures to do some freeing (my_free()). + */ + error= 0; + *min_rec_lsn= minimum_rec_lsn; + *min_first_undo_lsn= minimum_first_undo_lsn; + goto end; +err: + error= 1; +end: + DBUG_PRINT("info", ("pthread_mutex_unlock LOCK_trn_list")); + pthread_mutex_unlock(&LOCK_trn_list); + DBUG_RETURN(error); +} diff --git a/storage/maria/trnman.h b/storage/maria/trnman.h new file mode 100644 index 00000000000..1a4423f2a11 --- /dev/null +++ b/storage/maria/trnman.h @@ -0,0 +1,59 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#ifndef _trnman_h +#define _trnman_h + +C_MODE_START + +#include +#include "lockman.h" +#include "trnman_public.h" +#include "ma_loghandler_lsn.h" + +/* + trid - 6 byte transaction identifier. Assigned when a transaction + is created. Transaction can always be identified by its trid, + even after transaction has ended. + + short_trid - 2-byte transaction identifier, identifies a running + transaction, is reassigned when transaction ends. +*/ + +/* + short transaction id is at the same time its identifier + for a lock manager - its lock owner identifier (loid) +*/ + +#define short_id locks.loid + +struct st_transaction +{ + LOCK_OWNER locks; /* must be the first! see short_trid_to_TRN() */ + LF_PINS *pins; + TrID trid, min_read_from, commit_trid; + TRN *next, *prev; + LSN rec_lsn, undo_lsn; + LSN_WITH_FLAGS first_undo_lsn; + uint locked_tables; + /* Note! if locks.loid is 0, trn is NOT initialized */ +}; + +#define TRANSACTION_LOGGED_LONG_ID ULL(0x8000000000000000) + +C_MODE_END + +#endif + diff --git a/storage/maria/trnman_public.h b/storage/maria/trnman_public.h new file mode 100644 index 00000000000..3e0a21c26a6 --- /dev/null +++ b/storage/maria/trnman_public.h @@ -0,0 +1,54 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + + +/* + External definitions for trnman.h + We need to split this into two files as gcc 4.1.2 gives error if it tries + to include my_atomic.h in C++ code. +*/ + +#include "ma_loghandler_lsn.h" + +C_MODE_START +typedef uint64 TrID; /* our TrID is 6 bytes */ +typedef struct st_transaction TRN; + +#define SHORT_TRID_MAX 65535 + +extern uint trnman_active_transactions, trnman_allocated_transactions; +extern TRN dummy_transaction_object; + +int trnman_init(void); +void trnman_destroy(void); +TRN *trnman_new_trn(pthread_mutex_t *, pthread_cond_t *, void *); +int trnman_end_trn(TRN *trn, my_bool commit); +#define trnman_commit_trn(T) trnman_end_trn(T, TRUE) +#define trnman_abort_trn(T) trnman_end_trn(T, FALSE) +#define trnman_rollback_trn(T) trnman_end_trn(T, FALSE) +void trnman_free_trn(TRN *trn); +int trnman_can_read_from(TRN *trn, TrID trid); +void trnman_new_statement(TRN *trn); +void trnman_rollback_statement(TRN *trn); +my_bool trnman_collect_transactions(LEX_STRING *str_act, LEX_STRING *str_com, + LSN *min_rec_lsn, + LSN *min_first_undo_lsn); + +uint trnman_increment_locked_tables(TRN *trn); +uint trnman_decrement_locked_tables(TRN *trn); +my_bool trnman_has_locked_tables(TRN *trn); +void trnman_reset_locked_tables(TRN *trn); + +C_MODE_END diff --git a/storage/maria/unittest/Makefile.am b/storage/maria/unittest/Makefile.am new file mode 100644 index 00000000000..28264d5d903 --- /dev/null +++ b/storage/maria/unittest/Makefile.am @@ -0,0 +1,89 @@ +# Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +AM_CPPFLAGS = @ZLIB_INCLUDES@ -I$(top_builddir)/include \ + -I$(top_srcdir)/include -I$(top_srcdir)/unittest/mytap +INCLUDES = @ZLIB_INCLUDES@ -I$(top_builddir)/include \ + -I$(top_srcdir)/include -I$(top_srcdir)/unittest/mytap + +# Only reason to link with libmyisam.a here is that it's where some fulltext +# pieces are (but soon we'll remove fulltext dependencies from Maria). +LDADD= $(top_builddir)/unittest/mytap/libmytap.a \ + $(top_builddir)/storage/maria/libmaria.a \ + $(top_builddir)/storage/myisam/libmyisam.a \ + $(top_builddir)/mysys/libmysys.a \ + $(top_builddir)/dbug/libdbug.a \ + $(top_builddir)/strings/libmystrings.a @ZLIB_LIBS@ +noinst_PROGRAMS = ma_control_file-t trnman-t lockman2-t \ + ma_pagecache_single_1k-t ma_pagecache_single_8k-t \ + ma_pagecache_single_64k-t-big \ + ma_pagecache_consist_1k-t-big \ + ma_pagecache_consist_64k-t-big \ + ma_pagecache_consist_1kHC-t-big \ + ma_pagecache_consist_64kHC-t-big \ + ma_pagecache_consist_1kRD-t-big \ + ma_pagecache_consist_64kRD-t-big \ + ma_pagecache_consist_1kWR-t-big \ + ma_pagecache_consist_64kWR-t-big \ + ma_test_loghandler-t \ + ma_test_loghandler_multigroup-t \ + ma_test_loghandler_multithread-t \ + ma_test_loghandler_pagecache-t \ + ma_test_loghandler_long-t-big + +ma_test_loghandler_t_SOURCES = ma_test_loghandler-t.c ma_maria_log_cleanup.c +ma_test_loghandler_multigroup_t_SOURCES = ma_test_loghandler_multigroup-t.c ma_maria_log_cleanup.c +ma_test_loghandler_multithread_t_SOURCES = ma_test_loghandler_multithread-t.c ma_maria_log_cleanup.c +ma_test_loghandler_pagecache_t_SOURCES = ma_test_loghandler_pagecache-t.c ma_maria_log_cleanup.c +ma_test_loghandler_long_t_big_SOURCES = ma_test_loghandler-t.c ma_maria_log_cleanup.c +ma_test_loghandler_long_t_big_CPPFLAGS = -DLONG_LOG_TEST + +ma_pagecache_single_src = ma_pagecache_single.c test_file.c +ma_pagecache_consist_src = ma_pagecache_consist.c test_file.c +ma_pagecache_common_cppflags = -DEXTRA_DEBUG -DPAGECACHE_DEBUG -DMAIN + +ma_pagecache_single_1k_t_SOURCES = $(ma_pagecache_single_src) +ma_pagecache_single_8k_t_SOURCES = $(ma_pagecache_single_src) +ma_pagecache_single_64k_t_big_SOURCES = $(ma_pagecache_single_src) +ma_pagecache_single_1k_t_CPPFLAGS = $(ma_pagecache_common_cppflags) -DPAGE_SIZE=1024 +ma_pagecache_single_8k_t_CPPFLAGS = $(ma_pagecache_common_cppflags) -DPAGE_SIZE=8192 +ma_pagecache_single_64k_t_big_CPPFLAGS = $(ma_pagecache_common_cppflags) -DPAGE_SIZE=65536 + +ma_pagecache_consist_1k_t_big_SOURCES = $(ma_pagecache_consist_src) +ma_pagecache_consist_1k_t_big_CPPFLAGS = $(ma_pagecache_common_cppflags) -DPAGE_SIZE=1024 +ma_pagecache_consist_64k_t_big_SOURCES = $(ma_pagecache_consist_src) +ma_pagecache_consist_64k_t_big_CPPFLAGS = $(ma_pagecache_common_cppflags) -DPAGE_SIZE=65536 + +ma_pagecache_consist_1kHC_t_big_SOURCES = $(ma_pagecache_consist_src) +ma_pagecache_consist_1kHC_t_big_CPPFLAGS = $(ma_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_HIGH_CONCURENCY +ma_pagecache_consist_64kHC_t_big_SOURCES = $(ma_pagecache_consist_src) +ma_pagecache_consist_64kHC_t_big_CPPFLAGS = $(ma_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_HIGH_CONCURENCY + +ma_pagecache_consist_1kRD_t_big_SOURCES = $(ma_pagecache_consist_src) +ma_pagecache_consist_1kRD_t_big_CPPFLAGS = $(ma_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_READERS +ma_pagecache_consist_64kRD_t_big_SOURCES = $(ma_pagecache_consist_src) +ma_pagecache_consist_64kRD_t_big_CPPFLAGS = $(ma_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_READERS + +ma_pagecache_consist_1kWR_t_big_SOURCES = $(ma_pagecache_consist_src) +ma_pagecache_consist_1kWR_t_big_CPPFLAGS = $(ma_pagecache_common_cppflags) -DPAGE_SIZE=1024 -DTEST_WRITERS +ma_pagecache_consist_64kWR_t_big_SOURCES = $(ma_pagecache_consist_src) +ma_pagecache_consist_64kWR_t_big_CPPFLAGS = $(ma_pagecache_common_cppflags) -DPAGE_SIZE=65536 -DTEST_WRITERS + +# the generic lock manager may not be used in the end and lockman1-t crashes, +# so we don't build lockman-t and lockman1-t +CLEANFILES = maria_control page_cache_test_file_1 \ + maria_log.???????? + diff --git a/storage/maria/unittest/lockman-t.c b/storage/maria/unittest/lockman-t.c new file mode 100644 index 00000000000..8c0f71175e7 --- /dev/null +++ b/storage/maria/unittest/lockman-t.c @@ -0,0 +1,309 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + lockman for row and table locks +*/ + +/* #define EXTRA_VERBOSE */ + +#include + +#include +#include +#include +#include +#include "../lockman.h" + +#define Nlos 100 +LOCK_OWNER loarray[Nlos]; +pthread_mutex_t mutexes[Nlos]; +pthread_cond_t conds[Nlos]; +LOCKMAN lockman; + +#ifndef EXTRA_VERBOSE +#define print_lockhash(X) /* no-op */ +#define DIAG(X) /* no-op */ +#else +#define DIAG(X) diag X +#endif + +LOCK_OWNER *loid2lo(uint16 loid) +{ + return loarray+loid-1; +} + +#define unlock_all(O) diag("lo" #O "> release all locks"); \ + lockman_release_locks(&lockman, loid2lo(O));print_lockhash(&lockman) +#define test_lock(O, R, L, S, RES) \ + ok(lockman_getlock(&lockman, loid2lo(O), R, L) == RES, \ + "lo" #O "> " S "lock resource " #R " with " #L "-lock"); \ + print_lockhash(&lockman) +#define lock_ok_a(O, R, L) \ + test_lock(O, R, L, "", GOT_THE_LOCK) +#define lock_ok_i(O, R, L) \ + test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE) +#define lock_ok_l(O, R, L) \ + test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE) +#define lock_conflict(O, R, L) \ + test_lock(O, R, L, "cannot ", DIDNT_GET_THE_LOCK); + +void test_lockman_simple() +{ + /* simple */ + lock_ok_a(1, 1, S); + lock_ok_i(2, 2, IS); + lock_ok_i(1, 2, IX); + /* lock escalation */ + lock_ok_a(1, 1, X); + lock_ok_i(2, 2, IX); + /* failures */ + lock_conflict(2, 1, X); + unlock_all(2); + lock_ok_a(1, 2, S); + lock_ok_a(1, 2, IS); + lock_ok_a(1, 2, LS); + lock_ok_i(1, 3, IX); + lock_ok_a(2, 3, LS); + lock_ok_i(1, 3, IX); + lock_ok_l(2, 3, IS); + unlock_all(1); + unlock_all(2); + + lock_ok_i(1, 1, IX); + lock_conflict(2, 1, S); + lock_ok_a(1, 1, LS); + unlock_all(1); + unlock_all(2); + + lock_ok_i(1, 1, IX); + lock_ok_a(2, 1, LS); + lock_ok_a(1, 1, LS); + lock_ok_i(1, 1, IX); + lock_ok_i(3, 1, IS); + unlock_all(1); + unlock_all(2); + unlock_all(3); + + lock_ok_i(1, 4, IS); + lock_ok_i(2, 4, IS); + lock_ok_i(3, 4, IS); + lock_ok_a(3, 4, LS); + lock_ok_i(4, 4, IS); + lock_conflict(4, 4, IX); + lock_conflict(2, 4, IX); + lock_ok_a(1, 4, LS); + unlock_all(1); + unlock_all(2); + unlock_all(3); + unlock_all(4); + + lock_ok_i(1, 1, IX); + lock_ok_i(2, 1, IX); + lock_conflict(1, 1, S); + lock_conflict(2, 1, X); + unlock_all(1); + unlock_all(2); +} + +int rt_num_threads; +int litmus; +int thread_number= 0, timeouts= 0; +void run_test(const char *test, pthread_handler handler, int n, int m) +{ + pthread_t *threads; + ulonglong now= my_getsystime(); + int i; + + thread_number= timeouts= 0; + litmus= 0; + + threads= (pthread_t *)my_malloc(sizeof(void *)*n, MYF(0)); + if (!threads) + { + diag("Out of memory"); + abort(); + } + + diag("Running %s with %d threads, %d iterations... ", test, n, m); + rt_num_threads= n; + for (i= 0; i < n ; i++) + if (pthread_create(threads+i, 0, handler, &m)) + { + diag("Could not create thread"); + abort(); + } + for (i= 0 ; i < n ; i++) + pthread_join(threads[i], 0); + now= my_getsystime()-now; + ok(litmus == 0, "Finished %s in %g secs (%d)", test, ((double)now)/1e7, litmus); + my_free((void*)threads, MYF(0)); +} + +pthread_mutex_t rt_mutex; +int Nrows= 100; +int Ntables= 10; +int table_lock_ratio= 10; +enum lock_type lock_array[6]= {S, X, LS, LX, IS, IX}; +char *lock2str[6]= {"S", "X", "LS", "LX", "IS", "IX"}; +char *res2str[4]= { + "DIDN'T GET THE LOCK", + "GOT THE LOCK", + "GOT THE LOCK NEED TO LOCK A SUBRESOURCE", + "GOT THE LOCK NEED TO INSTANT LOCK A SUBRESOURCE"}; +pthread_handler_t test_lockman(void *arg) +{ + int m= (*(int *)arg); + uint x, loid, row, table, res, locklevel, timeout= 0; + LOCK_OWNER *lo; + + pthread_mutex_lock(&rt_mutex); + loid= ++thread_number; + pthread_mutex_unlock(&rt_mutex); + lo= loid2lo(loid); + + for (x= ((int)(intptr)(&m)); m > 0; m--) + { + x= (x*3628273133 + 1500450271) % 9576890767; /* three prime numbers */ + row= x % Nrows + Ntables; + table= row % Ntables; + locklevel= (x/Nrows) & 3; + if (table_lock_ratio && (x/Nrows/4) % table_lock_ratio == 0) + { /* table lock */ + res= lockman_getlock(&lockman, lo, table, lock_array[locklevel]); + DIAG(("loid %2d, table %d, lock %s, res %s", loid, table, + lock2str[locklevel], res2str[res])); + if (res == DIDNT_GET_THE_LOCK) + { + lockman_release_locks(&lockman, lo); + DIAG(("loid %2d, release all locks", loid)); + timeout++; + continue; + } + DBUG_ASSERT(res == GOT_THE_LOCK); + } + else + { /* row lock */ + locklevel&= 1; + res= lockman_getlock(&lockman, lo, table, lock_array[locklevel + 4]); + DIAG(("loid %2d, row %d, lock %s, res %s", loid, row, + lock2str[locklevel+4], res2str[res])); + switch (res) + { + case DIDNT_GET_THE_LOCK: + lockman_release_locks(&lockman, lo); + DIAG(("loid %2d, release all locks", loid)); + timeout++; + continue; + case GOT_THE_LOCK: + continue; + case GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE: + /* not implemented, so take a regular lock */ + case GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE: + res= lockman_getlock(&lockman, lo, row, lock_array[locklevel]); + DIAG(("loid %2d, ROW %d, lock %s, res %s", loid, row, + lock2str[locklevel], res2str[res])); + if (res == DIDNT_GET_THE_LOCK) + { + lockman_release_locks(&lockman, lo); + DIAG(("loid %2d, release all locks", loid)); + timeout++; + continue; + } + DBUG_ASSERT(res == GOT_THE_LOCK); + continue; + default: + DBUG_ASSERT(0); + } + } + } + + lockman_release_locks(&lockman, lo); + + pthread_mutex_lock(&rt_mutex); + rt_num_threads--; + timeouts+= timeout; + if (!rt_num_threads) + diag("number of timeouts: %d", timeouts); + pthread_mutex_unlock(&rt_mutex); + + return 0; +} + +int main() +{ + int i; + + my_init(); + pthread_mutex_init(&rt_mutex, 0); + + plan(35); + + if (my_atomic_initialize()) + return exit_status(); + + + lockman_init(&lockman, &loid2lo, 50); + + for (i= 0; i < Nlos; i++) + { + loarray[i].pins= lf_alloc_get_pins(&lockman.alloc); + loarray[i].all_locks= 0; + loarray[i].waiting_for= 0; + pthread_mutex_init(&mutexes[i], MY_MUTEX_INIT_FAST); + pthread_cond_init (&conds[i], 0); + loarray[i].mutex= &mutexes[i]; + loarray[i].cond= &conds[i]; + loarray[i].loid= i+1; + } + + test_lockman_simple(); + +#define CYCLES 10000 +#define THREADS Nlos /* don't change this line */ + + /* mixed load, stress-test with random locks */ + Nrows= 100; + Ntables= 10; + table_lock_ratio= 10; + run_test("\"random lock\" stress test", test_lockman, THREADS, CYCLES); + + /* "real-life" simulation - many rows, no table locks */ + Nrows= 1000000; + Ntables= 10; + table_lock_ratio= 0; + run_test("\"real-life\" simulation test", test_lockman, THREADS, CYCLES*10); + + for (i= 0; i < Nlos; i++) + { + lockman_release_locks(&lockman, &loarray[i]); + pthread_mutex_destroy(loarray[i].mutex); + pthread_cond_destroy(loarray[i].cond); + lf_pinbox_put_pins(loarray[i].pins); + } + + { + ulonglong now= my_getsystime(); + lockman_destroy(&lockman); + now= my_getsystime()-now; + diag("lockman_destroy: %g secs", ((double)now)/1e7); + } + + pthread_mutex_destroy(&rt_mutex); + my_end(0); + return exit_status(); +} + diff --git a/storage/maria/unittest/lockman1-t.c b/storage/maria/unittest/lockman1-t.c new file mode 100644 index 00000000000..41a1f0fd2f4 --- /dev/null +++ b/storage/maria/unittest/lockman1-t.c @@ -0,0 +1,335 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + lockman for row locks, tablockman for table locks +*/ + +/* #define EXTRA_VERBOSE */ + +#include + +#include +#include +#include +#include +#include "../lockman.h" +#include "../tablockman.h" + +#define Nlos 100 +#define Ntbls 10 +LOCK_OWNER loarray[Nlos]; +TABLE_LOCK_OWNER loarray1[Nlos]; +pthread_mutex_t mutexes[Nlos]; +pthread_cond_t conds[Nlos]; +LOCKED_TABLE ltarray[Ntbls]; +LOCKMAN lockman; +TABLOCKMAN tablockman; + +#ifndef EXTRA_VERBOSE +#define print_lo1(X) /* no-op */ +#define DIAG(X) /* no-op */ +#else +#define DIAG(X) diag X +#endif + +LOCK_OWNER *loid2lo(uint16 loid) +{ + return loarray+loid-1; +} +TABLE_LOCK_OWNER *loid2lo1(uint16 loid) +{ + return loarray1+loid-1; +} + +#define unlock_all(O) diag("lo" #O "> release all locks"); \ + tablockman_release_locks(&tablockman, loid2lo1(O)); +#define test_lock(O, R, L, S, RES) \ + ok(tablockman_getlock(&tablockman, loid2lo1(O), <array[R], L) == RES, \ + "lo" #O "> " S "lock resource " #R " with " #L "-lock"); \ + print_lo1(loid2lo1(O)); +#define lock_ok_a(O, R, L) \ + test_lock(O, R, L, "", GOT_THE_LOCK) +#define lock_ok_i(O, R, L) \ + test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE) +#define lock_ok_l(O, R, L) \ + test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE) +#define lock_conflict(O, R, L) \ + test_lock(O, R, L, "cannot ", LOCK_TIMEOUT); + +void test_tablockman_simple() +{ + /* simple */ + lock_ok_a(1, 1, S); + lock_ok_i(2, 2, IS); + lock_ok_i(1, 2, IX); + /* lock escalation */ + lock_ok_a(1, 1, X); + lock_ok_i(2, 2, IX); + /* failures */ + lock_conflict(2, 1, X); + unlock_all(2); + lock_ok_a(1, 2, S); + lock_ok_a(1, 2, IS); + lock_ok_a(1, 2, LS); + lock_ok_i(1, 3, IX); + lock_ok_a(2, 3, LS); + lock_ok_i(1, 3, IX); + lock_ok_l(2, 3, IS); + unlock_all(1); + unlock_all(2); + + lock_ok_i(1, 1, IX); + lock_conflict(2, 1, S); + lock_ok_a(1, 1, LS); + unlock_all(1); + unlock_all(2); + + lock_ok_i(1, 1, IX); + lock_ok_a(2, 1, LS); + lock_ok_a(1, 1, LS); + lock_ok_i(1, 1, IX); + lock_ok_i(3, 1, IS); + unlock_all(1); + unlock_all(2); + unlock_all(3); + + lock_ok_i(1, 4, IS); + lock_ok_i(2, 4, IS); + lock_ok_i(3, 4, IS); + lock_ok_a(3, 4, LS); + lock_ok_i(4, 4, IS); + lock_conflict(4, 4, IX); + lock_conflict(2, 4, IX); + lock_ok_a(1, 4, LS); + unlock_all(1); + unlock_all(2); + unlock_all(3); + unlock_all(4); + + lock_ok_i(1, 1, IX); + lock_ok_i(2, 1, IX); + lock_conflict(1, 1, S); + lock_conflict(2, 1, X); + unlock_all(1); + unlock_all(2); +} + +int rt_num_threads; +int litmus; +int thread_number= 0, timeouts= 0; +void run_test(const char *test, pthread_handler handler, int n, int m) +{ + pthread_t *threads; + ulonglong now= my_getsystime(); + int i; + + thread_number= timeouts= 0; + litmus= 0; + + threads= (pthread_t *)my_malloc(sizeof(void *)*n, MYF(0)); + if (!threads) + { + diag("Out of memory"); + abort(); + } + + diag("Running %s with %d threads, %d iterations... ", test, n, m); + rt_num_threads= n; + for (i= 0; i < n ; i++) + if (pthread_create(threads+i, 0, handler, &m)) + { + diag("Could not create thread"); + abort(); + } + for (i= 0 ; i < n ; i++) + pthread_join(threads[i], 0); + now= my_getsystime()-now; + ok(litmus == 0, "Finished %s in %g secs (%d)", test, ((double)now)/1e7, litmus); + my_free((void*)threads, MYF(0)); +} + +pthread_mutex_t rt_mutex; +int Nrows= 100; +int Ntables= 10; +int table_lock_ratio= 10; +enum lock_type lock_array[6]= {S, X, LS, LX, IS, IX}; +char *lock2str[6]= {"S", "X", "LS", "LX", "IS", "IX"}; +char *res2str[]= { + "DIDN'T GET THE LOCK", + "OUT OF MEMORY", + "DEADLOCK", + "LOCK TIMEOUT", + "GOT THE LOCK", + "GOT THE LOCK NEED TO LOCK A SUBRESOURCE", + "GOT THE LOCK NEED TO INSTANT LOCK A SUBRESOURCE"}; +pthread_handler_t test_lockman(void *arg) +{ + int m= (*(int *)arg); + uint x, loid, row, table, res, locklevel, timeout= 0; + LOCK_OWNER *lo; TABLE_LOCK_OWNER *lo1; DBUG_ASSERT(Ntables <= Ntbls); + + pthread_mutex_lock(&rt_mutex); + loid= ++thread_number; + pthread_mutex_unlock(&rt_mutex); + lo= loid2lo(loid); lo1= loid2lo1(loid); + + for (x= ((int)(intptr)(&m)); m > 0; m--) + { + x= (x*3628273133 + 1500450271) % 9576890767; /* three prime numbers */ + row= x % Nrows + Ntables; + table= row % Ntables; + locklevel= (x/Nrows) & 3; + if (table_lock_ratio && (x/Nrows/4) % table_lock_ratio == 0) + { /* table lock */ + res= tablockman_getlock(&tablockman, lo1, ltarray+table, lock_array[locklevel]); + DIAG(("loid %2d, table %d, lock %s, res %s", loid, table, + lock2str[locklevel], res2str[res])); + if (res < GOT_THE_LOCK) + { + lockman_release_locks(&lockman, lo); tablockman_release_locks(&tablockman, lo1); + DIAG(("loid %2d, release all locks", loid)); + timeout++; + continue; + } + DBUG_ASSERT(res == GOT_THE_LOCK); + } + else + { /* row lock */ + locklevel&= 1; + res= tablockman_getlock(&tablockman, lo1, ltarray+table, lock_array[locklevel + 4]); + DIAG(("loid %2d, row %d, lock %s, res %s", loid, row, + lock2str[locklevel+4], res2str[res])); + switch (res) + { + case GOT_THE_LOCK: + continue; + case GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE: + /* not implemented, so take a regular lock */ + case GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE: + res= lockman_getlock(&lockman, lo, row, lock_array[locklevel]); + DIAG(("loid %2d, ROW %d, lock %s, res %s", loid, row, + lock2str[locklevel], res2str[res])); + if (res == DIDNT_GET_THE_LOCK) + { + lockman_release_locks(&lockman, lo); + tablockman_release_locks(&tablockman, lo1); + DIAG(("loid %2d, release all locks", loid)); + timeout++; + continue; + } + DBUG_ASSERT(res == GOT_THE_LOCK); + continue; + default: + lockman_release_locks(&lockman, lo); tablockman_release_locks(&tablockman, lo1); + DIAG(("loid %2d, release all locks", loid)); + timeout++; + continue; + } + } + } + + lockman_release_locks(&lockman, lo); + tablockman_release_locks(&tablockman, lo1); + + pthread_mutex_lock(&rt_mutex); + rt_num_threads--; + timeouts+= timeout; + if (!rt_num_threads) + diag("number of timeouts: %d", timeouts); + pthread_mutex_unlock(&rt_mutex); + + return 0; +} + +int main() +{ + int i; + + my_init(); + pthread_mutex_init(&rt_mutex, 0); + + plan(35); + + if (my_atomic_initialize()) + return exit_status(); + + + lockman_init(&lockman, &loid2lo, 50); + tablockman_init(&tablockman, &loid2lo1, 50); + + for (i= 0; i < Nlos; i++) + { + pthread_mutex_init(&mutexes[i], MY_MUTEX_INIT_FAST); + pthread_cond_init (&conds[i], 0); + + loarray[i].pins= lf_alloc_get_pins(&lockman.alloc); + loarray[i].all_locks= 0; + loarray[i].waiting_for= 0; + loarray[i].mutex= &mutexes[i]; + loarray[i].cond= &conds[i]; + loarray[i].loid= i+1; + + loarray1[i].active_locks= 0; + loarray1[i].waiting_lock= 0; + loarray1[i].waiting_for= 0; + loarray1[i].mutex= &mutexes[i]; + loarray1[i].cond= &conds[i]; + loarray1[i].loid= i+1; + } + + for (i= 0; i < Ntbls; i++) + { + tablockman_init_locked_table(ltarray+i, Nlos); + } + + test_tablockman_simple(); + +#define CYCLES 10000 +#define THREADS Nlos /* don't change this line */ + + /* mixed load, stress-test with random locks */ + Nrows= 100; + Ntables= 10; + table_lock_ratio= 10; + run_test("\"random lock\" stress test", test_lockman, THREADS, CYCLES); + + /* "real-life" simulation - many rows, no table locks */ + Nrows= 1000000; + Ntables= 10; + table_lock_ratio= 0; + run_test("\"real-life\" simulation test", test_lockman, THREADS, CYCLES*10); + + for (i= 0; i < Nlos; i++) + { + lockman_release_locks(&lockman, &loarray[i]); + pthread_mutex_destroy(loarray[i].mutex); + pthread_cond_destroy(loarray[i].cond); + lf_pinbox_put_pins(loarray[i].pins); + } + + { + ulonglong now= my_getsystime(); + lockman_destroy(&lockman); + now= my_getsystime()-now; + diag("lockman_destroy: %g secs", ((double)now)/1e7); + } + + pthread_mutex_destroy(&rt_mutex); + my_end(0); + return exit_status(); +} + diff --git a/storage/maria/unittest/lockman2-t.c b/storage/maria/unittest/lockman2-t.c new file mode 100644 index 00000000000..01af1a03d22 --- /dev/null +++ b/storage/maria/unittest/lockman2-t.c @@ -0,0 +1,361 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + tablockman for row and table locks +*/ + +/* #define EXTRA_VERBOSE */ + +#include + +#include +#include +#include +#include +#include "../tablockman.h" + +#define Nlos 100 +#define Ntbls 110 +TABLE_LOCK_OWNER loarray1[Nlos]; +pthread_mutex_t mutexes[Nlos]; +pthread_cond_t conds[Nlos]; +LOCKED_TABLE ltarray[Ntbls]; +TABLOCKMAN tablockman; + +#ifndef EXTRA_VERBOSE +#define print_lo1(X) /* no-op */ +#define DIAG(X) /* no-op */ +#else +#define DIAG(X) diag X +#endif + +TABLE_LOCK_OWNER *loid2lo1(uint16 loid) +{ + return loarray1+loid-1; +} + +#define unlock_all(O) diag("lo" #O "> release all locks"); \ + tablockman_release_locks(&tablockman, loid2lo1(O)); +#define test_lock(O, R, L, S, RES) \ + ok(tablockman_getlock(&tablockman, loid2lo1(O), <array[R], L) == RES, \ + "lo" #O "> " S "lock resource " #R " with " #L "-lock"); \ + print_lo1(loid2lo1(O)); +#define lock_ok_a(O, R, L) \ + test_lock(O, R, L, "", GOT_THE_LOCK) +#define lock_ok_i(O, R, L) \ + test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE) +#define lock_ok_l(O, R, L) \ + test_lock(O, R, L, "", GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE) +#define lock_conflict(O, R, L) \ + test_lock(O, R, L, "cannot ", LOCK_TIMEOUT); + +void test_tablockman_simple() +{ + /* simple */ + lock_ok_a(1, 1, S); + lock_ok_i(2, 2, IS); + lock_ok_i(1, 2, IX); + /* lock escalation */ + lock_ok_a(1, 1, X); + lock_ok_i(2, 2, IX); + /* failures */ + lock_conflict(2, 1, X); + unlock_all(2); + lock_ok_a(1, 2, S); + lock_ok_a(1, 2, IS); + lock_ok_a(1, 2, LS); + lock_ok_i(1, 3, IX); + lock_ok_a(2, 3, LS); + lock_ok_i(1, 3, IX); + lock_ok_l(2, 3, IS); + unlock_all(1); + unlock_all(2); + + lock_ok_i(1, 1, IX); + lock_conflict(2, 1, S); + lock_ok_a(1, 1, LS); + unlock_all(1); + unlock_all(2); + + lock_ok_i(1, 1, IX); + lock_ok_a(2, 1, LS); + lock_ok_a(1, 1, LS); + lock_ok_i(1, 1, IX); + lock_ok_i(3, 1, IS); + unlock_all(1); + unlock_all(2); + unlock_all(3); + + lock_ok_i(1, 4, IS); + lock_ok_i(2, 4, IS); + lock_ok_i(3, 4, IS); + lock_ok_a(3, 4, LS); + lock_ok_i(4, 4, IS); + lock_conflict(4, 4, IX); + lock_conflict(2, 4, IX); + lock_ok_a(1, 4, LS); + unlock_all(1); + unlock_all(2); + unlock_all(3); + unlock_all(4); + + lock_ok_i(1, 1, IX); + lock_ok_i(2, 1, IX); + lock_conflict(1, 1, S); + lock_conflict(2, 1, X); + unlock_all(1); + unlock_all(2); + + lock_ok_i(1, 1, IS); + lock_conflict(2, 1, X); + lock_conflict(3, 1, IS); + unlock_all(1); + unlock_all(2); + unlock_all(3); + + lock_ok_a(1, 1, S); + lock_conflict(2, 1, IX); + lock_conflict(3, 1, IS); + unlock_all(1); + unlock_all(2); + unlock_all(3); +} + +int rt_num_threads; +int litmus; +int thread_number= 0, timeouts= 0; +void run_test(const char *test, pthread_handler handler, int n, int m) +{ + pthread_t *threads; + ulonglong now= my_getsystime(); + int i; + + thread_number= timeouts= 0; + litmus= 0; + + threads= (pthread_t *)my_malloc(sizeof(void *)*n, MYF(0)); + if (!threads) + { + diag("Out of memory"); + abort(); + } + + diag("Running %s with %d threads, %d iterations... ", test, n, m); + rt_num_threads= n; + for (i= 0; i < n ; i++) + if (pthread_create(threads+i, 0, handler, &m)) + { + diag("Could not create thread"); + abort(); + } + for (i= 0 ; i < n ; i++) + pthread_join(threads[i], 0); + now= my_getsystime()-now; + ok(litmus == 0, "Finished %s in %g secs (%d)", test, ((double)now)/1e7, litmus); + my_free((void*)threads, MYF(0)); +} + +static void reinit_tlo(TABLOCKMAN *lm, TABLE_LOCK_OWNER *lo) +{ +#ifdef NOT_USED_YET + TABLE_LOCK_OWNER backup= *lo; +#endif + + tablockman_release_locks(lm, lo); +#ifdef NOT_USED_YET + pthread_mutex_destroy(lo->mutex); + pthread_cond_destroy(lo->cond); + bzero(lo, sizeof(*lo)); + + lo->mutex= backup.mutex; + lo->cond= backup.cond; + lo->loid= backup.loid; + pthread_mutex_init(lo->mutex, MY_MUTEX_INIT_FAST); + pthread_cond_init(lo->cond, 0); +#endif +} + +pthread_mutex_t rt_mutex; +int Nrows= 100; +int Ntables= 10; +int table_lock_ratio= 10; +enum lock_type lock_array[6]= {S, X, LS, LX, IS, IX}; +const char *lock2str[6]= {"S", "X", "LS", "LX", "IS", "IX"}; +const char *res2str[]= { + 0, + "OUT OF MEMORY", + "DEADLOCK", + "LOCK TIMEOUT", + "GOT THE LOCK", + "GOT THE LOCK NEED TO LOCK A SUBRESOURCE", + "GOT THE LOCK NEED TO INSTANT LOCK A SUBRESOURCE"}; + +pthread_handler_t test_lockman(void *arg) +{ + int m= (*(int *)arg); + uint x, loid, row, table, res, locklevel, timeout= 0; + TABLE_LOCK_OWNER *lo1; + DBUG_ASSERT(Ntables <= Ntbls); + DBUG_ASSERT(Nrows + Ntables <= Ntbls); + + pthread_mutex_lock(&rt_mutex); + loid= ++thread_number; + pthread_mutex_unlock(&rt_mutex); + lo1= loid2lo1(loid); + + for (x= ((int)(intptr)(&m)); m > 0; m--) + { + /* three prime numbers */ + x= (uint) ((x*LL(3628273133) + LL(1500450271)) % LL(9576890767)); + row= x % Nrows + Ntables; + table= row % Ntables; + locklevel= (x/Nrows) & 3; + if (table_lock_ratio && (x/Nrows/4) % table_lock_ratio == 0) + { + /* table lock */ + res= tablockman_getlock(&tablockman, lo1, ltarray+table, + lock_array[locklevel]); + DIAG(("loid %2d, table %d, lock %s, res %s", loid, table, + lock2str[locklevel], res2str[res])); + if (res < GOT_THE_LOCK) + { + reinit_tlo(&tablockman, lo1); + DIAG(("loid %2d, release all locks", loid)); + timeout++; + continue; + } + DBUG_ASSERT(res == GOT_THE_LOCK); + } + else + { /* row lock */ + locklevel&= 1; + res= tablockman_getlock(&tablockman, lo1, ltarray+table, lock_array[locklevel + 4]); + DIAG(("loid %2d, row %d, lock %s, res %s", loid, row, + lock2str[locklevel+4], res2str[res])); + switch (res) + { + case GOT_THE_LOCK: + continue; + case GOT_THE_LOCK_NEED_TO_INSTANT_LOCK_A_SUBRESOURCE: + /* not implemented, so take a regular lock */ + case GOT_THE_LOCK_NEED_TO_LOCK_A_SUBRESOURCE: + res= tablockman_getlock(&tablockman, lo1, ltarray+row, lock_array[locklevel]); + DIAG(("loid %2d, ROW %d, lock %s, res %s", loid, row, + lock2str[locklevel], res2str[res])); + if (res < GOT_THE_LOCK) + { + reinit_tlo(&tablockman, lo1); + DIAG(("loid %2d, release all locks", loid)); + timeout++; + continue; + } + DBUG_ASSERT(res == GOT_THE_LOCK); + continue; + default: + reinit_tlo(&tablockman, lo1); + DIAG(("loid %2d, release all locks", loid)); + timeout++; + continue; + } + } + } + + reinit_tlo(&tablockman, lo1); + + pthread_mutex_lock(&rt_mutex); + rt_num_threads--; + timeouts+= timeout; + if (!rt_num_threads) + diag("number of timeouts: %d", timeouts); + pthread_mutex_unlock(&rt_mutex); + + return 0; +} + +int main() +{ + int i; + + my_init(); + pthread_mutex_init(&rt_mutex, 0); + + plan(40); + + if (my_atomic_initialize()) + return exit_status(); + + + tablockman_init(&tablockman, &loid2lo1, 50); + + for (i= 0; i < Nlos; i++) + { + pthread_mutex_init(&mutexes[i], MY_MUTEX_INIT_FAST); + pthread_cond_init (&conds[i], 0); + + loarray1[i].active_locks= 0; + loarray1[i].waiting_lock= 0; + loarray1[i].waiting_for= 0; + loarray1[i].mutex= &mutexes[i]; + loarray1[i].cond= &conds[i]; + loarray1[i].loid= i+1; + } + + for (i= 0; i < Ntbls; i++) + { + tablockman_init_locked_table(ltarray+i, Nlos); + } + + test_tablockman_simple(); + +#define CYCLES 10000 +#define THREADS Nlos /* don't change this line */ + + /* mixed load, stress-test with random locks */ + Nrows= 100; + Ntables= 10; + table_lock_ratio= 10; + run_test("\"random lock\" stress test", test_lockman, THREADS, CYCLES); +#if 0 + /* "real-life" simulation - many rows, no table locks */ + Nrows= 1000000; + Ntables= 10; + table_lock_ratio= 0; + run_test("\"real-life\" simulation test", test_lockman, THREADS, CYCLES*10); +#endif + for (i= 0; i < Nlos; i++) + { + tablockman_release_locks(&tablockman, &loarray1[i]); + pthread_mutex_destroy(loarray1[i].mutex); + pthread_cond_destroy(loarray1[i].cond); + } + + { + ulonglong now= my_getsystime(); + for (i= 0; i < Ntbls; i++) + { + tablockman_destroy_locked_table(ltarray+i); + } + tablockman_destroy(&tablockman); + now= my_getsystime()-now; + diag("lockman_destroy: %g secs", ((double)now)/1e7); + } + + pthread_mutex_destroy(&rt_mutex); + my_end(0); + return exit_status(); +} + diff --git a/storage/maria/unittest/ma_control_file-t.c b/storage/maria/unittest/ma_control_file-t.c new file mode 100644 index 00000000000..71a1157f1ba --- /dev/null +++ b/storage/maria/unittest/ma_control_file-t.c @@ -0,0 +1,446 @@ +/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Unit test of the control file module of the Maria engine WL#3234 */ + +/* + Note that it is not possible to test the durability of the write (can't + pull the plug programmatically :) +*/ + +#include +#include +#include + +#ifndef WITH_MARIA_STORAGE_ENGINE +/* + If Maria is not compiled in, normally we don't come to building this test. +*/ +#error "Maria engine is not compiled in, test cannot be built" +#endif + +#include "maria.h" +#include "../../../storage/maria/maria_def.h" +#include + +char file_name[FN_REFLEN]; + +/* The values we'll set and expect the control file module to return */ +LSN expect_checkpoint_lsn; +uint32 expect_logno; + +static int delete_file(myf my_flags); +/* + Those are test-specific wrappers around the module's API functions: after + calling the module's API functions they perform checks on the result. +*/ +static int close_file(); /* wraps ma_control_file_end */ +static int create_or_open_file(); /* wraps ma_control_file_open_or_create */ +static int write_file(); /* wraps ma_control_file_write_and_force */ + +/* Tests */ +static int test_one_log(); +static int test_five_logs(); +static int test_3_checkpoints_and_2_logs(); +static int test_binary_content(); +static int test_start_stop(); +static int test_2_open_and_2_close(); +static int test_bad_magic_string(); +static int test_bad_checksum(); +static int test_bad_size(); + +/* Utility */ +static int verify_module_values_match_expected(); +static int verify_module_values_are_impossible(); +static void usage(); +static void get_options(int argc, char *argv[]); + +/* + If "expr" is FALSE, this macro will make the function print a diagnostic + message and immediately return 1. + This is inspired from assert() but does not crash the binary (sometimes we + may want to see how other tests go even if one fails). + RET_ERR means "return error". +*/ + +#define RET_ERR_UNLESS(expr) \ + {if (!(expr)) {diag("line %d: failure: '%s'", __LINE__, #expr); return 1;}} + + +int main(int argc,char *argv[]) +{ + MY_INIT(argv[0]); + maria_data_root= "."; + + plan(9); + + diag("Unit tests for control file"); + + get_options(argc,argv); + + diag("Deleting control file at startup, if there is an old one"); + RET_ERR_UNLESS(0 == delete_file(0)); /* if fails, can't continue */ + + diag("Tests of normal conditions"); + ok(0 == test_one_log(), "test of creating one log"); + ok(0 == test_five_logs(), "test of creating five logs"); + ok(0 == test_3_checkpoints_and_2_logs(), + "test of creating three checkpoints and two logs"); + ok(0 == test_binary_content(), "test of the binary content of the file"); + ok(0 == test_start_stop(), "test of multiple starts and stops"); + diag("Tests of abnormal conditions"); + ok(0 == test_2_open_and_2_close(), + "test of two open and two close (strange call sequence)"); + ok(0 == test_bad_magic_string(), "test of bad magic string"); + ok(0 == test_bad_checksum(), "test of bad checksum"); + ok(0 == test_bad_size(), "test of too small/big file"); + + return exit_status(); +} + + +static int delete_file(myf my_flags) +{ + RET_ERR_UNLESS(fn_format(file_name, CONTROL_FILE_BASE_NAME, + maria_data_root, "", MYF(MY_WME)) != NullS); + /* + Maybe file does not exist, ignore error. + The error will however be printed on stderr. + */ + my_delete(file_name, my_flags); + expect_checkpoint_lsn= CONTROL_FILE_IMPOSSIBLE_LSN; + expect_logno= CONTROL_FILE_IMPOSSIBLE_FILENO; + + return 0; +} + +/* + Verifies that global values last_checkpoint_lsn and last_logno (belonging + to the module) match what we expect. +*/ +static int verify_module_values_match_expected() +{ + RET_ERR_UNLESS(last_logno == expect_logno); + RET_ERR_UNLESS(last_checkpoint_lsn == + expect_checkpoint_lsn); + return 0; +} + + +/* + Verifies that global values last_checkpoint_lsn and last_logno (belonging + to the module) are impossible (this is used when the file has been closed). +*/ +static int verify_module_values_are_impossible() +{ + RET_ERR_UNLESS(last_logno == CONTROL_FILE_IMPOSSIBLE_FILENO); + RET_ERR_UNLESS(last_checkpoint_lsn == + CONTROL_FILE_IMPOSSIBLE_LSN); + return 0; +} + + +static int close_file() +{ + /* Simulate shutdown */ + ma_control_file_end(); + /* Verify amnesia */ + RET_ERR_UNLESS(verify_module_values_are_impossible() == 0); + return 0; +} + +static int create_or_open_file() +{ + RET_ERR_UNLESS(ma_control_file_create_or_open() == CONTROL_FILE_OK); + /* Check that the module reports expected information */ + RET_ERR_UNLESS(verify_module_values_match_expected() == 0); + return 0; +} + +static int write_file(const LSN checkpoint_lsn, + uint32 logno, + uint objs_to_write) +{ + RET_ERR_UNLESS(ma_control_file_write_and_force(checkpoint_lsn, logno, + objs_to_write) == 0); + /* Check that the module reports expected information */ + RET_ERR_UNLESS(verify_module_values_match_expected() == 0); + return 0; +} + +static int test_one_log() +{ + uint objs_to_write; + + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO; + expect_logno= 123; + RET_ERR_UNLESS(write_file(CONTROL_FILE_IMPOSSIBLE_LSN, + expect_logno, + objs_to_write) == 0); + RET_ERR_UNLESS(close_file() == 0); + return 0; +} + +static int test_five_logs() +{ + uint objs_to_write; + uint i; + + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO; + expect_logno= 100; + for (i= 0; i<5; i++) + { + expect_logno*= 3; + RET_ERR_UNLESS(write_file(CONTROL_FILE_IMPOSSIBLE_LSN, expect_logno, + objs_to_write) == 0); + } + RET_ERR_UNLESS(close_file() == 0); + return 0; +} + +static int test_3_checkpoints_and_2_logs() +{ + uint objs_to_write; + /* + Simulate one checkpoint, one log creation, two checkpoints, one + log creation. + */ + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + objs_to_write= CONTROL_FILE_UPDATE_ONLY_LSN; + expect_checkpoint_lsn= MAKE_LSN(5, 10000); + RET_ERR_UNLESS(write_file(expect_checkpoint_lsn, + expect_logno, objs_to_write) == 0); + + objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO; + expect_logno= 17; + RET_ERR_UNLESS(write_file(expect_checkpoint_lsn, + expect_logno, objs_to_write) == 0); + + objs_to_write= CONTROL_FILE_UPDATE_ONLY_LSN; + expect_checkpoint_lsn= MAKE_LSN(17, 20000); + RET_ERR_UNLESS(write_file(expect_checkpoint_lsn, + expect_logno, objs_to_write) == 0); + + objs_to_write= CONTROL_FILE_UPDATE_ONLY_LSN; + expect_checkpoint_lsn= MAKE_LSN(17, 45000); + RET_ERR_UNLESS(write_file(expect_checkpoint_lsn, + expect_logno, objs_to_write) == 0); + + objs_to_write= CONTROL_FILE_UPDATE_ONLY_LOGNO; + expect_logno= 19; + RET_ERR_UNLESS(write_file(expect_checkpoint_lsn, + expect_logno, objs_to_write) == 0); + RET_ERR_UNLESS(close_file() == 0); + return 0; +} + +static int test_binary_content() +{ + uint i; + int fd; + + /* + TEST4: actually check by ourselves the content of the file. + Note that constants (offsets) are hard-coded here, precisely to prevent + someone from changing them in the control file module and breaking + backward-compatibility. + TODO: when we reach the format-freeze state, we may even just do a + comparison with a raw binary string, to not depend on any uint4korr + future change/breakage. + */ + + char buffer[23]; + RET_ERR_UNLESS((fd= my_open(file_name, + O_BINARY | O_RDWR, + MYF(MY_WME))) >= 0); + RET_ERR_UNLESS(my_read(fd, buffer, 23, MYF(MY_FNABP | MY_WME)) == 0); + RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0); + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + i= uint3korr(buffer+12); + RET_ERR_UNLESS(i == LSN_FILE_NO(last_checkpoint_lsn)); + i= uint4korr(buffer+15); + RET_ERR_UNLESS(i == LSN_OFFSET(last_checkpoint_lsn)); + i= uint4korr(buffer+19); + RET_ERR_UNLESS(i == last_logno); + RET_ERR_UNLESS(close_file() == 0); + return 0; +} + +static int test_start_stop() +{ + /* TEST5: Simulate start/nothing/stop/start/nothing/stop/start */ + + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + RET_ERR_UNLESS(close_file() == 0); + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + RET_ERR_UNLESS(close_file() == 0); + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + RET_ERR_UNLESS(close_file() == 0); + return 0; +} + +static int test_2_open_and_2_close() +{ + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + RET_ERR_UNLESS(close_file() == 0); + RET_ERR_UNLESS(close_file() == 0); + return 0; +} + + +static int test_bad_magic_string() +{ + char buffer[4]; + int fd; + + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + RET_ERR_UNLESS(close_file() == 0); + + /* Corrupt magic string */ + RET_ERR_UNLESS((fd= my_open(file_name, + O_BINARY | O_RDWR, + MYF(MY_WME))) >= 0); + RET_ERR_UNLESS(my_pread(fd, buffer, 4, 0, MYF(MY_FNABP | MY_WME)) == 0); + RET_ERR_UNLESS(my_pwrite(fd, "papa", 4, 0, MYF(MY_FNABP | MY_WME)) == 0); + + /* Check that control file module sees the problem */ + RET_ERR_UNLESS(ma_control_file_create_or_open() == + CONTROL_FILE_BAD_MAGIC_STRING); + /* Restore magic string */ + RET_ERR_UNLESS(my_pwrite(fd, buffer, 4, 0, MYF(MY_FNABP | MY_WME)) == 0); + RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0); + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + RET_ERR_UNLESS(close_file() == 0); + return 0; +} + +static int test_bad_checksum() +{ + char buffer[4]; + int fd; + + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + RET_ERR_UNLESS(close_file() == 0); + + /* Corrupt checksum */ + RET_ERR_UNLESS((fd= my_open(file_name, + O_BINARY | O_RDWR, + MYF(MY_WME))) >= 0); + RET_ERR_UNLESS(my_pread(fd, buffer, 1, 8, MYF(MY_FNABP | MY_WME)) == 0); + buffer[0]+= 3; /* mangle checksum */ + RET_ERR_UNLESS(my_pwrite(fd, buffer, 1, 8, MYF(MY_FNABP | MY_WME)) == 0); + /* Check that control file module sees the problem */ + RET_ERR_UNLESS(ma_control_file_create_or_open() == + CONTROL_FILE_BAD_CHECKSUM); + /* Restore checksum */ + buffer[0]-= 3; + RET_ERR_UNLESS(my_pwrite(fd, buffer, 1, 4, MYF(MY_FNABP | MY_WME)) == 0); + RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0); + + return 0; +} + + +static int test_bad_size() +{ + char buffer[]="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; + int fd; + + /* A too short file */ + RET_ERR_UNLESS(delete_file(MYF(MY_WME)) == 0); + RET_ERR_UNLESS((fd= my_open(file_name, + O_BINARY | O_RDWR | O_CREAT, + MYF(MY_WME))) >= 0); + RET_ERR_UNLESS(my_write(fd, buffer, 10, MYF(MY_FNABP | MY_WME)) == 0); + /* Check that control file module sees the problem */ + RET_ERR_UNLESS(ma_control_file_create_or_open() == CONTROL_FILE_TOO_SMALL); + RET_ERR_UNLESS(my_write(fd, buffer, 30, MYF(MY_FNABP | MY_WME)) == 0); + /* Check that control file module sees the problem */ + RET_ERR_UNLESS(ma_control_file_create_or_open() == CONTROL_FILE_TOO_BIG); + RET_ERR_UNLESS(my_close(fd, MYF(MY_WME)) == 0); + + /* Leave a correct control file */ + RET_ERR_UNLESS(delete_file(MYF(MY_WME)) == 0); + RET_ERR_UNLESS(create_or_open_file() == CONTROL_FILE_OK); + RET_ERR_UNLESS(close_file() == 0); + + return 0; +} + + +static struct my_option my_long_options[] = +{ +#ifndef DBUG_OFF + {"debug", '#', "Debug log.", + 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, +#endif + {"help", '?', "Display help and exit", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"version", 'V', "Print version number and exit", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, + { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} +}; + + +static void version() +{ + printf("ma_control_file_test: unit test for the control file " + "module of the Maria storage engine. Ver 1.0 \n"); +} + +static my_bool +get_one_option(int optid, const struct my_option *opt __attribute__((unused)), + char *argument) +{ + switch(optid) { + case 'V': + version(); + exit(0); + case '#': + DBUG_PUSH (argument); + break; + case '?': + version(); + usage(); + exit(0); + } + return 0; +} + + +/* Read options */ + +static void get_options(int argc, char *argv[]) +{ + int ho_error; + + if ((ho_error=handle_options(&argc, &argv, my_long_options, + get_one_option))) + exit(ho_error); + + return; +} /* get options */ + + +static void usage() +{ + printf("Usage: %s [options]\n\n", my_progname); + my_print_help(my_long_options); + my_print_variables(my_long_options); +} diff --git a/storage/maria/unittest/ma_maria_log_cleanup.c b/storage/maria/unittest/ma_maria_log_cleanup.c new file mode 100644 index 00000000000..c5917764b9b --- /dev/null +++ b/storage/maria/unittest/ma_maria_log_cleanup.c @@ -0,0 +1,45 @@ +#include "../maria_def.h" +#include + +my_bool maria_log_remove() +{ + MY_DIR *dirp; + uint i; + MY_STAT stat_buff; + char file_name[FN_REFLEN]; + + /* Removes control file */ + if (fn_format(file_name, CONTROL_FILE_BASE_NAME, + maria_data_root, "", MYF(MY_WME)) == NullS) + return 1; + if (my_stat(file_name, &stat_buff, MYF(0)) && + my_delete(file_name, MYF(MY_WME)) != 0) + return 1; + + /* Finds and removes transaction log files */ + if (!(dirp = my_dir(maria_data_root, MYF(MY_DONT_SORT)))) + return 1; + + for (i= 0; i < dirp->number_off_files; i++) + { + char *file= dirp->dir_entry[i].name; + if (strncmp(file, "maria_log.", 10) == 0 && + file[10] >= '0' && file[10] <= '9' && + file[11] >= '0' && file[11] <= '9' && + file[12] >= '0' && file[12] <= '9' && + file[13] >= '0' && file[13] <= '9' && + file[14] >= '0' && file[14] <= '9' && + file[15] >= '0' && file[15] <= '9' && + file[16] >= '0' && file[16] <= '9' && + file[17] >= '0' && file[17] <= '9' && + file[18] == '\0') + { + if (fn_format(file_name, file, + maria_data_root, "", MYF(MY_WME)) == NullS || + my_delete(file_name, MYF(MY_WME)) != 0) + return 1; + } + } + return 0; +} + diff --git a/storage/maria/unittest/ma_pagecache_consist.c b/storage/maria/unittest/ma_pagecache_consist.c new file mode 100644 index 00000000000..39170693573 --- /dev/null +++ b/storage/maria/unittest/ma_pagecache_consist.c @@ -0,0 +1,459 @@ +/* + TODO: use pthread_join instead of wait_for_thread_count_to_be_zero, like in + my_atomic-t.c (see BUG#22320). + Use diag() instead of fprintf(stderr). Use ok() and plan(). +*/ + +#include +#include +#include +#include "test_file.h" +#include + +#define PCACHE_SIZE (PAGE_SIZE*1024*8) + +#ifndef DBUG_OFF +static const char* default_dbug_option; +#endif + +static char *file1_name= (char*)"page_cache_test_file_1"; +static PAGECACHE_FILE file1; +static pthread_cond_t COND_thread_count; +static pthread_mutex_t LOCK_thread_count; +static uint thread_count; +static PAGECACHE pagecache; + +#ifdef TEST_HIGH_CONCURENCY +static uint number_of_readers= 10; +static uint number_of_writers= 20; +static uint number_of_tests= 30000; +static uint record_length_limit= PAGE_SIZE/200; +static uint number_of_pages= 20; +static uint flush_divider= 1000; +#else /*TEST_HIGH_CONCURENCY*/ +#ifdef TEST_READERS +static uint number_of_readers= 10; +static uint number_of_writers= 1; +static uint number_of_tests= 30000; +static uint record_length_limit= PAGE_SIZE/200; +static uint number_of_pages= 20; +static uint flush_divider= 1000; +#else /*TEST_READERS*/ +#ifdef TEST_WRITERS +static uint number_of_readers= 0; +static uint number_of_writers= 10; +static uint number_of_tests= 30000; +static uint record_length_limit= PAGE_SIZE/200; +static uint number_of_pages= 20; +static uint flush_divider= 1000; +#else /*TEST_WRITERS*/ +static uint number_of_readers= 10; +static uint number_of_writers= 10; +static uint number_of_tests= 50000; +static uint record_length_limit= PAGE_SIZE/200; +static uint number_of_pages= 20000; +static uint flush_divider= 1000; +#endif /*TEST_WRITERS*/ +#endif /*TEST_READERS*/ +#endif /*TEST_HIGH_CONCURENCY*/ + + +/* + Get pseudo-random length of the field in (0;limit) + + SYNOPSYS + get_len() + limit limit for generated value + + RETURN + length where length >= 0 & length < limit +*/ + +static uint get_len(uint limit) +{ + uint32 rec_len; + do + { + rec_len= random() / + (RAND_MAX / limit); + } while (rec_len >= limit || rec_len == 0); + return rec_len; +} + + +/* check page consistency */ +uint check_page(uchar *buff, ulong offset, int page_locked, int page_no, + int tag) +{ + uint end= sizeof(uint); + uint num= *((uint *)buff); + uint i; + DBUG_ENTER("check_page"); + + for (i= 0; i < num; i++) + { + uint len= *((uint *)(buff + end)); + uint j; + end+= sizeof(uint) + sizeof(uint); + if (len + end > PAGE_SIZE) + { + diag("incorrect field header #%u by offset %lu\n", i, offset + end); + goto err; + } + for(j= 0; j < len; j++) + { + if (buff[end + j] != (uchar)((i+1) % 256)) + { + diag("incorrect %lu byte\n", offset + end + j); + goto err; + } + } + end+= len; + } + for(i= end; i < PAGE_SIZE; i++) + { + if (buff[i] != 0) + { + int h; + DBUG_PRINT("err", + ("byte %lu (%lu + %u), page %u (%s, end: %u, recs: %u, tag: %d) should be 0\n", + offset + i, offset, i, page_no, + (page_locked ? "locked" : "unlocked"), + end, num, tag)); + diag("byte %lu (%lu + %u), page %u (%s, end: %u, recs: %u, tag: %d) should be 0\n", + offset + i, offset, i, page_no, + (page_locked ? "locked" : "unlocked"), + end, num, tag); + h= my_open("wrong_page", O_CREAT | O_TRUNC | O_RDWR, MYF(0)); + my_pwrite(h, (byte*) buff, PAGE_SIZE, 0, MYF(0)); + my_close(h, MYF(0)); + goto err; + } + } + DBUG_RETURN(end); +err: + DBUG_PRINT("err", ("try to flush")); + if (page_locked) + { + pagecache_delete(&pagecache, &file1, page_no, + PAGECACHE_LOCK_LEFT_WRITELOCKED, 1); + } + else + { + flush_pagecache_blocks(&pagecache, &file1, FLUSH_RELEASE); + } + exit(1); +} + +void put_rec(uchar *buff, uint end, uint len, uint tag) +{ + uint i; + uint num= *((uint *)buff); + if (!len) + len= 1; + if (end + sizeof(uint)*2 + len > PAGE_SIZE) + return; + *((uint *)(buff + end))= len; + end+= sizeof(uint); + *((uint *)(buff + end))= tag; + end+= sizeof(uint); + num++; + *((uint *)buff)= num; + *((uint*)(buff + end))= len; + for (i= end; i < (len + end); i++) + { + buff[i]= (uchar) num % 256; + } +} + +/* + Recreate and reopen a file for test + + SYNOPSIS + reset_file() + file File to reset + file_name Path (and name) of file which should be reset +*/ + +void reset_file(PAGECACHE_FILE file, char *file_name) +{ + flush_pagecache_blocks(&pagecache, &file1, FLUSH_RELEASE); + if (my_close(file1.file, MYF(0)) != 0) + { + diag("Got error during %s closing from close() (errno: %d)\n", + file_name, errno); + exit(1); + } + my_delete(file_name, MYF(0)); + if ((file.file= my_open(file_name, + O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1) + { + diag("Got error during %s creation from open() (errno: %d)\n", + file_name, errno); + exit(1); + } +} + + +void reader(int num) +{ + unsigned char *buffr= malloc(PAGE_SIZE); + uint i; + + for (i= 0; i < number_of_tests; i++) + { + uint page= get_len(number_of_pages); + pagecache_read(&pagecache, &file1, page, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + 0); + check_page(buffr, page * PAGE_SIZE, 0, page, -num); + if (i % 500 == 0) + printf("reader%d: %d\n", num, i); + + } + printf("reader%d: done\n", num); + free(buffr); +} + + +void writer(int num) +{ + unsigned char *buffr= malloc(PAGE_SIZE); + uint i; + + for (i= 0; i < number_of_tests; i++) + { + uint end; + uint page= get_len(number_of_pages); + pagecache_read(&pagecache, &file1, page, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_WRITE, + 0); + end= check_page(buffr, page * PAGE_SIZE, 1, page, num); + put_rec(buffr, end, get_len(record_length_limit), num); + pagecache_write(&pagecache, &file1, page, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_WRITE_UNLOCK, + PAGECACHE_UNPIN, + PAGECACHE_WRITE_DELAY, + 0); + + if (i % flush_divider == 0) + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + if (i % 500 == 0) + printf("writer%d: %d\n", num, i); + } + printf("writer%d: done\n", num); + free(buffr); +} + + +static void *test_thread_reader(void *arg) +{ + int param=*((int*) arg); + + my_thread_init(); + DBUG_ENTER("test_reader"); + DBUG_PRINT("enter", ("param: %d", param)); + + reader(param); + + DBUG_PRINT("info", ("Thread %s ended\n", my_thread_name())); + pthread_mutex_lock(&LOCK_thread_count); + thread_count--; + VOID(pthread_cond_signal(&COND_thread_count)); /* Tell main we are ready */ + pthread_mutex_unlock(&LOCK_thread_count); + free((gptr) arg); + my_thread_end(); + DBUG_RETURN(0); +} + +static void *test_thread_writer(void *arg) +{ + int param=*((int*) arg); + + my_thread_init(); + DBUG_ENTER("test_writer"); + DBUG_PRINT("enter", ("param: %d", param)); + + writer(param); + + DBUG_PRINT("info", ("Thread %s ended\n", my_thread_name())); + pthread_mutex_lock(&LOCK_thread_count); + thread_count--; + VOID(pthread_cond_signal(&COND_thread_count)); /* Tell main we are ready */ + pthread_mutex_unlock(&LOCK_thread_count); + free((gptr) arg); + my_thread_end(); + DBUG_RETURN(0); +} + +int main(int argc __attribute__((unused)), + char **argv __attribute__((unused))) +{ + pthread_t tid; + pthread_attr_t thr_attr; + int *param, error, pagen; + + MY_INIT(argv[0]); + +#ifndef DBUG_OFF +#if defined(__WIN__) + default_dbug_option= "d:t:i:O,\\test_pagecache_consist.trace"; +#else + default_dbug_option= "d:t:i:o,/tmp/test_pagecache_consist.trace"; +#endif + if (argc > 1) + { + DBUG_SET(default_dbug_option); + DBUG_SET_INITIAL(default_dbug_option); + } +#endif + + + DBUG_ENTER("main"); + DBUG_PRINT("info", ("Main thread: %s\n", my_thread_name())); + if ((file1.file= my_open(file1_name, + O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1) + { + fprintf(stderr, "Got error during file1 creation from open() (errno: %d)\n", + errno); + exit(1); + } + DBUG_PRINT("info", ("file1: %d", file1.file)); + if (chmod(file1_name, S_IRWXU | S_IRWXG | S_IRWXO) != 0) + { + fprintf(stderr, "Got error during file1 chmod() (errno: %d)\n", + errno); + exit(1); + } + my_pwrite(file1.file, "test file", 9, 0, MYF(0)); + + if ((error= pthread_cond_init(&COND_thread_count, NULL))) + { + fprintf(stderr, "COND_thread_count: %d from pthread_cond_init (errno: %d)\n", + error, errno); + exit(1); + } + if ((error= pthread_mutex_init(&LOCK_thread_count, MY_MUTEX_INIT_FAST))) + { + fprintf(stderr, "LOCK_thread_count: %d from pthread_cond_init (errno: %d)\n", + error, errno); + exit(1); + } + + if ((error= pthread_attr_init(&thr_attr))) + { + fprintf(stderr,"Got error: %d from pthread_attr_init (errno: %d)\n", + error,errno); + exit(1); + } + if ((error= pthread_attr_setdetachstate(&thr_attr, PTHREAD_CREATE_DETACHED))) + { + fprintf(stderr, + "Got error: %d from pthread_attr_setdetachstate (errno: %d)\n", + error,errno); + exit(1); + } + +#ifdef HAVE_THR_SETCONCURRENCY + VOID(thr_setconcurrency(2)); +#endif + + if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, + PAGE_SIZE)) == 0) + { + fprintf(stderr,"Got error: init_pagecache() (errno: %d)\n", + errno); + exit(1); + } + DBUG_PRINT("info", ("Page cache %d pages", pagen)); + { + unsigned char *buffr= malloc(PAGE_SIZE); + uint i; + memset(buffr, '\0', PAGE_SIZE); + for (i= 0; i < number_of_pages; i++) + { + pagecache_write(&pagecache, &file1, i, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + } + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + free(buffr); + } + if ((error= pthread_mutex_lock(&LOCK_thread_count))) + { + fprintf(stderr,"LOCK_thread_count: %d from pthread_mutex_lock (errno: %d)\n", + error,errno); + exit(1); + } + while (number_of_readers != 0 || number_of_writers != 0) + { + if (number_of_readers != 0) + { + param=(int*) malloc(sizeof(int)); + *param= number_of_readers; + if ((error= pthread_create(&tid, &thr_attr, test_thread_reader, + (void*) param))) + { + fprintf(stderr,"Got error: %d from pthread_create (errno: %d)\n", + error,errno); + exit(1); + } + thread_count++; + number_of_readers--; + } + if (number_of_writers != 0) + { + param=(int*) malloc(sizeof(int)); + *param= number_of_writers; + if ((error= pthread_create(&tid, &thr_attr, test_thread_writer, + (void*) param))) + { + fprintf(stderr,"Got error: %d from pthread_create (errno: %d)\n", + error,errno); + exit(1); + } + thread_count++; + number_of_writers--; + } + } + DBUG_PRINT("info", ("Thread started")); + pthread_mutex_unlock(&LOCK_thread_count); + + pthread_attr_destroy(&thr_attr); + + /* wait finishing */ + if ((error= pthread_mutex_lock(&LOCK_thread_count))) + fprintf(stderr,"LOCK_thread_count: %d from pthread_mutex_lock\n",error); + while (thread_count) + { + if ((error= pthread_cond_wait(&COND_thread_count,&LOCK_thread_count))) + fprintf(stderr,"COND_thread_count: %d from pthread_cond_wait\n",error); + } + if ((error= pthread_mutex_unlock(&LOCK_thread_count))) + fprintf(stderr,"LOCK_thread_count: %d from pthread_mutex_unlock\n",error); + DBUG_PRINT("info", ("thread ended")); + + end_pagecache(&pagecache, 1); + DBUG_PRINT("info", ("Page cache ended")); + + if (my_close(file1.file, MYF(0)) != 0) + { + fprintf(stderr, "Got error during file1 closing from close() (errno: %d)\n", + errno); + exit(1); + } + /*my_delete(file1_name, MYF(0));*/ + my_end(0); + + DBUG_PRINT("info", ("file1 (%d) closed", file1.file)); + + DBUG_PRINT("info", ("Program end")); + + DBUG_RETURN(exit_status()); +} diff --git a/storage/maria/unittest/ma_pagecache_single.c b/storage/maria/unittest/ma_pagecache_single.c new file mode 100644 index 00000000000..7b77315e18c --- /dev/null +++ b/storage/maria/unittest/ma_pagecache_single.c @@ -0,0 +1,588 @@ +/* + TODO: use pthread_join instead of wait_for_thread_count_to_be_zero, like in + my_atomic-t.c (see BUG#22320). + Use diag() instead of fprintf(stderr). +*/ +#include +#include +#include +#include "test_file.h" +#include + +#define PCACHE_SIZE (PAGE_SIZE*1024*10) + +#ifndef DBUG_OFF +static const char* default_dbug_option; +#endif + +static char *file1_name= (char*)"page_cache_test_file_1"; +static PAGECACHE_FILE file1; +static pthread_cond_t COND_thread_count; +static pthread_mutex_t LOCK_thread_count; +static uint thread_count; +static PAGECACHE pagecache; + +/* + File contance descriptors +*/ +static struct file_desc simple_read_write_test_file[]= +{ + {PAGE_SIZE, '\1'}, + { 0, 0} +}; +static struct file_desc simple_read_change_write_read_test_file[]= +{ + {PAGE_SIZE/2, '\65'}, + {PAGE_SIZE/2, '\1'}, + { 0, 0} +}; +static struct file_desc simple_pin_test_file1[]= +{ + {PAGE_SIZE*2, '\1'}, + { 0, 0} +}; +static struct file_desc simple_pin_test_file2[]= +{ + {PAGE_SIZE/2, '\1'}, + {PAGE_SIZE/2, (unsigned char)129}, + {PAGE_SIZE, '\1'}, + { 0, 0} +}; +static struct file_desc simple_delete_forget_test_file[]= +{ + {PAGE_SIZE, '\1'}, + { 0, 0} +}; +static struct file_desc simple_delete_flush_test_file[]= +{ + {PAGE_SIZE, '\2'}, + { 0, 0} +}; + + +/* + Recreate and reopen a file for test + + SYNOPSIS + reset_file() + file File to reset + file_name Path (and name) of file which should be reset +*/ + +void reset_file(PAGECACHE_FILE file, char *file_name) +{ + flush_pagecache_blocks(&pagecache, &file1, FLUSH_RELEASE); + if (my_close(file1.file, MYF(0)) != 0) + { + diag("Got error during %s closing from close() (errno: %d)\n", + file_name, errno); + exit(1); + } + my_delete(file_name, MYF(0)); + if ((file.file= my_open(file_name, + O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1) + { + diag("Got error during %s creation from open() (errno: %d)\n", + file_name, errno); + exit(1); + } +} + +/* + Write then read page, check file on disk +*/ + +int simple_read_write_test() +{ + unsigned char *buffw= malloc(PAGE_SIZE); + unsigned char *buffr= malloc(PAGE_SIZE); + int res; + DBUG_ENTER("simple_read_write_test"); + bfill(buffw, PAGE_SIZE, '\1'); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + pagecache_read(&pagecache, &file1, 0, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + 0); + ok((res= test(memcmp(buffr, buffw, PAGE_SIZE) == 0)), + "Simple write-read page "); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + ok((res&= test(test_file(file1, file1_name, PAGE_SIZE, PAGE_SIZE, + simple_read_write_test_file))), + "Simple write-read page file"); + if (res) + reset_file(file1, file1_name); + free(buffw); + free(buffr); + DBUG_RETURN(res); +} + + +/* + Prepare page, then read (and lock), change (write new value and unlock), + then check the page in the cache and on the disk +*/ +int simple_read_change_write_read_test() +{ + unsigned char *buffw= malloc(PAGE_SIZE); + unsigned char *buffr= malloc(PAGE_SIZE); + int res; + DBUG_ENTER("simple_read_change_write_read_test"); + /* prepare the file */ + bfill(buffw, PAGE_SIZE, '\1'); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + /* test */ + pagecache_read(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_WRITE, + 0); + bfill(buffw, PAGE_SIZE/2, '\65'); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_WRITE_UNLOCK, + PAGECACHE_UNPIN, + PAGECACHE_WRITE_DELAY, + 0); + + pagecache_read(&pagecache, &file1, 0, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + 0); + ok((res= test(memcmp(buffr, buffw, PAGE_SIZE) == 0)), + "Simple read-change-write-read page "); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + ok((res&= test(test_file(file1, file1_name, PAGE_SIZE, PAGE_SIZE, + simple_read_change_write_read_test_file))), + "Simple read-change-write-read page file"); + if (res) + reset_file(file1, file1_name); + free(buffw); + free(buffr); + DBUG_RETURN(res); +} + + +/* + Prepare page, read page 0 (and pin) then write page 1 and page 0. + Flush the file (shold flush only page 1 and return 1 (page 0 is + still pinned). + Check file on the disk. + Unpin and flush. + Check file on the disk. +*/ +int simple_pin_test() +{ + unsigned char *buffw= malloc(PAGE_SIZE); + unsigned char *buffr= malloc(PAGE_SIZE); + int res; + DBUG_ENTER("simple_pin_test"); + /* prepare the file */ + bfill(buffw, PAGE_SIZE, '\1'); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + /* test */ + if (flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE)) + { + diag("error in flush_pagecache_blocks\n"); + exit(1); + } + pagecache_read(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_WRITE, + 0); + pagecache_write(&pagecache, &file1, 1, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + bfill(buffw + PAGE_SIZE/2, PAGE_SIZE/2, ((unsigned char) 129)); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_WRITE_TO_READ, + PAGECACHE_PIN_LEFT_PINNED, + PAGECACHE_WRITE_DELAY, + 0); + /* + We have to get error because one page of the file is pinned, + other page should be flushed + */ + if (!flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE)) + { + diag("Did not get error in flush_pagecache_blocks\n"); + res= 0; + goto err; + } + ok((res= test(test_file(file1, file1_name, PAGE_SIZE*2, PAGE_SIZE*2, + simple_pin_test_file1))), + "Simple pin page file with pin"); + pagecache_unlock(&pagecache, + &file1, + 0, + PAGECACHE_LOCK_READ_UNLOCK, + PAGECACHE_UNPIN, + 0, 0); + if (flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE)) + { + diag("Got error in flush_pagecache_blocks\n"); + res= 0; + goto err; + } + ok((res&= test(test_file(file1, file1_name, PAGE_SIZE*2, PAGE_SIZE, + simple_pin_test_file2))), + "Simple pin page result file"); + if (res) + reset_file(file1, file1_name); +err: + free(buffw); + free(buffr); + DBUG_RETURN(res); +} + +/* + Prepare page, write new value, then delete page from cache without flush, + on the disk should be page with old content written during preparation +*/ + +int simple_delete_forget_test() +{ + unsigned char *buffw= malloc(PAGE_SIZE); + unsigned char *buffr= malloc(PAGE_SIZE); + int res; + DBUG_ENTER("simple_delete_forget_test"); + /* prepare the file */ + bfill(buffw, PAGE_SIZE, '\1'); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + /* test */ + bfill(buffw, PAGE_SIZE, '\2'); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + pagecache_delete(&pagecache, &file1, 0, + PAGECACHE_LOCK_WRITE, 0); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + ok((res= test(test_file(file1, file1_name, PAGE_SIZE, PAGE_SIZE, + simple_delete_forget_test_file))), + "Simple delete-forget page file"); + if (res) + reset_file(file1, file1_name); + free(buffw); + free(buffr); + DBUG_RETURN(res); +} + +/* + Prepare page with locking, write new content to the page, + delete page with flush and on existing lock, + check that page on disk contain new value. +*/ + +int simple_delete_flush_test() +{ + unsigned char *buffw= malloc(PAGE_SIZE); + unsigned char *buffr= malloc(PAGE_SIZE); + int res; + DBUG_ENTER("simple_delete_flush_test"); + /* prepare the file */ + bfill(buffw, PAGE_SIZE, '\1'); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_WRITE, + PAGECACHE_PIN, + PAGECACHE_WRITE_DELAY, + 0); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + /* test */ + bfill(buffw, PAGE_SIZE, '\2'); + pagecache_write(&pagecache, &file1, 0, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_WRITELOCKED, + PAGECACHE_PIN_LEFT_PINNED, + PAGECACHE_WRITE_DELAY, + 0); + pagecache_delete(&pagecache, &file1, 0, + PAGECACHE_LOCK_LEFT_WRITELOCKED, 1); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + ok((res= test(test_file(file1, file1_name, PAGE_SIZE, PAGE_SIZE, + simple_delete_flush_test_file))), + "Simple delete-forget page file"); + if (res) + reset_file(file1, file1_name); + free(buffw); + free(buffr); + DBUG_RETURN(res); +} + + +/* + write then read file bigger then cache +*/ + +int simple_big_test() +{ + unsigned char *buffw= (unsigned char *)malloc(PAGE_SIZE); + unsigned char *buffr= (unsigned char *)malloc(PAGE_SIZE); + struct file_desc *desc= + (struct file_desc *)malloc((PCACHE_SIZE/(PAGE_SIZE/2) + 1) * + sizeof(struct file_desc)); + int res, i; + DBUG_ENTER("simple_big_test"); + /* prepare the file twice larger then cache */ + for (i= 0; i < PCACHE_SIZE/(PAGE_SIZE/2); i++) + { + bfill(buffw, PAGE_SIZE, (unsigned char) (i & 0xff)); + desc[i].length= PAGE_SIZE; + desc[i].content= (i & 0xff); + pagecache_write(&pagecache, &file1, i, 3, (char*)buffw, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + } + desc[i].length= 0; + desc[i].content= '\0'; + ok(1, "Simple big file write"); + /* check written pages sequentally read */ + for (i= 0; i < PCACHE_SIZE/(PAGE_SIZE/2); i++) + { + int j; + pagecache_read(&pagecache, &file1, i, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + 0); + for(j= 0; j < PAGE_SIZE; j++) + { + if (buffr[j] != (i & 0xff)) + { + diag("simple_big_test seq: page %u byte %u mismatch\n", i, j); + return 0; + } + } + } + ok(1, "Simple big file sequential read"); + /* chack random reads */ + for (i= 0; i < PCACHE_SIZE/(PAGE_SIZE); i++) + { + int j, page; + page= rand() % (PCACHE_SIZE/(PAGE_SIZE/2)); + pagecache_read(&pagecache, &file1, page, 3, (char*)buffr, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + 0); + for(j= 0; j < PAGE_SIZE; j++) + { + if (buffr[j] != (page & 0xff)) + { + diag("simple_big_test rnd: page %u byte %u mismatch\n", page, j); + return 0; + } + } + } + ok(1, "Simple big file random read"); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + + ok((res= test(test_file(file1, file1_name, PCACHE_SIZE*2, PAGE_SIZE, + desc))), + "Simple big file"); + if (res) + reset_file(file1, file1_name); + free(buffw); + free(buffr); + DBUG_RETURN(res); +} +/* + Thread function +*/ + +static void *test_thread(void *arg) +{ +#ifndef DBUG_OFF + int param= *((int*) arg); +#endif + + my_thread_init(); + DBUG_ENTER("test_thread"); + + DBUG_PRINT("enter", ("param: %d", param)); + + if (!simple_read_write_test() || + !simple_read_change_write_read_test() || + !simple_pin_test() || + !simple_delete_forget_test() || + !simple_delete_flush_test()) + exit(1); + + SKIP_BIG_TESTS(4) + { + if (!simple_big_test()) + exit(1); + } + + DBUG_PRINT("info", ("Thread %s ended\n", my_thread_name())); + pthread_mutex_lock(&LOCK_thread_count); + thread_count--; + VOID(pthread_cond_signal(&COND_thread_count)); /* Tell main we are ready */ + pthread_mutex_unlock(&LOCK_thread_count); + free((gptr) arg); + my_thread_end(); + DBUG_RETURN(0); +} + + +int main(int argc __attribute__((unused)), + char **argv __attribute__((unused))) +{ + pthread_t tid; + pthread_attr_t thr_attr; + int *param, error, pagen; + + MY_INIT(argv[0]); + +#ifndef DBUG_OFF +#if defined(__WIN__) + default_dbug_option= "d:t:i:O,\\test_pagecache_single.trace"; +#else + default_dbug_option= "d:t:i:o,/tmp/test_pagecache_single.trace"; +#endif + if (argc > 1) + { + DBUG_SET(default_dbug_option); + DBUG_SET_INITIAL(default_dbug_option); + } +#endif + + + DBUG_ENTER("main"); + DBUG_PRINT("info", ("Main thread: %s\n", my_thread_name())); + if ((file1.file= my_open(file1_name, + O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1) + { + fprintf(stderr, "Got error during file1 creation from open() (errno: %d)\n", + errno); + exit(1); + } + DBUG_PRINT("info", ("file1: %d", file1.file)); + if (chmod(file1_name, S_IRWXU | S_IRWXG | S_IRWXO) != 0) + { + fprintf(stderr, "Got error during file1 chmod() (errno: %d)\n", + errno); + exit(1); + } + my_pwrite(file1.file, "test file", 9, 0, MYF(0)); + + if ((error= pthread_cond_init(&COND_thread_count, NULL))) + { + fprintf(stderr, "Got error: %d from pthread_cond_init (errno: %d)\n", + error, errno); + exit(1); + } + if ((error= pthread_mutex_init(&LOCK_thread_count, MY_MUTEX_INIT_FAST))) + { + fprintf(stderr, "Got error: %d from pthread_cond_init (errno: %d)\n", + error, errno); + exit(1); + } + + if ((error= pthread_attr_init(&thr_attr))) + { + fprintf(stderr,"Got error: %d from pthread_attr_init (errno: %d)\n", + error,errno); + exit(1); + } + if ((error= pthread_attr_setdetachstate(&thr_attr, PTHREAD_CREATE_DETACHED))) + { + fprintf(stderr, + "Got error: %d from pthread_attr_setdetachstate (errno: %d)\n", + error,errno); + exit(1); + } + +#ifdef HAVE_THR_SETCONCURRENCY + VOID(thr_setconcurrency(2)); +#endif + + plan(12); + + if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, + PAGE_SIZE)) == 0) + { + fprintf(stderr,"Got error: init_pagecache() (errno: %d)\n", + errno); + exit(1); + } + DBUG_PRINT("info", ("Page cache %d pages", pagen)); + + if ((error=pthread_mutex_lock(&LOCK_thread_count))) + { + fprintf(stderr,"Got error: %d from pthread_mutex_lock (errno: %d)\n", + error,errno); + exit(1); + } + param=(int*) malloc(sizeof(int)); + *param= 1; + if ((error= pthread_create(&tid, &thr_attr, test_thread, (void*) param))) + { + fprintf(stderr,"Got error: %d from pthread_create (errno: %d)\n", + error,errno); + exit(1); + } + thread_count++; + DBUG_PRINT("info", ("Thread started")); + pthread_mutex_unlock(&LOCK_thread_count); + + pthread_attr_destroy(&thr_attr); + + if ((error= pthread_mutex_lock(&LOCK_thread_count))) + fprintf(stderr,"Got error: %d from pthread_mutex_lock\n",error); + while (thread_count) + { + if ((error= pthread_cond_wait(&COND_thread_count,&LOCK_thread_count))) + fprintf(stderr,"Got error: %d from pthread_cond_wait\n",error); + } + if ((error= pthread_mutex_unlock(&LOCK_thread_count))) + fprintf(stderr,"Got error: %d from pthread_mutex_unlock\n",error); + DBUG_PRINT("info", ("thread ended")); + + end_pagecache(&pagecache, 1); + DBUG_PRINT("info", ("Page cache ended")); + + if (my_close(file1.file, MYF(0)) != 0) + { + fprintf(stderr, "Got error during file1 closing from close() (errno: %d)\n", + errno); + exit(1); + } + /*my_delete(file1_name, MYF(0));*/ + my_end(0); + + DBUG_PRINT("info", ("file1 (%d) closed", file1.file)); + + DBUG_PRINT("info", ("Program end")); + + DBUG_RETURN(exit_status()); +} diff --git a/storage/maria/unittest/ma_test_loghandler-t.c b/storage/maria/unittest/ma_test_loghandler-t.c new file mode 100644 index 00000000000..e31136d52ec --- /dev/null +++ b/storage/maria/unittest/ma_test_loghandler-t.c @@ -0,0 +1,643 @@ +#include "../maria_def.h" +#include +#include +#include +#include "../trnman.h" + +extern my_bool maria_log_remove(); + +#ifndef DBUG_OFF +static const char *default_dbug_option; +#endif +static TRN *trn= &dummy_transaction_object; + +#define PCACHE_SIZE (1024*1024*10) + +#define LONG_BUFFER_SIZE (100 * 1024) + +#ifdef LONG_LOG_TEST +#define LOG_FLAGS 0 +#define LOG_FILE_SIZE (1024L*1024L) +#define ITERATIONS (1600*4) +#else +#define LOG_FLAGS TRANSLOG_SECTOR_PROTECTION | TRANSLOG_PAGE_CRC +#define LOG_FILE_SIZE (1024L*1024L*3L) +#define ITERATIONS 1600 +#endif + +/* +#define LOG_FLAGS 0 +#define LOG_FILE_SIZE 1024L*1024L*1024L +#define ITERATIONS 181000 +*/ + +/* +#define LOG_FLAGS 0 +#define LOG_FILE_SIZE 1024L*1024L*3L +#define ITERATIONS 1600 +*/ + +/* +#define LOG_FLAGS 0 +#define LOG_FILE_SIZE 1024L*1024L*100L +#define ITERATIONS 65000 +*/ + +/* + Check that the buffer filled correctly + + SYNOPSIS + check_content() + ptr Pointer to the buffer + length length of the buffer + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool check_content(byte *ptr, ulong length) +{ + ulong i; + byte buff[2]; + for (i= 0; i < length; i++) + { + if (i % 2 == 0) + int2store(buff, i >> 1); + if (ptr[i] != buff[i % 2]) + { + fprintf(stderr, "Byte # %lu is %x instead of %x", + i, (uint) ptr[i], (uint) buff[i % 2]); + return 1; + } + } + return 0; +} + + +/* + Report OK for read operation + + SYNOPSIS + read_ok() + rec the record header +*/ + +void read_ok(TRANSLOG_HEADER_BUFFER *rec) +{ + char buff[80]; + snprintf(buff, sizeof(buff), "read record type: %u LSN: (%lu,0x%lx)", + rec->type, (ulong) LSN_FILE_NO(rec->lsn), + (ulong) LSN_OFFSET(rec->lsn)); + ok(1, buff); +} + +/* + Read whole record content, and check content (put with offset) + + SYNOPSIS + read_and_check_content() + rec The record header buffer + buffer The buffer to read the record in + skip Skip this number of bytes ot the record content + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool read_and_check_content(TRANSLOG_HEADER_BUFFER *rec, + byte *buffer, uint skip) +{ + DBUG_ASSERT(rec->record_length < LONG_BUFFER_SIZE * 2 + 7 * 2 + 2); + if (translog_read_record(rec->lsn, 0, rec->record_length, buffer, NULL) != + rec->record_length) + return 1; + return check_content(buffer + skip, rec->record_length - skip); +} + + +int main(int argc __attribute__((unused)), char *argv[]) +{ + uint32 i; + uint32 rec_len; + uint pagen; + byte long_tr_id[6]; + byte lsn_buff[23]= + { + 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, + 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, + 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55 + }; + byte long_buffer[LONG_BUFFER_SIZE * 2 + LSN_STORE_SIZE * 2 + 2]; + PAGECACHE pagecache; + LSN lsn, lsn_base, first_lsn; + TRANSLOG_HEADER_BUFFER rec; + LEX_STRING parts[TRANSLOG_INTERNAL_PARTS + 3]; + struct st_translog_scanner_data scanner; + int rc; + + MY_INIT(argv[0]); + + bzero(&pagecache, sizeof(pagecache)); + maria_data_root= "."; + if (maria_log_remove()) + exit(1); + + for (i= 0; i < (LONG_BUFFER_SIZE + LSN_STORE_SIZE * 2 + 2); i+= 2) + { + int2store(long_buffer + i, (i >> 1)); + /* long_buffer[i]= (i & 0xFF); */ + } + + bzero(long_tr_id, 6); +#ifndef DBUG_OFF +#if defined(__WIN__) + default_dbug_option= "d:t:i:O,\\ma_test_loghandler.trace"; +#else + default_dbug_option= "d:t:i:o,/tmp/ma_test_loghandler.trace"; +#endif + if (argc > 1) + { + DBUG_SET(default_dbug_option); + DBUG_SET_INITIAL(default_dbug_option); + } +#endif + + if (ma_control_file_create_or_open()) + { + fprintf(stderr, "Can't init control file (%d)\n", errno); + exit(1); + } + if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, + TRANSLOG_PAGE_SIZE)) == 0) + { + fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno); + exit(1); + } + if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, LOG_FLAGS)) + { + fprintf(stderr, "Can't init loghandler (%d)\n", errno); + translog_destroy(); + exit(1); + } + example_loghandler_init(); + + plan(((ITERATIONS - 1) * 4 + 1)*2 + ITERATIONS - 1); + + srandom(122334817L); + + long_tr_id[5]= 0xff; + + int4store(long_tr_id, 0); + parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id; + parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6; + trn->short_id= 0; + if (translog_write_record(&lsn, + LOGREC_FIXED_RECORD_0LSN_EXAMPLE, + trn, NULL, + 6, TRANSLOG_INTERNAL_PARTS + 1, parts, NULL)) + { + fprintf(stderr, "Can't write record #%lu\n", (ulong) 0); + translog_destroy(); + ok(0, "write LOGREC_FIXED_RECORD_0LSN_EXAMPLE"); + exit(1); + } + ok(1, "write LOGREC_FIXED_RECORD_0LSN_EXAMPLE"); + lsn_base= first_lsn= lsn; + + for (i= 1; i < ITERATIONS; i++) + { + trn->short_id= i % 0xFFFF; + if (i % 2) + { + lsn_store(lsn_buff, lsn_base); + parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)lsn_buff; + parts[TRANSLOG_INTERNAL_PARTS + 0].length= LSN_STORE_SIZE; + /* check auto-count feature */ + parts[TRANSLOG_INTERNAL_PARTS + 1].str= NULL; + parts[TRANSLOG_INTERNAL_PARTS + 1].length= 0; + if (translog_write_record(&lsn, LOGREC_FIXED_RECORD_1LSN_EXAMPLE, + trn, NULL, LSN_STORE_SIZE, 0, parts, NULL)) + { + fprintf(stderr, "1 Can't write reference defore record #%lu\n", + (ulong) i); + translog_destroy(); + ok(0, "write LOGREC_FIXED_RECORD_1LSN_EXAMPLE"); + exit(1); + } + ok(1, "write LOGREC_FIXED_RECORD_1LSN_EXAMPLE"); + lsn_store(lsn_buff, lsn_base); + if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 12) + rec_len= 12; + parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)lsn_buff; + parts[TRANSLOG_INTERNAL_PARTS + 0].length= LSN_STORE_SIZE; + parts[TRANSLOG_INTERNAL_PARTS + 1].str= (char*)long_buffer; + parts[TRANSLOG_INTERNAL_PARTS + 1].length= rec_len; + /* check record length auto-counting */ + if (translog_write_record(&lsn, + LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE, + trn, NULL, 0, TRANSLOG_INTERNAL_PARTS + 2, + parts, NULL)) + { + fprintf(stderr, "1 Can't write var reference defore record #%lu\n", + (ulong) i); + translog_destroy(); + ok(0, "write LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE"); + exit(1); + } + ok(1, "write LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE"); + } + else + { + lsn_store(lsn_buff, lsn_base); + lsn_store(lsn_buff + LSN_STORE_SIZE, first_lsn); + parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)lsn_buff; + parts[TRANSLOG_INTERNAL_PARTS + 0].length= 23; + if (translog_write_record(&lsn, + LOGREC_FIXED_RECORD_2LSN_EXAMPLE, + trn, NULL, + 23, TRANSLOG_INTERNAL_PARTS + 1, parts, NULL)) + { + fprintf(stderr, "0 Can't write reference defore record #%lu\n", + (ulong) i); + translog_destroy(); + ok(0, "write LOGREC_FIXED_RECORD_2LSN_EXAMPLE"); + exit(1); + } + ok(1, "write LOGREC_FIXED_RECORD_2LSN_EXAMPLE"); + lsn_store(lsn_buff, lsn_base); + lsn_store(lsn_buff + LSN_STORE_SIZE, first_lsn); + if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 19) + rec_len= 19; + parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)lsn_buff; + parts[TRANSLOG_INTERNAL_PARTS + 0].length= 14; + parts[TRANSLOG_INTERNAL_PARTS + 1].str= (char*)long_buffer; + parts[TRANSLOG_INTERNAL_PARTS + 1].length= rec_len; + if (translog_write_record(&lsn, + LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE, + trn, NULL, 14 + rec_len, + TRANSLOG_INTERNAL_PARTS + 2, parts, NULL)) + { + fprintf(stderr, "0 Can't write var reference defore record #%lu\n", + (ulong) i); + translog_destroy(); + ok(0, "write LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE"); + exit(1); + } + ok(1, "write LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE"); + } + int4store(long_tr_id, i); + parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id; + parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6; + if (translog_write_record(&lsn, + LOGREC_FIXED_RECORD_0LSN_EXAMPLE, + trn, NULL, 6, + TRANSLOG_INTERNAL_PARTS + 1, + parts, NULL)) + { + fprintf(stderr, "Can't write record #%lu\n", (ulong) i); + translog_destroy(); + ok(0, "write LOGREC_FIXED_RECORD_0LSN_EXAMPLE"); + exit(1); + } + ok(1, "write LOGREC_FIXED_RECORD_0LSN_EXAMPLE"); + + lsn_base= lsn; + + if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 9) + rec_len= 9; + parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_buffer; + parts[TRANSLOG_INTERNAL_PARTS + 0].length= rec_len; + if (translog_write_record(&lsn, + LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE, + trn, NULL, rec_len, + TRANSLOG_INTERNAL_PARTS + 1, + parts, NULL)) + { + fprintf(stderr, "Can't write variable record #%lu\n", (ulong) i); + translog_destroy(); + ok(0, "write LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE"); + exit(1); + } + ok(1, "write LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE"); + if (translog_flush(lsn)) + { + fprintf(stderr, "Can't flush #%lu\n", (ulong) i); + translog_destroy(); + ok(0, "flush"); + exit(1); + } + ok(1, "flush"); + } + + translog_destroy(); + end_pagecache(&pagecache, 1); + ma_control_file_end(); + + + if (ma_control_file_create_or_open()) + { + fprintf(stderr, "pass2: Can't init control file (%d)\n", errno); + exit(1); + } + if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, + TRANSLOG_PAGE_SIZE)) == 0) + { + fprintf(stderr, "pass2: Got error: init_pagecache() (errno: %d)\n", errno); + exit(1); + } + if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, LOG_FLAGS)) + { + fprintf(stderr, "pass2: Can't init loghandler (%d)\n", errno); + translog_destroy(); + exit(1); + } + example_loghandler_init(); + srandom(122334817L); + + + rc= 1; + + { + translog_size_t len= translog_read_record_header(first_lsn, &rec); + if (len == 0) + { + fprintf(stderr, "translog_read_record_header failed (%d)\n", errno); + goto err; + } + if (rec.type !=LOGREC_FIXED_RECORD_0LSN_EXAMPLE || rec.short_trid != 0 || + rec.record_length != 6 || uint4korr(rec.header) != 0 || + ((uchar)rec.header[4]) != 0 || ((uchar)rec.header[5]) != 0xFF || + first_lsn != rec.lsn) + { + fprintf(stderr, "Incorrect LOGREC_FIXED_RECORD_0LSN_EXAMPLE " + "data read(0)\n" + "type %u, strid %u, len %u, i: %u, 4: %u 5: %u, " + "lsn(%lu,0x%lx)\n", + (uint) rec.type, (uint) rec.short_trid, (uint) rec.record_length, + (uint) uint4korr(rec.header), (uint) rec.header[4], + (uint) rec.header[5], + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + goto err; + } + read_ok(&rec); + translog_free_record_header(&rec); + lsn= first_lsn; + if (translog_init_scanner(first_lsn, 1, &scanner)) + { + fprintf(stderr, "scanner init failed\n"); + goto err; + } + for (i= 1;; i++) + { + len= translog_read_next_record_header(&scanner, &rec); + if (len == 0) + { + fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n", + i, errno); + goto err; + } + if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) + { + if (i != ITERATIONS) + { + fprintf(stderr, "EOL met at iteration %u instead of %u\n", + i, ITERATIONS); + goto err; + } + break; + } + if (i % 2) + { + LSN ref; + ref= lsn_korr(rec.header); + if (rec.type != LOGREC_FIXED_RECORD_1LSN_EXAMPLE || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != 7 || ref != lsn) + { + fprintf(stderr, "Incorrect LOGREC_FIXED_RECORD_1LSN_EXAMPLE " + "data read(%d) " + "type: %u strid: %u len: %u" + "ref: (%lu,0x%lx) (%lu,0x%lx) " + "lsn(%lu,0x%lx)\n", + i, (uint) rec.type, (uint) rec.short_trid, + (uint) rec.record_length, + (ulong) LSN_FILE_NO(ref), (ulong) LSN_OFFSET(ref), + (ulong) LSN_FILE_NO(lsn), (ulong) LSN_OFFSET(lsn), + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + goto err; + } + } + else + { + LSN ref1, ref2; + ref1= lsn_korr(rec.header); + ref2= lsn_korr(rec.header + LSN_STORE_SIZE); + if (rec.type != LOGREC_FIXED_RECORD_2LSN_EXAMPLE || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != 23 || + ref1 != lsn || + ref2 != first_lsn || + ((uchar)rec.header[22]) != 0x55 || + ((uchar)rec.header[21]) != 0xAA || + ((uchar)rec.header[20]) != 0x55 || + ((uchar)rec.header[19]) != 0xAA || + ((uchar)rec.header[18]) != 0x55 || + ((uchar)rec.header[17]) != 0xAA || + ((uchar)rec.header[16]) != 0x55 || + ((uchar)rec.header[15]) != 0xAA || + ((uchar)rec.header[14]) != 0x55) + { + fprintf(stderr, "Incorrect LOGREC_FIXED_RECORD_2LSN_EXAMPLE " + "data read(%d) " + "type %u, strid %u, len %u, ref1(%lu,0x%lx), " + "ref2(%lu,0x%lx) %x%x%x%x%x%x%x%x%x " + "lsn(%lu,0x%lx)\n", + i, (uint) rec.type, (uint) rec.short_trid, + (uint) rec.record_length, + (ulong) LSN_FILE_NO(ref1), (ulong) LSN_OFFSET(ref1), + (ulong) LSN_FILE_NO(ref2), (ulong) LSN_OFFSET(ref2), + (uint) rec.header[14], (uint) rec.header[15], + (uint) rec.header[16], (uint) rec.header[17], + (uint) rec.header[18], (uint) rec.header[19], + (uint) rec.header[20], (uint) rec.header[21], + (uint) rec.header[22], + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + goto err; + } + } + read_ok(&rec); + translog_free_record_header(&rec); + + len= translog_read_next_record_header(&scanner, &rec); + if (len == 0) + { + fprintf(stderr, "1-%d translog_read_next_record_header (var) " + "failed (%d)\n", i, errno); + goto err; + } + if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) + { + fprintf(stderr, "EOL met at the middle of iteration (first var) %u " + "instead of beginning of %u\n", i, ITERATIONS); + goto err; + } + if (i % 2) + { + LSN ref; + ref= lsn_korr(rec.header); + if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 12) + rec_len= 12; + if (rec.type != LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != rec_len + LSN_STORE_SIZE || + len != 12 || ref != lsn || + check_content(rec.header + LSN_STORE_SIZE, len - LSN_STORE_SIZE)) + { + fprintf(stderr, "Incorrect LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE " + "data read(%d)" + "type %u (%d), strid %u (%d), len %lu, %lu + 7 (%d), " + "hdr len: %u (%d), " + "ref(%lu,0x%lx), lsn(%lu,0x%lx) (%d), content: %d\n", + i, (uint) rec.type, + rec.type != LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE, + (uint) rec.short_trid, + rec.short_trid != (i % 0xFFFF), + (ulong) rec.record_length, (ulong) rec_len, + rec.record_length != rec_len + LSN_STORE_SIZE, + (uint) len, + len != 12, + (ulong) LSN_FILE_NO(ref), (ulong) LSN_OFFSET(ref), + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn), + (len != 12 || ref != lsn), + check_content(rec.header + LSN_STORE_SIZE, + len - LSN_STORE_SIZE)); + goto err; + } + if (read_and_check_content(&rec, long_buffer, LSN_STORE_SIZE)) + { + fprintf(stderr, + "Incorrect LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE " + "in whole rec read lsn(%lu,0x%lx)\n", + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + goto err; + } + } + else + { + LSN ref1, ref2; + ref1= lsn_korr(rec.header); + ref2= lsn_korr(rec.header + LSN_STORE_SIZE); + if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 19) + rec_len= 19; + if (rec.type != LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != rec_len + LSN_STORE_SIZE * 2 || + len != 19 || + ref1 != lsn || + ref2 != first_lsn || + check_content(rec.header + LSN_STORE_SIZE * 2, + len - LSN_STORE_SIZE * 2)) + { + fprintf(stderr, "Incorrect LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE " + "data read(%d) " + "type %u, strid %u, len %lu != %lu + 14, hdr len: %u, " + "ref1(%lu,0x%lx), ref2(%lu,0x%lx), " + "lsn(%lu,0x%lx)\n", + i, (uint) rec.type, (uint) rec.short_trid, + (ulong) rec.record_length, (ulong) rec_len, + (uint) len, + (ulong) LSN_FILE_NO(ref1), (ulong) LSN_OFFSET(ref1), + (ulong) LSN_FILE_NO(ref2), (ulong) LSN_OFFSET(ref2), + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + goto err; + } + if (read_and_check_content(&rec, long_buffer, LSN_STORE_SIZE * 2)) + { + fprintf(stderr, + "Incorrect LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE " + "in whole rec read lsn(%lu,0x%lx)\n", + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + goto err; + } + } + read_ok(&rec); + translog_free_record_header(&rec); + + len= translog_read_next_record_header(&scanner, &rec); + if (len == 0) + { + fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n", + i, errno); + goto err; + } + if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) + { + fprintf(stderr, "EOL met at the middle of iteration %u " + "instead of beginning of %u\n", i, ITERATIONS); + goto err; + } + if (rec.type != LOGREC_FIXED_RECORD_0LSN_EXAMPLE || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != 6 || uint4korr(rec.header) != i || + ((uchar)rec.header[4]) != 0 || ((uchar)rec.header[5]) != 0xFF) + { + fprintf(stderr, "Incorrect LOGREC_FIXED_RECORD_0LSN_EXAMPLE " + "data read(%d)\n" + "type %u, strid %u, len %u, i: %u, 4: %u 5: %u " + "lsn(%lu,0x%lx)\n", + i, (uint) rec.type, (uint) rec.short_trid, + (uint) rec.record_length, + (uint) uint4korr(rec.header), (uint) rec.header[4], + (uint) rec.header[5], + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + goto err; + } + lsn= rec.lsn; + read_ok(&rec); + translog_free_record_header(&rec); + + len= translog_read_next_record_header(&scanner, &rec); + if ((rec_len= random() / (RAND_MAX / (LONG_BUFFER_SIZE + 1))) < 9) + rec_len= 9; + if (rec.type != LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != rec_len || + len != 9 || check_content(rec.header, len)) + { + fprintf(stderr, "Incorrect LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE " + "data read(%d) " + "type %u, strid %u, len %lu != %lu, hdr len: %u, " + "lsn(%lu,0x%lx)\n", + i, (uint) rec.type, (uint) rec.short_trid, + (ulong) rec.record_length, (ulong) rec_len, + (uint) len, + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + goto err; + } + if (read_and_check_content(&rec, long_buffer, 0)) + { + fprintf(stderr, + "Incorrect LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE " + "in whole rec read lsn(%lu,0x%lx)\n", + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + goto err; + } + read_ok(&rec); + translog_free_record_header(&rec); + } + } + + rc= 0; +err: + if (rc) + ok(0, "read record"); + translog_destroy(); + end_pagecache(&pagecache, 1); + ma_control_file_end(); + + if (maria_log_remove()) + exit(1); + return(test(exit_status())); +} diff --git a/storage/maria/unittest/ma_test_loghandler_multigroup-t.c b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c new file mode 100644 index 00000000000..1281ee425d8 --- /dev/null +++ b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c @@ -0,0 +1,643 @@ +#include "../maria_def.h" +#include +#include +#include +#include "../trnman.h" + +extern my_bool maria_log_remove(); + +#ifndef DBUG_OFF +static const char *default_dbug_option; +#endif +static TRN *trn= &dummy_transaction_object; + +#define PCACHE_SIZE (1024*1024*10) + +#define LONG_BUFFER_SIZE ((1024L*1024L*1024L) + (1024L*1024L*512)) + +#define MIN_REC_LENGTH (1024L*1024L + 1024L*512L + 1) + +#define LOG_FILE_SIZE (1024L*1024L*1024L + 1024L*1024L*512) +#define ITERATIONS 2 +/*#define ITERATIONS 63 */ + +/* +#define LOG_FILE_SIZE 1024L*1024L*3L +#define ITERATIONS 1600 +*/ +/* +#define LOG_FILE_SIZE 1024L*1024L*100L +#define ITERATIONS 65000 +*/ + + +/* + Check that the buffer filled correctly + + SYNOPSIS + check_content() + ptr Pointer to the buffer + length length of the buffer + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool check_content(byte *ptr, ulong length) +{ + ulong i; + byte buff[4]; + DBUG_ENTER("check_content"); + for (i= 0; i < length; i++) + { + if (i % 4 == 0) + int4store(buff, (i >> 2)); + if (ptr[i] != buff[i % 4]) + { + fprintf(stderr, "Byte # %lu is %x instead of %x", + i, (uint) ptr[i], (uint) buff[i % 4]); + DBUG_DUMP("mem", ptr +(ulong) (i > 16 ? i - 16 : 0), + (i > 16 ? 16 : i) + (i + 16 < length ? 16 : length - i)); + DBUG_RETURN(1); + } + } + DBUG_RETURN(0); +} + + +/* + Read whole record content, and check content (put with offset) + + SYNOPSIS + read_and_check_content() + rec The record header buffer + buffer The buffer to read the record in + skip Skip this number of bytes ot the record content + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool read_and_check_content(TRANSLOG_HEADER_BUFFER *rec, + byte *buffer, uint skip) +{ + int res= 0; + translog_size_t len; + DBUG_ENTER("read_and_check_content"); + DBUG_ASSERT(rec->record_length < LONG_BUFFER_SIZE + LSN_STORE_SIZE * 2 + 2); + if ((len= translog_read_record(rec->lsn, 0, rec->record_length, + buffer, NULL)) != rec->record_length) + { + fprintf(stderr, "Requested %lu byte, read %lu\n", + (ulong) rec->record_length, (ulong) len); + res= 1; + } + res|= check_content(buffer + skip, rec->record_length - skip); + DBUG_RETURN(res); +} + + +static uint32 get_len() +{ + uint32 rec_len; + do + { + rec_len= random() / + (RAND_MAX / (LONG_BUFFER_SIZE - MIN_REC_LENGTH - 1)) + MIN_REC_LENGTH; + } while (rec_len >= LONG_BUFFER_SIZE); + return rec_len; +} + +int main(int argc __attribute__((unused)), char *argv[]) +{ + uint32 i; + uint32 rec_len; + uint pagen; + byte long_tr_id[6]; + byte lsn_buff[23]= + { + 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, + 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, + 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55 + }; + byte *long_buffer= malloc(LONG_BUFFER_SIZE + LSN_STORE_SIZE * 2 + 2); + PAGECACHE pagecache; + LSN lsn, lsn_base, first_lsn; + TRANSLOG_HEADER_BUFFER rec; + LEX_STRING parts[TRANSLOG_INTERNAL_PARTS + 2]; + struct st_translog_scanner_data scanner; + int rc; + + MY_INIT(argv[0]); + + bzero(&pagecache, sizeof(pagecache)); + maria_data_root= "."; + if (maria_log_remove()) + exit(1); + + { + byte buff[4]; + for (i= 0; i < (LONG_BUFFER_SIZE + LSN_STORE_SIZE * 2 + 2); i++) + { + if (i % 4 == 0) + int4store(buff, (i >> 2)); + long_buffer[i]= buff[i % 4]; + } + } + + bzero(long_tr_id, 6); +#ifndef DBUG_OFF +#if defined(__WIN__) + default_dbug_option= "d:t:i:O,\\ma_test_loghandler.trace"; +#else + default_dbug_option= "d:t:i:o,/tmp/ma_test_loghandler.trace"; +#endif + if (argc > 1) + { + DBUG_SET(default_dbug_option); + DBUG_SET_INITIAL(default_dbug_option); + } +#endif + + if (ma_control_file_create_or_open()) + { + fprintf(stderr, "Can't init control file (%d)\n", errno); + exit(1); + } + if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, + TRANSLOG_PAGE_SIZE)) == 0) + { + fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno); + exit(1); + } + if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, 0)) + { + fprintf(stderr, "Can't init loghandler (%d)\n", errno); + translog_destroy(); + exit(1); + } + example_loghandler_init(); + + plan(((ITERATIONS - 1) * 4 + 1) * 2); + + srandom(122334817L); + + long_tr_id[5]= 0xff; + + int4store(long_tr_id, 0); + parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id; + parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6; + trn->short_id= 0; + if (translog_write_record(&lsn, LOGREC_FIXED_RECORD_0LSN_EXAMPLE, + trn, NULL, + 6, TRANSLOG_INTERNAL_PARTS + 1, parts, NULL)) + { + fprintf(stderr, "Can't write record #%lu\n", (ulong) 0); + translog_destroy(); + ok(0, "write LOGREC_FIXED_RECORD_0LSN_EXAMPLE"); + exit(1); + } + ok(1, "write LOGREC_FIXED_RECORD_0LSN_EXAMPLE"); + lsn_base= first_lsn= lsn; + + for (i= 1; i < ITERATIONS; i++) + { + if (i % 2) + { + lsn_store(lsn_buff, lsn_base); + parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)lsn_buff; + parts[TRANSLOG_INTERNAL_PARTS + 0].length= LSN_STORE_SIZE; + trn->short_id= i % 0xFFFF; + if (translog_write_record(&lsn, + LOGREC_FIXED_RECORD_1LSN_EXAMPLE, + trn, NULL, + LSN_STORE_SIZE, + TRANSLOG_INTERNAL_PARTS + 1, parts, NULL)) + { + fprintf(stderr, "1 Can't write reference before record #%lu\n", + (ulong) i); + translog_destroy(); + ok(0, "write LOGREC_FIXED_RECORD_1LSN_EXAMPLE"); + exit(1); + } + ok(1, "write LOGREC_FIXED_RECORD_1LSN_EXAMPLE"); + lsn_store(lsn_buff, lsn_base); + rec_len= get_len(); + parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)lsn_buff; + parts[TRANSLOG_INTERNAL_PARTS + 0].length= LSN_STORE_SIZE; + parts[TRANSLOG_INTERNAL_PARTS + 1].str= (char*)long_buffer; + parts[TRANSLOG_INTERNAL_PARTS + 1].length= rec_len; + trn->short_id= i % 0xFFFF; + if (translog_write_record(&lsn, + LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE, + trn, NULL, LSN_STORE_SIZE + rec_len, + TRANSLOG_INTERNAL_PARTS + 2, + parts, NULL)) + { + fprintf(stderr, "1 Can't write var reference before record #%lu\n", + (ulong) i); + translog_destroy(); + ok(0, "write LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE"); + exit(1); + } + ok(1, "write LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE"); + } + else + { + lsn_store(lsn_buff, lsn_base); + lsn_store(lsn_buff + LSN_STORE_SIZE, first_lsn); + parts[TRANSLOG_INTERNAL_PARTS + 1].str= (char*)lsn_buff; + parts[TRANSLOG_INTERNAL_PARTS + 1].length= 23; + trn->short_id= i % 0xFFFF; + if (translog_write_record(&lsn, + LOGREC_FIXED_RECORD_2LSN_EXAMPLE, + trn, NULL, 23, + TRANSLOG_INTERNAL_PARTS + 1, + parts, NULL)) + { + fprintf(stderr, "0 Can't write reference before record #%lu\n", + (ulong) i); + translog_destroy(); + ok(0, "write LOGREC_FIXED_RECORD_2LSN_EXAMPLE"); + exit(1); + } + ok(1, "write LOGREC_FIXED_RECORD_2LSN_EXAMPLE"); + lsn_store(lsn_buff, lsn_base); + lsn_store(lsn_buff + LSN_STORE_SIZE, first_lsn); + rec_len= get_len(); + parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)lsn_buff; + parts[TRANSLOG_INTERNAL_PARTS + 0].length= LSN_STORE_SIZE * 2; + parts[TRANSLOG_INTERNAL_PARTS + 1].str= (char*)long_buffer; + parts[TRANSLOG_INTERNAL_PARTS + 1].length= rec_len; + trn->short_id= i % 0xFFFF; + if (translog_write_record(&lsn, + LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE, + trn, NULL, LSN_STORE_SIZE * 2 + rec_len, + TRANSLOG_INTERNAL_PARTS + 2, + parts, NULL)) + { + fprintf(stderr, "0 Can't write var reference before record #%lu\n", + (ulong) i); + translog_destroy(); + ok(0, "write LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE"); + exit(1); + } + ok(1, "write LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE"); + } + int4store(long_tr_id, i); + parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id; + parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6; + trn->short_id= i % 0xFFFF; + if (translog_write_record(&lsn, + LOGREC_FIXED_RECORD_0LSN_EXAMPLE, + trn, NULL, 6, + TRANSLOG_INTERNAL_PARTS + 1, parts, NULL)) + { + fprintf(stderr, "Can't write record #%lu\n", (ulong) i); + translog_destroy(); + ok(0, "write LOGREC_FIXED_RECORD_0LSN_EXAMPLE"); + exit(1); + } + ok(1, "write LOGREC_FIXED_RECORD_0LSN_EXAMPLE"); + + lsn_base= lsn; + + rec_len= get_len(); + parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_buffer; + parts[TRANSLOG_INTERNAL_PARTS + 0].length= rec_len; + trn->short_id= i % 0xFFFF; + if (translog_write_record(&lsn, + LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE, + trn, NULL, rec_len, + TRANSLOG_INTERNAL_PARTS + 1, parts, NULL)) + { + fprintf(stderr, "Can't write variable record #%lu\n", (ulong) i); + translog_destroy(); + ok(0, "write LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE"); + exit(1); + } + ok(1, "write LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE"); + } + + translog_destroy(); + end_pagecache(&pagecache, 1); + ma_control_file_end(); + + if (ma_control_file_create_or_open()) + { + fprintf(stderr, "pass2: Can't init control file (%d)\n", errno); + exit(1); + } + if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, + TRANSLOG_PAGE_SIZE)) == 0) + { + fprintf(stderr, "pass2: Got error: init_pagecache() (errno: %d)\n", errno); + exit(1); + } + if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, 0)) + { + fprintf(stderr, "pass2: Can't init loghandler (%d)\n", errno); + translog_destroy(); + exit(1); + } + example_loghandler_init(); + + srandom(122334817L); + + rc= 1; + + { + translog_size_t len= translog_read_record_header(first_lsn, &rec); + if (len == 0) + { + fprintf(stderr, "translog_read_record_header failed (%d)\n", errno); + translog_free_record_header(&rec); + goto err; + } + if (rec.type !=LOGREC_FIXED_RECORD_0LSN_EXAMPLE || rec.short_trid != 0 || + rec.record_length != 6 || uint4korr(rec.header) != 0 || + ((uchar)rec.header[4]) != 0 || ((uchar)rec.header[5]) != 0xFF || + first_lsn != rec.lsn) + { + fprintf(stderr, "Incorrect LOGREC_FIXED_RECORD_0LSN_EXAMPLE " + "data read(0)\n" + "type %u, strid %u, len %u, i: %u, 4: %u 5: %u, " + "lsn(0x%lu,0x%lx)\n", + (uint) rec.type, (uint) rec.short_trid, (uint) rec.record_length, + (uint)uint4korr(rec.header), (uint) rec.header[4], + (uint) rec.header[5], + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + translog_free_record_header(&rec); + goto err; + } + ok(1, "read record"); + translog_free_record_header(&rec); + lsn= first_lsn; + if (translog_init_scanner(first_lsn, 1, &scanner)) + { + fprintf(stderr, "scanner init failed\n"); + goto err; + } + for (i= 1;; i++) + { + len= translog_read_next_record_header(&scanner, &rec); + if (len == 0) + { + fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n", + i, errno); + translog_free_record_header(&rec); + goto err; + } + if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) + { + if (i != ITERATIONS) + { + fprintf(stderr, "EOL met at iteration %u instead of %u\n", + i, ITERATIONS); + translog_free_record_header(&rec); + goto err; + } + break; + } + + if (i % 2) + { + LSN ref; + ref= lsn_korr(rec.header); + if (rec.type != LOGREC_FIXED_RECORD_1LSN_EXAMPLE || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != LSN_STORE_SIZE || ref != lsn) + { + fprintf(stderr, "Incorrect LOGREC_FIXED_RECORD_1LSN_EXAMPLE " + "data read(%d)" + "type %u, strid %u, len %u, ref(%lu,0x%lx), lsn(%lu,0x%lx)\n", + i, (uint) rec.type, (uint) rec.short_trid, + (uint) rec.record_length, + (ulong) LSN_FILE_NO(ref), (ulong) LSN_OFFSET(ref), + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + translog_free_record_header(&rec); + goto err; + } + } + else + { + LSN ref1, ref2; + ref1= lsn_korr(rec.header); + ref2= lsn_korr(rec.header + LSN_STORE_SIZE); + if (rec.type != LOGREC_FIXED_RECORD_2LSN_EXAMPLE || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != 23 || + ref1 != lsn || + ref2 != first_lsn || + ((uchar)rec.header[22]) != 0x55 || + ((uchar)rec.header[21]) != 0xAA || + ((uchar)rec.header[20]) != 0x55 || + ((uchar)rec.header[19]) != 0xAA || + ((uchar)rec.header[18]) != 0x55 || + ((uchar)rec.header[17]) != 0xAA || + ((uchar)rec.header[16]) != 0x55 || + ((uchar)rec.header[15]) != 0xAA || + ((uchar)rec.header[14]) != 0x55) + { + fprintf(stderr, "Incorrect LOGREC_FIXED_RECORD_2LSN_EXAMPLE " + "data read(%d) " + "type %u, strid %u, len %u, ref1(%lu,0x%lx), " + "ref2(%lu,0x%lx) %x%x%x%x%x%x%x%x%x " + "lsn(%lu,0x%lx)\n", + i, (uint) rec.type, (uint) rec.short_trid, + (uint) rec.record_length, + (ulong) LSN_FILE_NO(ref1), (ulong) LSN_OFFSET(ref1), + (ulong) LSN_FILE_NO(ref2), (ulong) LSN_OFFSET(ref2), + (uint) rec.header[14], (uint) rec.header[15], + (uint) rec.header[16], (uint) rec.header[17], + (uint) rec.header[18], (uint) rec.header[19], + (uint) rec.header[20], (uint) rec.header[21], + (uint) rec.header[22], + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + translog_free_record_header(&rec); + goto err; + } + } + ok(1, "read record"); + translog_free_record_header(&rec); + + len= translog_read_next_record_header(&scanner, &rec); + if (len == 0) + { + fprintf(stderr, "1-%d translog_read_next_record_header (var) " + "failed (%d)\n", i, errno); + goto err; + } + if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) + { + fprintf(stderr, "EOL met at the middle of iteration (first var) %u " + "instead of beginning of %u\n", i, ITERATIONS); + goto err; + } + if (i % 2) + { + LSN ref; + ref= lsn_korr(rec.header); + rec_len= get_len(); + if (rec.type !=LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != rec_len + LSN_STORE_SIZE || + len != 12 || ref != lsn || + check_content(rec.header + LSN_STORE_SIZE, len - LSN_STORE_SIZE)) + { + fprintf(stderr, "Incorrect LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE " + "data read(%d)" + "type %u (%d), strid %u (%d), len %lu, %lu + 7 (%d), " + "hdr len: %u (%d), " + "ref(%lu,0x%lx), lsn(%lu,0x%lx) (%d), content: %d\n", + i, (uint) rec.type, + rec.type !=LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE, + (uint) rec.short_trid, + rec.short_trid != (i % 0xFFFF), + (ulong) rec.record_length, (ulong) rec_len, + rec.record_length != rec_len + LSN_STORE_SIZE, + (uint) len, + len != 12, + (ulong) LSN_FILE_NO(ref), (ulong) LSN_OFFSET(ref), + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn), + (ref != lsn), + check_content(rec.header + LSN_STORE_SIZE, + len - LSN_STORE_SIZE)); + translog_free_record_header(&rec); + goto err; + } + if (read_and_check_content(&rec, long_buffer, LSN_STORE_SIZE)) + { + fprintf(stderr, + "Incorrect LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE " + "in whole rec read lsn(%lu,0x%lx)\n", + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + translog_free_record_header(&rec); + goto err; + } + } + else + { + LSN ref1, ref2; + ref1= lsn_korr(rec.header); + ref2= lsn_korr(rec.header + LSN_STORE_SIZE); + rec_len= get_len(); + if (rec.type != LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != rec_len + LSN_STORE_SIZE * 2 || + len != 19 || + ref1 != lsn || + ref2 != first_lsn || + check_content(rec.header + LSN_STORE_SIZE * 2, + len - LSN_STORE_SIZE * 2)) + { + fprintf(stderr, "Incorrect LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE " + " data read(%d) " + "type %u, strid %u, len %lu != %lu + 14, hdr len: %u, " + "ref1(%lu,0x%lx), ref2(%lu,0x%lx), " + "lsn(%lu,0x%lx)\n", + i, (uint) rec.type, (uint) rec.short_trid, + (ulong) rec.record_length, (ulong) rec_len, + (uint) len, + (ulong) LSN_FILE_NO(ref1), (ulong) LSN_OFFSET(ref1), + (ulong) LSN_FILE_NO(ref2), (ulong) LSN_OFFSET(ref2), + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + translog_free_record_header(&rec); + goto err; + } + if (read_and_check_content(&rec, long_buffer, LSN_STORE_SIZE * 2)) + { + fprintf(stderr, + "Incorrect LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE " + "in whole rec read lsn(%lu,0x%lx)\n", + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + translog_free_record_header(&rec); + goto err; + } + } + ok(1, "read record"); + translog_free_record_header(&rec); + + len= translog_read_next_record_header(&scanner, &rec); + if (len == 0) + { + fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n", + i, errno); + translog_free_record_header(&rec); + goto err; + } + if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) + { + fprintf(stderr, "EOL met at the middle of iteration %u " + "instead of beginning of %u\n", i, ITERATIONS); + translog_free_record_header(&rec); + goto err; + } + if (rec.type != LOGREC_FIXED_RECORD_0LSN_EXAMPLE || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != 6 || uint4korr(rec.header) != i || + ((uchar)rec.header[4]) != 0 || ((uchar)rec.header[5]) != 0xFF) + { + fprintf(stderr, "Incorrect LOGREC_FIXED_RECORD_0LSN_EXAMPLE " + "data read(%d)\n" + "type %u, strid %u, len %u, i: %u, 4: %u 5: %u " + "lsn(%lu,0x%lx)\n", + i, (uint) rec.type, (uint) rec.short_trid, + (uint) rec.record_length, + (uint)uint4korr(rec.header), (uint) rec.header[4], + (uint) rec.header[5], + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + translog_free_record_header(&rec); + goto err; + } + ok(1, "read record"); + translog_free_record_header(&rec); + + lsn= rec.lsn; + + len= translog_read_next_record_header(&scanner, &rec); + rec_len= get_len(); + if (rec.type != LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE || + rec.short_trid != (i % 0xFFFF) || + rec.record_length != rec_len || + len != 9 || check_content(rec.header, len)) + { + fprintf(stderr, "Incorrect LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE " + "data read(%d) " + "type %u, strid %u, len %lu != %lu, hdr len: %u, " + "lsn(%lu,0x%lx)\n", + i, (uint) rec.type, (uint) rec.short_trid, + (ulong) rec.record_length, (ulong) rec_len, + (uint) len, + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + translog_free_record_header(&rec); + goto err; + } + if (read_and_check_content(&rec, long_buffer, 0)) + { + fprintf(stderr, + "Incorrect LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE " + "in whole rec read lsn(%lu,0x%lx)\n", + (ulong) LSN_FILE_NO(rec.lsn), (ulong) LSN_OFFSET(rec.lsn)); + translog_free_record_header(&rec); + goto err; + } + ok(1, "read record"); + translog_free_record_header(&rec); + } + } + + rc= 0; +err: + if (rc) + ok(0, "read record"); + translog_destroy(); + end_pagecache(&pagecache, 1); + ma_control_file_end(); + if (maria_log_remove()) + exit(1); + + return (test(exit_status())); +} diff --git a/storage/maria/unittest/ma_test_loghandler_multithread-t.c b/storage/maria/unittest/ma_test_loghandler_multithread-t.c new file mode 100644 index 00000000000..ff966160acc --- /dev/null +++ b/storage/maria/unittest/ma_test_loghandler_multithread-t.c @@ -0,0 +1,479 @@ +#include "../maria_def.h" +#include +#include +#include +#include "../trnman.h" + +extern my_bool maria_log_remove(); + +#ifndef DBUG_OFF +static const char *default_dbug_option; +#endif + +#define PCACHE_SIZE (1024*1024*10) + +/*#define LOG_FLAGS TRANSLOG_SECTOR_PROTECTION | TRANSLOG_PAGE_CRC */ +#define LOG_FLAGS 0 +/*#define LONG_BUFFER_SIZE (1024L*1024L*1024L + 1024L*1024L*512)*/ +#define LONG_BUFFER_SIZE (1024L*1024L*1024L) +#define MIN_REC_LENGTH 30 +#define SHOW_DIVIDER 10 +#define LOG_FILE_SIZE (1024L*1024L*1024L + 1024L*1024L*512) +#define ITERATIONS 3 +#define WRITERS 3 +static uint number_of_writers= WRITERS; + +static pthread_cond_t COND_thread_count; +static pthread_mutex_t LOCK_thread_count; +static uint thread_count; + +static ulong lens[WRITERS][ITERATIONS]; +static LSN lsns1[WRITERS][ITERATIONS]; +static LSN lsns2[WRITERS][ITERATIONS]; +static byte *long_buffer; + +/* + Get pseudo-random length of the field in + limits [MIN_REC_LENGTH..LONG_BUFFER_SIZE] + + SYNOPSIS + get_len() + + RETURN + length - length >= 0 length <= LONG_BUFFER_SIZE +*/ + +static uint32 get_len() +{ + uint32 rec_len; + do + { + rec_len= random() / + (RAND_MAX / (LONG_BUFFER_SIZE - MIN_REC_LENGTH - 1)) + MIN_REC_LENGTH; + } while (rec_len >= LONG_BUFFER_SIZE); + return rec_len; +} + + +/* + Check that the buffer filled correctly + + SYNOPSIS + check_content() + ptr Pointer to the buffer + length length of the buffer + + RETURN + 0 - OK + 1 - Error +*/ + +static my_bool check_content(byte *ptr, ulong length) +{ + ulong i; + for (i= 0; i < length; i++) + { + if (((uchar)ptr[i]) != (i & 0xFF)) + { + fprintf(stderr, "Byte # %lu is %x instead of %x", + i, (uint) ptr[i], (uint) (i & 0xFF)); + return 1; + } + } + return 0; +} + + +/* + Read whole record content, and check content (put with offset) + + SYNOPSIS + read_and_check_content() + rec The record header buffer + buffer The buffer to read the record in + skip Skip this number of bytes ot the record content + + RETURN + 0 - OK + 1 - Error +*/ + + +static my_bool read_and_check_content(TRANSLOG_HEADER_BUFFER *rec, + byte *buffer, uint skip) +{ + int res= 0; + translog_size_t len; + + if ((len= translog_read_record(rec->lsn, 0, rec->record_length, + buffer, NULL)) != rec->record_length) + { + fprintf(stderr, "Requested %lu byte, read %lu\n", + (ulong) rec->record_length, (ulong) len); + res= 1; + } + res|= check_content(buffer + skip, rec->record_length - skip); + return(res); +} + +void writer(int num) +{ + LSN lsn; + TRN trn; + byte long_tr_id[6]; + uint i; + + trn.short_id= num; + for (i= 0; i < ITERATIONS; i++) + { + uint len= get_len(); + lens[num][i]= len; + LEX_STRING parts[TRANSLOG_INTERNAL_PARTS + 1]; + + int2store(long_tr_id, num); + int4store(long_tr_id + 2, i); + parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id; + parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6; + if (translog_write_record(&lsn, + LOGREC_FIXED_RECORD_0LSN_EXAMPLE, + &trn, NULL, 6, TRANSLOG_INTERNAL_PARTS + 1, + parts, NULL)) + { + fprintf(stderr, "Can't write LOGREC_FIXED_RECORD_0LSN_EXAMPLE record #%lu " + "thread %i\n", (ulong) i, num); + translog_destroy(); + pthread_mutex_lock(&LOCK_thread_count); + ok(0, "write records"); + pthread_mutex_unlock(&LOCK_thread_count); + return; + } + lsns1[num][i]= lsn; + parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_buffer; + parts[TRANSLOG_INTERNAL_PARTS + 0].length= len; + if (translog_write_record(&lsn, + LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE, + &trn, NULL, + len, TRANSLOG_INTERNAL_PARTS + 1, + parts, NULL)) + { + fprintf(stderr, "Can't write variable record #%lu\n", (ulong) i); + translog_destroy(); + pthread_mutex_lock(&LOCK_thread_count); + ok(0, "write records"); + pthread_mutex_unlock(&LOCK_thread_count); + return; + } + lsns2[num][i]= lsn; + pthread_mutex_lock(&LOCK_thread_count); + ok(1, "write records"); + pthread_mutex_unlock(&LOCK_thread_count); + } + return; +} + + +static void *test_thread_writer(void *arg) +{ + int param= *((int*) arg); + + my_thread_init(); + + writer(param); + + pthread_mutex_lock(&LOCK_thread_count); + thread_count--; + ok(1, "writer finished"); /* just to show progress */ + VOID(pthread_cond_signal(&COND_thread_count)); /* Tell main we are + ready */ + pthread_mutex_unlock(&LOCK_thread_count); + free((gptr) arg); + my_thread_end(); + return(0); +} + + +int main(int argc __attribute__((unused)), + char **argv __attribute__ ((unused))) +{ + uint32 i; + uint pagen; + PAGECACHE pagecache; + LSN first_lsn; + TRANSLOG_HEADER_BUFFER rec; + struct st_translog_scanner_data scanner; + pthread_t tid; + pthread_attr_t thr_attr; + int *param, error; + int rc; + + plan(WRITERS + ITERATIONS * WRITERS * 3); + + bzero(&pagecache, sizeof(pagecache)); + maria_data_root= "."; + long_buffer= malloc(LONG_BUFFER_SIZE + 7 * 2 + 2); + if (long_buffer == 0) + { + fprintf(stderr, "End of memory\n"); + exit(1); + } + for (i= 0; i < (LONG_BUFFER_SIZE + 7 * 2 + 2); i++) + long_buffer[i]= (i & 0xFF); + + MY_INIT(argv[0]); + if (maria_log_remove()) + exit(1); + + +#ifndef DBUG_OFF +#if defined(__WIN__) + default_dbug_option= "d:t:i:O,\\ma_test_loghandler.trace"; +#else + default_dbug_option= "d:t:i:o,/tmp/ma_test_loghandler.trace"; +#endif + if (argc > 1) + { + DBUG_SET(default_dbug_option); + DBUG_SET_INITIAL(default_dbug_option); + } +#endif + + + if ((error= pthread_cond_init(&COND_thread_count, NULL))) + { + fprintf(stderr, "COND_thread_count: %d from pthread_cond_init " + "(errno: %d)\n", error, errno); + exit(1); + } + if ((error= pthread_mutex_init(&LOCK_thread_count, MY_MUTEX_INIT_FAST))) + { + fprintf(stderr, "LOCK_thread_count: %d from pthread_cond_init " + "(errno: %d)\n", error, errno); + exit(1); + } + if ((error= pthread_attr_init(&thr_attr))) + { + fprintf(stderr, "Got error: %d from pthread_attr_init " + "(errno: %d)\n", error, errno); + exit(1); + } + if ((error= pthread_attr_setdetachstate(&thr_attr, PTHREAD_CREATE_DETACHED))) + { + fprintf(stderr, + "Got error: %d from pthread_attr_setdetachstate (errno: %d)\n", + error, errno); + exit(1); + } + +#ifdef HAVE_THR_SETCONCURRENCY + VOID(thr_setconcurrency(2)); +#endif + + my_thread_global_init(); + + if (ma_control_file_create_or_open()) + { + fprintf(stderr, "Can't init control file (%d)\n", errno); + exit(1); + } + if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, + TRANSLOG_PAGE_SIZE)) == 0) + { + fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno); + exit(1); + } + if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, LOG_FLAGS)) + { + fprintf(stderr, "Can't init loghandler (%d)\n", errno); + translog_destroy(); + exit(1); + } + example_loghandler_init(); + + srandom(122334817L); + { + LEX_STRING parts[TRANSLOG_INTERNAL_PARTS + 1]; + byte long_tr_id[6]= + { + 0x11, 0x22, 0x33, 0x44, 0x55, 0x66 + }; + + parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id; + parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6; + if (translog_write_record(&first_lsn, + LOGREC_FIXED_RECORD_0LSN_EXAMPLE, + &dummy_transaction_object, NULL, 6, + TRANSLOG_INTERNAL_PARTS + 1, + parts, NULL)) + { + fprintf(stderr, "Can't write the first record\n"); + translog_destroy(); + exit(1); + } + } + + + if ((error= pthread_mutex_lock(&LOCK_thread_count))) + { + fprintf(stderr, "LOCK_thread_count: %d from pthread_mutex_lock " + "(errno: %d)\n", error, errno); + exit(1); + } + + while (number_of_writers != 0) + { + param= (int*) malloc(sizeof(int)); + *param= number_of_writers - 1; + if ((error= pthread_create(&tid, &thr_attr, test_thread_writer, + (void*) param))) + { + fprintf(stderr, "Got error: %d from pthread_create (errno: %d)\n", + error, errno); + exit(1); + } + thread_count++; + number_of_writers--; + } + pthread_mutex_unlock(&LOCK_thread_count); + + pthread_attr_destroy(&thr_attr); + + /* wait finishing */ + if ((error= pthread_mutex_lock(&LOCK_thread_count))) + fprintf(stderr, "LOCK_thread_count: %d from pthread_mutex_lock\n", error); + while (thread_count) + { + if ((error= pthread_cond_wait(&COND_thread_count, &LOCK_thread_count))) + fprintf(stderr, "COND_thread_count: %d from pthread_cond_wait\n", error); + } + if ((error= pthread_mutex_unlock(&LOCK_thread_count))) + fprintf(stderr, "LOCK_thread_count: %d from pthread_mutex_unlock\n", error); + + /* Find last LSN and flush up to it (all our log) */ + { + LSN max= 0; + for (i= 0; i < WRITERS; i++) + { + if (cmp_translog_addr(lsns2[i][ITERATIONS - 1], max) > 0) + max= lsns2[i][ITERATIONS - 1]; + } + translog_flush(max); + } + + rc= 1; + + { + uint indeces[WRITERS]; + uint index, len, stage; + bzero(indeces, sizeof(uint) * WRITERS); + + bzero(indeces, sizeof(indeces)); + + if (translog_init_scanner(first_lsn, 1, &scanner)) + { + fprintf(stderr, "scanner init failed\n"); + goto err; + } + for (i= 0;; i++) + { + len= translog_read_next_record_header(&scanner, &rec); + + if (len == 0) + { + fprintf(stderr, "1-%d translog_read_next_record_header failed (%d)\n", + i, errno); + translog_free_record_header(&rec); + goto err; + } + if (rec.lsn == CONTROL_FILE_IMPOSSIBLE_LSN) + { + if (i != WRITERS * ITERATIONS * 2) + { + fprintf(stderr, "EOL met at iteration %u instead of %u\n", + i, ITERATIONS * WRITERS * 2); + translog_free_record_header(&rec); + goto err; + } + break; + } + index= indeces[rec.short_trid] / 2; + stage= indeces[rec.short_trid] % 2; + if (stage == 0) + { + if (rec.type !=LOGREC_FIXED_RECORD_0LSN_EXAMPLE || + rec.record_length != 6 || + uint2korr(rec.header) != rec.short_trid || + index != uint4korr(rec.header + 2) || + cmp_translog_addr(lsns1[rec.short_trid][index], rec.lsn) != 0) + { + fprintf(stderr, "Incorrect LOGREC_FIXED_RECORD_0LSN_EXAMPLE " + "data read(%d)\n" + "type %u, strid %u %u, len %u, i: %u %u, " + "lsn(%lu,0x%lx) (%lu,0x%lx)\n", + i, (uint) rec.type, + (uint) rec.short_trid, (uint) uint2korr(rec.header), + (uint) rec.record_length, + (uint) index, (uint) uint4korr(rec.header + 2), + (ulong) LSN_FILE_NO(rec.lsn), + (ulong) LSN_OFFSET(rec.lsn), + (ulong) LSN_FILE_NO(lsns1[rec.short_trid][index]), + (ulong) LSN_OFFSET(lsns1[rec.short_trid][index])); + translog_free_record_header(&rec); + goto err; + } + } + else + { + if (rec.type != LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE || + len != 9 || + rec.record_length != lens[rec.short_trid][index] || + cmp_translog_addr(lsns2[rec.short_trid][index], rec.lsn) != 0 || + check_content(rec.header, len)) + { + fprintf(stderr, + "Incorrect LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE " + "data read(%d) " + "thread: %d, iteration %d, stage %d\n" + "type %u (%d), len %u, length %lu %lu (%d) " + "lsn(%lu,0x%lx) (%lu,0x%lx)\n", + i, (uint) rec.short_trid, index, stage, + (uint) rec.type, (rec.type != + LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE), + (uint) len, + (ulong) rec.record_length, lens[rec.short_trid][index], + (rec.record_length != lens[rec.short_trid][index]), + (ulong) LSN_FILE_NO(rec.lsn), + (ulong) LSN_OFFSET(rec.lsn), + (ulong) LSN_FILE_NO(lsns2[rec.short_trid][index]), + (ulong) LSN_OFFSET(lsns2[rec.short_trid][index])); + translog_free_record_header(&rec); + goto err; + } + if (read_and_check_content(&rec, long_buffer, 0)) + { + fprintf(stderr, + "Incorrect LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE " + "in whole rec read lsn(%lu,0x%lx)\n", + (ulong) LSN_FILE_NO(rec.lsn), + (ulong) LSN_OFFSET(rec.lsn)); + translog_free_record_header(&rec); + goto err; + } + } + ok(1, "record read"); + translog_free_record_header(&rec); + indeces[rec.short_trid]++; + } + } + + rc= 0; +err: + if (rc) + ok(0, "record read"); + translog_destroy(); + end_pagecache(&pagecache, 1); + ma_control_file_end(); + if (maria_log_remove()) + exit(1); + + return(exit_status()); +} diff --git a/storage/maria/unittest/ma_test_loghandler_pagecache-t.c b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c new file mode 100644 index 00000000000..35e05f9c997 --- /dev/null +++ b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c @@ -0,0 +1,156 @@ +#include "../maria_def.h" +#include +#include +#include +#include "../trnman.h" + +extern my_bool maria_log_remove(); + +#ifndef DBUG_OFF +static const char *default_dbug_option; +#endif + +#define PCACHE_SIZE (1024*1024*10) +#define PCACHE_PAGE TRANSLOG_PAGE_SIZE +#define LOG_FILE_SIZE (1024L*1024L*1024L + 1024L*1024L*512) +#define LOG_FLAGS 0 + +static char *first_translog_file= (char*)"maria_log.00000001"; +static char *file1_name= (char*)"page_cache_test_file_1"; +static PAGECACHE_FILE file1; + +int main(int argc __attribute__((unused)), char *argv[]) +{ + uint pagen; + byte long_tr_id[6]; + PAGECACHE pagecache; + LSN lsn; + MY_STAT st, *stat; + LEX_STRING parts[TRANSLOG_INTERNAL_PARTS + 1]; + + MY_INIT(argv[0]); + + plan(1); + + bzero(&pagecache, sizeof(pagecache)); + maria_data_root= "."; + if (maria_log_remove()) + exit(1); + /* be sure that we have no logs in the directory*/ + if (my_stat(CONTROL_FILE_BASE_NAME, &st, MYF(0))) + my_delete(CONTROL_FILE_BASE_NAME, MYF(0)); + if (my_stat(first_translog_file, &st, MYF(0))) + my_delete(first_translog_file, MYF(0)); + + bzero(long_tr_id, 6); +#ifndef DBUG_OFF +#if defined(__WIN__) + default_dbug_option= "d:t:i:O,\\ma_test_loghandler_pagecache.trace"; +#else + default_dbug_option= "d:t:i:o,/tmp/ma_test_loghandler_pagecache.trace"; +#endif + if (argc > 1) + { + DBUG_SET(default_dbug_option); + DBUG_SET_INITIAL(default_dbug_option); + } +#endif + + if (ma_control_file_create_or_open()) + { + fprintf(stderr, "Can't init control file (%d)\n", errno); + exit(1); + } + if ((pagen= init_pagecache(&pagecache, PCACHE_SIZE, 0, 0, + PCACHE_PAGE)) == 0) + { + fprintf(stderr, "Got error: init_pagecache() (errno: %d)\n", errno); + exit(1); + } + if (translog_init(".", LOG_FILE_SIZE, 50112, 0, &pagecache, LOG_FLAGS)) + { + fprintf(stderr, "Can't init loghandler (%d)\n", errno); + translog_destroy(); + exit(1); + } + example_loghandler_init(); + + if ((stat= my_stat(first_translog_file, &st, MYF(0))) == 0) + { + fprintf(stderr, "There is no %s (%d)\n", first_translog_file, errno); + exit(1); + } + if (st.st_size != TRANSLOG_PAGE_SIZE) + { + fprintf(stderr, + "incorrect initial size of %s: %ld instead of %ld\n", + first_translog_file, (long)st.st_size, (long)TRANSLOG_PAGE_SIZE); + exit(1); + } + int4store(long_tr_id, 0); + parts[TRANSLOG_INTERNAL_PARTS + 0].str= (char*)long_tr_id; + parts[TRANSLOG_INTERNAL_PARTS + 0].length= 6; + if (translog_write_record(&lsn, + LOGREC_FIXED_RECORD_0LSN_EXAMPLE, + &dummy_transaction_object, NULL, 6, + TRANSLOG_INTERNAL_PARTS + 1, + parts, NULL)) + { + fprintf(stderr, "Can't write record #%lu\n", (ulong) 0); + translog_destroy(); + exit(1); + } + + if ((file1.file= my_open(file1_name, + O_CREAT | O_TRUNC | O_RDWR, MYF(0))) == -1) + { + fprintf(stderr, "Got error during file1 creation from open() (errno: %d)\n", + errno); + exit(1); + } + if (chmod(file1_name, S_IRWXU | S_IRWXG | S_IRWXO) != 0) + { + fprintf(stderr, "Got error during file1 chmod() (errno: %d)\n", + errno); + exit(1); + } + + { + uchar page[PCACHE_PAGE]; + + bzero(page, PCACHE_PAGE); +#define PAGE_LSN_OFFSET 0 + lsn_store(page + PAGE_LSN_OFFSET, lsn); + pagecache_write(&pagecache, &file1, 0, 3, (char*)page, + PAGECACHE_LSN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, + PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_WRITE_DELAY, + 0); + flush_pagecache_blocks(&pagecache, &file1, FLUSH_FORCE_WRITE); + } + if ((stat= my_stat(first_translog_file, &st, MYF(0))) == 0) + { + fprintf(stderr, "can't stat %s (%d)\n", first_translog_file, errno); + exit(1); + } + if (st.st_size != TRANSLOG_PAGE_SIZE * 2) + { + fprintf(stderr, + "incorrect initial size of %s: %ld instead of %ld\n", + first_translog_file, + (long)st.st_size, (long)(TRANSLOG_PAGE_SIZE * 2)); + ok(0, "log triggered"); + exit(1); + } + ok(1, "log triggered"); + + translog_destroy(); + end_pagecache(&pagecache, 1); + ma_control_file_end(); + my_delete(CONTROL_FILE_BASE_NAME, MYF(0)); + my_delete(first_translog_file, MYF(0)); + my_delete(file1_name, MYF(0)); + + exit(0); +} diff --git a/storage/maria/unittest/test_file.c b/storage/maria/unittest/test_file.c new file mode 100644 index 00000000000..758d0bfa81b --- /dev/null +++ b/storage/maria/unittest/test_file.c @@ -0,0 +1,68 @@ +#include +#include +#include +#include "test_file.h" + + +/* + Check that file contance correspond to descriptor + + SYNOPSIS + test_file() + file File to test + file_name Path (and name) of file which is tested + size size of file + buff_size size of buffer which is enought to check the file + desc file descriptor to check with + + RETURN + 1 file if OK + 0 error +*/ + +int test_file(PAGECACHE_FILE file, char *file_name, + off_t size, size_t buff_size, struct file_desc *desc) +{ + MY_STAT stat_buff, *stat; + unsigned char *buffr= malloc(buff_size); + off_t pos= 0; + size_t byte; + int step= 0; + + if ((stat= my_stat(file_name, &stat_buff, MYF(0))) == NULL) + { + diag("Can't stat() %s (errno: %d)\n", file_name, errno); + return 0; + } + if (stat->st_size != size) + { + diag("file %s size is %lu (should be %lu)\n", + file_name, (ulong) stat->st_size, (ulong) size); + return 0; + } + /* check content */ + my_seek(file.file, 0, SEEK_SET, MYF(0)); + while (desc[step].length != 0) + { + if (my_read(file.file, (char*)buffr, desc[step].length, MYF(0)) != + desc[step].length) + { + diag("Can't read %u bytes from %s (errno: %d)\n", + (uint)desc[step].length, file_name, errno); + return 0; + } + for (byte= 0; byte < desc[step].length; byte++) + { + if (buffr[byte] != desc[step].content) + { + diag("content of %s mismatch 0x%x in position %lu instead of 0x%x\n", + file_name, (uint) buffr[byte], (ulong) (pos + byte), + desc[step].content); + return 0; + } + } + pos+= desc[step].length; + step++; + } + return 1; +} diff --git a/storage/maria/unittest/test_file.h b/storage/maria/unittest/test_file.h new file mode 100644 index 00000000000..293c692717e --- /dev/null +++ b/storage/maria/unittest/test_file.h @@ -0,0 +1,14 @@ +#include +#include "../ma_pagecache.h" + +/* + File content descriptor +*/ +struct file_desc +{ + unsigned int length; + unsigned char content; +}; + +int test_file(PAGECACHE_FILE file, char *file_name, + off_t size, size_t buff_size, struct file_desc *desc); diff --git a/storage/maria/unittest/trnman-t.c b/storage/maria/unittest/trnman-t.c new file mode 100644 index 00000000000..b0a087370f2 --- /dev/null +++ b/storage/maria/unittest/trnman-t.c @@ -0,0 +1,194 @@ +/* Copyright (C) 2006 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +#include + +#include +#include +#include +#include +#include +#include "../trnman.h" + +pthread_mutex_t rt_mutex; +pthread_attr_t attr; +size_t stacksize= 0; +#define STACK_SIZE (((int)stacksize-2048)*STACK_DIRECTION) + +int rt_num_threads; +int litmus; + +/* + create and end (commit or rollback) transactions randomly +*/ +#define MAX_ITER 100 +pthread_handler_t test_trnman(void *arg) +{ + uint x, y, i, n; + TRN *trn[MAX_ITER]; + pthread_mutex_t mutexes[MAX_ITER]; + pthread_cond_t conds[MAX_ITER]; + int m= (*(int *)arg); + + for (i= 0; i < MAX_ITER; i++) + { + pthread_mutex_init(&mutexes[i], MY_MUTEX_INIT_FAST); + pthread_cond_init(&conds[i], 0); + } + + for (x= ((int)(intptr)(&m)); m > 0; ) + { + y= x= (x*LL(3628273133) + LL(1500450271)) % LL(9576890767); /* three prime numbers */ + m-= n= x % MAX_ITER; + for (i= 0; i < n; i++) + { + trn[i]= trnman_new_trn(&mutexes[i], &conds[i], &m + STACK_SIZE); + if (!trn[i]) + { + diag("trnman_new_trn() failed"); + litmus++; + } + } + for (i= 0; i < n; i++) + { + y= (y*19 + 7) % 31; + trnman_end_trn(trn[i], y & 1); + } + } + for (i= 0; i < MAX_ITER; i++) + { + pthread_mutex_destroy(&mutexes[i]); + pthread_cond_destroy(&conds[i]); + } + pthread_mutex_lock(&rt_mutex); + rt_num_threads--; + pthread_mutex_unlock(&rt_mutex); + + return 0; +} +#undef MAX_ITER + +void run_test(const char *test, pthread_handler handler, int n, int m) +{ + pthread_t *threads; + ulonglong now= my_getsystime(); + int i; + + litmus= 0; + + threads= (pthread_t *)my_malloc(sizeof(void *)*n, MYF(0)); + if (!threads) + { + diag("Out of memory"); + abort(); + } + + diag("Testing %s with %d threads, %d iterations... ", test, n, m); + rt_num_threads= n; + for (i= 0; i < n ; i++) + if (pthread_create(threads+i, &attr, handler, &m)) + { + diag("Could not create thread"); + abort(); + } + for (i= 0 ; i < n ; i++) + pthread_join(threads[i], 0); + now= my_getsystime()-now; + ok(litmus == 0, "Tested %s in %g secs (%d)", test, ((double)now)/1e7, litmus); + my_free((void*)threads, MYF(0)); +} + +#define ok_read_from(T1, T2, RES) \ + i= trnman_can_read_from(trn[T1], trn[T2]->trid); \ + ok(i == RES, "trn" #T1 " %s read from trn" #T2, i ? "can" : "cannot") +#define start_transaction(T) \ + trn[T]= trnman_new_trn(&mutexes[T], &conds[T], &i + STACK_SIZE) +#define commit(T) trnman_commit_trn(trn[T]) +#define abort(T) trnman_abort_trn(trn[T]) + +#define Ntrns 4 +void test_trnman_read_from() +{ + TRN *trn[Ntrns]; + pthread_mutex_t mutexes[Ntrns]; + pthread_cond_t conds[Ntrns]; + int i; + + for (i= 0; i < Ntrns; i++) + { + pthread_mutex_init(&mutexes[i], MY_MUTEX_INIT_FAST); + pthread_cond_init(&conds[i], 0); + } + + start_transaction(0); /* start trn1 */ + start_transaction(1); /* start trn2 */ + ok_read_from(1, 0, 0); + commit(0); /* commit trn1 */ + start_transaction(2); /* start trn4 */ + abort(2); /* abort trn4 */ + start_transaction(3); /* start trn5 */ + ok_read_from(3, 0, 1); + ok_read_from(3, 1, 0); + ok_read_from(3, 2, 0); + commit(1); /* commit trn2 */ + ok_read_from(3, 1, 0); + commit(3); /* commit trn5 */ + + for (i= 0; i < Ntrns; i++) + { + pthread_mutex_destroy(&mutexes[i]); + pthread_cond_destroy(&conds[i]); + } +} + +int main() +{ + my_init(); + + plan(6); + + if (my_atomic_initialize()) + return exit_status(); + + pthread_mutex_init(&rt_mutex, 0); + pthread_attr_init(&attr); +#ifdef HAVE_PTHREAD_ATTR_GETSTACKSIZE + pthread_attr_getstacksize(&attr, &stacksize); + if (stacksize == 0) +#endif + stacksize= PTHREAD_STACK_MIN; + +#define CYCLES 10000 +#define THREADS 10 + + trnman_init(); + + test_trnman_read_from(); + run_test("trnman", test_trnman, THREADS, CYCLES); + + diag("mallocs: %d", trnman_allocated_transactions); + { + ulonglong now= my_getsystime(); + trnman_destroy(); + now= my_getsystime()-now; + diag("trnman_destroy: %g", ((double)now)/1e7); + } + + pthread_mutex_destroy(&rt_mutex); + my_end(0); + return exit_status(); +} + diff --git a/storage/myisam/Makefile.am b/storage/myisam/Makefile.am index f50c312b8e4..4bd0b177daa 100644 --- a/storage/myisam/Makefile.am +++ b/storage/myisam/Makefile.am @@ -97,8 +97,8 @@ libmyisam_a_SOURCES = mi_open.c mi_extra.c mi_info.c mi_rkey.c \ mi_delete_table.c mi_rename.c mi_check.c \ mi_keycache.c mi_preload.c \ ft_parser.c ft_stopwords.c ft_static.c \ - ft_update.c ft_boolean_search.c ft_nlq_search.c sort.c \ - ha_myisam.cc \ + ft_update.c ft_boolean_search.c ft_nlq_search.c \ + sort.c ha_myisam.cc ft_myisam.c \ rt_index.c rt_key.c rt_mbr.c rt_split.c sp_key.c CLEANFILES = test?.MY? FT?.MY? isam.log mi_test_all rt_test.MY? sp_test.MY? diff --git a/storage/myisam/ft_boolean_search.c b/storage/myisam/ft_boolean_search.c index f38561a76dd..bd75ec7d9e8 100644 --- a/storage/myisam/ft_boolean_search.c +++ b/storage/myisam/ft_boolean_search.c @@ -149,7 +149,7 @@ static int FTB_WORD_cmp(my_off_t *v, FTB_WORD *a, FTB_WORD *b) static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b) { /* ORDER BY word DESC, ndepth DESC */ - int i= mi_compare_text(cs, (uchar*) (*b)->word+1,(*b)->len-1, + int i= ha_compare_text(cs, (uchar*) (*b)->word+1,(*b)->len-1, (uchar*) (*a)->word+1,(*a)->len-1,0,0); if (!i) i=CMP_NUM((*b)->ndepth,(*a)->ndepth); @@ -183,7 +183,7 @@ static int ftb_query_add_word(MYSQL_FTPARSER_PARAM *param, case FT_TOKEN_WORD: ftbw= (FTB_WORD *)alloc_root(&ftb_param->ftb->mem_root, sizeof(FTB_WORD) + - (info->trunc ? MI_MAX_KEY_BUFF : + (info->trunc ? HA_MAX_KEY_BUFF : word_len * ftb_param->ftb->charset->mbmaxlen + HA_FT_WLEN + ftb_param->ftb->info->s->rec_reflength)); @@ -332,7 +332,6 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search) uint off, extra=HA_FT_WLEN+info->s->base.rec_reflength; uchar *lastkey_buf=ftbw->word+ftbw->off; - LINT_INIT(off); if (ftbw->flags & FTB_FLAG_TRUNC) lastkey_buf+=ftbw->len; @@ -376,7 +375,7 @@ static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search) if (!r && !ftbw->off) { - r= mi_compare_text(ftb->charset, + r= ha_compare_text(ftb->charset, info->lastkey+1, info->lastkey_length-extra-1, (uchar*) ftbw->word+1, @@ -847,7 +846,7 @@ static int ftb_find_relevance_add_word(MYSQL_FTPARSER_PARAM *param, for (a= 0, b= ftb->queue.elements, c= (a+b)/2; b-a>1; c= (a+b)/2) { ftbw= ftb->list[c]; - if (mi_compare_text(ftb->charset, (uchar*)word, len, + if (ha_compare_text(ftb->charset, (uchar*)word, len, (uchar*)ftbw->word+1, ftbw->len-1, (my_bool)(ftbw->flags&FTB_FLAG_TRUNC), 0) > 0) b= c; @@ -857,7 +856,7 @@ static int ftb_find_relevance_add_word(MYSQL_FTPARSER_PARAM *param, for (; c >= 0; c--) { ftbw= ftb->list[c]; - if (mi_compare_text(ftb->charset, (uchar*)word, len, + if (ha_compare_text(ftb->charset, (uchar*)word, len, (uchar*)ftbw->word + 1,ftbw->len - 1, (my_bool)(ftbw->flags & FTB_FLAG_TRUNC), 0)) break; diff --git a/storage/myisam/ft_eval.c b/storage/myisam/ft_eval.c index 7eb78861e5e..de01510fdd7 100644 --- a/storage/myisam/ft_eval.c +++ b/storage/myisam/ft_eval.c @@ -48,7 +48,7 @@ int main(int argc, char *argv[]) recinfo[0].type=FIELD_SKIP_ENDSPACE; recinfo[0].length=docid_length; recinfo[1].type=FIELD_BLOB; - recinfo[1].length= 4+mi_portable_sizeof_char_ptr; + recinfo[1].length= 4+portable_sizeof_char_ptr; /* Define a key over the first column */ keyinfo[0].seg=keyseg; diff --git a/storage/myisam/ft_myisam.c b/storage/myisam/ft_myisam.c new file mode 100644 index 00000000000..76c04ba4c0b --- /dev/null +++ b/storage/myisam/ft_myisam.c @@ -0,0 +1,36 @@ +/* Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* Written by Sergei A. Golubchik, who has a shared copyright to this code */ + +/* + This function is for interface functions between fulltext and myisam +*/ + +#include "ftdefs.h" + +FT_INFO *ft_init_search(uint flags, void *info, uint keynr, + byte *query, uint query_len, CHARSET_INFO *cs, + byte *record) +{ + FT_INFO *res; + if (flags & FT_BOOL) + res= ft_init_boolean_search((MI_INFO *)info, keynr, query, query_len,cs); + else + res= ft_init_nlq_search((MI_INFO *)info, keynr, query, query_len, flags, + record); + return res; +} diff --git a/storage/myisam/ft_nlq_search.c b/storage/myisam/ft_nlq_search.c index 4774b819aa2..a540c52e683 100644 --- a/storage/myisam/ft_nlq_search.c +++ b/storage/myisam/ft_nlq_search.c @@ -103,7 +103,7 @@ static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio) { if (keylen && - mi_compare_text(aio->charset,info->lastkey+1, + ha_compare_text(aio->charset,info->lastkey+1, info->lastkey_length-extra-1, keybuff+1,keylen-1,0,0)) break; diff --git a/storage/myisam/ft_parser.c b/storage/myisam/ft_parser.c index ba858c37aee..d8f90c38366 100644 --- a/storage/myisam/ft_parser.c +++ b/storage/myisam/ft_parser.c @@ -31,7 +31,7 @@ typedef struct st_my_ft_parser_param static int FT_WORD_cmp(CHARSET_INFO* cs, FT_WORD *w1, FT_WORD *w2) { - return mi_compare_text(cs, (uchar*) w1->pos, w1->len, + return ha_compare_text(cs, (uchar*) w1->pos, w1->len, (uchar*) w2->pos, w2->len, 0, 0); } diff --git a/storage/myisam/ft_static.c b/storage/myisam/ft_static.c index 610c20eede6..d48bedc9e3b 100644 --- a/storage/myisam/ft_static.c +++ b/storage/myisam/ft_static.c @@ -54,20 +54,6 @@ const struct _ft_vft _ft_vft_boolean = { ft_boolean_get_relevance, ft_boolean_reinit_search }; - -FT_INFO *ft_init_search(uint flags, void *info, uint keynr, - uchar *query, uint query_len, CHARSET_INFO *cs, - uchar *record) -{ - FT_INFO *res; - if (flags & FT_BOOL) - res= ft_init_boolean_search((MI_INFO *)info, keynr, query, query_len,cs); - else - res= ft_init_nlq_search((MI_INFO *)info, keynr, query, query_len, flags, - record); - return res; -} - const char *ft_stopword_file = 0; const char *ft_precompiled_stopwords[] = { diff --git a/storage/myisam/ft_stopwords.c b/storage/myisam/ft_stopwords.c index e6a90bc927a..b0f1ef8ff8b 100644 --- a/storage/myisam/ft_stopwords.c +++ b/storage/myisam/ft_stopwords.c @@ -29,7 +29,7 @@ static TREE *stopwords3=NULL; static int FT_STOPWORD_cmp(void* cmp_arg __attribute__((unused)), FT_STOPWORD *w1, FT_STOPWORD *w2) { - return mi_compare_text(default_charset_info, + return ha_compare_text(default_charset_info, (uchar *)w1->pos,w1->len, (uchar *)w2->pos,w2->len,0,0); } diff --git a/storage/myisam/ft_test1.c b/storage/myisam/ft_test1.c index e49c47bb268..b37935a0d7a 100644 --- a/storage/myisam/ft_test1.c +++ b/storage/myisam/ft_test1.c @@ -75,12 +75,12 @@ static int run_test(const char *filename) /* First define 2 columns */ recinfo[0].type=extra_field; - recinfo[0].length= (extra_field == FIELD_BLOB ? 4 + mi_portable_sizeof_char_ptr : + recinfo[0].length= (extra_field == FIELD_BLOB ? 4 + portable_sizeof_char_ptr : extra_length); if (extra_field == FIELD_VARCHAR) recinfo[0].length+= HA_VARCHAR_PACKLENGTH(extra_length); recinfo[1].type=key_field; - recinfo[1].length= (key_field == FIELD_BLOB ? 4+mi_portable_sizeof_char_ptr : + recinfo[1].length= (key_field == FIELD_BLOB ? 4+portable_sizeof_char_ptr : key_length); if (key_field == FIELD_VARCHAR) recinfo[1].length+= HA_VARCHAR_PACKLENGTH(key_length); diff --git a/storage/myisam/ft_update.c b/storage/myisam/ft_update.c index e3e4c62158f..d1548e32870 100644 --- a/storage/myisam/ft_update.c +++ b/storage/myisam/ft_update.c @@ -180,7 +180,7 @@ int _mi_ft_cmp(MI_INFO *info, uint keynr, const uchar *rec1, const uchar *rec2) { if ((ftsi1.pos != ftsi2.pos) && (!ftsi1.pos || !ftsi2.pos || - mi_compare_text(cs, (uchar*) ftsi1.pos,ftsi1.len, + ha_compare_text(cs, (uchar*) ftsi1.pos,ftsi1.len, (uchar*) ftsi2.pos,ftsi2.len,0,0))) DBUG_RETURN(THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT); } @@ -209,7 +209,7 @@ int _mi_ft_update(MI_INFO *info, uint keynr, uchar *keybuf, error=0; while(old_word->pos && new_word->pos) { - cmp= mi_compare_text(cs, (uchar*) old_word->pos,old_word->len, + cmp= ha_compare_text(cs, (uchar*) old_word->pos,old_word->len, (uchar*) new_word->pos,new_word->len,0,0); cmp2= cmp ? 0 : (fabs(old_word->weight - new_word->weight) > 1.e-5); diff --git a/storage/myisam/fulltext.h b/storage/myisam/fulltext.h index 856e93e034d..9aef2d0d002 100644 --- a/storage/myisam/fulltext.h +++ b/storage/myisam/fulltext.h @@ -20,18 +20,8 @@ #include "myisamdef.h" #include "ft_global.h" -#define HA_FT_WTYPE HA_KEYTYPE_FLOAT -#define HA_FT_WLEN 4 -#define FT_SEGS 2 - -#define ft_sintXkorr(A) mi_sint4korr(A) -#define ft_intXstore(T,A) mi_int4store(T,A) - -extern const HA_KEYSEG ft_keysegs[FT_SEGS]; - int _mi_ft_cmp(MI_INFO *, uint, const uchar *, const uchar *); int _mi_ft_add(MI_INFO *, uint, uchar *, const uchar *, my_off_t); int _mi_ft_del(MI_INFO *, uint, uchar *, const uchar *, my_off_t); uint _mi_ft_convert_to_ft2(MI_INFO *, uint, uchar *); - diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc index 98f74247707..3472bbcabd5 100644 --- a/storage/myisam/ha_myisam.cc +++ b/storage/myisam/ha_myisam.cc @@ -22,6 +22,7 @@ #include "mysql_priv.h" #include #include +#include #include #include "ha_myisam.h" #include @@ -56,7 +57,7 @@ static handler *myisam_create_handler(handlerton *hton, // collect errors printed by mi_check routines -static void mi_check_print_msg(MI_CHECK *param, const char* msg_type, +static void mi_check_print_msg(HA_CHECK *param, const char* msg_type, const char *fmt, va_list args) { THD* thd = (THD*)param->thd; @@ -300,7 +301,7 @@ int table2myisam(TABLE *table_arg, MI_KEYDEF **keydef_out, Check for underlying table conformance SYNOPSIS - check_definition() + myisam_check_definition() t1_keyinfo in First table key definition t1_recinfo in First table record definition t1_keys in Number of keys in first table @@ -442,13 +443,13 @@ int check_definition(MI_KEYDEF *t1_keyinfo, MI_COLUMNDEF *t1_recinfo, extern "C" { -volatile int *killed_ptr(MI_CHECK *param) +volatile int *killed_ptr(HA_CHECK *param) { /* In theory Unsafe conversion, but should be ok for now */ return (int*) &(((THD *)(param->thd))->killed); } -void mi_check_print_error(MI_CHECK *param, const char *fmt,...) +void mi_check_print_error(HA_CHECK *param, const char *fmt,...) { param->error_printed|=1; param->out_flag|= O_DATA_LOST; @@ -458,7 +459,7 @@ void mi_check_print_error(MI_CHECK *param, const char *fmt,...) va_end(args); } -void mi_check_print_info(MI_CHECK *param, const char *fmt,...) +void mi_check_print_info(HA_CHECK *param, const char *fmt,...) { va_list args; va_start(args, fmt); @@ -466,7 +467,7 @@ void mi_check_print_info(MI_CHECK *param, const char *fmt,...) va_end(args); } -void mi_check_print_warning(MI_CHECK *param, const char *fmt,...) +void mi_check_print_warning(HA_CHECK *param, const char *fmt,...) { param->warning_printed=1; param->out_flag|= O_DATA_LOST; @@ -755,7 +756,7 @@ int ha_myisam::check(THD* thd, HA_CHECK_OPT* check_opt) { if (!file) return HA_ADMIN_INTERNAL_ERROR; int error; - MI_CHECK param; + HA_CHECK param; MYISAM_SHARE* share = file->s; const char *old_proc_info=thd->proc_info; @@ -766,7 +767,7 @@ int ha_myisam::check(THD* thd, HA_CHECK_OPT* check_opt) param.db_name= table->s->db.str; param.table_name= table->alias; param.testflag = check_opt->flags | T_CHECK | T_SILENT; - param.stats_method= (enum_mi_stats_method)thd->variables.myisam_stats_method; + param.stats_method= (enum_handler_stats_method)thd->variables.myisam_stats_method; if (!(table->db_stat & HA_READ_ONLY)) param.testflag|= T_STATISTICS; @@ -847,7 +848,7 @@ int ha_myisam::check(THD* thd, HA_CHECK_OPT* check_opt) int ha_myisam::analyze(THD *thd, HA_CHECK_OPT* check_opt) { int error=0; - MI_CHECK param; + HA_CHECK param; MYISAM_SHARE* share = file->s; myisamchk_init(¶m); @@ -858,7 +859,7 @@ int ha_myisam::analyze(THD *thd, HA_CHECK_OPT* check_opt) param.testflag= (T_FAST | T_CHECK | T_SILENT | T_STATISTICS | T_DONT_CHECK_CHECKSUM); param.using_global_keycache = 1; - param.stats_method= (enum_mi_stats_method)thd->variables.myisam_stats_method; + param.stats_method= (enum_handler_stats_method)thd->variables.myisam_stats_method; if (!(share->state.changed & STATE_NOT_ANALYZED)) return HA_ADMIN_ALREADY_DONE; @@ -907,7 +908,7 @@ int ha_myisam::restore(THD* thd, HA_CHECK_OPT *check_opt) err: { - MI_CHECK param; + HA_CHECK param; myisamchk_init(¶m); param.thd= thd; param.op_name= "restore"; @@ -970,7 +971,7 @@ int ha_myisam::backup(THD* thd, HA_CHECK_OPT *check_opt) err: { - MI_CHECK param; + HA_CHECK param; myisamchk_init(¶m); param.thd= thd; param.op_name= "backup"; @@ -986,7 +987,7 @@ int ha_myisam::backup(THD* thd, HA_CHECK_OPT *check_opt) int ha_myisam::repair(THD* thd, HA_CHECK_OPT *check_opt) { int error; - MI_CHECK param; + HA_CHECK param; ha_rows start_records; if (!file) return HA_ADMIN_INTERNAL_ERROR; @@ -1036,7 +1037,7 @@ int ha_myisam::optimize(THD* thd, HA_CHECK_OPT *check_opt) { int error; if (!file) return HA_ADMIN_INTERNAL_ERROR; - MI_CHECK param; + HA_CHECK param; myisamchk_init(¶m); param.thd = thd; @@ -1055,7 +1056,7 @@ int ha_myisam::optimize(THD* thd, HA_CHECK_OPT *check_opt) } -int ha_myisam::repair(THD *thd, MI_CHECK ¶m, bool do_optimize) +int ha_myisam::repair(THD *thd, HA_CHECK ¶m, bool do_optimize) { int error=0; uint local_testflag=param.testflag; @@ -1307,7 +1308,7 @@ int ha_myisam::preload_keys(THD* thd, HA_CHECK_OPT *check_opt) err: { - MI_CHECK param; + HA_CHECK param; myisamchk_init(¶m); param.thd= thd; param.op_name= "preload_keys"; diff --git a/storage/myisam/ha_myisam.h b/storage/myisam/ha_myisam.h index 6cc9f4811b0..cb099813a37 100644 --- a/storage/myisam/ha_myisam.h +++ b/storage/myisam/ha_myisam.h @@ -21,6 +21,7 @@ /* class for the the myisam handler */ #include +#include #include #define HA_RECOVER_NONE 0 /* No automatic recover */ @@ -39,7 +40,7 @@ class ha_myisam: public handler ulonglong int_table_flags; char *data_file_name, *index_file_name; bool can_enable_indexes; - int repair(THD *thd, MI_CHECK ¶m, bool optimize); + int repair(THD *thd, HA_CHECK ¶m, bool optimize); public: ha_myisam(handlerton *hton, TABLE_SHARE *table_arg); @@ -56,8 +57,8 @@ class ha_myisam: public handler HA_READ_ORDER | HA_KEYREAD_ONLY); } uint max_supported_keys() const { return MI_MAX_KEY; } - uint max_supported_key_length() const { return MI_MAX_KEY_LENGTH; } - uint max_supported_key_part_length() const { return MI_MAX_KEY_LENGTH; } + uint max_supported_key_length() const { return HA_MAX_KEY_LENGTH; } + uint max_supported_key_part_length() const { return HA_MAX_KEY_LENGTH; } uint checksum() const; virtual bool check_if_locking_is_allowed(uint sql_command, diff --git a/storage/myisam/mi_check.c b/storage/myisam/mi_check.c index 255f17eada7..5442c9a9093 100644 --- a/storage/myisam/mi_check.c +++ b/storage/myisam/mi_check.c @@ -59,14 +59,14 @@ /* Functions defined in this file */ -static int check_k_link(MI_CHECK *param, MI_INFO *info,uint nr); -static int chk_index(MI_CHECK *param, MI_INFO *info,MI_KEYDEF *keyinfo, +static int check_k_link(HA_CHECK *param, MI_INFO *info,uint nr); +static int chk_index(HA_CHECK *param, MI_INFO *info,MI_KEYDEF *keyinfo, my_off_t page, uchar *buff, ha_rows *keys, ha_checksum *key_checksum, uint level); static uint isam_key_length(MI_INFO *info,MI_KEYDEF *keyinfo); static ha_checksum calc_checksum(ha_rows count); static int writekeys(MI_SORT_PARAM *sort_param); -static int sort_one_index(MI_CHECK *param, MI_INFO *info,MI_KEYDEF *keyinfo, +static int sort_one_index(HA_CHECK *param, MI_INFO *info,MI_KEYDEF *keyinfo, my_off_t pagepos, File new_file); static int sort_key_read(MI_SORT_PARAM *sort_param,void *key); static int sort_ft_key_read(MI_SORT_PARAM *sort_param,void *key); @@ -80,13 +80,13 @@ static int sort_insert_key(MI_SORT_PARAM *sort_param, reg1 SORT_KEY_BLOCKS *key_block, uchar *key, my_off_t prev_block); static int sort_delete_record(MI_SORT_PARAM *sort_param); -/*static int flush_pending_blocks(MI_CHECK *param);*/ -static SORT_KEY_BLOCKS *alloc_key_blocks(MI_CHECK *param, uint blocks, +/*static int flush_pending_blocks(HA_CHECK *param);*/ +static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks, uint buffer_length); static ha_checksum mi_byte_checksum(const uchar *buf, uint length); -static void set_data_file_type(SORT_INFO *sort_info, MYISAM_SHARE *share); +static void set_data_file_type(MI_SORT_INFO *sort_info, MYISAM_SHARE *share); -void myisamchk_init(MI_CHECK *param) +void myisamchk_init(HA_CHECK *param) { bzero((uchar*) param,sizeof(*param)); param->opt_follow_links=1; @@ -108,7 +108,7 @@ void myisamchk_init(MI_CHECK *param) /* Check the status flags for the table */ -int chk_status(MI_CHECK *param, register MI_INFO *info) +int chk_status(HA_CHECK *param, register MI_INFO *info) { MYISAM_SHARE *share=info->s; @@ -136,7 +136,7 @@ int chk_status(MI_CHECK *param, register MI_INFO *info) /* Check delete links */ -int chk_del(MI_CHECK *param, register MI_INFO *info, uint test_flag) +int chk_del(HA_CHECK *param, register MI_INFO *info, uint test_flag) { reg2 ha_rows i; uint delete_link_length; @@ -245,7 +245,7 @@ wrong: /* Check delete links in index file */ -static int check_k_link(MI_CHECK *param, register MI_INFO *info, uint nr) +static int check_k_link(HA_CHECK *param, register MI_INFO *info, uint nr) { my_off_t next_link; uint block_size=(nr+1)*MI_MIN_KEY_BLOCK_LENGTH; @@ -322,7 +322,7 @@ static int check_k_link(MI_CHECK *param, register MI_INFO *info, uint nr) /* Check sizes of files */ -int chk_size(MI_CHECK *param, register MI_INFO *info) +int chk_size(HA_CHECK *param, register MI_INFO *info) { int error=0; register my_off_t skr,size; @@ -398,7 +398,7 @@ int chk_size(MI_CHECK *param, register MI_INFO *info) /* Check keys */ -int chk_key(MI_CHECK *param, register MI_INFO *info) +int chk_key(HA_CHECK *param, register MI_INFO *info) { uint key,found_keys=0,full_text_keys=0,result=0; ha_rows keys; @@ -583,7 +583,7 @@ do_stat: } /* chk_key */ -static int chk_index_down(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, +static int chk_index_down(HA_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t page, uchar *buff, ha_rows *keys, ha_checksum *key_checksum, uint level) { @@ -730,13 +730,13 @@ int mi_collect_stats_nonulls_next(HA_KEYSEG *keyseg, ulonglong *notnull, /* Check if index is ok */ -static int chk_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, +static int chk_index(HA_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t page, uchar *buff, ha_rows *keys, ha_checksum *key_checksum, uint level) { int flag; uint used_length,comp_flag,nod_flag,key_length=0; - uchar key[MI_MAX_POSSIBLE_KEY_BUFF],*temp_buff,*keypos,*old_keypos,*endpos; + uchar key[HA_MAX_POSSIBLE_KEY_BUFF],*temp_buff,*keypos,*old_keypos,*endpos; my_off_t next_page,record; char llbuff[22]; uint diff_pos[2]; @@ -933,7 +933,7 @@ static uint isam_key_length(MI_INFO *info, register MI_KEYDEF *keyinfo) /* Check that record-link is ok */ -int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend) +int chk_data_link(HA_CHECK *param, MI_INFO *info,int extend) { int error,got_error,flag; uint key,left_length,b_type,field; @@ -943,7 +943,7 @@ int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend) uchar *record,*to; char llbuff[22],llbuff2[22],llbuff3[22]; ha_checksum intern_record_checksum; - ha_checksum key_checksum[MI_MAX_POSSIBLE_KEY]; + ha_checksum key_checksum[HA_MAX_POSSIBLE_KEY]; my_bool static_row_size; MI_KEYDEF *keyinfo; MI_BLOCK_INFO block_info; @@ -991,6 +991,9 @@ int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend) if (*killed_ptr(param)) goto err2; switch (info->s->data_file_type) { + case BLOCK_RECORD: + DBUG_ASSERT(0); /* Impossible */ + break; case STATIC_RECORD: if (my_b_read(¶m->read_cache,(uchar*) record, info->s->base.pack_reclength)) @@ -1378,7 +1381,7 @@ int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend) /* Recover old table by reading each record and writing all keys */ /* Save new datafile-name in temp_filename */ -int mi_repair(MI_CHECK *param, register MI_INFO *info, +int mi_repair(HA_CHECK *param, register MI_INFO *info, char * name, int rep_quick) { int error,got_error; @@ -1388,7 +1391,7 @@ int mi_repair(MI_CHECK *param, register MI_INFO *info, File new_file; MYISAM_SHARE *share=info->s; char llbuff[22],llbuff2[22]; - SORT_INFO sort_info; + MI_SORT_INFO sort_info; MI_SORT_PARAM sort_param; DBUG_ENTER("mi_repair"); @@ -1771,7 +1774,7 @@ int movepoint(register MI_INFO *info, uchar *record, my_off_t oldpos, /* Tell system that we want all memory for our cache */ -void lock_memory(MI_CHECK *param __attribute__((unused))) +void lock_memory(HA_CHECK *param __attribute__((unused))) { #ifdef SUN_OS /* Key-cacheing thrases on sun 4.1 */ if (param->opt_lock_memory) @@ -1787,7 +1790,7 @@ void lock_memory(MI_CHECK *param __attribute__((unused))) /* Flush all changed blocks to disk */ -int flush_blocks(MI_CHECK *param, KEY_CACHE *key_cache, File file) +int flush_blocks(HA_CHECK *param, KEY_CACHE *key_cache, File file) { if (flush_key_blocks(key_cache, file, FLUSH_RELEASE)) { @@ -1802,12 +1805,12 @@ int flush_blocks(MI_CHECK *param, KEY_CACHE *key_cache, File file) /* Sort index for more efficent reads */ -int mi_sort_index(MI_CHECK *param, register MI_INFO *info, char * name) +int mi_sort_index(HA_CHECK *param, register MI_INFO *info, char * name) { reg2 uint key; reg1 MI_KEYDEF *keyinfo; File new_file; - my_off_t index_pos[MI_MAX_POSSIBLE_KEY]; + my_off_t index_pos[HA_MAX_POSSIBLE_KEY]; uint r_locks,w_locks; int old_lock; MYISAM_SHARE *share=info->s; @@ -1902,12 +1905,12 @@ err2: /* Sort records recursive using one index */ -static int sort_one_index(MI_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, +static int sort_one_index(HA_CHECK *param, MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t pagepos, File new_file) { uint length,nod_flag,used_length, key_length; uchar *buff,*keypos,*endpos; - uchar key[MI_MAX_POSSIBLE_KEY_BUFF]; + uchar key[HA_MAX_POSSIBLE_KEY_BUFF]; my_off_t new_page_pos,next_page; char llbuff[22]; DBUG_ENTER("sort_one_index"); @@ -2022,7 +2025,7 @@ int change_to_newfile(const char * filename, const char * old_ext, /* Locks a whole file */ /* Gives an error-message if file can't be locked */ -int lock_file(MI_CHECK *param, File file, my_off_t start, int lock_type, +int lock_file(HA_CHECK *param, File file, my_off_t start, int lock_type, const char *filetype, const char *filename) { if (my_lock(file,lock_type,start,F_TO_EOF, @@ -2039,7 +2042,7 @@ int lock_file(MI_CHECK *param, File file, my_off_t start, int lock_type, /* Copy a block between two files */ -int filecopy(MI_CHECK *param, File to,File from,my_off_t start, +int filecopy(HA_CHECK *param, File to,File from,my_off_t start, my_off_t length, const char *type) { char tmp_buff[IO_SIZE],*buff; @@ -2090,7 +2093,7 @@ err: <>0 Error */ -int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, +int mi_repair_by_sort(HA_CHECK *param, register MI_INFO *info, const char * name, int rep_quick) { int got_error; @@ -2104,7 +2107,7 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, HA_KEYSEG *keyseg; ulong *rec_per_key_part; char llbuff[22]; - SORT_INFO sort_info; + MI_SORT_INFO sort_info; ulonglong key_map=share->state.key_map; DBUG_ENTER("mi_repair_by_sort"); @@ -2510,7 +2513,7 @@ err: <>0 Error */ -int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, +int mi_repair_parallel(HA_CHECK *param, register MI_INFO *info, const char * name, int rep_quick) { #ifndef THREAD @@ -2529,7 +2532,7 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, char llbuff[22]; IO_CACHE new_data_cache; /* For non-quick repair. */ IO_CACHE_SHARE io_share; - SORT_INFO sort_info; + MI_SORT_INFO sort_info; ulonglong key_map=share->state.key_map; pthread_attr_t thr_attr; DBUG_ENTER("mi_repair_parallel"); @@ -3008,7 +3011,7 @@ err: static int sort_key_read(MI_SORT_PARAM *sort_param, void *key) { int error; - SORT_INFO *sort_info=sort_param->sort_info; + MI_SORT_INFO *sort_info=sort_param->sort_info; MI_INFO *info=sort_info->info; DBUG_ENTER("sort_key_read"); @@ -3035,7 +3038,7 @@ static int sort_key_read(MI_SORT_PARAM *sort_param, void *key) static int sort_ft_key_read(MI_SORT_PARAM *sort_param, void *key) { int error; - SORT_INFO *sort_info=sort_param->sort_info; + MI_SORT_INFO *sort_info=sort_param->sort_info; MI_INFO *info=sort_info->info; FT_WORD *wptr=0; DBUG_ENTER("sort_ft_key_read"); @@ -3122,8 +3125,8 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) my_off_t pos; uchar *to; MI_BLOCK_INFO block_info; - SORT_INFO *sort_info=sort_param->sort_info; - MI_CHECK *param=sort_info->param; + MI_SORT_INFO *sort_info=sort_param->sort_info; + HA_CHECK *param=sort_info->param; MI_INFO *info=sort_info->info; MYISAM_SHARE *share=info->s; char llbuff[22],llbuff2[22]; @@ -3133,6 +3136,9 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) DBUG_RETURN(1); switch (share->data_file_type) { + case BLOCK_RECORD: + DBUG_ASSERT(0); /* Impossible */ + break; case STATIC_RECORD: for (;;) { @@ -3548,8 +3554,8 @@ int sort_write_record(MI_SORT_PARAM *sort_param) ulong block_length,reclength; uchar *from; uchar block_buff[8]; - SORT_INFO *sort_info=sort_param->sort_info; - MI_CHECK *param=sort_info->param; + MI_SORT_INFO *sort_info=sort_param->sort_info; + HA_CHECK *param=sort_info->param; MI_INFO *info=sort_info->info; MYISAM_SHARE *share=info->s; DBUG_ENTER("sort_write_record"); @@ -3557,6 +3563,9 @@ int sort_write_record(MI_SORT_PARAM *sort_param) if (sort_param->fix_datafile) { switch (sort_info->new_data_file_type) { + case BLOCK_RECORD: + DBUG_ASSERT(0); /* Impossible */ + break; case STATIC_RECORD: if (my_b_write(&info->rec_cache,sort_param->record, share->base.pack_reclength)) @@ -3575,7 +3584,7 @@ int sort_write_record(MI_SORT_PARAM *sort_param) { /* must be sure that local buffer is big enough */ reclength=info->s->base.pack_reclength+ - _my_calc_total_blob_length(info,sort_param->record)+ + _mi_calc_total_blob_length(info,sort_param->record)+ ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER)+MI_SPLIT_LENGTH+ MI_DYN_DELETE_BLOCK_HEADER; if (sort_info->buff_length < reclength) @@ -3664,24 +3673,25 @@ static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a) { uint diff_pos[2]; char llbuff[22],llbuff2[22]; - SORT_INFO *sort_info=sort_param->sort_info; - MI_CHECK *param= sort_info->param; + MI_SORT_INFO *sort_info=sort_param->sort_info; + HA_CHECK *param= sort_info->param; int cmp; if (sort_info->key_block->inited) { - cmp=ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey, + cmp=ha_key_cmp(sort_param->seg, (uchar*) sort_info->key_block->lastkey, (uchar*) a, USE_WHOLE_KEY,SEARCH_FIND | SEARCH_UPDATE, diff_pos); if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL) - ha_key_cmp(sort_param->seg,sort_info->key_block->lastkey, + ha_key_cmp(sort_param->seg, (uchar*) sort_info->key_block->lastkey, (uchar*) a, USE_WHOLE_KEY, SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diff_pos); else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS) { diff_pos[0]= mi_collect_stats_nonulls_next(sort_param->seg, sort_param->notnull, - sort_info->key_block->lastkey, + (uchar*) sort_info-> + key_block->lastkey, (uchar*)a); } sort_param->unique[diff_pos[0]-1]++; @@ -3704,8 +3714,8 @@ static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a) llstr(sort_info->info->lastpos,llbuff), llstr(get_record_for_key(sort_info->info, sort_param->keyinfo, - sort_info->key_block-> - lastkey), + (uchar*) sort_info-> + key_block->lastkey), llbuff2)); param->testflag|=T_RETRY_WITHOUT_QUICK; if (sort_info->param->testflag & T_VERBOSE) @@ -3726,7 +3736,7 @@ static int sort_key_write(MI_SORT_PARAM *sort_param, const void *a) int sort_ft_buf_flush(MI_SORT_PARAM *sort_param) { - SORT_INFO *sort_info=sort_param->sort_info; + MI_SORT_INFO *sort_info=sort_param->sort_info; SORT_KEY_BLOCKS *key_block=sort_info->key_block; MYISAM_SHARE *share=sort_info->info->s; uint val_off, val_len; @@ -3736,19 +3746,19 @@ int sort_ft_buf_flush(MI_SORT_PARAM *sort_param) val_len=share->ft2_keyinfo.keylength; get_key_full_length_rdonly(val_off, ft_buf->lastkey); - to=ft_buf->lastkey+val_off; + to= (uchar*) ft_buf->lastkey+val_off; if (ft_buf->buf) { /* flushing first-level tree */ - error=sort_insert_key(sort_param,key_block,ft_buf->lastkey, + error=sort_insert_key(sort_param,key_block, (uchar*) ft_buf->lastkey, HA_OFFSET_ERROR); for (from=to+val_len; - !error && from < ft_buf->buf; + !error && from < (uchar*) ft_buf->buf; from+= val_len) { memcpy(to, from, val_len); - error=sort_insert_key(sort_param,key_block,ft_buf->lastkey, + error=sort_insert_key(sort_param,key_block, (uchar*) ft_buf->lastkey, HA_OFFSET_ERROR); } return error; @@ -3757,8 +3767,8 @@ int sort_ft_buf_flush(MI_SORT_PARAM *sort_param) error=flush_pending_blocks(sort_param); /* updating lastkey with second-level tree info */ ft_intXstore(ft_buf->lastkey+val_off, -ft_buf->count); - _mi_dpointer(sort_info->info, ft_buf->lastkey+val_off+HA_FT_WLEN, - share->state.key_root[sort_param->key]); + _mi_dpointer(sort_info->info, (uchar*) ft_buf->lastkey+val_off+HA_FT_WLEN, + share->state.key_root[sort_param->key]); /* restoring first level tree data in sort_info/sort_param */ sort_info->key_block=sort_info->key_block_end- sort_info->param->sort_key_blocks; sort_param->keyinfo=share->keyinfo+sort_param->key; @@ -3766,14 +3776,14 @@ int sort_ft_buf_flush(MI_SORT_PARAM *sort_param) /* writing lastkey in first-level tree */ return error ? error : sort_insert_key(sort_param,sort_info->key_block, - ft_buf->lastkey,HA_OFFSET_ERROR); + (uchar*) ft_buf->lastkey,HA_OFFSET_ERROR); } static int sort_ft_key_write(MI_SORT_PARAM *sort_param, const void *a) { uint a_len, val_off, val_len, error; uchar *p; - SORT_INFO *sort_info=sort_param->sort_info; + MI_SORT_INFO *sort_info=sort_param->sort_info; SORT_FT_BUF *ft_buf=sort_info->ft_buf; SORT_KEY_BLOCKS *key_block=sort_info->key_block; @@ -3803,9 +3813,9 @@ static int sort_ft_key_write(MI_SORT_PARAM *sort_param, const void *a) } get_key_full_length_rdonly(val_off, ft_buf->lastkey); - if (mi_compare_text(sort_param->seg->charset, + if (ha_compare_text(sort_param->seg->charset, ((uchar *)a)+1,a_len-1, - ft_buf->lastkey+1,val_off-1, 0, 0)==0) + (uchar*) ft_buf->lastkey+1,val_off-1, 0, 0)==0) { if (!ft_buf->buf) /* store in second-level tree */ { @@ -3821,16 +3831,16 @@ static int sort_ft_key_write(MI_SORT_PARAM *sort_param, const void *a) return 0; /* converting to two-level tree */ - p=ft_buf->lastkey+val_off; + p= (uchar*) ft_buf->lastkey+val_off; while (key_block->inited) key_block++; sort_info->key_block=key_block; sort_param->keyinfo=& sort_info->info->s->ft2_keyinfo; - ft_buf->count=(ft_buf->buf - p)/val_len; + ft_buf->count=((uchar*) ft_buf->buf - p)/val_len; /* flushing buffer to second-level tree */ - for (error=0; !error && p < ft_buf->buf; p+= val_len) + for (error=0; !error && p < (uchar*) ft_buf->buf; p+= val_len) error=sort_insert_key(sort_param,key_block,p,HA_OFFSET_ERROR); ft_buf->buf=0; return error; @@ -3878,13 +3888,13 @@ static int sort_insert_key(MI_SORT_PARAM *sort_param, MI_KEY_PARAM s_temp; MI_INFO *info; MI_KEYDEF *keyinfo=sort_param->keyinfo; - SORT_INFO *sort_info= sort_param->sort_info; - MI_CHECK *param=sort_info->param; + MI_SORT_INFO *sort_info= sort_param->sort_info; + HA_CHECK *param=sort_info->param; DBUG_ENTER("sort_insert_key"); - anc_buff=key_block->buff; + anc_buff= (uchar*) key_block->buff; info=sort_info->info; - lastkey=key_block->lastkey; + lastkey= (uchar*) key_block->lastkey; nod_flag= (key_block == sort_info->key_block ? 0 : info->s->base.key_reflength); @@ -3897,7 +3907,7 @@ static int sort_insert_key(MI_SORT_PARAM *sort_param, DBUG_RETURN(1); } a_length=2+nod_flag; - key_block->end_pos=anc_buff+2; + key_block->end_pos= (char*) anc_buff+2; lastkey=0; /* No previous key in block */ } else @@ -3905,18 +3915,18 @@ static int sort_insert_key(MI_SORT_PARAM *sort_param, /* Save pointer to previous block */ if (nod_flag) - _mi_kpointer(info,key_block->end_pos,prev_block); + _mi_kpointer(info,(uchar*) key_block->end_pos,prev_block); t_length=(*keyinfo->pack_key)(keyinfo,nod_flag, (uchar*) 0,lastkey,lastkey,key, &s_temp); - (*keyinfo->store_key)(keyinfo, key_block->end_pos+nod_flag,&s_temp); + (*keyinfo->store_key)(keyinfo, (uchar*) key_block->end_pos+nod_flag,&s_temp); a_length+=t_length; mi_putint(anc_buff,a_length,nod_flag); key_block->end_pos+=t_length; if (a_length <= keyinfo->block_length) { - VOID(_mi_move_key(keyinfo,key_block->lastkey,key)); + VOID(_mi_move_key(keyinfo,(uchar*) key_block->lastkey,key)); key_block->last_length=a_length-t_length; DBUG_RETURN(0); } @@ -3941,7 +3951,8 @@ static int sort_insert_key(MI_SORT_PARAM *sort_param, DBUG_DUMP("buff",(uchar*) anc_buff,mi_getint(anc_buff)); /* Write separator-key to block in next level */ - if (sort_insert_key(sort_param,key_block+1,key_block->lastkey,filepos)) + if (sort_insert_key(sort_param,key_block+1,(uchar*) key_block->lastkey, + filepos)) DBUG_RETURN(1); /* clear old block and write new key in it */ @@ -3957,8 +3968,8 @@ static int sort_delete_record(MI_SORT_PARAM *sort_param) uint i; int old_file,error; uchar *key; - SORT_INFO *sort_info=sort_param->sort_info; - MI_CHECK *param=sort_info->param; + MI_SORT_INFO *sort_info=sort_param->sort_info; + HA_CHECK *param=sort_info->param; MI_INFO *info=sort_info->info; DBUG_ENTER("sort_delete_record"); @@ -4014,7 +4025,7 @@ int flush_pending_blocks(MI_SORT_PARAM *sort_param) uint nod_flag,length; my_off_t filepos,key_file_length; SORT_KEY_BLOCKS *key_block; - SORT_INFO *sort_info= sort_param->sort_info; + MI_SORT_INFO *sort_info= sort_param->sort_info; myf myf_rw=sort_info->param->myf_rw; MI_INFO *info=sort_info->info; MI_KEYDEF *keyinfo=sort_param->keyinfo; @@ -4027,7 +4038,7 @@ int flush_pending_blocks(MI_SORT_PARAM *sort_param) key_block->inited=0; length=mi_getint(key_block->buff); if (nod_flag) - _mi_kpointer(info,key_block->end_pos,filepos); + _mi_kpointer(info,(uchar*) key_block->end_pos,filepos); key_file_length=info->state->key_file_length; bzero((uchar*) key_block->buff+length, keyinfo->block_length-length); if ((filepos=_mi_new(info,keyinfo,DFLT_INIT_HITS)) == HA_OFFSET_ERROR) @@ -4037,7 +4048,7 @@ int flush_pending_blocks(MI_SORT_PARAM *sort_param) if (key_file_length == info->state->key_file_length) { if (_mi_write_keypage(info, keyinfo, filepos, - DFLT_INIT_HITS, key_block->buff)) + DFLT_INIT_HITS, (uchar*) key_block->buff)) DBUG_RETURN(1); } else if (my_pwrite(info->s->kfile,(uchar*) key_block->buff, @@ -4052,7 +4063,7 @@ int flush_pending_blocks(MI_SORT_PARAM *sort_param) /* alloc space and pointers for key_blocks */ -static SORT_KEY_BLOCKS *alloc_key_blocks(MI_CHECK *param, uint blocks, +static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks, uint buffer_length) { reg1 uint i; @@ -4089,7 +4100,7 @@ int test_if_almost_full(MI_INFO *info) /* Recreate table with bigger more alloced record-data */ -int recreate_table(MI_CHECK *param, MI_INFO **org_info, char *filename) +int recreate_table(HA_CHECK *param, MI_INFO **org_info, char *filename) { int error; MI_INFO info; @@ -4262,7 +4273,7 @@ end: /* write suffix to data file if neaded */ -int write_data_suffix(SORT_INFO *sort_info, my_bool fix_datafile) +int write_data_suffix(MI_SORT_INFO *sort_info, my_bool fix_datafile) { MI_INFO *info=sort_info->info; @@ -4283,7 +4294,7 @@ int write_data_suffix(SORT_INFO *sort_info, my_bool fix_datafile) /* Update state and myisamchk_time of indexfile */ -int update_state_info(MI_CHECK *param, MI_INFO *info,uint update) +int update_state_info(HA_CHECK *param, MI_INFO *info,uint update) { MYISAM_SHARE *share=info->s; @@ -4355,7 +4366,7 @@ err: param->auto_increment is bigger than the biggest key. */ -void update_auto_increment_key(MI_CHECK *param, MI_INFO *info, +void update_auto_increment_key(HA_CHECK *param, MI_INFO *info, my_bool repair_only) { uchar *record; @@ -4588,7 +4599,7 @@ my_bool mi_test_if_sort_rep(MI_INFO *info, ha_rows rows, static void -set_data_file_type(SORT_INFO *sort_info, MYISAM_SHARE *share) +set_data_file_type(MI_SORT_INFO *sort_info, MYISAM_SHARE *share) { if ((sort_info->new_data_file_type=share->data_file_type) == COMPRESSED_RECORD && sort_info->param->testflag & T_UNPACK) diff --git a/storage/myisam/mi_checksum.c b/storage/myisam/mi_checksum.c index 4e87de373bd..1aa56e571e3 100644 --- a/storage/myisam/mi_checksum.c +++ b/storage/myisam/mi_checksum.c @@ -31,9 +31,9 @@ ha_checksum mi_checksum(MI_INFO *info, const uchar *buf) case FIELD_BLOB: { length=_mi_calc_blob_length(rec->length- - mi_portable_sizeof_char_ptr, + portable_sizeof_char_ptr, buf); - memcpy((char*) &pos, buf+rec->length- mi_portable_sizeof_char_ptr, + memcpy((char*) &pos, buf+rec->length- portable_sizeof_char_ptr, sizeof(char*)); break; } diff --git a/storage/myisam/mi_create.c b/storage/myisam/mi_create.c index c177aa8d987..0033120aba2 100644 --- a/storage/myisam/mi_create.c +++ b/storage/myisam/mi_create.c @@ -17,6 +17,7 @@ #include "ftdefs.h" #include "sp_defs.h" +#include #if defined(MSDOS) || defined(__WIN__) #ifdef __WIN__ @@ -40,11 +41,11 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, File dfile,file; int errpos,save_errno, create_mode= O_RDWR | O_TRUNC; myf create_flag; - uint fields,length,max_key_length,packed,pointer,real_length_diff, + uint fields,length,max_key_length,packed,pack_bytes,pointer,real_length_diff, key_length,info_length,key_segs,options,min_key_length_skip, base_pos,long_varchar_count,varchar_length, max_key_block_length,unique_key_parts,fulltext_keys,offset; - uint aligned_key_start, block_length; + uint aligned_key_start, block_length, res; ulong reclength, real_reclength,min_pack_length; char filename[FN_REFLEN],linkname[FN_REFLEN], *linkname_ptr; ulong pack_reclength; @@ -56,7 +57,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, HA_KEYSEG *keyseg,tmp_keyseg; MI_COLUMNDEF *rec; ulong *rec_per_key_part; - my_off_t key_root[MI_MAX_POSSIBLE_KEY],key_del[MI_MAX_KEY_BLOCK_SIZE]; + my_off_t key_root[HA_MAX_POSSIBLE_KEY],key_del[MI_MAX_KEY_BLOCK_SIZE]; MI_CREATE_INFO tmp_create_info; DBUG_ENTER("mi_create"); DBUG_PRINT("enter", ("keys: %u columns: %u uniques: %u flags: %u", @@ -94,7 +95,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, ci->reloc_rows=ci->max_rows; /* Check if wrong parameter */ if (!(rec_per_key_part= - (ulong*) my_malloc((keys + uniques)*MI_MAX_KEY_SEG*sizeof(long), + (ulong*) my_malloc((keys + uniques)*HA_MAX_KEY_SEG*sizeof(long), MYF(MY_WME | MY_ZEROFILL)))) DBUG_RETURN(my_errno); @@ -116,10 +117,10 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, share.base.blobs++; if (pack_reclength != INT_MAX32) { - if (rec->length == 4+mi_portable_sizeof_char_ptr) + if (rec->length == 4+portable_sizeof_char_ptr) pack_reclength= INT_MAX32; else - pack_reclength+=(1 << ((rec->length-mi_portable_sizeof_char_ptr)*8)); /* Max blob length */ + pack_reclength+=(1 << ((rec->length-portable_sizeof_char_ptr)*8)); /* Max blob length */ } } else if (type == FIELD_SKIP_PRESPACE || @@ -192,11 +193,11 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, if (flags & HA_CREATE_RELIES_ON_SQL_LAYER) options|= HA_OPTION_RELIES_ON_SQL_LAYER; - packed=(packed+7)/8; + pack_bytes= (packed+7)/8; if (pack_reclength != INT_MAX32) pack_reclength+= reclength+packed + test(test_all_bits(options, HA_OPTION_CHECKSUM | HA_PACK_RECORD)); - min_pack_length+=packed; + min_pack_length+= pack_bytes; if (!ci->data_file_length && ci->max_rows) { @@ -273,7 +274,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, keyseg->type != HA_KEYTYPE_VARBINARY2) { my_errno=HA_WRONG_CREATE_OPTION; - goto err; + goto err_no_lock; } } keydef->keysegs+=sp_segs; @@ -282,7 +283,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, min_key_length_skip+=SPLEN*2*SPDIMS; #else my_errno= HA_ERR_UNSUPPORTED; - goto err; + goto err_no_lock; #endif /*HAVE_SPATIAL*/ } else if (keydef->flag & HA_FULLTEXT) @@ -298,7 +299,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, keyseg->type != HA_KEYTYPE_VARTEXT2) { my_errno=HA_WRONG_CREATE_OPTION; - goto err; + goto err_no_lock; } if (!(keyseg->flag & HA_BLOB_PART) && (keyseg->type == HA_KEYTYPE_VARTEXT1 || @@ -420,10 +421,10 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, } } /* if HA_FULLTEXT */ key_segs+=keydef->keysegs; - if (keydef->keysegs > MI_MAX_KEY_SEG) + if (keydef->keysegs > HA_MAX_KEY_SEG) { my_errno=HA_WRONG_CREATE_OPTION; - goto err; + goto err_no_lock; } /* key_segs may be 0 in the case when we only want to be able to @@ -435,7 +436,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, share.state.rec_per_key_part[key_segs-1]=1L; length+=key_length; /* Get block length for key, if defined by user */ - block_length= (keydef->block_length ? + block_length= (keydef->block_length ? my_round_up_to_next_power(keydef->block_length) : myisam_block_size); block_length= max(block_length, MI_MIN_KEY_BLOCK_LENGTH); @@ -445,10 +446,10 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, pointer,MI_MAX_KEYPTR_SIZE, block_length); if (keydef->block_length > MI_MAX_KEY_BLOCK_LENGTH || - length >= MI_MAX_KEY_BUFF) + length >= HA_MAX_KEY_BUFF) { my_errno=HA_WRONG_CREATE_OPTION; - goto err; + goto err_no_lock; } set_if_bigger(max_key_block_length,keydef->block_length); keydef->keylength= (uint16) key_length; @@ -495,7 +496,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, "indexes and/or unique constraints.", MYF(0), name + dirname_length(name)); my_errno= HA_WRONG_CREATE_OPTION; - goto err; + goto err_no_lock; } bmove(share.state.header.file_version,(uchar*) myisam_file_magic,4); @@ -550,9 +551,9 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, share.base.pack_reclength=reclength+ test(options & HA_OPTION_CHECKSUM); share.base.max_pack_length=pack_reclength; share.base.min_pack_length=min_pack_length; - share.base.pack_bits=packed; + share.base.pack_bits= pack_bytes; share.base.fields=fields; - share.base.pack_fields=packed; + share.base.pack_fields= packed; #ifdef USE_RAID share.base.raid_type=ci->raid_type; share.base.raid_chunks=ci->raid_chunks; @@ -826,13 +827,16 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs, } errpos=0; pthread_mutex_unlock(&THR_LOCK_myisam); + res= 0; if (my_close(file,MYF(0))) - goto err; + res= my_errno; my_free((char*) rec_per_key_part,MYF(0)); - DBUG_RETURN(0); + DBUG_RETURN(res); err: pthread_mutex_unlock(&THR_LOCK_myisam); +err_no_lock: + save_errno=my_errno; switch (errpos) { case 3: diff --git a/storage/myisam/mi_dbug.c b/storage/myisam/mi_dbug.c index 07c314c43e6..0808a7e85dd 100644 --- a/storage/myisam/mi_dbug.c +++ b/storage/myisam/mi_dbug.c @@ -45,6 +45,7 @@ void _mi_print_key(FILE *stream, register HA_KEYSEG *keyseg, fprintf(stream,"NULL"); continue; } + end++; } switch (keyseg->type) { diff --git a/storage/myisam/mi_delete.c b/storage/myisam/mi_delete.c index 29a84570f1e..5d5ea8b071e 100644 --- a/storage/myisam/mi_delete.c +++ b/storage/myisam/mi_delete.c @@ -159,7 +159,7 @@ static int _mi_ck_real_delete(register MI_INFO *info, MI_KEYDEF *keyinfo, DBUG_RETURN(my_errno=HA_ERR_CRASHED); } if (!(root_buff= (uchar*) my_alloca((uint) keyinfo->block_length+ - MI_MAX_KEY_BUFF*2))) + HA_MAX_KEY_BUFF*2))) { DBUG_PRINT("error",("Couldn't allocate memory")); DBUG_RETURN(my_errno=ENOMEM); @@ -221,7 +221,7 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, my_bool last_key; uchar *leaf_buff,*keypos; my_off_t leaf_page,next_block; - uchar lastkey[MI_MAX_KEY_BUFF]; + uchar lastkey[HA_MAX_KEY_BUFF]; DBUG_ENTER("d_search"); DBUG_DUMP("page",(uchar*) anc_buff,mi_getint(anc_buff)); @@ -306,7 +306,7 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, { leaf_page=_mi_kpos(nod_flag,keypos); if (!(leaf_buff= (uchar*) my_alloca((uint) keyinfo->block_length+ - MI_MAX_KEY_BUFF*2))) + HA_MAX_KEY_BUFF*2))) { DBUG_PRINT("error",("Couldn't allocate memory")); my_errno=ENOMEM; @@ -365,9 +365,7 @@ static int d_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, { /* This happens only with packed keys */ DBUG_PRINT("test",("Enlarging of key when deleting")); if (!_mi_get_last_key(info,keyinfo,anc_buff,lastkey,keypos,&length)) - { goto err; - } ret_value=_mi_insert(info,keyinfo,key,anc_buff,keypos,lastkey, (uchar*) 0,(uchar*) 0,(my_off_t) 0,(my_bool) 0); } @@ -405,7 +403,7 @@ static int del(register MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *key, int ret_value,length; uint a_length,nod_flag,tmp; my_off_t next_page; - uchar keybuff[MI_MAX_KEY_BUFF],*endpos,*next_buff,*key_start, *prev_key; + uchar keybuff[HA_MAX_KEY_BUFF],*endpos,*next_buff,*key_start, *prev_key; MYISAM_SHARE *share=info->s; MI_KEY_PARAM s_temp; DBUG_ENTER("del"); @@ -422,7 +420,7 @@ static int del(register MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *key, { next_page= _mi_kpos(nod_flag,endpos); if (!(next_buff= (uchar*) my_alloca((uint) keyinfo->block_length+ - MI_MAX_KEY_BUFF*2))) + HA_MAX_KEY_BUFF*2))) DBUG_RETURN(-1); if (!_mi_fetch_keypage(info,keyinfo,next_page,DFLT_INIT_HITS,next_buff,0)) ret_value= -1; @@ -509,7 +507,7 @@ static int underflow(register MI_INFO *info, register MI_KEYDEF *keyinfo, uint length,anc_length,buff_length,leaf_length,p_length,s_length,nod_flag, key_reflength,key_length; my_off_t next_page; - uchar anc_key[MI_MAX_KEY_BUFF],leaf_key[MI_MAX_KEY_BUFF], + uchar anc_key[HA_MAX_KEY_BUFF],leaf_key[HA_MAX_KEY_BUFF], *buff,*endpos,*next_keypos,*anc_pos,*half_pos,*temp_pos,*prev_key, *after_key; MI_KEY_PARAM s_temp; diff --git a/storage/myisam/mi_dynrec.c b/storage/myisam/mi_dynrec.c index eba868bfb82..08d2607c953 100644 --- a/storage/myisam/mi_dynrec.c +++ b/storage/myisam/mi_dynrec.c @@ -252,7 +252,7 @@ int _mi_write_blob_record(MI_INFO *info, const uchar *record) extra= (ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER)+MI_SPLIT_LENGTH+ MI_DYN_DELETE_BLOCK_HEADER+1); reclength= (info->s->base.pack_reclength + - _my_calc_total_blob_length(info,record)+ extra); + _mi_calc_total_blob_length(info,record)+ extra); #ifdef NOT_USED /* We now support big rows */ if (reclength > MI_DYN_MAX_ROW_LENGTH) { @@ -286,7 +286,7 @@ int _mi_update_blob_record(MI_INFO *info, my_off_t pos, const uchar *record) extra= (ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER)+MI_SPLIT_LENGTH+ MI_DYN_DELETE_BLOCK_HEADER); reclength= (info->s->base.pack_reclength+ - _my_calc_total_blob_length(info,record)+ extra); + _mi_calc_total_blob_length(info,record)+ extra); #ifdef NOT_USED /* We now support big rows */ if (reclength > MI_DYN_MAX_ROW_LENGTH) { @@ -901,7 +901,7 @@ uint _mi_rec_pack(MI_INFO *info, register uchar *to, else { char *temp_pos; - size_t tmp_length=length-mi_portable_sizeof_char_ptr; + size_t tmp_length=length-portable_sizeof_char_ptr; memcpy((uchar*) to,from,tmp_length); memcpy_fixed(&temp_pos,from+tmp_length,sizeof(char*)); memcpy(to+tmp_length,temp_pos,(size_t) blob->length); @@ -1022,11 +1022,11 @@ my_bool _mi_rec_check(MI_INFO *info,const uchar *record, uchar *rec_buff, if (type == FIELD_BLOB) { uint blob_length= - _mi_calc_blob_length(length-mi_portable_sizeof_char_ptr,record); + _mi_calc_blob_length(length-portable_sizeof_char_ptr,record); if (!blob_length && !(flag & bit)) goto err; if (blob_length) - to+=length - mi_portable_sizeof_char_ptr+ blob_length; + to+=length - portable_sizeof_char_ptr+ blob_length; } else if (type == FIELD_SKIP_ZERO) { @@ -1209,7 +1209,7 @@ ulong _mi_rec_unpack(register MI_INFO *info, register uchar *to, uchar *from, } else if (type == FIELD_BLOB) { - uint size_length=rec_length- mi_portable_sizeof_char_ptr; + uint size_length=rec_length- portable_sizeof_char_ptr; ulong blob_length=_mi_calc_blob_length(size_length,from); ulong from_left= (ulong) (from_end - from); if (from_left < size_length || @@ -1259,7 +1259,7 @@ err: /* Calc length of blob. Update info in blobs->length */ -ulong _my_calc_total_blob_length(MI_INFO *info, const uchar *record) +ulong _mi_calc_total_blob_length(MI_INFO *info, const uchar *record) { ulong length; MI_BLOB *blob,*end; @@ -1293,7 +1293,7 @@ ulong _mi_calc_blob_length(uint length, const uchar *pos) } -void _my_store_blob_length(uchar *pos,uint pack_length,uint length) +void _mi_store_blob_length(uchar *pos,uint pack_length,uint length) { switch (pack_length) { case 1: @@ -1506,7 +1506,7 @@ int _mi_cmp_dynamic_record(register MI_INFO *info, register const uchar *record) if (info->s->base.blobs) { if (!(buffer=(uchar*) my_alloca(info->s->base.pack_reclength+ - _my_calc_total_blob_length(info,record)))) + _mi_calc_total_blob_length(info,record)))) DBUG_RETURN(-1); } reclength=_mi_rec_pack(info,buffer,record); diff --git a/storage/myisam/mi_key.c b/storage/myisam/mi_key.c index 230ebac3bb0..87b149ee1ea 100644 --- a/storage/myisam/mi_key.c +++ b/storage/myisam/mi_key.c @@ -425,7 +425,7 @@ static int _mi_put_key_in_record(register MI_INFO *info, uint keynr, /* The above changed info->lastkey2. Inform mi_rnext_same(). */ info->update&= ~HA_STATE_RNEXT_SAME; - _my_store_blob_length(record+keyseg->start, + _mi_store_blob_length(record+keyseg->start, (uint) keyseg->bit_start,length); key+=length; } diff --git a/storage/myisam/mi_log.c b/storage/myisam/mi_log.c index 91d3bb6f702..69b6746c6a2 100644 --- a/storage/myisam/mi_log.c +++ b/storage/myisam/mi_log.c @@ -133,7 +133,7 @@ void _myisam_log_record(enum myisam_log_commands command, MI_INFO *info, if (!info->s->base.blobs) length=info->s->base.reclength; else - length=info->s->base.reclength+ _my_calc_total_blob_length(info,record); + length=info->s->base.reclength+ _mi_calc_total_blob_length(info,record); buff[0]=(char) command; mi_int2store(buff+1,info->dfile); mi_int4store(buff+3,pid); diff --git a/storage/myisam/mi_open.c b/storage/myisam/mi_open.c index 9ff8ecdb14d..66546abb52a 100644 --- a/storage/myisam/mi_open.c +++ b/storage/myisam/mi_open.c @@ -82,8 +82,8 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) char *disk_cache, *disk_pos, *end_pos; MI_INFO info,*m_info,*old_info; MYISAM_SHARE share_buff,*share; - ulong rec_per_key_part[MI_MAX_POSSIBLE_KEY*MI_MAX_KEY_SEG]; - my_off_t key_root[MI_MAX_POSSIBLE_KEY],key_del[MI_MAX_KEY_BLOCK_SIZE]; + ulong rec_per_key_part[HA_MAX_POSSIBLE_KEY*HA_MAX_KEY_SEG]; + my_off_t key_root[HA_MAX_POSSIBLE_KEY],key_del[MI_MAX_KEY_BLOCK_SIZE]; ulonglong max_key_file_length, max_data_file_length; DBUG_ENTER("mi_open"); @@ -104,7 +104,8 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) share_buff.state.rec_per_key_part=rec_per_key_part; share_buff.state.key_root=key_root; share_buff.state.key_del=key_del; - share_buff.key_cache= multi_key_cache_search(name_buff, strlen(name_buff)); + share_buff.key_cache= multi_key_cache_search(name_buff, strlen(name_buff), + dflt_key_cache); DBUG_EXECUTE_IF("myisam_pretend_crashed_table_on_open", if (strstr(name, "/t1")) @@ -210,7 +211,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) len,MI_BASE_INFO_SIZE)); } disk_pos= (char*) - my_n_base_info_read((uchar*) disk_cache + base_pos, &share->base); + mi_n_base_info_read((uchar*) disk_cache + base_pos, &share->base); share->state.state_length=base_pos; if (!(open_flags & HA_OPEN_FOR_REPAIR) && @@ -235,8 +236,8 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) } key_parts+=fulltext_keys*FT_SEGS; - if (share->base.max_key_length > MI_MAX_KEY_BUFF || keys > MI_MAX_KEY || - key_parts >= MI_MAX_KEY * MI_MAX_KEY_SEG) + if (share->base.max_key_length > HA_MAX_KEY_BUFF || keys > MI_MAX_KEY || + key_parts >= MI_MAX_KEY * HA_MAX_KEY_SEG) { DBUG_PRINT("error",("Wrong key info: Max_key_length: %d keys: %d key_parts: %d", share->base.max_key_length, keys, key_parts)); my_errno=HA_ERR_UNSUPPORTED; @@ -452,7 +453,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) if (share->rec[i].type == (int) FIELD_BLOB) { share->blobs[j].pack_length= - share->rec[i].length-mi_portable_sizeof_char_ptr;; + share->rec[i].length-portable_sizeof_char_ptr;; share->blobs[j].offset=offset; j++; } @@ -1017,7 +1018,7 @@ uint mi_base_info_write(File file, MI_BASE_INFO *base) } -uchar *my_n_base_info_read(uchar *ptr, MI_BASE_INFO *base) +uchar *mi_n_base_info_read(uchar *ptr, MI_BASE_INFO *base) { base->keystart = mi_sizekorr(ptr); ptr +=8; base->max_data_file_length = mi_sizekorr(ptr); ptr +=8; diff --git a/storage/myisam/mi_packrec.c b/storage/myisam/mi_packrec.c index 1231ef026f7..2997db6e003 100644 --- a/storage/myisam/mi_packrec.c +++ b/storage/myisam/mi_packrec.c @@ -105,6 +105,7 @@ static void init_bit_buffer(MI_BIT_BUFF *bit_buff,uchar *buffer,uint length); static uint fill_and_get_bits(MI_BIT_BUFF *bit_buff,uint count); static void fill_buffer(MI_BIT_BUFF *bit_buff); static uint max_bit(uint value); +static uint read_pack_length(uint version, const uchar *buf, ulong *length); #ifdef HAVE_MMAP static uchar *_mi_mempack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff, MI_BLOCK_INFO *info, uchar **rec_buff_p, @@ -1036,7 +1037,7 @@ static void uf_blob(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, else { ulong length=get_bits(bit_buff,rec->space_length_bits); - uint pack_length=(uint) (end-to)-mi_portable_sizeof_char_ptr; + uint pack_length=(uint) (end-to)-portable_sizeof_char_ptr; if (bit_buff->blob_pos+length > bit_buff->blob_end) { bit_buff->error=1; @@ -1044,7 +1045,7 @@ static void uf_blob(MI_COLUMNDEF *rec, MI_BIT_BUFF *bit_buff, return; } decode_bytes(rec,bit_buff,bit_buff->blob_pos,bit_buff->blob_pos+length); - _my_store_blob_length((uchar*) to,pack_length,length); + _mi_store_blob_length((uchar*) to,pack_length,length); memcpy_fixed((char*) to+pack_length,(char*) &bit_buff->blob_pos, sizeof(char*)); bit_buff->blob_pos+=length; @@ -1625,7 +1626,7 @@ uint save_pack_length(uint version, uchar *block_buff, ulong length) } -uint read_pack_length(uint version, const uchar *buf, ulong *length) +static uint read_pack_length(uint version, const uchar *buf, ulong *length) { if (buf[0] < 254) { diff --git a/storage/myisam/mi_range.c b/storage/myisam/mi_range.c index 932a4abd1b3..8bd122c828a 100644 --- a/storage/myisam/mi_range.c +++ b/storage/myisam/mi_range.c @@ -260,7 +260,7 @@ static uint _mi_keynr(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, uchar *keypos, uint *ret_max_key) { uint nod_flag,keynr,max_key; - uchar t_buff[MI_MAX_KEY_BUFF],*end; + uchar t_buff[HA_MAX_KEY_BUFF],*end; end= page+mi_getint(page); nod_flag=mi_test_if_nod(page); diff --git a/storage/myisam/mi_rkey.c b/storage/myisam/mi_rkey.c index 3dcaa27d3ca..5fd9ae6a6be 100644 --- a/storage/myisam/mi_rkey.c +++ b/storage/myisam/mi_rkey.c @@ -84,6 +84,8 @@ int mi_rkey(MI_INFO *info, uchar *buf, int inx, const uchar *key, { mi_print_error(info->s, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; + if (share->concurrent_insert) + rw_unlock(&share->key_root_lock[inx]); goto err; } break; diff --git a/storage/myisam/mi_search.c b/storage/myisam/mi_search.c index acc8c073297..c45ed6d7d23 100644 --- a/storage/myisam/mi_search.c +++ b/storage/myisam/mi_search.c @@ -60,7 +60,7 @@ int _mi_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, int error,flag; uint nod_flag; uchar *keypos,*maxpos; - uchar lastkey[MI_MAX_KEY_BUFF],*buff; + uchar lastkey[HA_MAX_KEY_BUFF],*buff; DBUG_ENTER("_mi_search"); DBUG_PRINT("enter",("pos: %lu nextflag: %u lastpos: %lu", (ulong) pos, nextflag, (ulong) info->lastpos)); @@ -242,7 +242,7 @@ int _mi_seq_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, { int flag; uint nod_flag,length,not_used[2]; - uchar t_buff[MI_MAX_KEY_BUFF],*end; + uchar t_buff[HA_MAX_KEY_BUFF],*end; DBUG_ENTER("_mi_seq_search"); LINT_INIT(flag); LINT_INIT(length); @@ -296,7 +296,7 @@ int _mi_prefix_search(MI_INFO *info, register MI_KEYDEF *keyinfo, uchar *page, int key_len_skip, seg_len_pack, key_len_left; uchar *end, *kseg, *vseg; uchar *sort_order=keyinfo->seg->charset->sort_order; - uchar tt_buff[MI_MAX_KEY_BUFF+2], *t_buff=tt_buff+2; + uchar tt_buff[HA_MAX_KEY_BUFF+2], *t_buff=tt_buff+2; uchar *saved_from, *saved_to, *saved_vseg; uint saved_length=0, saved_prefix_len=0; uint length_pack; @@ -920,7 +920,7 @@ uint _mi_get_binary_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag, DBUG_ENTER("_mi_get_binary_pack_key"); page= *page_pos; - page_end=page+MI_MAX_KEY_BUFF+1; + page_end=page+HA_MAX_KEY_BUFF+1; start_key=key; /* @@ -1238,7 +1238,7 @@ int _mi_search_next(register MI_INFO *info, register MI_KEYDEF *keyinfo, { int error; uint nod_flag; - uchar lastkey[MI_MAX_KEY_BUFF]; + uchar lastkey[HA_MAX_KEY_BUFF]; DBUG_ENTER("_mi_search_next"); DBUG_PRINT("enter",("nextflag: %u lastpos: %lu int_keypos: %lu", nextflag, (ulong) info->lastpos, diff --git a/storage/myisam/mi_test1.c b/storage/myisam/mi_test1.c index 1165ea4bb32..1ceedf7f86a 100644 --- a/storage/myisam/mi_test1.c +++ b/storage/myisam/mi_test1.c @@ -71,12 +71,12 @@ static int run_test(const char *filename) /* First define 2 columns */ recinfo[0].type=FIELD_NORMAL; recinfo[0].length=1; /* For NULL bits */ recinfo[1].type=key_field; - recinfo[1].length= (key_field == FIELD_BLOB ? 4+mi_portable_sizeof_char_ptr : + recinfo[1].length= (key_field == FIELD_BLOB ? 4+portable_sizeof_char_ptr : key_length); if (key_field == FIELD_VARCHAR) recinfo[1].length+= HA_VARCHAR_PACKLENGTH(key_length);; recinfo[2].type=extra_field; - recinfo[2].length= (extra_field == FIELD_BLOB ? 4 + mi_portable_sizeof_char_ptr : 24); + recinfo[2].length= (extra_field == FIELD_BLOB ? 4 + portable_sizeof_char_ptr : 24); if (extra_field == FIELD_VARCHAR) recinfo[2].length+= HA_VARCHAR_PACKLENGTH(recinfo[2].length); if (opt_unique) @@ -628,7 +628,7 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), key_type= HA_KEYTYPE_VARTEXT1; break; case 'k': - if (key_length < 4 || key_length > MI_MAX_KEY_LENGTH) + if (key_length < 4 || key_length > HA_MAX_KEY_LENGTH) { fprintf(stderr,"Wrong key length\n"); exit(1); diff --git a/storage/myisam/mi_test2.c b/storage/myisam/mi_test2.c index 96ee82e023c..b16e0c4e506 100644 --- a/storage/myisam/mi_test2.c +++ b/storage/myisam/mi_test2.c @@ -26,6 +26,7 @@ #endif #include "myisamdef.h" #include +#include #define STANDARD_LENGTH 37 #define MYISAM_KEYS 6 @@ -187,7 +188,7 @@ int main(int argc, char *argv[]) if (use_blob) { recinfo[6].type=FIELD_BLOB; - recinfo[6].length=4+mi_portable_sizeof_char_ptr; + recinfo[6].length=4+portable_sizeof_char_ptr; recinfo[6].null_bit=0; recinfo[6].null_pos=0; } @@ -602,7 +603,7 @@ int main(int argc, char *argv[]) if (mi_rsame(file,read_record2,(int) i)) goto err; if (bcmp(read_record,read_record2,reclength) != 0) { - printf("is_rsame didn't find same record\n"); + printf("mi_rsame didn't find same record\n"); goto end; } } @@ -776,8 +777,7 @@ int main(int argc, char *argv[]) { ulong blob_length,pos; uchar *ptr; - longget(blob_length,read_record+blob_pos+4); - ptr=(uchar*) blob_length; + memcpy_fixed(&ptr, read_record+blob_pos+4, sizeof(ptr)); longget(blob_length,read_record+blob_pos); for (pos=0 ; pos < blob_length ; pos++) { diff --git a/storage/myisam/mi_unique.c b/storage/myisam/mi_unique.c index e490fb683e4..02fcd9289dd 100644 --- a/storage/myisam/mi_unique.c +++ b/storage/myisam/mi_unique.c @@ -212,7 +212,7 @@ int mi_unique_comp(MI_UNIQUEDEF *def, const uchar *a, const uchar *b, if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT1 || type == HA_KEYTYPE_VARTEXT2) { - if (mi_compare_text(keyseg->charset, (uchar *) pos_a, a_length, + if (ha_compare_text(keyseg->charset, (uchar *) pos_a, a_length, (uchar *) pos_b, b_length, 0, 1)) return 1; } diff --git a/storage/myisam/mi_update.c b/storage/myisam/mi_update.c index 924cdfcbc21..f4bc1b60759 100644 --- a/storage/myisam/mi_update.c +++ b/storage/myisam/mi_update.c @@ -23,7 +23,7 @@ int mi_update(register MI_INFO *info, const uchar *oldrec, uchar *newrec) int flag,key_changed,save_errno; reg3 my_off_t pos; uint i; - uchar old_key[MI_MAX_KEY_BUFF],*new_key; + uchar old_key[HA_MAX_KEY_BUFF],*new_key; bool auto_key_changed=0; ulonglong changed; MYISAM_SHARE *share=info->s; diff --git a/storage/myisam/mi_write.c b/storage/myisam/mi_write.c index 8ae979f8bf6..9d82c34df3d 100644 --- a/storage/myisam/mi_write.c +++ b/storage/myisam/mi_write.c @@ -346,7 +346,7 @@ static int w_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, int error,flag; uint nod_flag, search_key_length; uchar *temp_buff,*keypos; - uchar keybuff[MI_MAX_KEY_BUFF]; + uchar keybuff[HA_MAX_KEY_BUFF]; my_bool was_last_key; my_off_t next_page, dupp_key_pos; DBUG_ENTER("w_search"); @@ -354,7 +354,7 @@ static int w_search(register MI_INFO *info, register MI_KEYDEF *keyinfo, search_key_length= (comp_flag & SEARCH_FIND) ? key_length : USE_WHOLE_KEY; if (!(temp_buff= (uchar*) my_alloca((uint) keyinfo->block_length+ - MI_MAX_KEY_BUFF*2))) + HA_MAX_KEY_BUFF*2))) DBUG_RETURN(-1); if (!_mi_fetch_keypage(info,keyinfo,page,DFLT_INIT_HITS,temp_buff,0)) goto err; @@ -545,7 +545,7 @@ int _mi_insert(register MI_INFO *info, register MI_KEYDEF *keyinfo, get_key_length(alen,a); DBUG_ASSERT(info->ft1_to_ft2==0); if (alen == blen && - mi_compare_text(keyinfo->seg->charset, a, alen, b, blen, 0, 0)==0) + ha_compare_text(keyinfo->seg->charset, a, alen, b, blen, 0, 0)==0) { /* yup. converting */ info->ft1_to_ft2=(DYNAMIC_ARRAY *) @@ -707,7 +707,7 @@ static uchar *_mi_find_last_pos(MI_KEYDEF *keyinfo, uchar *page, { uint keys,length,last_length,key_ref_length; uchar *end,*lastpos,*prevpos; - uchar key_buff[MI_MAX_KEY_BUFF]; + uchar key_buff[HA_MAX_KEY_BUFF]; DBUG_ENTER("_mi_find_last_pos"); key_ref_length=2; @@ -764,7 +764,7 @@ static int _mi_balance_page(register MI_INFO *info, MI_KEYDEF *keyinfo, length,keys; uchar *pos,*buff,*extra_buff; my_off_t next_page,new_pos; - uchar tmp_part_key[MI_MAX_KEY_BUFF]; + uchar tmp_part_key[HA_MAX_KEY_BUFF]; DBUG_ENTER("_mi_balance_page"); k_length=keyinfo->keylength; @@ -930,7 +930,7 @@ static int keys_free(uchar *key, TREE_FREE mode, bulk_insert_param *param) Probably I can use info->lastkey here, but I'm not sure, and to be safe I'd better use local lastkey. */ - uchar lastkey[MI_MAX_KEY_BUFF]; + uchar lastkey[HA_MAX_KEY_BUFF]; uint keylen; MI_KEYDEF *keyinfo; diff --git a/storage/myisam/myisamchk.c b/storage/myisam/myisamchk.c index 3700251ab15..2ed7db73770 100644 --- a/storage/myisam/myisamchk.c +++ b/storage/myisam/myisamchk.c @@ -16,10 +16,10 @@ /* Describe, check and repair of MyISAM tables */ #include "fulltext.h" - #include #include #include +#include #ifdef HAVE_SYS_VADVICE_H #include #endif @@ -67,9 +67,9 @@ static const char *myisam_stats_method_str="nulls_unequal"; static void get_options(int *argc,char * * *argv); static void print_version(void); static void usage(void); -static int myisamchk(MI_CHECK *param, char *filename); -static void descript(MI_CHECK *param, register MI_INFO *info, char * name); -static int mi_sort_records(MI_CHECK *param, register MI_INFO *info, +static int myisamchk(HA_CHECK *param, char *filename); +static void descript(HA_CHECK *param, register MI_INFO *info, char * name); +static int mi_sort_records(HA_CHECK *param, register MI_INFO *info, char * name, uint sort_key, my_bool write_info, my_bool update_index); static int sort_record_index(MI_SORT_PARAM *sort_param, MI_INFO *info, @@ -77,7 +77,7 @@ static int sort_record_index(MI_SORT_PARAM *sort_param, MI_INFO *info, my_off_t page,uchar *buff,uint sortkey, File new_file, my_bool update_index); -MI_CHECK check_param; +HA_CHECK check_param; /* Main program */ @@ -695,7 +695,7 @@ get_one_option(int optid, case OPT_STATS_METHOD: { int method; - enum_mi_stats_method method_conv; + enum_handler_stats_method method_conv; LINT_INIT(method_conv); myisam_stats_method_str= argument; if ((method=find_type(argument, &myisam_stats_method_typelib, 2)) <= 0) @@ -794,7 +794,7 @@ static void get_options(register int *argc,register char ***argv) /* Check table */ -static int myisamchk(MI_CHECK *param, char * filename) +static int myisamchk(HA_CHECK *param, char * filename) { int error,lock_type,recreate; int rep_quick= param->testflag & (T_QUICK | T_FORCE_UNIQUENESS); @@ -1199,7 +1199,7 @@ end2: /* Write info about table */ -static void descript(MI_CHECK *param, register MI_INFO *info, char * name) +static void descript(HA_CHECK *param, register MI_INFO *info, char * name) { uint key,keyseg_nr,field,start; reg3 MI_KEYDEF *keyinfo; @@ -1464,7 +1464,7 @@ static void descript(MI_CHECK *param, register MI_INFO *info, char * name) /* Sort records according to one key */ -static int mi_sort_records(MI_CHECK *param, +static int mi_sort_records(HA_CHECK *param, register MI_INFO *info, char * name, uint sort_key, my_bool write_info, @@ -1478,7 +1478,7 @@ static int mi_sort_records(MI_CHECK *param, ha_rows old_record_count; MYISAM_SHARE *share=info->s; char llbuff[22],llbuff2[22]; - SORT_INFO sort_info; + MI_SORT_INFO sort_info; MI_SORT_PARAM sort_param; DBUG_ENTER("sort_records"); @@ -1653,10 +1653,10 @@ static int sort_record_index(MI_SORT_PARAM *sort_param,MI_INFO *info, uint nod_flag,used_length,key_length; uchar *temp_buff,*keypos,*endpos; my_off_t next_page,rec_pos; - uchar lastkey[MI_MAX_KEY_BUFF]; + uchar lastkey[HA_MAX_KEY_BUFF]; char llbuff[22]; - SORT_INFO *sort_info= sort_param->sort_info; - MI_CHECK *param=sort_info->param; + MI_SORT_INFO *sort_info= sort_param->sort_info; + HA_CHECK *param=sort_info->param; DBUG_ENTER("sort_record_index"); nod_flag=mi_test_if_nod(buff); @@ -1744,7 +1744,7 @@ err: static int not_killed= 0; -volatile int *killed_ptr(MI_CHECK *param __attribute__((unused))) +volatile int *killed_ptr(HA_CHECK *param __attribute__((unused))) { return ¬_killed; /* always NULL */ } @@ -1752,7 +1752,7 @@ volatile int *killed_ptr(MI_CHECK *param __attribute__((unused))) /* print warnings and errors */ /* VARARGS */ -void mi_check_print_info(MI_CHECK *param __attribute__((unused)), +void mi_check_print_info(HA_CHECK *param __attribute__((unused)), const char *fmt,...) { va_list args; @@ -1765,7 +1765,7 @@ void mi_check_print_info(MI_CHECK *param __attribute__((unused)), /* VARARGS */ -void mi_check_print_warning(MI_CHECK *param, const char *fmt,...) +void mi_check_print_warning(HA_CHECK *param, const char *fmt,...) { va_list args; DBUG_ENTER("mi_check_print_warning"); @@ -1790,7 +1790,7 @@ void mi_check_print_warning(MI_CHECK *param, const char *fmt,...) /* VARARGS */ -void mi_check_print_error(MI_CHECK *param, const char *fmt,...) +void mi_check_print_error(HA_CHECK *param, const char *fmt,...) { va_list args; DBUG_ENTER("mi_check_print_error"); diff --git a/storage/myisam/myisamdef.h b/storage/myisam/myisamdef.h index f68d66d4cbb..0daa6a7fc72 100644 --- a/storage/myisam/myisamdef.h +++ b/storage/myisam/myisamdef.h @@ -15,8 +15,8 @@ /* This file is included by all internal myisam files */ -#include "myisam.h" /* Structs & some defines */ -#include "myisampack.h" /* packing of keys */ +#include "myisam.h" /* Structs & some defines */ +#include "myisampack.h" /* packing of keys */ #include #ifdef THREAD #include @@ -26,15 +26,16 @@ #endif #if defined(my_write) && !defined(MAP_TO_USE_RAID) -#undef my_write /* undef map from my_nosys; We need test-if-disk full */ +/* undef map from my_nosys; We need test-if-disk full */ +#undef my_write #endif typedef struct st_mi_status_info { - ha_rows records; /* Rows in table */ - ha_rows del; /* Removed rows */ - my_off_t empty; /* lost space in datafile */ - my_off_t key_empty; /* lost space in indexfile */ + ha_rows records; /* Rows in table */ + ha_rows del; /* Removed rows */ + my_off_t empty; /* lost space in datafile */ + my_off_t key_empty; /* lost space in indexfile */ my_off_t key_file_length; my_off_t data_file_length; ha_checksum checksum; @@ -42,347 +43,292 @@ typedef struct st_mi_status_info typedef struct st_mi_state_info { - struct { /* Fileheader */ + struct + { /* Fileheader */ uchar file_version[4]; uchar options[2]; uchar header_length[2]; uchar state_info_length[2]; uchar base_info_length[2]; uchar base_pos[2]; - uchar key_parts[2]; /* Key parts */ - uchar unique_key_parts[2]; /* Key parts + unique parts */ - uchar keys; /* number of keys in file */ - uchar uniques; /* number of UNIQUE definitions */ - uchar language; /* Language for indexes */ - uchar max_block_size_index; /* max keyblock size */ + uchar key_parts[2]; /* Key parts */ + uchar unique_key_parts[2]; /* Key parts + unique parts */ + uchar keys; /* number of keys in file */ + uchar uniques; /* number of UNIQUE definitions */ + uchar language; /* Language for indexes */ + uchar max_block_size_index; /* max keyblock size */ uchar fulltext_keys; uchar not_used; /* To align to 8 */ } header; MI_STATUS_INFO state; - ha_rows split; /* number of split blocks */ - my_off_t dellink; /* Link to next removed block */ + ha_rows split; /* number of split blocks */ + my_off_t dellink; /* Link to next removed block */ ulonglong auto_increment; - ulong process; /* process that updated table last */ - ulong unique; /* Unique number for this process */ - ulong update_count; /* Updated for each write lock */ + ulong process; /* process that updated table last */ + ulong unique; /* Unique number for this process */ + ulong update_count; /* Updated for each write lock */ ulong status; ulong *rec_per_key_part; - my_off_t *key_root; /* Start of key trees */ - my_off_t *key_del; /* delete links for trees */ - my_off_t rec_per_key_rows; /* Rows when calculating rec_per_key */ - - ulong sec_index_changed; /* Updated when new sec_index */ - ulong sec_index_used; /* which extra index are in use */ - ulonglong key_map; /* Which keys are in use */ ha_checksum checksum; /* Table checksum */ - ulong version; /* timestamp of create */ - time_t create_time; /* Time when created database */ - time_t recover_time; /* Time for last recover */ - time_t check_time; /* Time for last check */ - uint sortkey; /* sorted by this key (not used) */ + my_off_t *key_root; /* Start of key trees */ + my_off_t *key_del; /* delete links for trees */ + my_off_t rec_per_key_rows; /* Rows when calculating rec_per_key */ + + ulong sec_index_changed; /* Updated when new sec_index */ + ulong sec_index_used; /* which extra index are in use */ + ulonglong key_map; /* Which keys are in use */ + ulong version; /* timestamp of create */ + time_t create_time; /* Time when created database */ + time_t recover_time; /* Time for last recover */ + time_t check_time; /* Time for last check */ + uint sortkey; /* sorted by this key (not used) */ uint open_count; - uint8 changed; /* Changed since myisamchk */ + uint8 changed; /* Changed since myisamchk */ /* the following isn't saved on disk */ - uint state_diff_length; /* Should be 0 */ - uint state_length; /* Length of state header in file */ + uint state_diff_length; /* Should be 0 */ + uint state_length; /* Length of state header in file */ ulong *key_info; } MI_STATE_INFO; -#define MI_STATE_INFO_SIZE (24+14*8+7*4+2*2+8) -#define MI_STATE_KEY_SIZE 8 +#define MI_STATE_INFO_SIZE (24+14*8+7*4+2*2+8) +#define MI_STATE_KEY_SIZE 8 #define MI_STATE_KEYBLOCK_SIZE 8 -#define MI_STATE_KEYSEG_SIZE 4 -#define MI_STATE_EXTRA_SIZE ((MI_MAX_KEY+MI_MAX_KEY_BLOCK_SIZE)*MI_STATE_KEY_SIZE + MI_MAX_KEY*MI_MAX_KEY_SEG*MI_STATE_KEYSEG_SIZE) -#define MI_KEYDEF_SIZE (2+ 5*2) -#define MI_UNIQUEDEF_SIZE (2+1+1) -#define HA_KEYSEG_SIZE (6+ 2*2 + 4*2) -#define MI_COLUMNDEF_SIZE (2*3+1) -#define MI_BASE_INFO_SIZE (5*8 + 8*4 + 4 + 4*2 + 16) -#define MI_INDEX_BLOCK_MARGIN 16 /* Safety margin for .MYI tables */ +#define MI_STATE_KEYSEG_SIZE 4 +#define MI_STATE_EXTRA_SIZE ((MI_MAX_KEY+MI_MAX_KEY_BLOCK_SIZE)*MI_STATE_KEY_SIZE + MI_MAX_KEY*HA_MAX_KEY_SEG*MI_STATE_KEYSEG_SIZE) +#define MI_KEYDEF_SIZE (2+ 5*2) +#define MI_UNIQUEDEF_SIZE (2+1+1) +#define HA_KEYSEG_SIZE (6+ 2*2 + 4*2) +#define MI_COLUMNDEF_SIZE (2*3+1) +#define MI_BASE_INFO_SIZE (5*8 + 8*4 + 4 + 4*2 + 16) +#define MI_INDEX_BLOCK_MARGIN 16 /* Safety margin for .MYI tables */ typedef struct st_mi_base_info { - my_off_t keystart; /* Start of keys */ + my_off_t keystart; /* Start of keys */ my_off_t max_data_file_length; my_off_t max_key_file_length; my_off_t margin_key_file_length; - ha_rows records,reloc; /* Create information */ - ulong mean_row_length; /* Create information */ - ulong reclength; /* length of unpacked record */ - ulong pack_reclength; /* Length of full packed rec. */ + ha_rows records, reloc; /* Create information */ + ulong mean_row_length; /* Create information */ + ulong reclength; /* length of unpacked record */ + ulong pack_reclength; /* Length of full packed rec. */ ulong min_pack_length; - ulong max_pack_length; /* Max possibly length of packed rec.*/ + ulong max_pack_length; /* Max possibly length of packed rec.*/ ulong min_block_length; - ulong fields, /* fields in table */ - pack_fields; /* packed fields in table */ - uint rec_reflength; /* = 2-8 */ - uint key_reflength; /* = 2-8 */ - uint keys; /* same as in state.header */ - uint auto_key; /* Which key-1 is a auto key */ - uint blobs; /* Number of blobs */ - uint pack_bits; /* Length of packed bits */ - uint max_key_block_length; /* Max block length */ - uint max_key_length; /* Max key length */ + ulong fields, /* fields in table */ + pack_fields; /* packed fields in table */ + uint rec_reflength; /* = 2-8 */ + uint key_reflength; /* = 2-8 */ + uint keys; /* same as in state.header */ + uint auto_key; /* Which key-1 is a auto key */ + uint blobs; /* Number of blobs */ + uint pack_bits; /* Length of packed bits */ + uint max_key_block_length; /* Max block length */ + uint max_key_length; /* Max key length */ /* Extra allocation when using dynamic record format */ uint extra_alloc_bytes; uint extra_alloc_procent; /* Info about raid */ - uint raid_type,raid_chunks; + uint raid_type, raid_chunks; ulong raid_chunksize; /* The following are from the header */ - uint key_parts,all_key_parts; + uint key_parts, all_key_parts; } MI_BASE_INFO; - /* Structs used intern in database */ + /* Structs used intern in database */ -typedef struct st_mi_blob /* Info of record */ +typedef struct st_mi_blob /* Info of record */ { - ulong offset; /* Offset to blob in record */ - uint pack_length; /* Type of packed length */ - ulong length; /* Calc:ed for each record */ + ulong offset; /* Offset to blob in record */ + uint pack_length; /* Type of packed length */ + ulong length; /* Calc:ed for each record */ } MI_BLOB; -typedef struct st_mi_isam_pack { +typedef struct st_mi_isam_pack +{ ulong header_length; uint ref_length; uchar version; } MI_PACK; -#define MAX_NONMAPPED_INSERTS 1000 +#define MAX_NONMAPPED_INSERTS 1000 -typedef struct st_mi_isam_share { /* Shared between opens */ +typedef struct st_mi_isam_share +{ /* Shared between opens */ MI_STATE_INFO state; MI_BASE_INFO base; - MI_KEYDEF ft2_keyinfo; /* Second-level ft-key definition */ - MI_KEYDEF *keyinfo; /* Key definitions */ - MI_UNIQUEDEF *uniqueinfo; /* unique definitions */ - HA_KEYSEG *keyparts; /* key part info */ - MI_COLUMNDEF *rec; /* Pointer to field information */ - MI_PACK pack; /* Data about packed records */ - MI_BLOB *blobs; /* Pointer to blobs */ - char *unique_file_name; /* realpath() of index file */ - char *data_file_name, /* Resolved path names from symlinks */ - *index_file_name; - uchar *file_map; /* mem-map of file if possible */ - KEY_CACHE *key_cache; /* ref to the current key cache */ + MI_KEYDEF ft2_keyinfo; /* Second-level ft-key definition */ + MI_KEYDEF *keyinfo; /* Key definitions */ + MI_UNIQUEDEF *uniqueinfo; /* unique definitions */ + HA_KEYSEG *keyparts; /* key part info */ + MI_COLUMNDEF *rec; /* Pointer to field information */ + MI_PACK pack; /* Data about packed records */ + MI_BLOB *blobs; /* Pointer to blobs */ + char *unique_file_name; /* realpath() of index file */ + char *data_file_name, /* Resolved path names from symlinks */ + *index_file_name; + uchar *file_map; /* mem-map of file if possible */ + KEY_CACHE *key_cache; /* ref to the current key cache */ MI_DECODE_TREE *decode_trees; uint16 *decode_tables; - int (*read_record)(struct st_myisam_info*, my_off_t, uchar*); - int (*write_record)(struct st_myisam_info*, const uchar*); - int (*update_record)(struct st_myisam_info*, my_off_t, const uchar*); - int (*delete_record)(struct st_myisam_info*); - int (*read_rnd)(struct st_myisam_info*, uchar*, my_off_t, my_bool); - int (*compare_record)(struct st_myisam_info*, const uchar *); /* Function to use for a row checksum. */ - ha_checksum (*calc_checksum)(struct st_myisam_info*, const uchar *); - int (*compare_unique)(struct st_myisam_info*, MI_UNIQUEDEF *, - const uchar *record, my_off_t pos); - uint (*file_read)(MI_INFO *, uchar *, uint, my_off_t, myf); - uint (*file_write)(MI_INFO *, uchar *, uint, my_off_t, myf); + int(*read_record) (struct st_myisam_info *, my_off_t, uchar*); + int(*write_record) (struct st_myisam_info *, const uchar*); + int(*update_record) (struct st_myisam_info *, my_off_t, const uchar*); + int(*delete_record) (struct st_myisam_info *); + int(*read_rnd) (struct st_myisam_info *, uchar*, my_off_t, my_bool); + int(*compare_record) (struct st_myisam_info *, const uchar*); + ha_checksum(*calc_checksum) (struct st_myisam_info *, const uchar*); + int(*compare_unique) (struct st_myisam_info *, MI_UNIQUEDEF *, + const uchar *record, my_off_t pos); + uint(*file_read) (MI_INFO *, uchar *, uint, my_off_t, myf); + uint(*file_write) (MI_INFO *, uchar *, uint, my_off_t, myf); invalidator_by_filename invalidator; /* query cache invalidator */ - ulong this_process; /* processid */ - ulong last_process; /* For table-change-check */ - ulong last_version; /* Version on start */ - ulong options; /* Options used */ - ulong min_pack_length; /* Theese are used by packed data */ + ulong this_process; /* processid */ + ulong last_process; /* For table-change-check */ + ulong last_version; /* Version on start */ + ulong options; /* Options used */ + ulong min_pack_length; /* Theese are used by packed data */ ulong max_pack_length; ulong state_diff_length; - uint rec_reflength; /* rec_reflength in use now */ - uint unique_name_length; + uint rec_reflength; /* rec_reflength in use now */ + uint unique_name_length; uint32 ftparsers; /* Number of distinct ftparsers + 1 */ - File kfile; /* Shared keyfile */ - File data_file; /* Shared data file */ - int mode; /* mode of file on open */ - uint reopen; /* How many times reopened */ - uint w_locks,r_locks,tot_locks; /* Number of read/write locks */ - uint blocksize; /* blocksize of keyfile */ + File kfile; /* Shared keyfile */ + File data_file; /* Shared data file */ + int mode; /* mode of file on open */ + uint reopen; /* How many times reopened */ + uint w_locks, r_locks, tot_locks; /* Number of read/write locks */ + uint blocksize; /* blocksize of keyfile */ myf write_flag; enum data_file_type data_file_type; /* Below flag is needed to make log tables work with concurrent insert */ my_bool is_log_table; - my_bool changed, /* If changed since lock */ - global_changed, /* If changed since open */ - not_flushed, - temporary,delay_key_write, - concurrent_insert; + my_bool changed, /* If changed since lock */ + global_changed, /* If changed since open */ + not_flushed, temporary, delay_key_write, concurrent_insert; #ifdef THREAD THR_LOCK lock; - pthread_mutex_t intern_lock; /* Locking for use with _locking */ + pthread_mutex_t intern_lock; /* Locking for use with _locking */ rw_lock_t *key_root_lock; #endif my_off_t mmaped_length; - uint nonmmaped_inserts; /* counter of writing in non-mmaped - area */ + /* counter of writing in non-mmaped area */ + uint nonmmaped_inserts; rw_lock_t mmap_lock; } MYISAM_SHARE; -typedef uint mi_bit_type; - -typedef struct st_mi_bit_buff { /* Used for packing of record */ - mi_bit_type current_byte; - uint bits; - uchar *pos,*end,*blob_pos,*blob_end; - uint error; -} MI_BIT_BUFF; - -struct st_myisam_info { - MYISAM_SHARE *s; /* Shared between open:s */ - MI_STATUS_INFO *state,save_state; - MI_BLOB *blobs; /* Pointer to blobs */ - MI_BIT_BUFF bit_buff; +struct st_myisam_info +{ + MYISAM_SHARE *s; /* Shared between open:s */ + MI_STATUS_INFO *state, save_state; + MI_BLOB *blobs; /* Pointer to blobs */ + MI_BIT_BUFF bit_buff; /* accumulate indexfile changes between write's */ - TREE *bulk_insert; + TREE *bulk_insert; DYNAMIC_ARRAY *ft1_to_ft2; /* used only in ft1->ft2 conversion */ MEM_ROOT ft_memroot; /* used by the parser */ - MYSQL_FTPARSER_PARAM *ftparser_param; /* share info between init/deinit */ - char *filename; /* parameter to open filename */ - uchar *buff, /* Temp area for key */ - *lastkey,*lastkey2; /* Last used search key */ - uchar *first_mbr_key; /* Searhed spatial key */ - uchar *rec_buff; /* Tempbuff for recordpack */ - uchar *int_keypos, /* Save position for next/previous */ - *int_maxpos; /* -""- */ - uint int_nod_flag; /* -""- */ - uint32 int_keytree_version; /* -""- */ - int (*read_record)(struct st_myisam_info*, my_off_t, uchar*); + MYSQL_FTPARSER_PARAM *ftparser_param; /* share info between init/deinit */ + char *filename; /* parameter to open filename */ + uchar *buff, /* Temp area for key */ + *lastkey, *lastkey2; /* Last used search key */ + uchar *first_mbr_key; /* Searhed spatial key */ + uchar *rec_buff; /* Tempbuff for recordpack */ + uchar *int_keypos, /* Save position for next/previous */ + *int_maxpos; /* -""- */ + uint int_nod_flag; /* -""- */ + uint32 int_keytree_version; /* -""- */ + int(*read_record) (struct st_myisam_info *, my_off_t, uchar *); invalidator_by_filename invalidator; /* query cache invalidator */ - ulong this_unique; /* uniq filenumber or thread */ - ulong last_unique; /* last unique number */ - ulong this_loop; /* counter for this open */ - ulong last_loop; /* last used counter */ - my_off_t lastpos, /* Last record position */ - nextpos; /* Position to next record */ + ulong this_unique; /* uniq filenumber or thread */ + ulong last_unique; /* last unique number */ + ulong this_loop; /* counter for this open */ + ulong last_loop; /* last used counter */ + my_off_t lastpos, /* Last record position */ + nextpos; /* Position to next record */ my_off_t save_lastpos; - my_off_t pos; /* Intern variable */ - my_off_t last_keypage; /* Last key page read */ - my_off_t last_search_keypage; /* Last keypage when searching */ + my_off_t pos; /* Intern variable */ + my_off_t last_keypage; /* Last key page read */ + my_off_t last_search_keypage; /* Last keypage when searching */ my_off_t dupp_key_pos; ha_checksum checksum; /* Temp storage for row checksum */ - /* QQ: the folloing two xxx_length fields should be removed, - as they are not compatible with parallel repair */ - ulong packed_length,blob_length; /* Length of found, packed record */ - int dfile; /* The datafile */ - uint opt_flag; /* Optim. for space/speed */ - uint update; /* If file changed since open */ - int lastinx; /* Last used index */ - uint lastkey_length; /* Length of key in lastkey */ - uint last_rkey_length; /* Last length in mi_rkey() */ + /* + QQ: the folloing two xxx_length fields should be removed, + as they are not compatible with parallel repair + */ + ulong packed_length, blob_length; /* Length of found, packed record */ + int dfile; /* The datafile */ + uint opt_flag; /* Optim. for space/speed */ + uint update; /* If file changed since open */ + int lastinx; /* Last used index */ + uint lastkey_length; /* Length of key in lastkey */ + uint last_rkey_length; /* Last length in mi_rkey() */ enum ha_rkey_function last_key_func; /* CONTAIN, OVERLAP, etc */ - uint save_lastkey_length; - uint pack_key_length; /* For MYISAMMRG */ + uint save_lastkey_length; + uint pack_key_length; /* For MYISAMMRG */ uint16 last_used_keyseg; /* For MyISAMMRG */ - int errkey; /* Got last error on this key */ - int lock_type; /* How database was locked */ - int tmp_lock_type; /* When locked by readinfo */ - uint data_changed; /* Somebody has changed data */ - uint save_update; /* When using KEY_READ */ - int save_lastinx; - LIST open_list; - IO_CACHE rec_cache; /* When cacheing records */ - uint preload_buff_size; /* When preloading indexes */ - myf lock_wait; /* is 0 or MY_DONT_WAIT */ - my_bool was_locked; /* Was locked in panic */ - my_bool append_insert_at_end; /* Set if concurrent insert */ + int errkey; /* Got last error on this key */ + int lock_type; /* How database was locked */ + int tmp_lock_type; /* When locked by readinfo */ + uint data_changed; /* Somebody has changed data */ + uint save_update; /* When using KEY_READ */ + int save_lastinx; + LIST open_list; + IO_CACHE rec_cache; /* When cacheing records */ + uint preload_buff_size; /* When preloading indexes */ + myf lock_wait; /* is 0 or MY_DONT_WAIT */ + my_bool was_locked; /* Was locked in panic */ + my_bool append_insert_at_end; /* Set if concurrent insert */ my_bool quick_mode; - my_bool page_changed; /* If info->buff can't be used for rnext */ - my_bool buff_used; /* If info->buff has to be reread for rnext */ - my_bool once_flags; /* For MYISAMMRG */ + /* If info->buff can't be used for rnext */ + my_bool page_changed; + /* If info->buff has to be reread for rnext */ + my_bool buff_used; + my_bool once_flags; /* For MYISAMMRG */ #ifdef __WIN__ my_bool owned_by_merge; /* This MyISAM table is part of a merge union */ #endif #ifdef THREAD THR_LOCK_DATA lock; #endif - uchar *rtree_recursion_state; /* For RTREE */ - int rtree_recursion_depth; + uchar *rtree_recursion_state; /* For RTREE */ + int rtree_recursion_depth; }; -typedef struct st_buffpek { - my_off_t file_pos; /* Where we are in the sort file */ - uchar *base,*key; /* Key pointers */ - ha_rows count; /* Number of rows in table */ - ulong mem_count; /* numbers of keys in memory */ - ulong max_keys; /* Max keys in buffert */ -} BUFFPEK; - -typedef struct st_mi_sort_param -{ - pthread_t thr; - IO_CACHE read_cache, tempfile, tempfile_for_exceptions; - DYNAMIC_ARRAY buffpek; - MI_BIT_BUFF bit_buff; /* For parallel repair of packrec. */ - - /* - The next two are used to collect statistics, see update_key_parts for - description. - */ - ulonglong unique[MI_MAX_KEY_SEG+1]; - ulonglong notnull[MI_MAX_KEY_SEG+1]; - - my_off_t pos,max_pos,filepos,start_recpos; - uint key, key_length,real_key_length,sortbuff_size; - uint maxbuffers, keys, find_length, sort_keys_length; - my_bool fix_datafile, master; - my_bool calc_checksum; /* calculate table checksum */ - MI_KEYDEF *keyinfo; - HA_KEYSEG *seg; - SORT_INFO *sort_info; - uchar **sort_keys; - uchar *rec_buff; - void *wordlist, *wordptr; - MEM_ROOT wordroot; - uchar *record; - MY_TMPDIR *tmpdir; - int (*key_cmp)(struct st_mi_sort_param *, const void *, const void *); - int (*key_read)(struct st_mi_sort_param *,void *); - int (*key_write)(struct st_mi_sort_param *, const void *); - void (*lock_in_memory)(MI_CHECK *); - NEAR int (*write_keys)(struct st_mi_sort_param *, register uchar **, - uint , struct st_buffpek *, IO_CACHE *); - NEAR uint (*read_to_buffer)(IO_CACHE *,struct st_buffpek *, uint); - NEAR int (*write_key)(struct st_mi_sort_param *, IO_CACHE *,char *, - uint, uint); -} MI_SORT_PARAM; - - /* Some defines used by isam-funktions */ - -#define USE_WHOLE_KEY MI_MAX_KEY_BUFF*2 /* Use whole key in _mi_search() */ -#define F_EXTRA_LCK -1 - - /* bits in opt_flag */ -#define MEMMAP_USED 32 +#define USE_WHOLE_KEY HA_MAX_KEY_BUFF*2 /* Use whole key in _mi_search() */ +#define F_EXTRA_LCK -1 +/* bits in opt_flag */ +#define MEMMAP_USED 32 #define REMEMBER_OLD_POS 64 -#define WRITEINFO_UPDATE_KEYFILE 1 -#define WRITEINFO_NO_UNLOCK 2 +#define WRITEINFO_UPDATE_KEYFILE 1 +#define WRITEINFO_NO_UNLOCK 2 - /* once_flags */ +/* once_flags */ #define USE_PACKED_KEYS 1 #define RRND_PRESERVE_LASTINX 2 - /* bits in state.changed */ - -#define STATE_CHANGED 1 -#define STATE_CRASHED 2 +/* bits in state.changed */ +#define STATE_CHANGED 1 +#define STATE_CRASHED 2 #define STATE_CRASHED_ON_REPAIR 4 -#define STATE_NOT_ANALYZED 8 +#define STATE_NOT_ANALYZED 8 #define STATE_NOT_OPTIMIZED_KEYS 16 -#define STATE_NOT_SORTED_PAGES 32 +#define STATE_NOT_SORTED_PAGES 32 - /* options to mi_read_cache */ +/* options to mi_read_cache */ +#define READING_NEXT 1 +#define READING_HEADER 2 -#define READING_NEXT 1 -#define READING_HEADER 2 - -#define mi_getint(x) ((uint) mi_uint2korr(x) & 32767) +#define mi_getint(x) ((uint) mi_uint2korr(x) & 32767) #define mi_putint(x,y,nod) { uint16 boh=(nod ? (uint16) 32768 : 0) + (uint16) (y);\ - mi_int2store(x,boh); } + mi_int2store(x,boh); } #define mi_test_if_nod(x) (x[0] & 128 ? info->s->base.key_reflength : 0) #define mi_mark_crashed(x) do{(x)->s->state.changed|= STATE_CRASHED; \ DBUG_PRINT("error", ("Marked table crashed")); \ @@ -400,13 +346,6 @@ typedef struct st_mi_sort_param /* Functions to store length of space packed keys, VARCHAR or BLOB keys */ -#define store_key_length_inc(key,length) \ -{ if ((length) < 255) \ - { *(key)++=(length); } \ - else \ - { *(key)=255; mi_int2store((key)+1,(length)); (key)+=3; } \ -} - #define store_key_length(key,length) \ { if ((length) < 255) \ { *(key)=(length); } \ @@ -430,39 +369,39 @@ typedef struct st_mi_sort_param #define get_pack_length(length) ((length) >= 255 ? 3 : 1) -#define MI_MIN_BLOCK_LENGTH 20 /* Because of delete-link */ -#define MI_EXTEND_BLOCK_LENGTH 20 /* Don't use to small record-blocks */ -#define MI_SPLIT_LENGTH ((MI_EXTEND_BLOCK_LENGTH+4)*2) -#define MI_MAX_DYN_BLOCK_HEADER 20 /* Max prefix of record-block */ +#define MI_MIN_BLOCK_LENGTH 20 /* Because of delete-link */ +#define MI_EXTEND_BLOCK_LENGTH 20 /* Don't use to small record-blocks */ +#define MI_SPLIT_LENGTH ((MI_EXTEND_BLOCK_LENGTH+4)*2) +#define MI_MAX_DYN_BLOCK_HEADER 20 /* Max prefix of record-block */ #define MI_BLOCK_INFO_HEADER_LENGTH 20 -#define MI_DYN_DELETE_BLOCK_HEADER 20 /* length of delete-block-header */ -#define MI_DYN_MAX_BLOCK_LENGTH ((1L << 24)-4L) -#define MI_DYN_MAX_ROW_LENGTH (MI_DYN_MAX_BLOCK_LENGTH - MI_SPLIT_LENGTH) -#define MI_DYN_ALIGN_SIZE 4 /* Align blocks on this */ -#define MI_MAX_DYN_HEADER_BYTE 13 /* max header byte for dynamic rows */ -#define MI_MAX_BLOCK_LENGTH ((((ulong) 1 << 24)-1) & (~ (ulong) (MI_DYN_ALIGN_SIZE-1))) +#define MI_DYN_DELETE_BLOCK_HEADER 20 /* length of delete-block-header */ +#define MI_DYN_MAX_BLOCK_LENGTH ((1L << 24)-4L) +#define MI_DYN_MAX_ROW_LENGTH (MI_DYN_MAX_BLOCK_LENGTH - MI_SPLIT_LENGTH) +#define MI_DYN_ALIGN_SIZE 4 /* Align blocks on this */ +#define MI_MAX_DYN_HEADER_BYTE 13 /* max header byte for dynamic rows */ +#define MI_MAX_BLOCK_LENGTH ((((ulong) 1 << 24)-1) & (~ (ulong) (MI_DYN_ALIGN_SIZE-1))) #define MI_REC_BUFF_OFFSET ALIGN_SIZE(MI_DYN_DELETE_BLOCK_HEADER+sizeof(uint32)) -#define MEMMAP_EXTRA_MARGIN 7 /* Write this as a suffix for file */ +#define MEMMAP_EXTRA_MARGIN 7 /* Write this as a suffix for file */ -#define PACK_TYPE_SELECTED 1 /* Bits in field->pack_type */ -#define PACK_TYPE_SPACE_FIELDS 2 -#define PACK_TYPE_ZERO_FILL 4 -#define MI_FOUND_WRONG_KEY 32738 /* Impossible value from ha_key_cmp */ +#define PACK_TYPE_SELECTED 1 /* Bits in field->pack_type */ +#define PACK_TYPE_SPACE_FIELDS 2 +#define PACK_TYPE_ZERO_FILL 4 +#define MI_FOUND_WRONG_KEY 32738 /* Impossible value from ha_key_cmp */ -#define MI_MAX_KEY_BLOCK_SIZE (MI_MAX_KEY_BLOCK_LENGTH/MI_MIN_KEY_BLOCK_LENGTH) +#define MI_MAX_KEY_BLOCK_SIZE (MI_MAX_KEY_BLOCK_LENGTH/MI_MIN_KEY_BLOCK_LENGTH) #define MI_BLOCK_SIZE(key_length,data_pointer,key_pointer,block_size) (((((key_length)+(data_pointer)+(key_pointer))*4+(key_pointer)+2)/(block_size)+1)*(block_size)) -#define MI_MAX_KEYPTR_SIZE 5 /* For calculating block lengths */ -#define MI_MIN_KEYBLOCK_LENGTH 50 /* When to split delete blocks */ +#define MI_MAX_KEYPTR_SIZE 5 /* For calculating block lengths */ +#define MI_MIN_KEYBLOCK_LENGTH 50 /* When to split delete blocks */ -#define MI_MIN_SIZE_BULK_INSERT_TREE 16384 /* this is per key */ +#define MI_MIN_SIZE_BULK_INSERT_TREE 16384 /* this is per key */ #define MI_MIN_ROWS_TO_USE_BULK_INSERT 100 #define MI_MIN_ROWS_TO_DISABLE_INDEXES 100 #define MI_MIN_ROWS_TO_USE_WRITE_CACHE 10 /* The UNIQUE check is done with a hashed long key */ -#define MI_UNIQUE_HASH_TYPE HA_KEYTYPE_ULONG_INT +#define MI_UNIQUE_HASH_TYPE HA_KEYTYPE_ULONG_INT #define mi_unique_store(A,B) mi_int4store((A),(B)) #ifdef THREAD @@ -474,175 +413,181 @@ extern pthread_mutex_t THR_LOCK_myisam; #define rw_unlock(A) {} #endif - /* Some extern variables */ +/* Some extern variables */ extern LIST *myisam_open_list; -extern uchar NEAR myisam_file_magic[],NEAR myisam_pack_file_magic[]; -extern uint NEAR myisam_read_vec[],NEAR myisam_readnext_vec[]; +extern uchar NEAR myisam_file_magic[], NEAR myisam_pack_file_magic[]; +extern uint NEAR myisam_read_vec[], NEAR myisam_readnext_vec[]; extern uint myisam_quick_table_bits; extern File myisam_log_file; extern ulong myisam_pid; - /* This is used by _mi_calc_xxx_key_length och _mi_store_key */ +/* This is used by _mi_calc_xxx_key_length och _mi_store_key */ typedef struct st_mi_s_param { - uint ref_length,key_length, - n_ref_length, - n_length, - totlength, - part_of_prev_key,prev_length,pack_marker; - uchar *key, *prev_key,*next_key_pos; - bool store_not_null; + uint ref_length, key_length, + n_ref_length, + n_length, totlength, part_of_prev_key, prev_length, pack_marker; + uchar *key, *prev_key, *next_key_pos; + bool store_not_null; } MI_KEY_PARAM; - /* Prototypes for intern functions */ +/* Prototypes for intern functions */ -extern int _mi_read_dynamic_record(MI_INFO *info,my_off_t filepos,uchar *buf); -extern int _mi_write_dynamic_record(MI_INFO*, const uchar*); -extern int _mi_update_dynamic_record(MI_INFO*, my_off_t, const uchar*); +extern int _mi_read_dynamic_record(MI_INFO *info, my_off_t filepos, uchar *buf); +extern int _mi_write_dynamic_record(MI_INFO *, const uchar *); +extern int _mi_update_dynamic_record(MI_INFO *, my_off_t, const uchar *); extern int _mi_delete_dynamic_record(MI_INFO *info); -extern int _mi_cmp_dynamic_record(MI_INFO *info,const uchar *record); -extern int _mi_read_rnd_dynamic_record(MI_INFO *, uchar *,my_off_t, my_bool); -extern int _mi_write_blob_record(MI_INFO*, const uchar*); -extern int _mi_update_blob_record(MI_INFO*, my_off_t, const uchar*); -extern int _mi_read_static_record(MI_INFO *info, my_off_t filepos,uchar *buf); -extern int _mi_write_static_record(MI_INFO*, const uchar*); -extern int _mi_update_static_record(MI_INFO*, my_off_t, const uchar*); +extern int _mi_cmp_dynamic_record(MI_INFO *info, const uchar *record); +extern int _mi_read_rnd_dynamic_record(MI_INFO *, uchar *, my_off_t, my_bool); +extern int _mi_write_blob_record(MI_INFO *, const uchar *); +extern int _mi_update_blob_record(MI_INFO *, my_off_t, const uchar *); +extern int _mi_read_static_record(MI_INFO *info, my_off_t filepos, uchar *buf); +extern int _mi_write_static_record(MI_INFO *, const uchar *); +extern int _mi_update_static_record(MI_INFO *, my_off_t, const uchar *); extern int _mi_delete_static_record(MI_INFO *info); -extern int _mi_cmp_static_record(MI_INFO *info,const uchar *record); -extern int _mi_read_rnd_static_record(MI_INFO*, uchar *,my_off_t, my_bool); -extern int _mi_ck_write(MI_INFO *info,uint keynr,uchar *key,uint length); +extern int _mi_cmp_static_record(MI_INFO *info, const uchar *record); +extern int _mi_read_rnd_static_record(MI_INFO *, uchar *, my_off_t, my_bool); +extern int _mi_ck_write(MI_INFO *info, uint keynr, uchar *key, uint length); extern int _mi_ck_real_write_btree(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key, uint key_length, my_off_t *root, uint comp_flag); -extern int _mi_enlarge_root(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key, my_off_t *root); -extern int _mi_insert(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key, - uchar *anc_buff,uchar *key_pos,uchar *key_buff, - uchar *father_buff, uchar *father_keypos, - my_off_t father_page, my_bool insert_last); -extern int _mi_split_page(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key, - uchar *buff,uchar *key_buff, my_bool insert_last); -extern uchar *_mi_find_half_pos(uint nod_flag,MI_KEYDEF *keyinfo,uchar *page, - uchar *key,uint *return_key_length, - uchar **after_key); -extern int _mi_calc_static_key_length(MI_KEYDEF *keyinfo,uint nod_flag, - uchar *key_pos, uchar *org_key, - uchar *key_buff, - uchar *key, MI_KEY_PARAM *s_temp); -extern int _mi_calc_var_key_length(MI_KEYDEF *keyinfo,uint nod_flag, - uchar *key_pos, uchar *org_key, - uchar *key_buff, - uchar *key, MI_KEY_PARAM *s_temp); -extern int _mi_calc_var_pack_key_length(MI_KEYDEF *keyinfo,uint nod_flag, - uchar *key_pos, uchar *org_key, - uchar *prev_key, - uchar *key, MI_KEY_PARAM *s_temp); -extern int _mi_calc_bin_pack_key_length(MI_KEYDEF *keyinfo,uint nod_flag, - uchar *key_pos,uchar *org_key, - uchar *prev_key, - uchar *key, MI_KEY_PARAM *s_temp); -void _mi_store_static_key(MI_KEYDEF *keyinfo, uchar *key_pos, - MI_KEY_PARAM *s_temp); -void _mi_store_var_pack_key(MI_KEYDEF *keyinfo, uchar *key_pos, - MI_KEY_PARAM *s_temp); +extern int _mi_enlarge_root(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key, + my_off_t *root); +extern int _mi_insert(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key, + uchar *anc_buff, uchar *key_pos, uchar *key_buff, + uchar *father_buff, uchar *father_keypos, + my_off_t father_page, my_bool insert_last); +extern int _mi_split_page(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key, + uchar *buff, uchar *key_buff, my_bool insert_last); +extern uchar *_mi_find_half_pos(uint nod_flag, MI_KEYDEF *keyinfo, + uchar *page, uchar *key, + uint *return_key_length, uchar ** after_key); +extern int _mi_calc_static_key_length(MI_KEYDEF *keyinfo, uint nod_flag, + uchar *key_pos, uchar *org_key, + uchar *key_buff, uchar *key, + MI_KEY_PARAM *s_temp); +extern int _mi_calc_var_key_length(MI_KEYDEF *keyinfo, uint nod_flag, + uchar *key_pos, uchar *org_key, + uchar *key_buff, uchar *key, + MI_KEY_PARAM *s_temp); +extern int _mi_calc_var_pack_key_length(MI_KEYDEF *keyinfo, uint nod_flag, + uchar *key_pos, uchar *org_key, + uchar *prev_key, uchar *key, + MI_KEY_PARAM *s_temp); +extern int _mi_calc_bin_pack_key_length(MI_KEYDEF *keyinfo, uint nod_flag, + uchar *key_pos, uchar *org_key, + uchar *prev_key, uchar *key, + MI_KEY_PARAM *s_temp); +void _mi_store_static_key(MI_KEYDEF *keyinfo, uchar *key_pos, + MI_KEY_PARAM *s_temp); +void _mi_store_var_pack_key(MI_KEYDEF *keyinfo, uchar *key_pos, + MI_KEY_PARAM *s_temp); #ifdef NOT_USED -void _mi_store_pack_key(MI_KEYDEF *keyinfo, uchar *key_pos, - MI_KEY_PARAM *s_temp); +void _mi_store_pack_key(MI_KEYDEF *keyinfo, uchar *key_pos, + MI_KEY_PARAM *s_temp); #endif -void _mi_store_bin_pack_key(MI_KEYDEF *keyinfo, uchar *key_pos, - MI_KEY_PARAM *s_temp); +void _mi_store_bin_pack_key(MI_KEYDEF *keyinfo, uchar *key_pos, + MI_KEY_PARAM *s_temp); -extern int _mi_ck_delete(MI_INFO *info,uint keynr,uchar *key,uint key_length); -extern int _mi_readinfo(MI_INFO *info,int lock_flag,int check_keybuffer); -extern int _mi_writeinfo(MI_INFO *info,uint options); +extern int _mi_ck_delete(MI_INFO *info, uint keynr, uchar *key, + uint key_length); +extern int _mi_readinfo(MI_INFO *info, int lock_flag, int check_keybuffer); +extern int _mi_writeinfo(MI_INFO *info, uint options); extern int _mi_test_if_changed(MI_INFO *info); extern int _mi_mark_file_changed(MI_INFO *info); extern int _mi_decrement_open_count(MI_INFO *info); -extern int _mi_check_index(MI_INFO *info,int inx); -extern int _mi_search(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key,uint key_len, - uint nextflag,my_off_t pos); -extern int _mi_bin_search(struct st_myisam_info *info,MI_KEYDEF *keyinfo, - uchar *page,uchar *key,uint key_len,uint comp_flag, - uchar * *ret_pos,uchar *buff, my_bool *was_last_key); -extern int _mi_seq_search(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *page, - uchar *key,uint key_len,uint comp_flag, - uchar **ret_pos,uchar *buff, my_bool *was_last_key); -extern int _mi_prefix_search(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *page, - uchar *key,uint key_len,uint comp_flag, - uchar **ret_pos,uchar *buff, my_bool *was_last_key); -extern my_off_t _mi_kpos(uint nod_flag,uchar *after_key); -extern void _mi_kpointer(MI_INFO *info,uchar *buff,my_off_t pos); -extern my_off_t _mi_dpos(MI_INFO *info, uint nod_flag,uchar *after_key); +extern int _mi_check_index(MI_INFO *info, int inx); +extern int _mi_search(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key, + uint key_len, uint nextflag, my_off_t pos); +extern int _mi_bin_search(struct st_myisam_info *info, MI_KEYDEF *keyinfo, + uchar *page, uchar *key, uint key_len, + uint comp_flag, uchar **ret_pos, uchar *buff, + my_bool *was_last_key); +extern int _mi_seq_search(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page, + uchar *key, uint key_len, uint comp_flag, + uchar ** ret_pos, uchar *buff, + my_bool *was_last_key); +extern int _mi_prefix_search(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page, + uchar *key, uint key_len, uint comp_flag, + uchar ** ret_pos, uchar *buff, + my_bool *was_last_key); +extern my_off_t _mi_kpos(uint nod_flag, uchar *after_key); +extern void _mi_kpointer(MI_INFO *info, uchar *buff, my_off_t pos); +extern my_off_t _mi_dpos(MI_INFO *info, uint nod_flag, uchar *after_key); extern my_off_t _mi_rec_pos(MYISAM_SHARE *info, uchar *ptr); -extern void _mi_dpointer(MI_INFO *info, uchar *buff,my_off_t pos); -extern int ha_key_cmp(HA_KEYSEG *keyseg, uchar *a,uchar *b, - uint key_length,uint nextflag,uint *diff_length); -extern uint _mi_get_static_key(MI_KEYDEF *keyinfo,uint nod_flag,uchar * *page, - uchar *key); -extern uint _mi_get_pack_key(MI_KEYDEF *keyinfo,uint nod_flag,uchar * *page, - uchar *key); +extern void _mi_dpointer(MI_INFO *info, uchar *buff, my_off_t pos); +extern int ha_key_cmp(HA_KEYSEG *keyseg, uchar *a, uchar *b, + uint key_length, uint nextflag, uint *diff_length); +extern uint _mi_get_static_key(MI_KEYDEF *keyinfo, uint nod_flag, + uchar **page, uchar *key); +extern uint _mi_get_pack_key(MI_KEYDEF *keyinfo, uint nod_flag, uchar **page, + uchar *key); extern uint _mi_get_binary_pack_key(MI_KEYDEF *keyinfo, uint nod_flag, - uchar **page_pos, uchar *key); -extern uchar *_mi_get_last_key(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *keypos, - uchar *lastkey,uchar *endpos, - uint *return_key_length); + uchar ** page_pos, uchar *key); +extern uchar *_mi_get_last_key(MI_INFO *info, MI_KEYDEF *keyinfo, + uchar *keypos, uchar *lastkey, uchar *endpos, + uint *return_key_length); extern uchar *_mi_get_key(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *page, - uchar *key, uchar *keypos, uint *return_key_length); -extern uint _mi_keylength(MI_KEYDEF *keyinfo,uchar *key); + uchar *key, uchar *keypos, + uint *return_key_length); +extern uint _mi_keylength(MI_KEYDEF *keyinfo, uchar *key); extern uint _mi_keylength_part(MI_KEYDEF *keyinfo, register uchar *key, - HA_KEYSEG *end); -extern uchar *_mi_move_key(MI_KEYDEF *keyinfo,uchar *to,uchar *from); -extern int _mi_search_next(MI_INFO *info,MI_KEYDEF *keyinfo,uchar *key, - uint key_length,uint nextflag,my_off_t pos); -extern int _mi_search_first(MI_INFO *info,MI_KEYDEF *keyinfo,my_off_t pos); -extern int _mi_search_last(MI_INFO *info,MI_KEYDEF *keyinfo,my_off_t pos); -extern uchar *_mi_fetch_keypage(MI_INFO *info,MI_KEYDEF *keyinfo,my_off_t page, - int level,uchar *buff,int return_buffer); -extern int _mi_write_keypage(MI_INFO *info,MI_KEYDEF *keyinfo,my_off_t page, - int level, uchar *buff); -extern int _mi_dispose(MI_INFO *info,MI_KEYDEF *keyinfo,my_off_t pos, - int level); -extern my_off_t _mi_new(MI_INFO *info,MI_KEYDEF *keyinfo,int level); -extern uint _mi_make_key(MI_INFO *info,uint keynr,uchar *key, - const uchar *record,my_off_t filepos); -extern uint _mi_pack_key(register MI_INFO *info, uint keynr, uchar *key, - uchar *old, key_part_map keypart_map, - HA_KEYSEG **last_used_keyseg); -extern int _mi_read_key_record(MI_INFO *info,my_off_t filepos,uchar *buf); -extern int _mi_read_cache(IO_CACHE *info,uchar *buff,my_off_t pos, - uint length,int re_read_if_possibly); -extern ulonglong retrieve_auto_increment(MI_INFO *info,const uchar *record); + HA_KEYSEG *end); +extern uchar *_mi_move_key(MI_KEYDEF *keyinfo, uchar *to, uchar *from); +extern int _mi_search_next(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key, + uint key_length, uint nextflag, my_off_t pos); +extern int _mi_search_first(MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t pos); +extern int _mi_search_last(MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t pos); +extern uchar *_mi_fetch_keypage(MI_INFO *info, MI_KEYDEF *keyinfo, + my_off_t page, int level, uchar *buff, + int return_buffer); +extern int _mi_write_keypage(MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t page, + int level, uchar *buff); +extern int _mi_dispose(MI_INFO *info, MI_KEYDEF *keyinfo, my_off_t pos, + int level); +extern my_off_t _mi_new(MI_INFO *info, MI_KEYDEF *keyinfo, int level); +extern uint _mi_make_key(MI_INFO *info, uint keynr, uchar *key, + const uchar *record, my_off_t filepos); +extern uint _mi_pack_key(MI_INFO *info, uint keynr, uchar *key, uchar *old, + uint key_length, HA_KEYSEG ** last_used_keyseg); +extern int _mi_read_key_record(MI_INFO *info, my_off_t filepos, uchar *buf); +extern int _mi_read_cache(IO_CACHE *info, uchar *buff, my_off_t pos, + uint length, int re_read_if_possibly); +extern ulonglong retrieve_auto_increment(MI_INFO *info, const uchar *record); -extern uchar *mi_alloc_rec_buff(MI_INFO *,ulong, uchar**); +extern uchar *mi_alloc_rec_buff(MI_INFO *, ulong, uchar **); #define mi_get_rec_buff_ptr(info,buf) \ ((((info)->s->options & HA_OPTION_PACK_RECORD) && (buf)) ? \ (buf) - MI_REC_BUFF_OFFSET : (buf)) #define mi_get_rec_buff_len(info,buf) \ (*((uint32 *)(mi_get_rec_buff_ptr(info,buf)))) -extern ulong _mi_rec_unpack(MI_INFO *info,uchar *to,uchar *from, - ulong reclength); -extern my_bool _mi_rec_check(MI_INFO *info,const uchar *record, uchar *packpos, +extern ulong _mi_rec_unpack(MI_INFO *info, uchar *to, uchar *from, + ulong reclength); +extern my_bool _mi_rec_check(MI_INFO *info, const char *record, uchar *packpos, ulong packed_length, my_bool with_checkum); -extern int _mi_write_part_record(MI_INFO *info,my_off_t filepos,ulong length, - my_off_t next_filepos,uchar **record, - ulong *reclength,int *flag); -extern void _mi_print_key(FILE *stream,HA_KEYSEG *keyseg,const uchar *key, - uint length); -extern my_bool _mi_read_pack_info(MI_INFO *info,pbool fix_keys); -extern int _mi_read_pack_record(MI_INFO *info,my_off_t filepos,uchar *buf); -extern int _mi_read_rnd_pack_record(MI_INFO*, uchar *,my_off_t, my_bool); +extern int _mi_write_part_record(MI_INFO *info, my_off_t filepos, ulong length, + my_off_t next_filepos, uchar ** record, + ulong *reclength, int *flag); +extern void _mi_print_key(FILE *stream, HA_KEYSEG *keyseg, const uchar *key, + uint length); +extern my_bool _mi_read_pack_info(MI_INFO *info, pbool fix_keys); +extern int _mi_read_pack_record(MI_INFO *info, my_off_t filepos, uchar *buf); +extern int _mi_read_rnd_pack_record(MI_INFO *, uchar *, my_off_t, my_bool); extern int _mi_pack_rec_unpack(MI_INFO *info, MI_BIT_BUFF *bit_buff, uchar *to, uchar *from, ulong reclength); -extern ulonglong mi_safe_mul(ulonglong a,ulonglong b); +extern ulonglong mi_safe_mul(ulonglong a, ulonglong b); extern int _mi_ft_update(MI_INFO *info, uint keynr, uchar *keybuf, - const uchar *oldrec, const uchar *newrec, my_off_t pos); + const uchar *oldrec, const uchar *newrec, + my_off_t pos); struct st_sort_info; -typedef struct st_mi_block_info { /* Parameter to _mi_get_block_info */ +typedef struct st_mi_block_info /* Parameter to _mi_get_block_info */ +{ uchar header[MI_BLOCK_INFO_HEADER_LENGTH]; ulong rec_len; ulong data_len; @@ -655,35 +600,37 @@ typedef struct st_mi_block_info { /* Parameter to _mi_get_block_info */ uint offset; } MI_BLOCK_INFO; - /* bits in return from _mi_get_block_info */ + /* bits in return from _mi_get_block_info */ -#define BLOCK_FIRST 1 -#define BLOCK_LAST 2 -#define BLOCK_DELETED 4 -#define BLOCK_ERROR 8 /* Wrong data */ -#define BLOCK_SYNC_ERROR 16 /* Right data at wrong place */ -#define BLOCK_FATAL_ERROR 32 /* hardware-error */ +#define BLOCK_FIRST 1 +#define BLOCK_LAST 2 +#define BLOCK_DELETED 4 +#define BLOCK_ERROR 8 /* Wrong data */ +#define BLOCK_SYNC_ERROR 16 /* Right data at wrong place */ +#define BLOCK_FATAL_ERROR 32 /* hardware-error */ -#define NEED_MEM ((uint) 10*4*(IO_SIZE+32)+32) /* Nead for recursion */ -#define MAXERR 20 -#define BUFFERS_WHEN_SORTING 16 /* Alloc for sort-key-tree */ -#define WRITE_COUNT MY_HOW_OFTEN_TO_WRITE -#define INDEX_TMP_EXT ".TMM" -#define DATA_TMP_EXT ".TMD" +#define NEED_MEM ((uint) 10*4*(IO_SIZE+32)+32) /* Nead for recursion */ +#define MAXERR 20 +#define BUFFERS_WHEN_SORTING 16 /* Alloc for sort-key-tree */ +#define WRITE_COUNT MY_HOW_OFTEN_TO_WRITE +#define INDEX_TMP_EXT ".TMM" +#define DATA_TMP_EXT ".TMD" -#define UPDATE_TIME 1 -#define UPDATE_STAT 2 -#define UPDATE_SORT 4 -#define UPDATE_AUTO_INC 8 -#define UPDATE_OPEN_COUNT 16 +#define UPDATE_TIME 1 +#define UPDATE_STAT 2 +#define UPDATE_SORT 4 +#define UPDATE_AUTO_INC 8 +#define UPDATE_OPEN_COUNT 16 -#define USE_BUFFER_INIT (((1024L*512L-MALLOC_OVERHEAD)/IO_SIZE)*IO_SIZE) -#define READ_BUFFER_INIT (1024L*256L-MALLOC_OVERHEAD) -#define SORT_BUFFER_INIT (2048L*1024L-MALLOC_OVERHEAD) -#define MIN_SORT_BUFFER (4096-MALLOC_OVERHEAD) +#define USE_BUFFER_INIT (((1024L*512L-MALLOC_OVERHEAD)/IO_SIZE)*IO_SIZE) +#define READ_BUFFER_INIT (1024L*256L-MALLOC_OVERHEAD) +#define SORT_BUFFER_INIT (2048L*1024L-MALLOC_OVERHEAD) +#define MIN_SORT_BUFFER (4096-MALLOC_OVERHEAD) -enum myisam_log_commands { - MI_LOG_OPEN,MI_LOG_WRITE,MI_LOG_UPDATE,MI_LOG_DELETE,MI_LOG_CLOSE,MI_LOG_EXTRA,MI_LOG_LOCK,MI_LOG_DELETE_ALL +enum myisam_log_commands +{ + MI_LOG_OPEN, MI_LOG_WRITE, MI_LOG_UPDATE, MI_LOG_DELETE, MI_LOG_CLOSE, + MI_LOG_EXTRA, MI_LOG_LOCK, MI_LOG_DELETE_ALL }; #define myisam_log(a,b,c,d) if (myisam_log_file >= 0) _myisam_log(a,b,c,d) @@ -693,36 +640,34 @@ enum myisam_log_commands { #define fast_mi_writeinfo(INFO) if (!(INFO)->s->tot_locks) (void) _mi_writeinfo((INFO),0) #define fast_mi_readinfo(INFO) ((INFO)->lock_type == F_UNLCK) && _mi_readinfo((INFO),F_RDLCK,1) -#ifdef __cplusplus +#ifdef __cplusplus extern "C" { #endif - -extern uint _mi_get_block_info(MI_BLOCK_INFO *,File, my_off_t); -extern uint _mi_rec_pack(MI_INFO *info,uchar *to,const uchar *from); + extern uint _mi_get_block_info(MI_BLOCK_INFO *, File, my_off_t); +extern uint _mi_rec_pack(MI_INFO *info, uchar *to, const uchar *from); extern uint _mi_pack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff, MI_BLOCK_INFO *info, uchar **rec_buff_p, File file, my_off_t filepos); -extern void _my_store_blob_length(uchar *pos,uint pack_length,uint length); -extern void _myisam_log(enum myisam_log_commands command,MI_INFO *info, - const uchar *buffert,uint length); +extern void _mi_store_blob_length(uchar *pos, uint pack_length, uint length); +extern void _myisam_log(enum myisam_log_commands command, MI_INFO *info, + const uchar *buffert, uint length); extern void _myisam_log_command(enum myisam_log_commands command, - MI_INFO *info, const uchar *buffert, - uint length, int result); -extern void _myisam_log_record(enum myisam_log_commands command,MI_INFO *info, - const uchar *record,my_off_t filepos, - int result); + MI_INFO *info, const uchar *buffert, + uint length, int result); +extern void _myisam_log_record(enum myisam_log_commands command, MI_INFO *info, + const uchar *record, my_off_t filepos, + int result); extern void mi_report_error(int errcode, const char *file_name); extern my_bool _mi_memmap_file(MI_INFO *info); extern void _mi_unmap_file(MI_INFO *info); extern uint save_pack_length(uint version, uchar *block_buff, ulong length); -extern uint read_pack_length(uint version, const uchar *buf, ulong *length); extern uint calc_pack_length(uint version, ulong length); extern uint mi_mmap_pread(MI_INFO *info, uchar *Buffer, - uint Count, my_off_t offset, myf MyFlags); + uint Count, my_off_t offset, myf MyFlags); extern uint mi_mmap_pwrite(MI_INFO *info, uchar *Buffer, - uint Count, my_off_t offset, myf MyFlags); + uint Count, my_off_t offset, myf MyFlags); extern uint mi_nommap_pread(MI_INFO *info, uchar *Buffer, - uint Count, my_off_t offset, myf MyFlags); + uint Count, my_off_t offset, myf MyFlags); extern uint mi_nommap_pwrite(MI_INFO *info, uchar *Buffer, uint Count, my_off_t offset, myf MyFlags); @@ -730,7 +675,7 @@ uint mi_state_info_write(File file, MI_STATE_INFO *state, uint pWrite); uchar *mi_state_info_read(uchar *ptr, MI_STATE_INFO *state); uint mi_state_info_read_dsk(File file, MI_STATE_INFO *state, my_bool pRead); uint mi_base_info_write(File file, MI_BASE_INFO *base); -uchar *my_n_base_info_read(uchar *ptr, MI_BASE_INFO *base); +uchar *mi_n_base_info_read(uchar *ptr, MI_BASE_INFO *base); int mi_keyseg_write(File file, const HA_KEYSEG *keyseg); char *mi_keyseg_read(char *ptr, HA_KEYSEG *keyseg); uint mi_keydef_write(File file, MI_KEYDEF *keydef); @@ -742,23 +687,23 @@ char *mi_recinfo_read(char *ptr, MI_COLUMNDEF *recinfo); extern int mi_disable_indexes(MI_INFO *info); extern int mi_enable_indexes(MI_INFO *info); extern int mi_indexes_are_disabled(MI_INFO *info); -ulong _my_calc_total_blob_length(MI_INFO *info, const uchar *record); +ulong _mi_calc_total_blob_length(MI_INFO *info, const uchar *record); ha_checksum mi_checksum(MI_INFO *info, const uchar *buf); ha_checksum mi_static_checksum(MI_INFO *info, const uchar *buf); my_bool mi_check_unique(MI_INFO *info, MI_UNIQUEDEF *def, uchar *record, - ha_checksum unique_hash, my_off_t pos); + ha_checksum unique_hash, my_off_t pos); ha_checksum mi_unique_hash(MI_UNIQUEDEF *def, const uchar *buf); int _mi_cmp_static_unique(MI_INFO *info, MI_UNIQUEDEF *def, - const uchar *record, my_off_t pos); + const uchar *record, my_off_t pos); int _mi_cmp_dynamic_unique(MI_INFO *info, MI_UNIQUEDEF *def, - const uchar *record, my_off_t pos); + const uchar *record, my_off_t pos); int mi_unique_comp(MI_UNIQUEDEF *def, const uchar *a, const uchar *b, - my_bool null_are_equal); -void mi_get_status(void* param, int concurrent_insert); -void mi_update_status(void* param); -void mi_restore_status(void* param); -void mi_copy_status(void* to,void *from); -my_bool mi_check_status(void* param); + my_bool null_are_equal); +void mi_get_status(void *param, int concurrent_insert); +void mi_update_status(void *param); +void mi_restore_status(void *param); +void mi_copy_status(void *to, void *from); +my_bool mi_check_status(void *param); void mi_disable_non_unique_index(MI_INFO *info, ha_rows rows); extern MI_INFO *test_if_reopen(char *filename); @@ -770,22 +715,14 @@ my_bool mi_dynmap_file(MI_INFO *info, my_off_t size); void mi_remap_file(MI_INFO *info, my_off_t size); /* Functions needed by mi_check */ -volatile int *killed_ptr(MI_CHECK *param); -void mi_check_print_error _VARARGS((MI_CHECK *param, const char *fmt,...)); -void mi_check_print_warning _VARARGS((MI_CHECK *param, const char *fmt,...)); -void mi_check_print_info _VARARGS((MI_CHECK *param, const char *fmt,...)); -int flush_pending_blocks(MI_SORT_PARAM *param); -int sort_ft_buf_flush(MI_SORT_PARAM *sort_param); -int thr_write_keys(MI_SORT_PARAM *sort_param); +volatile int *killed_ptr(HA_CHECK *param); +void mi_check_print_error _VARARGS((HA_CHECK *param, const char *fmt, ...)); +void mi_check_print_warning _VARARGS((HA_CHECK *param, const char *fmt, ...)); +void mi_check_print_info _VARARGS((HA_CHECK *param, const char *fmt, ...)); #ifdef THREAD pthread_handler_t thr_find_all_keys(void *arg); #endif -int flush_blocks(MI_CHECK *param, KEY_CACHE *key_cache, File file); - -int sort_write_record(MI_SORT_PARAM *sort_param); -int _create_index_by_sort(MI_SORT_PARAM *info,my_bool no_messages, ulong); - +int flush_blocks(HA_CHECK *param, KEY_CACHE *key_cache, File file); #ifdef __cplusplus } #endif - diff --git a/storage/myisam/myisamlog.c b/storage/myisam/myisamlog.c index 1cf4ad730cb..678aeaa6efd 100644 --- a/storage/myisam/myisamlog.c +++ b/storage/myisam/myisamlog.c @@ -807,7 +807,7 @@ static int find_record_with_key(struct file_info *file_info, uchar *record) { uint key; MI_INFO *info=file_info->isam; - uchar tmp_key[MI_MAX_KEY_BUFF]; + uchar tmp_key[HA_MAX_KEY_BUFF]; for (key=0 ; key < info->s->base.keys ; key++) { diff --git a/storage/myisam/myisampack.c b/storage/myisam/myisampack.c index 5f7c4ee8f2e..52c245dda18 100644 --- a/storage/myisam/myisampack.c +++ b/storage/myisam/myisampack.c @@ -305,7 +305,7 @@ static void usage(void) puts("and you are welcome to modify and redistribute it under the GPL license\n"); puts("Pack a MyISAM-table to take much less space."); - puts("Keys are not updated, you must run myisamchk -rq on the datafile"); + puts("Keys are not updated, you must run myisamchk -rq on the index (.MYI) file"); puts("afterwards to update the keys."); puts("You should give the .MYI file as the filename argument."); @@ -1008,7 +1008,7 @@ static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts) /* Calculate pos, end_pos, and max_length for variable length fields. */ if (count->field_type == FIELD_BLOB) { - uint field_length=count->field_length -mi_portable_sizeof_char_ptr; + uint field_length=count->field_length -portable_sizeof_char_ptr; ulong blob_length= _mi_calc_blob_length(field_length, start_pos); memcpy_fixed((char*) &pos, start_pos+field_length,sizeof(char*)); end_pos=pos+blob_length; @@ -2650,7 +2650,7 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) case FIELD_BLOB: { ulong blob_length=_mi_calc_blob_length(field_length- - mi_portable_sizeof_char_ptr, + portable_sizeof_char_ptr, start_pos); /* Empty blobs are encoded with a single 1 bit. */ if (!blob_length) @@ -2667,7 +2667,7 @@ static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) DBUG_PRINT("fields", ("FIELD_BLOB %lu bytes, bits: %2u", blob_length, count->length_bits)); write_bits(blob_length,count->length_bits); - memcpy_fixed(&blob,end_pos-mi_portable_sizeof_char_ptr, + memcpy_fixed(&blob,end_pos-portable_sizeof_char_ptr, sizeof(char*)); blob_end=blob+blob_length; /* Encode the blob bytes. */ diff --git a/storage/myisam/rt_index.c b/storage/myisam/rt_index.c index 63ed60586d6..35a70b8c2bf 100644 --- a/storage/myisam/rt_index.c +++ b/storage/myisam/rt_index.c @@ -542,7 +542,7 @@ static int rtree_insert_req(MI_INFO *info, MI_KEYDEF *keyinfo, uchar *key, DBUG_ENTER("rtree_insert_req"); if (!(page_buf = (uchar*)my_alloca((uint)keyinfo->block_length + - MI_MAX_KEY_BUFF))) + HA_MAX_KEY_BUFF))) { my_errno = HA_ERR_OUT_OF_MEM; DBUG_RETURN(-1); /* purecov: inspected */ @@ -658,7 +658,7 @@ static int rtree_insert_level(MI_INFO *info, uint keynr, uchar *key, DBUG_PRINT("rtree", ("root was split, grow a new root")); if (!(new_root_buf = (uchar*)my_alloca((uint)keyinfo->block_length + - MI_MAX_KEY_BUFF))) + HA_MAX_KEY_BUFF))) { my_errno = HA_ERR_OUT_OF_MEM; DBUG_RETURN(-1); /* purecov: inspected */ diff --git a/storage/myisam/sort.c b/storage/myisam/sort.c index bf3281e7d32..2e2684230e7 100644 --- a/storage/myisam/sort.c +++ b/storage/myisam/sort.c @@ -15,7 +15,7 @@ /* Creates a index for a database by reading keys, sorting them and outputing - them in sorted order through SORT_INFO functions. + them in sorted order through MI_SORT_INFO functions. */ #include "fulltext.h" @@ -487,8 +487,8 @@ ok: int thr_write_keys(MI_SORT_PARAM *sort_param) { - SORT_INFO *sort_info=sort_param->sort_info; - MI_CHECK *param=sort_info->param; + MI_SORT_INFO *sort_info=sort_param->sort_info; + HA_CHECK *param=sort_info->param; ulong length, keys; ulong *rec_per_key_part=param->rec_per_key_part; int got_error=sort_info->got_error; @@ -831,7 +831,7 @@ static uint NEAR_F read_to_buffer_varlen(IO_CACHE *fromfile, BUFFPEK *buffpek, register uint count; uint16 length_of_key = 0; uint idx; - uchar *buffp; + byte *buffp; if ((count=(uint) min((ha_rows) buffpek->max_keys,buffpek->count))) { @@ -918,7 +918,7 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file, for (buffpek= Fb ; buffpek <= Tb ; buffpek++) { count+= buffpek->count; - buffpek->base= strpos; + buffpek->base= (byte*) strpos; buffpek->max_keys=maxcount; strpos+= (uint) (error=(int) info->read_to_buffer(from_file,buffpek, sort_length)); @@ -956,7 +956,7 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file, { if (!(error=(int) info->read_to_buffer(from_file,buffpek,sort_length))) { - uchar *base=buffpek->base; + byte *base= buffpek->base; uint max_keys=buffpek->max_keys; VOID(queue_remove(&queue,0)); @@ -988,7 +988,7 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file, } } buffpek=(BUFFPEK*) queue_top(&queue); - buffpek->base=(uchar *) sort_keys; + buffpek->base= (byte*) sort_keys; buffpek->max_keys=keys; do { @@ -1003,7 +1003,7 @@ merge_buffers(MI_SORT_PARAM *info, uint keys, IO_CACHE *from_file, else { register uchar *end; - strpos= buffpek->key; + strpos= (uchar*) buffpek->key; for (end=strpos+buffpek->mem_count*sort_length; strpos != end ; strpos+=sort_length) diff --git a/storage/myisam/sp_test.c b/storage/myisam/sp_test.c index 96ba05e8a74..c1ba99a8d97 100644 --- a/storage/myisam/sp_test.c +++ b/storage/myisam/sp_test.c @@ -79,7 +79,7 @@ int run_test(const char *filename) /* Define spatial column */ recinfo[1].type=FIELD_BLOB; - recinfo[1].length=4 + mi_portable_sizeof_char_ptr; + recinfo[1].length=4 + portable_sizeof_char_ptr; diff --git a/storage/myisammrg/ha_myisammrg.cc b/storage/myisammrg/ha_myisammrg.cc index c03c1e28014..551f12e4a8c 100644 --- a/storage/myisammrg/ha_myisammrg.cc +++ b/storage/myisammrg/ha_myisammrg.cc @@ -48,10 +48,12 @@ static const char *ha_myisammrg_exts[] = { }; extern int table2myisam(TABLE *table_arg, MI_KEYDEF **keydef_out, MI_COLUMNDEF **recinfo_out, uint *records_out); -extern int check_definition(MI_KEYDEF *t1_keyinfo, MI_COLUMNDEF *t1_recinfo, - uint t1_keys, uint t1_recs, - MI_KEYDEF *t2_keyinfo, MI_COLUMNDEF *t2_recinfo, - uint t2_keys, uint t2_recs, bool strict); +extern int myisam_check_definition(MI_KEYDEF *t1_keyinfo, + MI_COLUMNDEF *t1_recinfo, + uint t1_keys, uint t1_recs, + MI_KEYDEF *t2_keyinfo, + MI_COLUMNDEF *t2_recinfo, + uint t2_keys, uint t2_recs, bool strict); static void split_file_name(const char *file_name, LEX_STRING *db, LEX_STRING *name); @@ -135,10 +137,10 @@ int ha_myisammrg::open(const char *name, int mode, uint test_if_locked) } for (u_table= file->open_tables; u_table < file->end_table; u_table++) { - if (check_definition(keyinfo, recinfo, keys, recs, - u_table->table->s->keyinfo, u_table->table->s->rec, - u_table->table->s->base.keys, - u_table->table->s->base.fields, false)) + if (myisam_check_definition(keyinfo, recinfo, keys, recs, + u_table->table->s->keyinfo, u_table->table->s->rec, + u_table->table->s->base.keys, + u_table->table->s->base.fields, false)) { error= HA_ERR_WRONG_MRG_TABLE_DEF; if (test_if_locked & HA_OPEN_FOR_REPAIR) diff --git a/storage/myisammrg/ha_myisammrg.h b/storage/myisammrg/ha_myisammrg.h index b88f4d52743..d642f45f35d 100644 --- a/storage/myisammrg/ha_myisammrg.h +++ b/storage/myisammrg/ha_myisammrg.h @@ -47,8 +47,8 @@ class ha_myisammrg: public handler HA_READ_ORDER | HA_KEYREAD_ONLY); } uint max_supported_keys() const { return MI_MAX_KEY; } - uint max_supported_key_length() const { return MI_MAX_KEY_LENGTH; } - uint max_supported_key_part_length() const { return MI_MAX_KEY_LENGTH; } + uint max_supported_key_length() const { return HA_MAX_KEY_LENGTH; } + uint max_supported_key_part_length() const { return HA_MAX_KEY_LENGTH; } double scan_time() { return ulonglong2double(stats.data_file_length) / IO_SIZE + file->tables; } diff --git a/support-files/magic b/support-files/magic index 9844142ba93..b3f3b3ea29d 100644 --- a/support-files/magic +++ b/support-files/magic @@ -4,12 +4,23 @@ # 0 beshort 0xfe01 MySQL table definition file >2 byte x Version %d -0 belong&0xffffff00 0xfefe0300 MySQL MISAM index file +0 belong&0xffffff00 0xfefe0700 MySQL MyISAM index file >3 byte x Version %d -0 belong&0xffffff00 0xfefe0700 MySQL MISAM compressed data file +0 belong&0xffffff00 0xfefe0800 MySQL MyISAM compressed data file +>3 byte x Version %d +0 belong&0xffffff00 0xfefe0900 MySQL Maria index file +>3 byte x Version %d +0 belong&0xffffff00 0xfefe0A00 MySQL Maria compressed data file >3 byte x Version %d 0 belong&0xffffff00 0xfefe0500 MySQL ISAM index file >3 byte x Version %d 0 belong&0xffffff00 0xfefe0600 MySQL ISAM compressed data file >3 byte x Version %d 0 string \376bin MySQL replication log +0 belong&0xffffff00 0xfefe0b00 +>4 string MARIALOG MySQL Maria transaction log file +>>3 byte x Version %d +0 belong&0xffffff00 0xfefe0c00 +>4 string MACF MySQL Maria control file +>>3 byte x Version %d + diff --git a/unittest/Makefile.am b/unittest/Makefile.am index 65fa615fb98..8684fd3fc7d 100644 --- a/unittest/Makefile.am +++ b/unittest/Makefile.am @@ -13,7 +13,7 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -SUBDIRS = mytap . mysys examples +SUBDIRS = mytap mysys examples EXTRA_DIST = unit.pl CLEANFILES = unit diff --git a/unittest/mysys/Makefile.am b/unittest/mysys/Makefile.am index be91ef31c9d..36ee285201e 100644 --- a/unittest/mysys/Makefile.am +++ b/unittest/mysys/Makefile.am @@ -13,15 +13,15 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -AM_CPPFLAGS = @ZLIB_INCLUDES@ -I$(top_builddir)/include -AM_CPPFLAGS += -I$(top_srcdir)/include -I$(top_srcdir)/unittest/mytap +INCLUDES = @ZLIB_INCLUDES@ -I$(top_builddir)/include \ + -I$(top_srcdir)/include -I$(top_srcdir)/unittest/mytap + +noinst_PROGRAMS = bitmap-t base64-t my_atomic-t LDADD = $(top_builddir)/unittest/mytap/libmytap.a \ $(top_builddir)/mysys/libmysys.a \ $(top_builddir)/dbug/libdbug.a \ $(top_builddir)/strings/libmystrings.a -noinst_PROGRAMS = bitmap-t base64-t my_atomic-t - # Don't update the files from bitkeeper %::SCCS/s.% diff --git a/unittest/mysys/my_atomic-t.c b/unittest/mysys/my_atomic-t.c index 8280aff5422..b700d7167ac 100644 --- a/unittest/mysys/my_atomic-t.c +++ b/unittest/mysys/my_atomic-t.c @@ -13,27 +13,31 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include #include + +#include #include #include +#include -int32 a32,b32,c32; +volatile uint32 a32,b32; +volatile int32 c32, N; my_atomic_rwlock_t rwl; +LF_ALLOCATOR lf_allocator; +LF_HASH lf_hash; -pthread_attr_t thr_attr; -pthread_mutex_t mutex; -pthread_cond_t cond; -int N; +pthread_attr_t attr; +size_t stacksize= 0; +#define STACK_SIZE (((int)stacksize-2048)*STACK_DIRECTION) /* add and sub a random number in a loop. Must get 0 at the end */ pthread_handler_t test_atomic_add_handler(void *arg) { - int m=*(int *)arg; + int m= (*(int *)arg)/2; int32 x; - for (x=((int)((long)(&m))); m ; m--) + for (x= ((int)(intptr)(&m)); m ; m--) { - x=x*m+0x87654321; + x= (x*m+0x87654321) & INT_MAX32; my_atomic_rwlock_wrlock(&rwl); my_atomic_add32(&a32, x); my_atomic_rwlock_wrunlock(&rwl); @@ -42,10 +46,6 @@ pthread_handler_t test_atomic_add_handler(void *arg) my_atomic_add32(&a32, -x); my_atomic_rwlock_wrunlock(&rwl); } - pthread_mutex_lock(&mutex); - N--; - if (!N) pthread_cond_signal(&cond); - pthread_mutex_unlock(&mutex); return 0; } @@ -57,30 +57,24 @@ pthread_handler_t test_atomic_add_handler(void *arg) 5. subtract result from a32 must get 0 in a32 at the end */ -pthread_handler_t test_atomic_swap_handler(void *arg) +pthread_handler_t test_atomic_fas_handler(void *arg) { - int m=*(int *)arg; - int32 x; + int m= *(int *)arg; + uint32 x= my_atomic_add32(&b32, 1); - my_atomic_rwlock_wrlock(&rwl); - x=my_atomic_add32(&b32, 1); - my_atomic_rwlock_wrunlock(&rwl); - - my_atomic_rwlock_wrlock(&rwl); my_atomic_add32(&a32, x); - my_atomic_rwlock_wrunlock(&rwl); for (; m ; m--) { my_atomic_rwlock_wrlock(&rwl); - x=my_atomic_swap32(&c32, x); + x= my_atomic_fas32(&c32, x); my_atomic_rwlock_wrunlock(&rwl); } if (!x) { my_atomic_rwlock_wrlock(&rwl); - x=my_atomic_swap32(&c32, x); + x= my_atomic_fas32(&c32, x); my_atomic_rwlock_wrunlock(&rwl); } @@ -88,110 +82,226 @@ pthread_handler_t test_atomic_swap_handler(void *arg) my_atomic_add32(&a32, -x); my_atomic_rwlock_wrunlock(&rwl); - pthread_mutex_lock(&mutex); - N--; - if (!N) pthread_cond_signal(&cond); - pthread_mutex_unlock(&mutex); return 0; } /* same as test_atomic_add_handler, but my_atomic_add32 is emulated with - (slower) my_atomic_cas32 + my_atomic_cas32 - notice that the slowdown is proportional to the + number of CPUs */ pthread_handler_t test_atomic_cas_handler(void *arg) { - int m=*(int *)arg, ok; - int32 x,y; - for (x=((int)((long)(&m))); m ; m--) + int m= (*(int *)arg)/2, ok= 0; + int32 x, y; + for (x= ((int)(intptr)(&m)); m ; m--) { my_atomic_rwlock_wrlock(&rwl); - y=my_atomic_load32(&a32); + y= my_atomic_load32(&a32); my_atomic_rwlock_wrunlock(&rwl); - - x=x*m+0x87654321; + x= (x*m+0x87654321) & INT_MAX32; do { my_atomic_rwlock_wrlock(&rwl); - ok=my_atomic_cas32(&a32, &y, y+x); + ok= my_atomic_cas32(&a32, &y, (uint32)y+x); my_atomic_rwlock_wrunlock(&rwl); - } while (!ok); + } while (!ok) ; do { my_atomic_rwlock_wrlock(&rwl); - ok=my_atomic_cas32(&a32, &y, y-x); + ok= my_atomic_cas32(&a32, &y, y-x); my_atomic_rwlock_wrunlock(&rwl); - } while (!ok); + } while (!ok) ; } - pthread_mutex_lock(&mutex); - N--; - if (!N) pthread_cond_signal(&cond); - pthread_mutex_unlock(&mutex); + return 0; +} + +/* + pin allocator - alloc and release an element in a loop +*/ +pthread_handler_t test_lf_pinbox(void *arg) +{ + int m= *(int *)arg; + int32 x= 0; + LF_PINS *pins; + + pins= lf_pinbox_get_pins(&lf_allocator.pinbox, &m + STACK_SIZE); + + for (x= ((int)(intptr)(&m)); m ; m--) + { + lf_pinbox_put_pins(pins); + pins= lf_pinbox_get_pins(&lf_allocator.pinbox, &m + STACK_SIZE); + } + lf_pinbox_put_pins(pins); + return 0; +} + +typedef union { + int32 data; + void *not_used; +} TLA; + +pthread_handler_t test_lf_alloc(void *arg) +{ + int m= (*(int *)arg)/2; + int32 x,y= 0; + LF_PINS *pins; + + pins= lf_alloc_get_pins(&lf_allocator, &m + STACK_SIZE); + + for (x= ((int)(intptr)(&m)); m ; m--) + { + TLA *node1, *node2; + x= (x*m+0x87654321) & INT_MAX32; + node1= (TLA *)lf_alloc_new(pins); + node1->data= x; + y+= node1->data; + node1->data= 0; + node2= (TLA *)lf_alloc_new(pins); + node2->data= x; + y-= node2->data; + node2->data= 0; + lf_alloc_free(pins, node1); + lf_alloc_free(pins, node2); + } + lf_alloc_put_pins(pins); + my_atomic_rwlock_wrlock(&rwl); + my_atomic_add32(&a32, y); + + if (my_atomic_add32(&N, -1) == 1) + { + diag("%d mallocs, %d pins in stack", + lf_allocator.mallocs, lf_allocator.pinbox.pins_in_array); +#ifdef MY_LF_EXTRA_DEBUG + a32|= lf_allocator.mallocs - lf_alloc_pool_count(&lf_allocator); +#endif + } + my_atomic_rwlock_wrunlock(&rwl); + return 0; +} + +#define N_TLH 1000 +pthread_handler_t test_lf_hash(void *arg) +{ + int m= (*(int *)arg)/(2*N_TLH); + int32 x,y,z,sum= 0, ins= 0; + LF_PINS *pins; + + pins= lf_hash_get_pins(&lf_hash, &m + STACK_SIZE); + + for (x= ((int)(intptr)(&m)); m ; m--) + { + int i; + y= x; + for (i= 0; i < N_TLH; i++) + { + x= (x*(m+i)+0x87654321) & INT_MAX32; + z= (x<0) ? -x : x; + if (lf_hash_insert(&lf_hash, pins, &z)) + { + sum+= z; + ins++; + } + } + for (i= 0; i < N_TLH; i++) + { + y= (y*(m+i)+0x87654321) & INT_MAX32; + z= (y<0) ? -y : y; + if (lf_hash_delete(&lf_hash, pins, (uchar *)&z, sizeof(z))) + sum-= z; + } + } + lf_hash_put_pins(pins); + my_atomic_rwlock_wrlock(&rwl); + my_atomic_add32(&a32, sum); + my_atomic_add32(&b32, ins); + + if (my_atomic_add32(&N, -1) == 1) + { + diag("%d mallocs, %d pins in stack, %d hash size, %d inserts", + lf_hash.alloc.mallocs, lf_hash.alloc.pinbox.pins_in_array, + lf_hash.size, b32); + a32|= lf_hash.count; + } + my_atomic_rwlock_wrunlock(&rwl); return 0; } void test_atomic(const char *test, pthread_handler handler, int n, int m) { - pthread_t t; - ulonglong now=my_getsystime(); + pthread_t *threads; + ulonglong now= my_getsystime(); + int i; a32= 0; b32= 0; c32= 0; - diag("Testing %s with %d threads, %d iterations... ", test, n, m); - for (N=n ; n ; n--) + threads= (pthread_t *)my_malloc(sizeof(void *)*n, MYF(0)); + if (!threads) { - if (pthread_create(&t, &thr_attr, handler, &m) != 0) - { - diag("Could not create thread"); - a32= 1; - goto err; - } + diag("Out of memory"); + abort(); } - pthread_mutex_lock(&mutex); - while (N) - pthread_cond_wait(&cond, &mutex); - pthread_mutex_unlock(&mutex); - now=my_getsystime()-now; -err: - ok(a32 == 0, "tested %s in %g secs", test, ((double)now)/1e7); + diag("Testing %s with %d threads, %d iterations... ", test, n, m); + N= n; + for (i= 0 ; i < n ; i++) + { + if (pthread_create(threads+i, 0, handler, &m) != 0) + { + diag("Could not create thread"); + abort(); + } + } + for (i= 0 ; i < n ; i++) + pthread_join(threads[i], 0); + now= my_getsystime()-now; + ok(a32 == 0, "tested %s in %g secs (%d)", test, ((double)now)/1e7, a32); + my_free((void *)threads, MYF(0)); } + int main() { int err; - diag("N CPUs: %d", my_getncpus()); + my_init(); + + diag("N CPUs: %d, atomic ops: %s", my_getncpus(), MY_ATOMIC_MODE); err= my_atomic_initialize(); - plan(4); + plan(7); ok(err == 0, "my_atomic_initialize() returned %d", err); - pthread_attr_init(&thr_attr); - pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED); - pthread_mutex_init(&mutex, 0); - pthread_cond_init(&cond, 0); my_atomic_rwlock_init(&rwl); + lf_alloc_init(&lf_allocator, sizeof(TLA), offsetof(TLA, not_used)); + lf_hash_init(&lf_hash, sizeof(int), LF_HASH_UNIQUE, 0, sizeof(int), 0, + &my_charset_bin); -#ifdef HPUX11 -#define CYCLES 1000 + pthread_attr_init(&attr); +#ifdef HAVE_PTHREAD_ATTR_GETSTACKSIZE + pthread_attr_getstacksize(&attr, &stacksize); + if (stacksize == 0) +#endif + stacksize= PTHREAD_STACK_MIN; + +#ifdef MY_ATOMIC_MODE_RWLOCKS +#define CYCLES 3000 #else -#define CYCLES 10000 +#define CYCLES 300000 #endif #define THREADS 100 - test_atomic("my_atomic_add32", test_atomic_add_handler, THREADS, CYCLES); - test_atomic("my_atomic_swap32", test_atomic_swap_handler, THREADS, CYCLES); - test_atomic("my_atomic_cas32", test_atomic_cas_handler, THREADS, CYCLES); - /* - workaround until we know why it crashes randomly on some machine - (BUG#22320). - */ - sleep(2); - pthread_mutex_destroy(&mutex); - pthread_cond_destroy(&cond); - pthread_attr_destroy(&thr_attr); + test_atomic("my_atomic_add32", test_atomic_add_handler, THREADS,CYCLES); + test_atomic("my_atomic_fas32", test_atomic_fas_handler, THREADS,CYCLES); + test_atomic("my_atomic_cas32", test_atomic_cas_handler, THREADS,CYCLES); + test_atomic("lf_pinbox", test_lf_pinbox, THREADS,CYCLES); + test_atomic("lf_alloc", test_lf_alloc, THREADS,CYCLES); + test_atomic("lf_hash", test_lf_hash, THREADS,CYCLES/10); + + lf_hash_destroy(&lf_hash); + lf_alloc_destroy(&lf_allocator); my_atomic_rwlock_destroy(&rwl); + my_end(0); return exit_status(); } diff --git a/unittest/mytap/tap.c b/unittest/mytap/tap.c index 4e053e3e745..f9396adbd69 100644 --- a/unittest/mytap/tap.c +++ b/unittest/mytap/tap.c @@ -166,9 +166,17 @@ static signal_entry install_signal[]= { #endif }; +int skip_big_tests= 0; + void plan(int const count) { + char *config= getenv("MYTAP_CONFIG"); + + if (config) + skip_big_tests= strcmp(config, "big"); + + setvbuf(tapout, 0, _IONBF, 0); /* provide output at once */ /* Install signal handler */ diff --git a/unittest/mytap/tap.h b/unittest/mytap/tap.h index 31ec47d1ef2..f92fad1101f 100644 --- a/unittest/mytap/tap.h +++ b/unittest/mytap/tap.h @@ -61,6 +61,24 @@ typedef struct TEST_DATA { extern "C" { #endif +/** + Defines whether "big" tests should be skipped. + + This variable is set by plan() function unless MYTAP_CONFIG environment + variable is set to the string "big". It is supposed to be used as + + @code + if (skip_big_tests) { + skip(1, "Big test skipped"); + } else { + ok(life_universe_and_everything() == 42, "The answer is CORRECT"); + } + @endcode + + @see SKIP_BIG_TESTS +*/ +extern int skip_big_tests; + /** @defgroup MyTAP_API MyTAP API @@ -81,7 +99,12 @@ extern "C" { that generate a core, so if you want to override these signals, do it after you have called the plan() function. - @param count The planned number of tests to run. + It will also set skip_big_tests variable if MYTAP_CONFIG environment + variable is defined. + + @see skip_big_tests + + @param count The planned number of tests to run. */ void plan(int count); @@ -160,6 +183,24 @@ void skip(int how_many, char const *reason, ...) if (SKIP_IF_TRUE) skip((COUNT),(REASON)); else +/** + Helper macro to skip a group of "big" tests. It is used in the following + manner: + + @code + SKIP_BIG_TESTS(1) + { + ok(life_universe_and_everything() == 42, "The answer is CORRECT"); + } + @endcode + + @see skip_big_tests + */ + +#define SKIP_BIG_TESTS(COUNT) \ + if (skip_big_tests) skip((COUNT), "big test"); else + + /** Print a diagnostics message. diff --git a/unittest/unit.pl b/unittest/unit.pl index 9d328985012..b83132581f9 100644 --- a/unittest/unit.pl +++ b/unittest/unit.pl @@ -14,8 +14,9 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -use Test::Harness qw(&runtests $verbose); +use Test::Harness; use File::Find; +use Getopt::Long; use strict; @@ -35,6 +36,15 @@ unit - Run unit tests in directory =cut +my $big=1; + +my $result = GetOptions ( + "big!" => \$big, + "verbose!" => \$Test::Harness::verbose, +); + +$ENV{'MYTAP_CONFIG'} = $big ? "big" : ""; + my $cmd = shift; if (defined $cmd && exists $dispatch{$cmd}) {