Implement Performance Schema in InnoDB.

This commit is contained in:
jyang 2010-03-18 07:56:27 +00:00
parent 08993371d5
commit 01ac1273ba
410 changed files with 224227 additions and 0 deletions

267
perfschema/CMakeLists.txt Normal file
View file

@ -0,0 +1,267 @@
# Copyright (C) 2009 Oracle/Innobase Oy
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
# This is the CMakeLists for InnoDB Plugin
INCLUDE(CheckFunctionExists)
INCLUDE(CheckCSourceCompiles)
INCLUDE(CheckCSourceRuns)
# OS tests
IF(UNIX)
IF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
ADD_DEFINITIONS("-DUNIV_LINUX -D_GNU_SOURCE=1")
ELSEIF(CMAKE_SYSTEM_NAME MATCHES "HP*")
ADD_DEFINITIONS("-DUNIV_HPUX -DUNIV_MUST_NOT_INLINE")
ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "AIX")
ADD_DEFINITIONS("-DUNIV_AIX -DUNIX_MUST_NOT_INLINE")
ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "SunOS")
ADD_DEFINITIONS("-DUNIV_SOLARIS")
ELSE()
ADD_DEFINITIONS("-DUNIV_MUST_NOT_INLINE")
ENDIF()
ENDIF()
IF(NOT MSVC)
# either define HAVE_IB_GCC_ATOMIC_BUILTINS or not
IF(NOT CMAKE_CROSSCOMPILING)
CHECK_C_SOURCE_RUNS(
"
int main()
{
long x;
long y;
long res;
char c;
x = 10;
y = 123;
res = __sync_bool_compare_and_swap(&x, x, y);
if (!res || x != y) {
return(1);
}
x = 10;
y = 123;
res = __sync_bool_compare_and_swap(&x, x + 1, y);
if (res || x != 10) {
return(1);
}
x = 10;
y = 123;
res = __sync_add_and_fetch(&x, y);
if (res != 123 + 10 || x != 123 + 10) {
return(1);
}
c = 10;
res = __sync_lock_test_and_set(&c, 123);
if (res != 10 || c != 123) {
return(1);
}
return(0);
}"
HAVE_IB_GCC_ATOMIC_BUILTINS
)
ENDIF()
IF(HAVE_IB_GCC_ATOMIC_BUILTINS)
ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_BUILTINS=1)
ENDIF()
# either define HAVE_IB_ATOMIC_PTHREAD_T_GCC or not
IF(NOT CMAKE_CROSSCOMPILING)
CHECK_C_SOURCE_RUNS(
"
#include <pthread.h>
#include <string.h>
int main(int argc, char** argv) {
pthread_t x1;
pthread_t x2;
pthread_t x3;
memset(&x1, 0x0, sizeof(x1));
memset(&x2, 0x0, sizeof(x2));
memset(&x3, 0x0, sizeof(x3));
__sync_bool_compare_and_swap(&x1, x2, x3);
return(0);
}"
HAVE_IB_ATOMIC_PTHREAD_T_GCC)
ENDIF()
IF(HAVE_IB_ATOMIC_PTHREAD_T_GCC)
ADD_DEFINITIONS(-DHAVE_IB_ATOMIC_PTHREAD_T_GCC=1)
ENDIF()
ENDIF(NOT MSVC)
# Solaris atomics
IF(CMAKE_SYSTEM_NAME STREQUAL "SunOS")
CHECK_FUNCTION_EXISTS(atomic_cas_ulong HAVE_ATOMIC_CAS_ULONG)
CHECK_FUNCTION_EXISTS(atomic_cas_32 HAVE_ATOMIC_CAS_32)
CHECK_FUNCTION_EXISTS(atomic_cas_64 HAVE_ATOMIC_CAS_64)
CHECK_FUNCTION_EXISTS(atomic_add_long HAVE_ATOMIC_ADD_LONG)
IF(HAVE_ATOMIC_CAS_ULONG AND HAVE_ATOMIC_CAS_32 AND
HAVE_ATOMIC_CAS_64 AND HAVE_ATOMIC_ADD_LONG)
SET(HAVE_IB_SOLARIS_ATOMICS 1)
ENDIF()
IF(HAVE_IB_SOLARIS_ATOMICS)
ADD_DEFINITIONS(-DHAVE_IB_SOLARIS_ATOMICS=1)
ENDIF()
IF(NOT CMAKE_CROSSCOMPILING)
# either define HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS or not
CHECK_C_SOURCE_COMPILES(
" #include <pthread.h>
#include <string.h>
int main(int argc, char** argv) {
pthread_t x1;
pthread_t x2;
pthread_t x3;
memset(&x1, 0x0, sizeof(x1));
memset(&x2, 0x0, sizeof(x2));
memset(&x3, 0x0, sizeof(x3));
if (sizeof(pthread_t) == 4) {
atomic_cas_32(&x1, x2, x3);
} else if (sizeof(pthread_t) == 8) {
atomic_cas_64(&x1, x2, x3);
} else {
return(1);
}
return(0);
}
" HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS)
ENDIF()
IF(HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS)
ADD_DEFINITIONS(-DHAVE_IB_ATOMIC_PTHREAD_T_SOLARIS=1)
ENDIF()
ENDIF()
IF(UNIX)
# this is needed to know which one of atomic_cas_32() or atomic_cas_64()
# to use in the source
SET(CMAKE_EXTRA_INCLUDE_FILES pthread.h)
CHECK_TYPE_SIZE(pthread_t SIZEOF_PTHREAD_T)
SET(CMAKE_EXTRA_INCLUDE_FILES)
ENDIF()
IF(SIZEOF_PTHREAD_T)
ADD_DEFINITIONS(-DSIZEOF_PTHREAD_T=${SIZEOF_PTHREAD_T})
ENDIF()
IF(MSVC)
ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS -DHAVE_IB_PAUSE_INSTRUCTION)
ENDIF()
# Include directories under innobase
INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/storage/innobase/include
${CMAKE_SOURCE_DIR}/storage/innobase/handler)
# Sun Studio bug with -xO2
IF(CMAKE_C_COMPILER_ID MATCHES "SunPro"
AND CMAKE_C_FLAGS_RELEASE MATCHES "O2"
AND NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
# Sun Studio 12 crashes with -xO2 flag, but not with higher optimization
# -xO3
SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_SOURCE_DIR}/rem/rem0rec.c
PROPERTIES COMPILE_FLAGS -xO3)
ENDIF()
# Removing compiler optimizations for innodb/mem/* files on 64-bit Windows
# due to 64-bit compiler error, See MySQL Bug #19424, #36366, #34297
IF (MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 8)
SET_SOURCE_FILES_PROPERTIES(mem/mem0mem.c mem/mem0pool.c
PROPERTIES COMPILE_FLAGS -Od)
ENDIF()
SET(INNOBASE_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c
buf/buf0buddy.c buf/buf0buf.c buf/buf0flu.c buf/buf0lru.c buf/buf0rea.c
data/data0data.c data/data0type.c
dict/dict0boot.c dict/dict0crea.c dict/dict0dict.c dict/dict0load.c dict/dict0mem.c
dyn/dyn0dyn.c
eval/eval0eval.c eval/eval0proc.c
fil/fil0fil.c
fsp/fsp0fsp.c
fut/fut0fut.c fut/fut0lst.c
ha/ha0ha.c ha/hash0hash.c ha/ha0storage.c
ibuf/ibuf0ibuf.c
pars/lexyy.c pars/pars0grm.c pars/pars0opt.c pars/pars0pars.c pars/pars0sym.c
lock/lock0lock.c lock/lock0iter.c
log/log0log.c log/log0recv.c
mach/mach0data.c
mem/mem0mem.c mem/mem0pool.c
mtr/mtr0log.c mtr/mtr0mtr.c
os/os0file.c os/os0proc.c os/os0sync.c os/os0thread.c
page/page0cur.c page/page0page.c page/page0zip.c
que/que0que.c
handler/ha_innodb.cc handler/handler0alter.cc handler/i_s.cc handler/mysql_addons.cc
read/read0read.c
rem/rem0cmp.c rem/rem0rec.c
row/row0ext.c row/row0ins.c row/row0merge.c row/row0mysql.c row/row0purge.c row/row0row.c
row/row0sel.c row/row0uins.c row/row0umod.c row/row0undo.c row/row0upd.c row/row0vers.c
srv/srv0que.c srv/srv0srv.c srv/srv0start.c
sync/sync0arr.c sync/sync0rw.c sync/sync0sync.c
thr/thr0loc.c
trx/trx0i_s.c trx/trx0purge.c trx/trx0rec.c trx/trx0roll.c trx/trx0rseg.c
trx/trx0sys.c trx/trx0trx.c trx/trx0undo.c
usr/usr0sess.c
ut/ut0byte.c ut/ut0dbg.c ut/ut0list.c ut/ut0mem.c ut/ut0rbt.c ut/ut0rnd.c
ut/ut0ut.c ut/ut0vec.c ut/ut0wqueue.c)
IF(WITH_INNODB)
# Legacy option
SET(WITH_INNOBASE_STORAGE_ENGINE TRUE)
ENDIF()
#The plugin's CMakeLists.txt still needs to work with previous versions of MySQL.
IF(EXISTS ${SOURCE_DIR}/storage/mysql_storage_engine.cmake)
# Old plugin support on Windows only,
# use tricks to force ha_innodb.dll name for DLL
INCLUDE(${SOURCE_DIR}/storage/mysql_storage_engine.cmake)
MYSQL_STORAGE_ENGINE(INNOBASE)
GET_TARGET_PROPERTY(LIB_LOCATION ha_innobase LOCATION)
IF(LIB_LOCATION)
SET_TARGET_PROPERTIES(ha_innobase PROPERTIES OUTPUT_NAME ha_innodb)
ENDIF()
ELSEIF (MYSQL_VERSION_ID LESS "50137")
# Windows only, no plugin support
IF (NOT SOURCE_SUBLIBS)
ADD_DEFINITIONS(-DMYSQL_SERVER)
ADD_LIBRARY(innobase STATIC ${INNOBASE_SOURCES})
# Require mysqld_error.h, which is built as part of the GenError
ADD_DEPENDENCIES(innobase GenError)
ENDIF()
ELSE()
# New plugin support, cross-platform , base name for shared module is "ha_innodb"
MYSQL_ADD_PLUGIN(innobase ${INNOBASE_SOURCES} STORAGE_ENGINE
MODULE_OUTPUT_NAME ha_innodb
LINK_LIBRARIES ${ZLIB_LIBRARY})
ENDIF()

351
perfschema/COPYING Normal file
View file

@ -0,0 +1,351 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.
59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
========
The licenses for most software are designed to take away your freedom
to share and change it. By contrast, the GNU General Public License is
intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Library General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not price.
Our General Public Licenses are designed to make sure that you have
the freedom to distribute copies of free software (and charge for this
service if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid anyone
to deny you these rights or to ask you to surrender the rights. These
restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether gratis
or for a fee, you must give the recipients all the rights that you
have. You must make sure that they, too, receive or can get the source
code. And you must show them these terms so they know their rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software patents.
We wish to avoid the danger that redistributors of a free program will
individually obtain patent licenses, in effect making the program
proprietary. To prevent this, we have made it clear that any patent
must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains a
notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program",
below, refers to any such program or work, and a "work based on
the Program" means either the Program or any derivative work under
copyright law: that is to say, a work containing the Program or a
portion of it, either verbatim or with modifications and/or
translated into another language. (Hereinafter, translation is
included without limitation in the term "modification".) Each
licensee is addressed as "you".
Activities other than copying, distribution and modification are
not covered by this License; they are outside its scope. The act
of running the Program is not restricted, and the output from the
Program is covered only if its contents constitute a work based on
the Program (independent of having been made by running the
Program). Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any
warranty; and give any other recipients of the Program a copy of
this License along with the Program.
You may charge a fee for the physical act of transferring a copy,
and you may at your option offer warranty protection in exchange
for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a. You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b. You must cause any work that you distribute or publish, that
in whole or in part contains or is derived from the Program
or any part thereof, to be licensed as a whole at no charge
to all third parties under the terms of this License.
c. If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display
an announcement including an appropriate copyright notice and
a notice that there is no warranty (or else, saying that you
provide a warranty) and that users may redistribute the
program under these conditions, and telling the user how to
view a copy of this License. (Exception: if the Program
itself is interactive but does not normally print such an
announcement, your work based on the Program is not required
to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the
Program, and can be reasonably considered independent and separate
works in themselves, then this License, and its terms, do not
apply to those sections when you distribute them as separate
works. But when you distribute the same sections as part of a
whole which is a work based on the Program, the distribution of
the whole must be on the terms of this License, whose permissions
for other licensees extend to the entire whole, and thus to each
and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or
contest your rights to work written entirely by you; rather, the
intent is to exercise the right to control the distribution of
derivative or collective works based on the Program.
In addition, mere aggregation of another work not based on the
Program with the Program (or with a work based on the Program) on
a volume of a storage or distribution medium does not bring the
other work under the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms
of Sections 1 and 2 above provided that you also do one of the
following:
a. Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of
Sections 1 and 2 above on a medium customarily used for
software interchange; or,
b. Accompany it with a written offer, valid for at least three
years, to give any third-party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a
medium customarily used for software interchange; or,
c. Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with
such an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete
source code means all the source code for all modules it contains,
plus any associated interface definition files, plus the scripts
used to control compilation and installation of the executable.
However, as a special exception, the source code distributed need
not include anything that is normally distributed (in either
source or binary form) with the major components (compiler,
kernel, and so on) of the operating system on which the executable
runs, unless that component itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this
License. However, parties who have received copies, or rights,
from you under this License will not have their licenses
terminated so long as such parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify
or distribute the Program or its derivative works. These actions
are prohibited by law if you do not accept this License.
Therefore, by modifying or distributing the Program (or any work
based on the Program), you indicate your acceptance of this
License to do so, and all its terms and conditions for copying,
distributing or modifying the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program
subject to these terms and conditions. You may not impose any
further restrictions on the recipients' exercise of the rights
granted herein. You are not responsible for enforcing compliance
by third parties to this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent
issues), conditions are imposed on you (whether by court order,
agreement or otherwise) that contradict the conditions of this
License, they do not excuse you from the conditions of this
License. If you cannot distribute so as to satisfy simultaneously
your obligations under this License and any other pertinent
obligations, then as a consequence you may not distribute the
Program at all. For example, if a patent license would not permit
royalty-free redistribution of the Program by all those who
receive copies directly or indirectly through you, then the only
way you could satisfy both it and this License would be to refrain
entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable
under any particular circumstance, the balance of the section is
intended to apply and the section as a whole is intended to apply
in other circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of
any such claims; this section has the sole purpose of protecting
the integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is
willing to distribute software through any other system and a
licensee cannot impose that choice.
This section is intended to make thoroughly clear what is believed
to be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces,
the original copyright holder who places the Program under this
License may add an explicit geographical distribution limitation
excluding those countries, so that distribution is permitted only
in or among countries not thus excluded. In such case, this
License incorporates the limitation as if written in the body of
this License.
9. The Free Software Foundation may publish revised and/or new
versions of the General Public License from time to time. Such
new versions will be similar in spirit to the present version, but
may differ in detail to address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies a version number of this License which applies
to it and "any later version", you have the option of following
the terms and conditions either of that version or of any later
version published by the Free Software Foundation. If the Program
does not specify a version number of this License, you may choose
any version ever published by the Free Software Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the
author to ask for permission. For software which is copyrighted
by the Free Software Foundation, write to the Free Software
Foundation; we sometimes make exceptions for this. Our decision
will be guided by the two goals of preserving the free status of
all derivatives of our free software and of promoting the sharing
and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO
WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE
LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT
WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT
NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE
QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY
SERVICING, REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY
MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE
LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL,
INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR
INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU
OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY
OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
=============================================
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these
terms.
To do so, attach the following notices to the program. It is safest to
attach them to the start of each source file to most effectively convey
the exclusion of warranty; and each file should have at least the
"copyright" line and a pointer to where the full notice is found.
ONE LINE TO GIVE THE PROGRAM'S NAME AND A BRIEF IDEA OF WHAT IT DOES.
Copyright (C) YYYY NAME OF AUTHOR
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) 19YY NAME OF AUTHOR
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the
appropriate parts of the General Public License. Of course, the
commands you use may be called something other than `show w' and `show
c'; they could even be mouse-clicks or menu items--whatever suits your
program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
SIGNATURE OF TY COON, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program
into proprietary programs. If your program is a subroutine library,
you may consider it more useful to permit linking proprietary
applications with the library. If this is what you want to do, use the
GNU Library General Public License instead of this License.

30
perfschema/COPYING.Google Normal file
View file

@ -0,0 +1,30 @@
Portions of this software contain modifications contributed by Google, Inc.
These contributions are used with the following license:
Copyright (c) 2008, Google Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials
provided with the distribution.
* Neither the name of the Google Inc. nor the names of its
contributors may be used to endorse or promote products
derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -0,0 +1,30 @@
Portions of this software contain modifications contributed by Percona, Inc.
These contributions are used with the following license:
Copyright (c) 2008, 2009, Percona Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials
provided with the distribution.
* Neither the name of the Percona Inc. nor the names of its
contributors may be used to endorse or promote products
derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -0,0 +1,31 @@
Portions of this software contain modifications contributed by
Sun Microsystems, Inc. These contributions are used with the following
license:
Copyright (c) 2009, Sun Microsystems, Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials
provided with the distribution.
* Neither the name of Sun Microsystems, Inc. nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

1643
perfschema/ChangeLog Normal file

File diff suppressed because it is too large Load diff

1419
perfschema/Doxyfile Normal file

File diff suppressed because it is too large Load diff

343
perfschema/Makefile.am Normal file
View file

@ -0,0 +1,343 @@
# Copyright (C) 2001, 2004, 2006 MySQL AB & Innobase Oy
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
# Process this file with automake to create Makefile.in
MYSQLDATAdir= $(localstatedir)
MYSQLSHAREdir= $(pkgdatadir)
MYSQLBASEdir= $(prefix)
MYSQLLIBdir= $(pkglibdir)
pkgplugindir= $(pkglibdir)/plugin
INCLUDES= -I$(top_srcdir)/include -I$(top_builddir)/include \
-I$(top_srcdir)/regex \
-I$(srcdir)/include \
-I$(top_srcdir)/sql \
-I$(srcdir) @ZLIB_INCLUDES@
DEFS= @DEFS@
noinst_HEADERS= \
handler/ha_innodb.h \
handler/i_s.h \
include/btr0btr.h \
include/btr0btr.ic \
include/btr0cur.h \
include/btr0cur.ic \
include/btr0pcur.h \
include/btr0pcur.ic \
include/btr0sea.h \
include/btr0sea.ic \
include/btr0types.h \
include/buf0buddy.h \
include/buf0buddy.ic \
include/buf0buf.h \
include/buf0buf.ic \
include/buf0flu.h \
include/buf0flu.ic \
include/buf0lru.h \
include/buf0lru.ic \
include/buf0rea.h \
include/buf0types.h \
include/data0data.h \
include/data0data.ic \
include/data0type.h \
include/data0type.ic \
include/data0types.h \
include/db0err.h \
include/dict0boot.h \
include/dict0boot.ic \
include/dict0crea.h \
include/dict0crea.ic \
include/dict0dict.h \
include/dict0dict.ic \
include/dict0load.h \
include/dict0load.ic \
include/dict0mem.h \
include/dict0mem.ic \
include/dict0types.h \
include/dyn0dyn.h \
include/dyn0dyn.ic \
include/eval0eval.h \
include/eval0eval.ic \
include/eval0proc.h \
include/eval0proc.ic \
include/fil0fil.h \
include/fsp0fsp.h \
include/fsp0fsp.ic \
include/fsp0types.h \
include/fut0fut.h \
include/fut0fut.ic \
include/fut0lst.h \
include/fut0lst.ic \
include/ha0ha.h \
include/ha0ha.ic \
include/ha0storage.h \
include/ha0storage.ic \
include/ha_prototypes.h \
include/handler0alter.h \
include/hash0hash.h \
include/hash0hash.ic \
include/ibuf0ibuf.h \
include/ibuf0ibuf.ic \
include/ibuf0types.h \
include/lock0iter.h \
include/lock0lock.h \
include/lock0lock.ic \
include/lock0priv.h \
include/lock0priv.ic \
include/lock0types.h \
include/log0log.h \
include/log0log.ic \
include/log0recv.h \
include/log0recv.ic \
include/mach0data.h \
include/mach0data.ic \
include/mem0dbg.h \
include/mem0dbg.ic \
include/mem0mem.h \
include/mem0mem.ic \
include/mem0pool.h \
include/mem0pool.ic \
include/mtr0log.h \
include/mtr0log.ic \
include/mtr0mtr.h \
include/mtr0mtr.ic \
include/mtr0types.h \
include/mysql_addons.h \
include/os0file.h \
include/os0proc.h \
include/os0proc.ic \
include/os0sync.h \
include/os0sync.ic \
include/os0thread.h \
include/os0thread.ic \
include/page0cur.h \
include/page0cur.ic \
include/page0page.h \
include/page0page.ic \
include/page0types.h \
include/page0zip.h \
include/page0zip.ic \
include/pars0grm.h \
include/pars0opt.h \
include/pars0opt.ic \
include/pars0pars.h \
include/pars0pars.ic \
include/pars0sym.h \
include/pars0sym.ic \
include/pars0types.h \
include/que0que.h \
include/que0que.ic \
include/que0types.h \
include/read0read.h \
include/read0read.ic \
include/read0types.h \
include/rem0cmp.h \
include/rem0cmp.ic \
include/rem0rec.h \
include/rem0rec.ic \
include/rem0types.h \
include/row0ext.h \
include/row0ext.ic \
include/row0ins.h \
include/row0ins.ic \
include/row0merge.h \
include/row0mysql.h \
include/row0mysql.ic \
include/row0purge.h \
include/row0purge.ic \
include/row0row.h \
include/row0row.ic \
include/row0sel.h \
include/row0sel.ic \
include/row0types.h \
include/row0uins.h \
include/row0uins.ic \
include/row0umod.h \
include/row0umod.ic \
include/row0undo.h \
include/row0undo.ic \
include/row0upd.h \
include/row0upd.ic \
include/row0vers.h \
include/row0vers.ic \
include/srv0que.h \
include/srv0srv.h \
include/srv0srv.ic \
include/srv0start.h \
include/sync0arr.h \
include/sync0arr.ic \
include/sync0rw.h \
include/sync0rw.ic \
include/sync0sync.h \
include/sync0sync.ic \
include/sync0types.h \
include/thr0loc.h \
include/thr0loc.ic \
include/trx0i_s.h \
include/trx0purge.h \
include/trx0purge.ic \
include/trx0rec.h \
include/trx0rec.ic \
include/trx0roll.h \
include/trx0roll.ic \
include/trx0rseg.h \
include/trx0rseg.ic \
include/trx0sys.h \
include/trx0sys.ic \
include/trx0trx.h \
include/trx0trx.ic \
include/trx0types.h \
include/trx0undo.h \
include/trx0undo.ic \
include/trx0xa.h \
include/univ.i \
include/usr0sess.h \
include/usr0sess.ic \
include/usr0types.h \
include/ut0auxconf.h \
include/ut0byte.h \
include/ut0byte.ic \
include/ut0dbg.h \
include/ut0list.h \
include/ut0list.ic \
include/ut0lst.h \
include/ut0mem.h \
include/ut0mem.ic \
include/ut0rbt.h \
include/ut0rnd.h \
include/ut0rnd.ic \
include/ut0sort.h \
include/ut0ut.h \
include/ut0ut.ic \
include/ut0vec.h \
include/ut0vec.ic \
include/ut0wqueue.h \
mem/mem0dbg.c
EXTRA_LIBRARIES= libinnobase.a
noinst_LIBRARIES= @plugin_innobase_static_target@
libinnobase_a_SOURCES= \
btr/btr0btr.c \
btr/btr0cur.c \
btr/btr0pcur.c \
btr/btr0sea.c \
buf/buf0buddy.c \
buf/buf0buf.c \
buf/buf0flu.c \
buf/buf0lru.c \
buf/buf0rea.c \
data/data0data.c \
data/data0type.c \
dict/dict0boot.c \
dict/dict0crea.c \
dict/dict0dict.c \
dict/dict0load.c \
dict/dict0mem.c \
dyn/dyn0dyn.c \
eval/eval0eval.c \
eval/eval0proc.c \
fil/fil0fil.c \
fsp/fsp0fsp.c \
fut/fut0fut.c \
fut/fut0lst.c \
ha/ha0ha.c \
ha/ha0storage.c \
ha/hash0hash.c \
handler/ha_innodb.cc \
handler/handler0alter.cc \
handler/i_s.cc \
handler/mysql_addons.cc \
ibuf/ibuf0ibuf.c \
lock/lock0iter.c \
lock/lock0lock.c \
log/log0log.c \
log/log0recv.c \
mach/mach0data.c \
mem/mem0mem.c \
mem/mem0pool.c \
mtr/mtr0log.c \
mtr/mtr0mtr.c \
os/os0file.c \
os/os0proc.c \
os/os0sync.c \
os/os0thread.c \
page/page0cur.c \
page/page0page.c \
page/page0zip.c \
pars/lexyy.c \
pars/pars0grm.c \
pars/pars0opt.c \
pars/pars0pars.c \
pars/pars0sym.c \
que/que0que.c \
read/read0read.c \
rem/rem0cmp.c \
rem/rem0rec.c \
row/row0ext.c \
row/row0ins.c \
row/row0merge.c \
row/row0mysql.c \
row/row0purge.c \
row/row0row.c \
row/row0sel.c \
row/row0uins.c \
row/row0umod.c \
row/row0undo.c \
row/row0upd.c \
row/row0vers.c \
srv/srv0que.c \
srv/srv0srv.c \
srv/srv0start.c \
sync/sync0arr.c \
sync/sync0rw.c \
sync/sync0sync.c \
thr/thr0loc.c \
trx/trx0i_s.c \
trx/trx0purge.c \
trx/trx0rec.c \
trx/trx0roll.c \
trx/trx0rseg.c \
trx/trx0sys.c \
trx/trx0trx.c \
trx/trx0undo.c \
usr/usr0sess.c \
ut/ut0byte.c \
ut/ut0dbg.c \
ut/ut0list.c \
ut/ut0mem.c \
ut/ut0rbt.c \
ut/ut0rnd.c \
ut/ut0ut.c \
ut/ut0vec.c \
ut/ut0wqueue.c
libinnobase_a_CXXFLAGS= $(AM_CFLAGS)
libinnobase_a_CFLAGS= $(AM_CFLAGS)
EXTRA_LTLIBRARIES= ha_innodb.la
pkgplugin_LTLIBRARIES= @plugin_innobase_shared_target@
ha_innodb_la_LDFLAGS= -module -rpath $(pkgplugindir)
ha_innodb_la_CXXFLAGS= $(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS)
ha_innodb_la_CFLAGS= $(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS)
ha_innodb_la_SOURCES= $(libinnobase_a_SOURCES)
EXTRA_DIST= CMakeLists.txt plug.in \
pars/make_bison.sh pars/make_flex.sh \
pars/pars0grm.y pars/pars0lex.l
# Don't update the files from bitkeeper
%::SCCS/s.%

3730
perfschema/btr/btr0btr.c Normal file

File diff suppressed because it is too large Load diff

4969
perfschema/btr/btr0cur.c Normal file

File diff suppressed because it is too large Load diff

591
perfschema/btr/btr0pcur.c Normal file
View file

@ -0,0 +1,591 @@
/*****************************************************************************
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file btr/btr0pcur.c
The index tree persistent cursor
Created 2/23/1996 Heikki Tuuri
*******************************************************/
#include "btr0pcur.h"
#ifdef UNIV_NONINL
#include "btr0pcur.ic"
#endif
#include "ut0byte.h"
#include "rem0cmp.h"
#include "trx0trx.h"
/**************************************************************//**
Allocates memory for a persistent cursor object and initializes the cursor.
@return own: persistent cursor */
UNIV_INTERN
btr_pcur_t*
btr_pcur_create_for_mysql(void)
/*============================*/
{
btr_pcur_t* pcur;
pcur = mem_alloc(sizeof(btr_pcur_t));
pcur->btr_cur.index = NULL;
btr_pcur_init(pcur);
return(pcur);
}
/**************************************************************//**
Frees the memory for a persistent cursor object. */
UNIV_INTERN
void
btr_pcur_free_for_mysql(
/*====================*/
btr_pcur_t* cursor) /*!< in, own: persistent cursor */
{
if (cursor->old_rec_buf != NULL) {
mem_free(cursor->old_rec_buf);
cursor->old_rec_buf = NULL;
}
cursor->btr_cur.page_cur.rec = NULL;
cursor->old_rec = NULL;
cursor->old_n_fields = 0;
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
cursor->latch_mode = BTR_NO_LATCHES;
cursor->pos_state = BTR_PCUR_NOT_POSITIONED;
mem_free(cursor);
}
/**************************************************************//**
The position of the cursor is stored by taking an initial segment of the
record the cursor is positioned on, before, or after, and copying it to the
cursor data structure, or just setting a flag if the cursor id before the
first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the
page where the cursor is positioned must not be empty if the index tree is
not totally empty! */
UNIV_INTERN
void
btr_pcur_store_position(
/*====================*/
btr_pcur_t* cursor, /*!< in: persistent cursor */
mtr_t* mtr) /*!< in: mtr */
{
page_cur_t* page_cursor;
buf_block_t* block;
rec_t* rec;
dict_index_t* index;
page_t* page;
ulint offs;
ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
block = btr_pcur_get_block(cursor);
index = btr_cur_get_index(btr_pcur_get_btr_cur(cursor));
page_cursor = btr_pcur_get_page_cur(cursor);
rec = page_cur_get_rec(page_cursor);
page = page_align(rec);
offs = page_offset(rec);
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_S_FIX)
|| mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
ut_a(cursor->latch_mode != BTR_NO_LATCHES);
if (UNIV_UNLIKELY(page_get_n_recs(page) == 0)) {
/* It must be an empty index tree; NOTE that in this case
we do not store the modify_clock, but always do a search
if we restore the cursor position */
ut_a(btr_page_get_next(page, mtr) == FIL_NULL);
ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
cursor->old_stored = BTR_PCUR_OLD_STORED;
if (page_rec_is_supremum_low(offs)) {
cursor->rel_pos = BTR_PCUR_AFTER_LAST_IN_TREE;
} else {
cursor->rel_pos = BTR_PCUR_BEFORE_FIRST_IN_TREE;
}
return;
}
if (page_rec_is_supremum_low(offs)) {
rec = page_rec_get_prev(rec);
cursor->rel_pos = BTR_PCUR_AFTER;
} else if (page_rec_is_infimum_low(offs)) {
rec = page_rec_get_next(rec);
cursor->rel_pos = BTR_PCUR_BEFORE;
} else {
cursor->rel_pos = BTR_PCUR_ON;
}
cursor->old_stored = BTR_PCUR_OLD_STORED;
cursor->old_rec = dict_index_copy_rec_order_prefix(
index, rec, &cursor->old_n_fields,
&cursor->old_rec_buf, &cursor->buf_size);
cursor->block_when_stored = block;
cursor->modify_clock = buf_block_get_modify_clock(block);
}
/**************************************************************//**
Copies the stored position of a pcur to another pcur. */
UNIV_INTERN
void
btr_pcur_copy_stored_position(
/*==========================*/
btr_pcur_t* pcur_receive, /*!< in: pcur which will receive the
position info */
btr_pcur_t* pcur_donate) /*!< in: pcur from which the info is
copied */
{
if (pcur_receive->old_rec_buf) {
mem_free(pcur_receive->old_rec_buf);
}
ut_memcpy(pcur_receive, pcur_donate, sizeof(btr_pcur_t));
if (pcur_donate->old_rec_buf) {
pcur_receive->old_rec_buf = mem_alloc(pcur_donate->buf_size);
ut_memcpy(pcur_receive->old_rec_buf, pcur_donate->old_rec_buf,
pcur_donate->buf_size);
pcur_receive->old_rec = pcur_receive->old_rec_buf
+ (pcur_donate->old_rec - pcur_donate->old_rec_buf);
}
pcur_receive->old_n_fields = pcur_donate->old_n_fields;
}
/**************************************************************//**
Restores the stored position of a persistent cursor bufferfixing the page and
obtaining the specified latches. If the cursor position was saved when the
(1) cursor was positioned on a user record: this function restores the position
to the last record LESS OR EQUAL to the stored record;
(2) cursor was positioned on a page infimum record: restores the position to
the last record LESS than the user record which was the successor of the page
infimum;
(3) cursor was positioned on the page supremum: restores to the first record
GREATER than the user record which was the predecessor of the supremum.
(4) cursor was positioned before the first or after the last in an empty tree:
restores to before first or after the last in the tree.
@return TRUE if the cursor position was stored when it was on a user
record and it can be restored on a user record whose ordering fields
are identical to the ones of the original user record */
UNIV_INTERN
ibool
btr_pcur_restore_position_func(
/*===========================*/
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /*!< in: detached persistent cursor */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
dict_index_t* index;
dtuple_t* tuple;
ulint mode;
ulint old_mode;
mem_heap_t* heap;
ut_ad(mtr);
ut_ad(mtr->state == MTR_ACTIVE);
index = btr_cur_get_index(btr_pcur_get_btr_cur(cursor));
if (UNIV_UNLIKELY(cursor->old_stored != BTR_PCUR_OLD_STORED)
|| UNIV_UNLIKELY(cursor->pos_state != BTR_PCUR_WAS_POSITIONED
&& cursor->pos_state != BTR_PCUR_IS_POSITIONED)) {
ut_print_buf(stderr, cursor, sizeof(btr_pcur_t));
putc('\n', stderr);
if (cursor->trx_if_known) {
trx_print(stderr, cursor->trx_if_known, 0);
}
ut_error;
}
if (UNIV_UNLIKELY
(cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE
|| cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE)) {
/* In these cases we do not try an optimistic restoration,
but always do a search */
btr_cur_open_at_index_side(
cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE,
index, latch_mode, btr_pcur_get_btr_cur(cursor), mtr);
cursor->block_when_stored = btr_pcur_get_block(cursor);
return(FALSE);
}
ut_a(cursor->old_rec);
ut_a(cursor->old_n_fields);
if (UNIV_LIKELY(latch_mode == BTR_SEARCH_LEAF)
|| UNIV_LIKELY(latch_mode == BTR_MODIFY_LEAF)) {
/* Try optimistic restoration */
if (UNIV_LIKELY(buf_page_optimistic_get(
latch_mode,
cursor->block_when_stored,
cursor->modify_clock,
file, line, mtr))) {
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
buf_block_dbg_add_level(btr_pcur_get_block(cursor),
SYNC_TREE_NODE);
if (cursor->rel_pos == BTR_PCUR_ON) {
#ifdef UNIV_DEBUG
const rec_t* rec;
const ulint* offsets1;
const ulint* offsets2;
#endif /* UNIV_DEBUG */
cursor->latch_mode = latch_mode;
#ifdef UNIV_DEBUG
rec = btr_pcur_get_rec(cursor);
heap = mem_heap_create(256);
offsets1 = rec_get_offsets(
cursor->old_rec, index, NULL,
cursor->old_n_fields, &heap);
offsets2 = rec_get_offsets(
rec, index, NULL,
cursor->old_n_fields, &heap);
ut_ad(!cmp_rec_rec(cursor->old_rec,
rec, offsets1, offsets2,
index));
mem_heap_free(heap);
#endif /* UNIV_DEBUG */
return(TRUE);
}
return(FALSE);
}
}
/* If optimistic restoration did not succeed, open the cursor anew */
heap = mem_heap_create(256);
tuple = dict_index_build_data_tuple(index, cursor->old_rec,
cursor->old_n_fields, heap);
/* Save the old search mode of the cursor */
old_mode = cursor->search_mode;
if (UNIV_LIKELY(cursor->rel_pos == BTR_PCUR_ON)) {
mode = PAGE_CUR_LE;
} else if (cursor->rel_pos == BTR_PCUR_AFTER) {
mode = PAGE_CUR_G;
} else {
ut_ad(cursor->rel_pos == BTR_PCUR_BEFORE);
mode = PAGE_CUR_L;
}
btr_pcur_open_with_no_init_func(index, tuple, mode, latch_mode,
cursor, 0, file, line, mtr);
/* Restore the old search mode */
cursor->search_mode = old_mode;
if (cursor->rel_pos == BTR_PCUR_ON
&& btr_pcur_is_on_user_rec(cursor)
&& 0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor),
rec_get_offsets(
btr_pcur_get_rec(cursor), index,
NULL, ULINT_UNDEFINED, &heap))) {
/* We have to store the NEW value for the modify clock, since
the cursor can now be on a different page! But we can retain
the value of old_rec */
cursor->block_when_stored = btr_pcur_get_block(cursor);
cursor->modify_clock = buf_block_get_modify_clock(
cursor->block_when_stored);
cursor->old_stored = BTR_PCUR_OLD_STORED;
mem_heap_free(heap);
return(TRUE);
}
mem_heap_free(heap);
/* We have to store new position information, modify_clock etc.,
to the cursor because it can now be on a different page, the record
under it may have been removed, etc. */
btr_pcur_store_position(cursor, mtr);
return(FALSE);
}
/**************************************************************//**
If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY,
releases the page latch and bufferfix reserved by the cursor.
NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes
made by the current mini-transaction to the data protected by the
cursor latch, as then the latch must not be released until mtr_commit. */
UNIV_INTERN
void
btr_pcur_release_leaf(
/*==================*/
btr_pcur_t* cursor, /*!< in: persistent cursor */
mtr_t* mtr) /*!< in: mtr */
{
buf_block_t* block;
ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
block = btr_pcur_get_block(cursor);
btr_leaf_page_release(block, cursor->latch_mode, mtr);
cursor->latch_mode = BTR_NO_LATCHES;
cursor->pos_state = BTR_PCUR_WAS_POSITIONED;
}
/*********************************************************//**
Moves the persistent cursor to the first record on the next page. Releases the
latch on the current page, and bufferunfixes it. Note that there must not be
modifications on the current page, as then the x-latch can be released only in
mtr_commit. */
UNIV_INTERN
void
btr_pcur_move_to_next_page(
/*=======================*/
btr_pcur_t* cursor, /*!< in: persistent cursor; must be on the
last record of the current page */
mtr_t* mtr) /*!< in: mtr */
{
ulint next_page_no;
ulint space;
ulint zip_size;
page_t* page;
buf_block_t* next_block;
page_t* next_page;
ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
ut_ad(btr_pcur_is_after_last_on_page(cursor));
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
page = btr_pcur_get_page(cursor);
next_page_no = btr_page_get_next(page, mtr);
space = buf_block_get_space(btr_pcur_get_block(cursor));
zip_size = buf_block_get_zip_size(btr_pcur_get_block(cursor));
ut_ad(next_page_no != FIL_NULL);
next_block = btr_block_get(space, zip_size, next_page_no,
cursor->latch_mode, mtr);
next_page = buf_block_get_frame(next_block);
#ifdef UNIV_BTR_DEBUG
ut_a(page_is_comp(next_page) == page_is_comp(page));
ut_a(btr_page_get_prev(next_page, mtr)
== buf_block_get_page_no(btr_pcur_get_block(cursor)));
#endif /* UNIV_BTR_DEBUG */
next_block->check_index_page_at_flush = TRUE;
btr_leaf_page_release(btr_pcur_get_block(cursor),
cursor->latch_mode, mtr);
page_cur_set_before_first(next_block, btr_pcur_get_page_cur(cursor));
page_check_dir(next_page);
}
/*********************************************************//**
Moves the persistent cursor backward if it is on the first record of the page.
Commits mtr. Note that to prevent a possible deadlock, the operation
first stores the position of the cursor, commits mtr, acquires the necessary
latches and restores the cursor position again before returning. The
alphabetical position of the cursor is guaranteed to be sensible on
return, but it may happen that the cursor is not positioned on the last
record of any page, because the structure of the tree may have changed
during the time when the cursor had no latches. */
UNIV_INTERN
void
btr_pcur_move_backward_from_page(
/*=============================*/
btr_pcur_t* cursor, /*!< in: persistent cursor, must be on the first
record of the current page */
mtr_t* mtr) /*!< in: mtr */
{
ulint prev_page_no;
ulint space;
page_t* page;
buf_block_t* prev_block;
ulint latch_mode;
ulint latch_mode2;
ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
ut_ad(btr_pcur_is_before_first_on_page(cursor));
ut_ad(!btr_pcur_is_before_first_in_tree(cursor, mtr));
latch_mode = cursor->latch_mode;
if (latch_mode == BTR_SEARCH_LEAF) {
latch_mode2 = BTR_SEARCH_PREV;
} else if (latch_mode == BTR_MODIFY_LEAF) {
latch_mode2 = BTR_MODIFY_PREV;
} else {
latch_mode2 = 0; /* To eliminate compiler warning */
ut_error;
}
btr_pcur_store_position(cursor, mtr);
mtr_commit(mtr);
mtr_start(mtr);
btr_pcur_restore_position(latch_mode2, cursor, mtr);
page = btr_pcur_get_page(cursor);
prev_page_no = btr_page_get_prev(page, mtr);
space = buf_block_get_space(btr_pcur_get_block(cursor));
if (prev_page_no == FIL_NULL) {
} else if (btr_pcur_is_before_first_on_page(cursor)) {
prev_block = btr_pcur_get_btr_cur(cursor)->left_block;
btr_leaf_page_release(btr_pcur_get_block(cursor),
latch_mode, mtr);
page_cur_set_after_last(prev_block,
btr_pcur_get_page_cur(cursor));
} else {
/* The repositioned cursor did not end on an infimum record on
a page. Cursor repositioning acquired a latch also on the
previous page, but we do not need the latch: release it. */
prev_block = btr_pcur_get_btr_cur(cursor)->left_block;
btr_leaf_page_release(prev_block, latch_mode, mtr);
}
cursor->latch_mode = latch_mode;
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
}
/*********************************************************//**
Moves the persistent cursor to the previous record in the tree. If no records
are left, the cursor stays 'before first in tree'.
@return TRUE if the cursor was not before first in tree */
UNIV_INTERN
ibool
btr_pcur_move_to_prev(
/*==================*/
btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the
function may release the page latch */
mtr_t* mtr) /*!< in: mtr */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
if (btr_pcur_is_before_first_on_page(cursor)) {
if (btr_pcur_is_before_first_in_tree(cursor, mtr)) {
return(FALSE);
}
btr_pcur_move_backward_from_page(cursor, mtr);
return(TRUE);
}
btr_pcur_move_to_prev_on_page(cursor);
return(TRUE);
}
/**************************************************************//**
If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first
user record satisfying the search condition, in the case PAGE_CUR_L or
PAGE_CUR_LE, on the last user record. If no such user record exists, then
in the first case sets the cursor after last in tree, and in the latter case
before first in tree. The latching mode must be BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF. */
UNIV_INTERN
void
btr_pcur_open_on_user_rec_func(
/*===========================*/
dict_index_t* index, /*!< in: index */
const dtuple_t* tuple, /*!< in: tuple on which search done */
ulint mode, /*!< in: PAGE_CUR_L, ... */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent
cursor */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
btr_pcur_open_func(index, tuple, mode, latch_mode, cursor,
file, line, mtr);
if ((mode == PAGE_CUR_GE) || (mode == PAGE_CUR_G)) {
if (btr_pcur_is_after_last_on_page(cursor)) {
btr_pcur_move_to_next_user_rec(cursor, mtr);
}
} else {
ut_ad((mode == PAGE_CUR_LE) || (mode == PAGE_CUR_L));
/* Not implemented yet */
ut_error;
}
}

1889
perfschema/btr/btr0sea.c Normal file

File diff suppressed because it is too large Load diff

696
perfschema/buf/buf0buddy.c Normal file
View file

@ -0,0 +1,696 @@
/*****************************************************************************
Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file buf/buf0buddy.c
Binary buddy allocator for compressed pages
Created December 2006 by Marko Makela
*******************************************************/
#define THIS_MODULE
#include "buf0buddy.h"
#ifdef UNIV_NONINL
# include "buf0buddy.ic"
#endif
#undef THIS_MODULE
#include "buf0buf.h"
#include "buf0lru.h"
#include "buf0flu.h"
#include "page0zip.h"
/* Statistic counters */
#ifdef UNIV_DEBUG
/** Number of frames allocated from the buffer pool to the buddy system.
Protected by buf_pool_mutex. */
static ulint buf_buddy_n_frames;
#endif /* UNIV_DEBUG */
/** Statistics of the buddy system, indexed by block size.
Protected by buf_pool_mutex. */
UNIV_INTERN buf_buddy_stat_t buf_buddy_stat[BUF_BUDDY_SIZES + 1];
/**********************************************************************//**
Get the offset of the buddy of a compressed page frame.
@return the buddy relative of page */
UNIV_INLINE
byte*
buf_buddy_get(
/*==========*/
byte* page, /*!< in: compressed page */
ulint size) /*!< in: page size in bytes */
{
ut_ad(ut_is_2pow(size));
ut_ad(size >= BUF_BUDDY_LOW);
ut_ad(size < BUF_BUDDY_HIGH);
ut_ad(!ut_align_offset(page, size));
if (((ulint) page) & size) {
return(page - size);
} else {
return(page + size);
}
}
/**********************************************************************//**
Add a block to the head of the appropriate buddy free list. */
UNIV_INLINE
void
buf_buddy_add_to_free(
/*==================*/
buf_page_t* bpage, /*!< in,own: block to be freed */
ulint i) /*!< in: index of buf_pool->zip_free[] */
{
#ifdef UNIV_DEBUG_VALGRIND
buf_page_t* b = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i);
#endif /* UNIV_DEBUG_VALGRIND */
ut_ad(buf_pool_mutex_own());
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
ut_ad(buf_pool->zip_free[i].start != bpage);
UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
#ifdef UNIV_DEBUG_VALGRIND
if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
#endif /* UNIV_DEBUG_VALGRIND */
}
/**********************************************************************//**
Remove a block from the appropriate buddy free list. */
UNIV_INLINE
void
buf_buddy_remove_from_free(
/*=======================*/
buf_page_t* bpage, /*!< in: block to be removed */
ulint i) /*!< in: index of buf_pool->zip_free[] */
{
#ifdef UNIV_DEBUG_VALGRIND
buf_page_t* prev = UT_LIST_GET_PREV(list, bpage);
buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);
ut_ad(!prev || buf_page_get_state(prev) == BUF_BLOCK_ZIP_FREE);
ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
#endif /* UNIV_DEBUG_VALGRIND */
ut_ad(buf_pool_mutex_own());
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
#ifdef UNIV_DEBUG_VALGRIND
if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
if (next) UNIV_MEM_FREE(next, BUF_BUDDY_LOW << i);
#endif /* UNIV_DEBUG_VALGRIND */
}
/**********************************************************************//**
Try to allocate a block from buf_pool->zip_free[].
@return allocated block, or NULL if buf_pool->zip_free[] was empty */
static
void*
buf_buddy_alloc_zip(
/*================*/
ulint i) /*!< in: index of buf_pool->zip_free[] */
{
buf_page_t* bpage;
ut_ad(buf_pool_mutex_own());
ut_a(i < BUF_BUDDY_SIZES);
#ifndef UNIV_DEBUG_VALGRIND
/* Valgrind would complain about accessing free memory. */
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
ut_ad(buf_page_get_state(ut_list_node_313)
== BUF_BLOCK_ZIP_FREE)));
#endif /* !UNIV_DEBUG_VALGRIND */
bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
if (bpage) {
UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
ut_a(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
buf_buddy_remove_from_free(bpage, i);
} else if (i + 1 < BUF_BUDDY_SIZES) {
/* Attempt to split. */
bpage = buf_buddy_alloc_zip(i + 1);
if (bpage) {
buf_page_t* buddy = (buf_page_t*)
(((char*) bpage) + (BUF_BUDDY_LOW << i));
ut_ad(!buf_pool_contains_zip(buddy));
ut_d(memset(buddy, i, BUF_BUDDY_LOW << i));
buddy->state = BUF_BLOCK_ZIP_FREE;
buf_buddy_add_to_free(buddy, i);
}
}
#ifdef UNIV_DEBUG
if (bpage) {
memset(bpage, ~i, BUF_BUDDY_LOW << i);
}
#endif /* UNIV_DEBUG */
UNIV_MEM_ALLOC(bpage, BUF_BUDDY_SIZES << i);
return(bpage);
}
/**********************************************************************//**
Deallocate a buffer frame of UNIV_PAGE_SIZE. */
static
void
buf_buddy_block_free(
/*=================*/
void* buf) /*!< in: buffer frame to deallocate */
{
const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf);
buf_page_t* bpage;
buf_block_t* block;
ut_ad(buf_pool_mutex_own());
ut_ad(!mutex_own(&buf_pool_zip_mutex));
ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
&& bpage->in_zip_hash && !bpage->in_page_hash),
((buf_block_t*) bpage)->frame == buf);
ut_a(bpage);
ut_a(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY);
ut_ad(!bpage->in_page_hash);
ut_ad(bpage->in_zip_hash);
ut_d(bpage->in_zip_hash = FALSE);
HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
block = (buf_block_t*) bpage;
mutex_enter(&block->mutex);
buf_LRU_block_free_non_file_page(block);
mutex_exit(&block->mutex);
ut_ad(buf_buddy_n_frames > 0);
ut_d(buf_buddy_n_frames--);
}
/**********************************************************************//**
Allocate a buffer block to the buddy allocator. */
static
void
buf_buddy_block_register(
/*=====================*/
buf_block_t* block) /*!< in: buffer frame to allocate */
{
const ulint fold = BUF_POOL_ZIP_FOLD(block);
ut_ad(buf_pool_mutex_own());
ut_ad(!mutex_own(&buf_pool_zip_mutex));
ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
buf_block_set_state(block, BUF_BLOCK_MEMORY);
ut_a(block->frame);
ut_a(!ut_align_offset(block->frame, UNIV_PAGE_SIZE));
ut_ad(!block->page.in_page_hash);
ut_ad(!block->page.in_zip_hash);
ut_d(block->page.in_zip_hash = TRUE);
HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
ut_d(buf_buddy_n_frames++);
}
/**********************************************************************//**
Allocate a block from a bigger object.
@return allocated block */
static
void*
buf_buddy_alloc_from(
/*=================*/
void* buf, /*!< in: a block that is free to use */
ulint i, /*!< in: index of buf_pool->zip_free[] */
ulint j) /*!< in: size of buf as an index
of buf_pool->zip_free[] */
{
ulint offs = BUF_BUDDY_LOW << j;
ut_ad(j <= BUF_BUDDY_SIZES);
ut_ad(j >= i);
ut_ad(!ut_align_offset(buf, offs));
/* Add the unused parts of the block to the free lists. */
while (j > i) {
buf_page_t* bpage;
offs >>= 1;
j--;
bpage = (buf_page_t*) ((byte*) buf + offs);
ut_d(memset(bpage, j, BUF_BUDDY_LOW << j));
bpage->state = BUF_BLOCK_ZIP_FREE;
#ifndef UNIV_DEBUG_VALGRIND
/* Valgrind would complain about accessing free memory. */
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
ut_ad(buf_page_get_state(
ut_list_node_313)
== BUF_BLOCK_ZIP_FREE)));
#endif /* !UNIV_DEBUG_VALGRIND */
buf_buddy_add_to_free(bpage, j);
}
return(buf);
}
/**********************************************************************//**
Allocate a block. The thread calling this function must hold
buf_pool_mutex and must not hold buf_pool_zip_mutex or any block->mutex.
The buf_pool_mutex may only be released and reacquired if lru != NULL.
@return allocated block, possibly NULL if lru==NULL */
UNIV_INTERN
void*
buf_buddy_alloc_low(
/*================*/
ulint i, /*!< in: index of buf_pool->zip_free[],
or BUF_BUDDY_SIZES */
ibool* lru) /*!< in: pointer to a variable that will be assigned
TRUE if storage was allocated from the LRU list
and buf_pool_mutex was temporarily released,
or NULL if the LRU list should not be used */
{
buf_block_t* block;
ut_ad(buf_pool_mutex_own());
ut_ad(!mutex_own(&buf_pool_zip_mutex));
if (i < BUF_BUDDY_SIZES) {
/* Try to allocate from the buddy system. */
block = buf_buddy_alloc_zip(i);
if (block) {
goto func_exit;
}
}
/* Try allocating from the buf_pool->free list. */
block = buf_LRU_get_free_only();
if (block) {
goto alloc_big;
}
if (!lru) {
return(NULL);
}
/* Try replacing an uncompressed page in the buffer pool. */
buf_pool_mutex_exit();
block = buf_LRU_get_free_block(0);
*lru = TRUE;
buf_pool_mutex_enter();
alloc_big:
buf_buddy_block_register(block);
block = buf_buddy_alloc_from(block->frame, i, BUF_BUDDY_SIZES);
func_exit:
buf_buddy_stat[i].used++;
return(block);
}
/**********************************************************************//**
Try to relocate the control block of a compressed page.
@return TRUE if relocated */
static
ibool
buf_buddy_relocate_block(
/*=====================*/
buf_page_t* bpage, /*!< in: block to relocate */
buf_page_t* dpage) /*!< in: free block to relocate to */
{
buf_page_t* b;
ut_ad(buf_pool_mutex_own());
switch (buf_page_get_state(bpage)) {
case BUF_BLOCK_ZIP_FREE:
case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_READY_FOR_USE:
case BUF_BLOCK_FILE_PAGE:
case BUF_BLOCK_MEMORY:
case BUF_BLOCK_REMOVE_HASH:
ut_error;
case BUF_BLOCK_ZIP_DIRTY:
/* Cannot relocate dirty pages. */
return(FALSE);
case BUF_BLOCK_ZIP_PAGE:
break;
}
mutex_enter(&buf_pool_zip_mutex);
if (!buf_page_can_relocate(bpage)) {
mutex_exit(&buf_pool_zip_mutex);
return(FALSE);
}
buf_relocate(bpage, dpage);
ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);
/* relocate buf_pool->zip_clean */
b = UT_LIST_GET_PREV(list, dpage);
UT_LIST_REMOVE(list, buf_pool->zip_clean, dpage);
if (b) {
UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, dpage);
} else {
UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
}
UNIV_MEM_INVALID(bpage, sizeof *bpage);
mutex_exit(&buf_pool_zip_mutex);
return(TRUE);
}
/**********************************************************************//**
Try to relocate a block.
@return TRUE if relocated */
static
ibool
buf_buddy_relocate(
/*===============*/
void* src, /*!< in: block to relocate */
void* dst, /*!< in: free block to relocate to */
ulint i) /*!< in: index of buf_pool->zip_free[] */
{
buf_page_t* bpage;
const ulint size = BUF_BUDDY_LOW << i;
ullint usec = ut_time_us(NULL);
ut_ad(buf_pool_mutex_own());
ut_ad(!mutex_own(&buf_pool_zip_mutex));
ut_ad(!ut_align_offset(src, size));
ut_ad(!ut_align_offset(dst, size));
UNIV_MEM_ASSERT_W(dst, size);
/* We assume that all memory from buf_buddy_alloc()
is used for either compressed pages or buf_page_t
objects covering compressed pages. */
/* We look inside the allocated objects returned by
buf_buddy_alloc() and assume that anything of
PAGE_ZIP_MIN_SIZE or larger is a compressed page that contains
a valid space_id and page_no in the page header. Should the
fields be invalid, we will be unable to relocate the block.
We also assume that anything that fits sizeof(buf_page_t)
actually is a properly initialized buf_page_t object. */
if (size >= PAGE_ZIP_MIN_SIZE) {
/* This is a compressed page. */
mutex_t* mutex;
/* The src block may be split into smaller blocks,
some of which may be free. Thus, the
mach_read_from_4() calls below may attempt to read
from free memory. The memory is "owned" by the buddy
allocator (and it has been allocated from the buffer
pool), so there is nothing wrong about this. The
mach_read_from_4() calls here will only trigger bogus
Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */
bpage = buf_page_hash_get(
mach_read_from_4((const byte*) src
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID),
mach_read_from_4((const byte*) src
+ FIL_PAGE_OFFSET));
if (!bpage || bpage->zip.data != src) {
/* The block has probably been freshly
allocated by buf_LRU_get_free_block() but not
added to buf_pool->page_hash yet. Obviously,
it cannot be relocated. */
return(FALSE);
}
ut_ad(!buf_pool_watch_is(bpage));
if (page_zip_get_size(&bpage->zip) != size) {
/* The block is of different size. We would
have to relocate all blocks covered by src.
For the sake of simplicity, give up. */
ut_ad(page_zip_get_size(&bpage->zip) < size);
return(FALSE);
}
/* The block must have been allocated, but it may
contain uninitialized data. */
UNIV_MEM_ASSERT_W(src, size);
mutex = buf_page_get_mutex(bpage);
mutex_enter(mutex);
if (buf_page_can_relocate(bpage)) {
/* Relocate the compressed page. */
ut_a(bpage->zip.data == src);
memcpy(dst, src, size);
bpage->zip.data = dst;
mutex_exit(mutex);
success:
UNIV_MEM_INVALID(src, size);
{
buf_buddy_stat_t* buddy_stat
= &buf_buddy_stat[i];
buddy_stat->relocated++;
buddy_stat->relocated_usec
+= ut_time_us(NULL) - usec;
}
return(TRUE);
}
mutex_exit(mutex);
} else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
/* This must be a buf_page_t object. */
UNIV_MEM_ASSERT_RW(src, size);
if (buf_buddy_relocate_block(src, dst)) {
goto success;
}
}
return(FALSE);
}
/**********************************************************************//**
Deallocate a block. */
UNIV_INTERN
void
buf_buddy_free_low(
/*===============*/
void* buf, /*!< in: block to be freed, must not be
pointed to by the buffer pool */
ulint i) /*!< in: index of buf_pool->zip_free[],
or BUF_BUDDY_SIZES */
{
buf_page_t* bpage;
buf_page_t* buddy;
ut_ad(buf_pool_mutex_own());
ut_ad(!mutex_own(&buf_pool_zip_mutex));
ut_ad(i <= BUF_BUDDY_SIZES);
ut_ad(buf_buddy_stat[i].used > 0);
buf_buddy_stat[i].used--;
recombine:
UNIV_MEM_ASSERT_AND_ALLOC(buf, BUF_BUDDY_LOW << i);
ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);
if (i == BUF_BUDDY_SIZES) {
buf_buddy_block_free(buf);
return;
}
ut_ad(i < BUF_BUDDY_SIZES);
ut_ad(buf == ut_align_down(buf, BUF_BUDDY_LOW << i));
ut_ad(!buf_pool_contains_zip(buf));
/* Try to combine adjacent blocks. */
buddy = (buf_page_t*) buf_buddy_get(((byte*) buf), BUF_BUDDY_LOW << i);
#ifndef UNIV_DEBUG_VALGRIND
/* Valgrind would complain about accessing free memory. */
if (buddy->state != BUF_BLOCK_ZIP_FREE) {
goto buddy_nonfree;
}
/* The field buddy->state can only be trusted for free blocks.
If buddy->state == BUF_BLOCK_ZIP_FREE, the block is free if
it is in the free list. */
#endif /* !UNIV_DEBUG_VALGRIND */
for (bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]); bpage; ) {
UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
if (bpage == buddy) {
buddy_free:
/* The buddy is free: recombine */
buf_buddy_remove_from_free(bpage, i);
buddy_free2:
ut_ad(buf_page_get_state(buddy) == BUF_BLOCK_ZIP_FREE);
ut_ad(!buf_pool_contains_zip(buddy));
i++;
buf = ut_align_down(buf, BUF_BUDDY_LOW << i);
goto recombine;
}
ut_a(bpage != buf);
{
buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
bpage = next;
}
}
#ifndef UNIV_DEBUG_VALGRIND
buddy_nonfree:
/* Valgrind would complain about accessing free memory. */
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
ut_ad(buf_page_get_state(ut_list_node_313)
== BUF_BLOCK_ZIP_FREE)));
#endif /* UNIV_DEBUG_VALGRIND */
/* The buddy is not free. Is there a free block of this size? */
bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
if (bpage) {
/* Remove the block from the free list, because a successful
buf_buddy_relocate() will overwrite bpage->list. */
UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
buf_buddy_remove_from_free(bpage, i);
/* Try to relocate the buddy of buf to the free block. */
if (buf_buddy_relocate(buddy, bpage, i)) {
ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
goto buddy_free2;
}
buf_buddy_add_to_free(bpage, i);
/* Try to relocate the buddy of the free block to buf. */
buddy = (buf_page_t*) buf_buddy_get(((byte*) bpage),
BUF_BUDDY_LOW << i);
#ifndef UNIV_DEBUG_VALGRIND
/* Valgrind would complain about accessing free memory. */
/* The buddy must not be (completely) free, because we
always recombine adjacent free blocks.
(Parts of the buddy can be free in
buf_pool->zip_free[j] with j < i.) */
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
ut_ad(buf_page_get_state(
ut_list_node_313)
== BUF_BLOCK_ZIP_FREE
&& ut_list_node_313 != buddy)));
#endif /* !UNIV_DEBUG_VALGRIND */
if (buf_buddy_relocate(buddy, buf, i)) {
buf = bpage;
UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
goto buddy_free;
}
}
/* Free the block to the buddy list. */
bpage = buf;
#ifdef UNIV_DEBUG
if (i < buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE)) {
/* This area has most likely been allocated for at
least one compressed-only block descriptor. Check
that there are no live objects in the area. This is
not a complete check: it may yield false positives as
well as false negatives. Also, due to buddy blocks
being recombined, it is possible (although unlikely)
that this branch is never reached. */
char* c;
# ifndef UNIV_DEBUG_VALGRIND
/* Valgrind would complain about accessing
uninitialized memory. Besides, Valgrind performs a
more exhaustive check, at every memory access. */
const buf_page_t* b = buf;
const buf_page_t* const b_end = (buf_page_t*)
((char*) b + (BUF_BUDDY_LOW << i));
for (; b < b_end; b++) {
/* Avoid false positives (and cause false
negatives) by checking for b->space < 1000. */
if ((b->state == BUF_BLOCK_ZIP_PAGE
|| b->state == BUF_BLOCK_ZIP_DIRTY)
&& b->space > 0 && b->space < 1000) {
fprintf(stderr,
"buddy dirty %p %u (%u,%u) %p,%lu\n",
(void*) b,
b->state, b->space, b->offset,
buf, i);
}
}
# endif /* !UNIV_DEBUG_VALGRIND */
/* Scramble the block. This should make any pointers
invalid and trigger a segmentation violation. Because
the scrambling can be reversed, it may be possible to
track down the object pointing to the freed data by
dereferencing the unscrambled bpage->LRU or
bpage->list pointers. */
for (c = (char*) buf + (BUF_BUDDY_LOW << i);
c-- > (char*) buf; ) {
*c = ~*c ^ i;
}
} else {
/* Fill large blocks with a constant pattern. */
memset(bpage, i, BUF_BUDDY_LOW << i);
}
#endif /* UNIV_DEBUG */
bpage->state = BUF_BLOCK_ZIP_FREE;
buf_buddy_add_to_free(bpage, i);
}

4346
perfschema/buf/buf0buf.c Normal file

File diff suppressed because it is too large Load diff

1824
perfschema/buf/buf0flu.c Normal file

File diff suppressed because it is too large Load diff

2135
perfschema/buf/buf0lru.c Normal file

File diff suppressed because it is too large Load diff

656
perfschema/buf/buf0rea.c Normal file
View file

@ -0,0 +1,656 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file buf/buf0rea.c
The database buffer read
Created 11/5/1995 Heikki Tuuri
*******************************************************/
#include "buf0rea.h"
#include "fil0fil.h"
#include "mtr0mtr.h"
#include "buf0buf.h"
#include "buf0flu.h"
#include "buf0lru.h"
#include "ibuf0ibuf.h"
#include "log0recv.h"
#include "trx0sys.h"
#include "os0file.h"
#include "srv0start.h"
#include "srv0srv.h"
/** The linear read-ahead area size */
#define BUF_READ_AHEAD_LINEAR_AREA BUF_READ_AHEAD_AREA
/** If there are buf_pool->curr_size per the number below pending reads, then
read-ahead is not done: this is to prevent flooding the buffer pool with
i/o-fixed buffer blocks */
#define BUF_READ_AHEAD_PEND_LIMIT 2
/********************************************************************//**
Low-level function which reads a page asynchronously from a file to the
buffer buf_pool if it is not already there, in which case does nothing.
Sets the io_fix flag and sets an exclusive lock on the buffer frame. The
flag is cleared and the x-lock released by an i/o-handler thread.
@return 1 if a read request was queued, 0 if the page already resided
in buf_pool, or if the page is in the doublewrite buffer blocks in
which case it is never read into the pool, or if the tablespace does
not exist or is being dropped
@return 1 if read request is issued. 0 if it is not */
static
ulint
buf_read_page_low(
/*==============*/
ulint* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are
trying to read from a non-existent tablespace, or a
tablespace which is just now being dropped */
ibool sync, /*!< in: TRUE if synchronous aio is desired */
ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ...,
ORed to OS_AIO_SIMULATED_WAKE_LATER (see below
at read-ahead functions) */
ulint space, /*!< in: space id */
ulint zip_size,/*!< in: compressed page size, or 0 */
ibool unzip, /*!< in: TRUE=request uncompressed page */
ib_int64_t tablespace_version, /*!< in: if the space memory object has
this timestamp different from what we are giving here,
treat the tablespace as dropped; this is a timestamp we
use to stop dangling page reads from a tablespace
which we have DISCARDed + IMPORTed back */
ulint offset) /*!< in: page number */
{
buf_page_t* bpage;
ulint wake_later;
*err = DB_SUCCESS;
wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
mode = mode & ~OS_AIO_SIMULATED_WAKE_LATER;
if (trx_doublewrite && space == TRX_SYS_SPACE
&& ( (offset >= trx_doublewrite->block1
&& offset < trx_doublewrite->block1
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
|| (offset >= trx_doublewrite->block2
&& offset < trx_doublewrite->block2
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE))) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Warning: trying to read"
" doublewrite buffer page %lu\n",
(ulong) offset);
return(0);
}
if (ibuf_bitmap_page(zip_size, offset)
|| trx_sys_hdr_page(space, offset)) {
/* Trx sys header is so low in the latching order that we play
safe and do not leave the i/o-completion to an asynchronous
i/o-thread. Ibuf bitmap pages must always be read with
syncronous i/o, to make sure they do not get involved in
thread deadlocks. */
sync = TRUE;
}
/* The following call will also check if the tablespace does not exist
or is being dropped; if we succeed in initing the page in the buffer
pool for read, then DISCARD cannot proceed until the read has
completed */
bpage = buf_page_init_for_read(err, mode, space, zip_size, unzip,
tablespace_version, offset);
if (bpage == NULL) {
return(0);
}
#ifdef UNIV_DEBUG
if (buf_debug_prints) {
fprintf(stderr,
"Posting read request for page %lu, sync %lu\n",
(ulong) offset,
(ulong) sync);
}
#endif
ut_ad(buf_page_in_file(bpage));
if (zip_size) {
*err = fil_io(OS_FILE_READ | wake_later,
sync, space, zip_size, offset, 0, zip_size,
bpage->zip.data, bpage);
} else {
ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
*err = fil_io(OS_FILE_READ | wake_later,
sync, space, 0, offset, 0, UNIV_PAGE_SIZE,
((buf_block_t*) bpage)->frame, bpage);
}
ut_a(*err == DB_SUCCESS);
if (sync) {
/* The i/o is already completed when we arrive from
fil_read */
buf_page_io_complete(bpage);
}
return(1);
}
/********************************************************************//**
High-level function which reads a page asynchronously from a file to the
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
released by the i/o-handler thread.
@return TRUE if page has been read in, FALSE in case of failure */
UNIV_INTERN
ibool
buf_read_page(
/*==========*/
ulint space, /*!< in: space id */
ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
ulint offset) /*!< in: page number */
{
ib_int64_t tablespace_version;
ulint count;
ulint err;
tablespace_version = fil_space_get_version(space);
/* We do the i/o in the synchronous aio mode to save thread
switches: hence TRUE */
count = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
zip_size, FALSE,
tablespace_version, offset);
srv_buf_pool_reads += count;
if (err == DB_TABLESPACE_DELETED) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Error: trying to access"
" tablespace %lu page no. %lu,\n"
"InnoDB: but the tablespace does not exist"
" or is just being dropped.\n",
(ulong) space, (ulong) offset);
}
/* Flush pages from the end of the LRU list if necessary */
buf_flush_free_margin();
/* Increment number of I/O operations used for LRU policy. */
buf_LRU_stat_inc_io();
return(count > 0);
}
/********************************************************************//**
Applies linear read-ahead if in the buf_pool the page is a border page of
a linear read-ahead area and all the pages in the area have been accessed.
Does not read any page if the read-ahead mechanism is not activated. Note
that the algorithm looks at the 'natural' adjacent successor and
predecessor of the page, which on the leaf level of a B-tree are the next
and previous page in the chain of leaves. To know these, the page specified
in (space, offset) must already be present in the buf_pool. Thus, the
natural way to use this function is to call it when a page in the buf_pool
is accessed the first time, calling this function just after it has been
bufferfixed.
NOTE 1: as this function looks at the natural predecessor and successor
fields on the page, what happens, if these are not initialized to any
sensible value? No problem, before applying read-ahead we check that the
area to read is within the span of the space, if not, read-ahead is not
applied. An uninitialized value may result in a useless read operation, but
only very improbably.
NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
function must be written such that it cannot end up waiting for these
latches!
NOTE 3: the calling thread must want access to the page given: this rule is
set to prevent unintended read-aheads performed by ibuf routines, a situation
which could result in a deadlock if the OS does not support asynchronous io.
@return number of page read requests issued */
UNIV_INTERN
ulint
buf_read_ahead_linear(
/*==================*/
ulint space, /*!< in: space id */
ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
ulint offset) /*!< in: page number of a page; NOTE: the current thread
must want access to this page (see NOTE 3 above) */
{
ib_int64_t tablespace_version;
buf_page_t* bpage;
buf_frame_t* frame;
buf_page_t* pred_bpage = NULL;
ulint pred_offset;
ulint succ_offset;
ulint count;
int asc_or_desc;
ulint new_offset;
ulint fail_count;
ulint ibuf_mode;
ulint low, high;
ulint err;
ulint i;
const ulint buf_read_ahead_linear_area
= BUF_READ_AHEAD_LINEAR_AREA;
ulint threshold;
if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) {
/* No read-ahead to avoid thread deadlocks */
return(0);
}
low = (offset / buf_read_ahead_linear_area)
* buf_read_ahead_linear_area;
high = (offset / buf_read_ahead_linear_area + 1)
* buf_read_ahead_linear_area;
if ((offset != low) && (offset != high - 1)) {
/* This is not a border page of the area: return */
return(0);
}
if (ibuf_bitmap_page(zip_size, offset)
|| trx_sys_hdr_page(space, offset)) {
/* If it is an ibuf bitmap page or trx sys hdr, we do
no read-ahead, as that could break the ibuf page access
order */
return(0);
}
/* Remember the tablespace version before we ask te tablespace size
below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
do not try to read outside the bounds of the tablespace! */
tablespace_version = fil_space_get_version(space);
buf_pool_mutex_enter();
if (high > fil_space_get_size(space)) {
buf_pool_mutex_exit();
/* The area is not whole, return */
return(0);
}
if (buf_pool->n_pend_reads
> buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
buf_pool_mutex_exit();
return(0);
}
/* Check that almost all pages in the area have been accessed; if
offset == low, the accesses must be in a descending order, otherwise,
in an ascending order. */
asc_or_desc = 1;
if (offset == low) {
asc_or_desc = -1;
}
/* How many out of order accessed pages can we ignore
when working out the access pattern for linear readahead */
threshold = ut_min((64 - srv_read_ahead_threshold),
BUF_READ_AHEAD_AREA);
fail_count = 0;
for (i = low; i < high; i++) {
bpage = buf_page_hash_get(space, i);
if ((bpage == NULL) || !buf_page_is_accessed(bpage)) {
/* Not accessed */
fail_count++;
} else if (pred_bpage) {
/* Note that buf_page_is_accessed() returns
the time of the first access. If some blocks
of the extent existed in the buffer pool at
the time of a linear access pattern, the first
access times may be nonmonotonic, even though
the latest access times were linear. The
threshold (srv_read_ahead_factor) should help
a little against this. */
int res = ut_ulint_cmp(
buf_page_is_accessed(bpage),
buf_page_is_accessed(pred_bpage));
/* Accesses not in the right order */
if (res != 0 && res != asc_or_desc) {
fail_count++;
}
}
if (fail_count > threshold) {
/* Too many failures: return */
buf_pool_mutex_exit();
return(0);
}
if (bpage && buf_page_is_accessed(bpage)) {
pred_bpage = bpage;
}
}
/* If we got this far, we know that enough pages in the area have
been accessed in the right order: linear read-ahead can be sensible */
bpage = buf_page_hash_get(space, offset);
if (bpage == NULL) {
buf_pool_mutex_exit();
return(0);
}
switch (buf_page_get_state(bpage)) {
case BUF_BLOCK_ZIP_PAGE:
frame = bpage->zip.data;
break;
case BUF_BLOCK_FILE_PAGE:
frame = ((buf_block_t*) bpage)->frame;
break;
default:
ut_error;
break;
}
/* Read the natural predecessor and successor page addresses from
the page; NOTE that because the calling thread may have an x-latch
on the page, we do not acquire an s-latch on the page, this is to
prevent deadlocks. Even if we read values which are nonsense, the
algorithm will work. */
pred_offset = fil_page_get_prev(frame);
succ_offset = fil_page_get_next(frame);
buf_pool_mutex_exit();
if ((offset == low) && (succ_offset == offset + 1)) {
/* This is ok, we can continue */
new_offset = pred_offset;
} else if ((offset == high - 1) && (pred_offset == offset - 1)) {
/* This is ok, we can continue */
new_offset = succ_offset;
} else {
/* Successor or predecessor not in the right order */
return(0);
}
low = (new_offset / buf_read_ahead_linear_area)
* buf_read_ahead_linear_area;
high = (new_offset / buf_read_ahead_linear_area + 1)
* buf_read_ahead_linear_area;
if ((new_offset != low) && (new_offset != high - 1)) {
/* This is not a border page of the area: return */
return(0);
}
if (high > fil_space_get_size(space)) {
/* The area is not whole, return */
return(0);
}
/* If we got this far, read-ahead can be sensible: do it */
if (ibuf_inside()) {
ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
} else {
ibuf_mode = BUF_READ_ANY_PAGE;
}
count = 0;
/* Since Windows XP seems to schedule the i/o handler thread
very eagerly, and consequently it does not wait for the
full read batch to be posted, we use special heuristics here */
os_aio_simulated_put_read_threads_to_sleep();
for (i = low; i < high; i++) {
/* It is only sensible to do read-ahead in the non-sync
aio mode: hence FALSE as the first parameter */
if (!ibuf_bitmap_page(zip_size, i)) {
count += buf_read_page_low(
&err, FALSE,
ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
space, zip_size, FALSE, tablespace_version, i);
if (err == DB_TABLESPACE_DELETED) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Warning: in"
" linear readahead trying to access\n"
"InnoDB: tablespace %lu page %lu,\n"
"InnoDB: but the tablespace does not"
" exist or is just being dropped.\n",
(ulong) space, (ulong) i);
}
}
}
/* In simulated aio we wake the aio handler threads only after
queuing all aio requests, in native aio the following call does
nothing: */
os_aio_simulated_wake_handler_threads();
/* Flush pages from the end of the LRU list if necessary */
buf_flush_free_margin();
#ifdef UNIV_DEBUG
if (buf_debug_prints && (count > 0)) {
fprintf(stderr,
"LINEAR read-ahead space %lu offset %lu pages %lu\n",
(ulong) space, (ulong) offset, (ulong) count);
}
#endif /* UNIV_DEBUG */
/* Read ahead is considered one I/O operation for the purpose of
LRU policy decision. */
buf_LRU_stat_inc_io();
buf_pool->stat.n_ra_pages_read += count;
return(count);
}
/********************************************************************//**
Issues read requests for pages which the ibuf module wants to read in, in
order to contract the insert buffer tree. Technically, this function is like
a read-ahead function. */
UNIV_INTERN
void
buf_read_ibuf_merge_pages(
/*======================*/
ibool sync, /*!< in: TRUE if the caller
wants this function to wait
for the highest address page
to get read in, before this
function returns */
const ulint* space_ids, /*!< in: array of space ids */
const ib_int64_t* space_versions,/*!< in: the spaces must have
this version number
(timestamp), otherwise we
discard the read; we use this
to cancel reads if DISCARD +
IMPORT may have changed the
tablespace size */
const ulint* page_nos, /*!< in: array of page numbers
to read, with the highest page
number the last in the
array */
ulint n_stored) /*!< in: number of elements
in the arrays */
{
ulint i;
ut_ad(!ibuf_inside());
#ifdef UNIV_IBUF_DEBUG
ut_a(n_stored < UNIV_PAGE_SIZE);
#endif
while (buf_pool->n_pend_reads
> buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
os_thread_sleep(500000);
}
for (i = 0; i < n_stored; i++) {
ulint zip_size = fil_space_get_zip_size(space_ids[i]);
ulint err;
if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
goto tablespace_deleted;
}
buf_read_page_low(&err, sync && (i + 1 == n_stored),
BUF_READ_ANY_PAGE, space_ids[i],
zip_size, TRUE, space_versions[i],
page_nos[i]);
if (UNIV_UNLIKELY(err == DB_TABLESPACE_DELETED)) {
tablespace_deleted:
/* We have deleted or are deleting the single-table
tablespace: remove the entries for that page */
ibuf_merge_or_delete_for_page(NULL, space_ids[i],
page_nos[i],
zip_size, FALSE);
}
}
os_aio_simulated_wake_handler_threads();
/* Flush pages from the end of the LRU list if necessary */
buf_flush_free_margin();
#ifdef UNIV_DEBUG
if (buf_debug_prints) {
fprintf(stderr,
"Ibuf merge read-ahead space %lu pages %lu\n",
(ulong) space_ids[0], (ulong) n_stored);
}
#endif /* UNIV_DEBUG */
}
/********************************************************************//**
Issues read requests for pages which recovery wants to read in. */
UNIV_INTERN
void
buf_read_recv_pages(
/*================*/
ibool sync, /*!< in: TRUE if the caller
wants this function to wait
for the highest address page
to get read in, before this
function returns */
ulint space, /*!< in: space id */
ulint zip_size, /*!< in: compressed page size in
bytes, or 0 */
const ulint* page_nos, /*!< in: array of page numbers
to read, with the highest page
number the last in the
array */
ulint n_stored) /*!< in: number of page numbers
in the array */
{
ib_int64_t tablespace_version;
ulint count;
ulint err;
ulint i;
zip_size = fil_space_get_zip_size(space);
if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
/* It is a single table tablespace and the .ibd file is
missing: do nothing */
return;
}
tablespace_version = fil_space_get_version(space);
for (i = 0; i < n_stored; i++) {
count = 0;
os_aio_print_debug = FALSE;
while (buf_pool->n_pend_reads >= recv_n_pool_free_frames / 2) {
os_aio_simulated_wake_handler_threads();
os_thread_sleep(10000);
count++;
if (count > 1000) {
fprintf(stderr,
"InnoDB: Error: InnoDB has waited for"
" 10 seconds for pending\n"
"InnoDB: reads to the buffer pool to"
" be finished.\n"
"InnoDB: Number of pending reads %lu,"
" pending pread calls %lu\n",
(ulong) buf_pool->n_pend_reads,
(ulong)os_file_n_pending_preads);
os_aio_print_debug = TRUE;
}
}
os_aio_print_debug = FALSE;
if ((i + 1 == n_stored) && sync) {
buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
zip_size, TRUE, tablespace_version,
page_nos[i]);
} else {
buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
| OS_AIO_SIMULATED_WAKE_LATER,
space, zip_size, TRUE,
tablespace_version, page_nos[i]);
}
}
os_aio_simulated_wake_handler_threads();
/* Flush pages from the end of the LRU list if necessary */
buf_flush_free_margin();
#ifdef UNIV_DEBUG
if (buf_debug_prints) {
fprintf(stderr,
"Recovery applies read-ahead pages %lu\n",
(ulong) n_stored);
}
#endif /* UNIV_DEBUG */
}

24
perfschema/compile-innodb Executable file
View file

@ -0,0 +1,24 @@
#! /bin/sh
#
# Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation; version 2 of the License.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
# Place, Suite 330, Boston, MA 02111-1307 USA
#
path=`dirname $0`
. "$path/SETUP.sh"
extra_flags="$pentium_cflags $fast_cflags -g"
extra_configs="$pentium_configs $static_link --with-plugins=innobase"
. "$path/FINISH.sh"

24
perfschema/compile-innodb-debug Executable file
View file

@ -0,0 +1,24 @@
#! /bin/sh
#
# Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation; version 2 of the License.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
# Place, Suite 330, Boston, MA 02111-1307 USA
#
path=`dirname $0`
. "$path/SETUP.sh" $@ --with-debug=full
extra_flags="$pentium_cflags $debug_cflags"
extra_configs="$pentium_configs $debug_configs --with-plugins=innobase"
. "$path/FINISH.sh"

764
perfschema/data/data0data.c Normal file
View file

@ -0,0 +1,764 @@
/*****************************************************************************
Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/********************************************************************//**
@file data/data0data.c
SQL data field and tuple
Created 5/30/1994 Heikki Tuuri
*************************************************************************/
#include "data0data.h"
#ifdef UNIV_NONINL
#include "data0data.ic"
#endif
#ifndef UNIV_HOTBACKUP
#include "rem0rec.h"
#include "rem0cmp.h"
#include "page0page.h"
#include "page0zip.h"
#include "dict0dict.h"
#include "btr0cur.h"
#include <ctype.h>
#endif /* !UNIV_HOTBACKUP */
#ifdef UNIV_DEBUG
/** Dummy variable to catch access to uninitialized fields. In the
debug version, dtuple_create() will make all fields of dtuple_t point
to data_error. */
UNIV_INTERN byte data_error;
# ifndef UNIV_DEBUG_VALGRIND
/** this is used to fool the compiler in dtuple_validate */
UNIV_INTERN ulint data_dummy;
# endif /* !UNIV_DEBUG_VALGRIND */
#endif /* UNIV_DEBUG */
#ifndef UNIV_HOTBACKUP
/*********************************************************************//**
Tests if dfield data length and content is equal to the given.
@return TRUE if equal */
UNIV_INTERN
ibool
dfield_data_is_binary_equal(
/*========================*/
const dfield_t* field, /*!< in: field */
ulint len, /*!< in: data length or UNIV_SQL_NULL */
const byte* data) /*!< in: data */
{
if (len != dfield_get_len(field)) {
return(FALSE);
}
if (len == UNIV_SQL_NULL) {
return(TRUE);
}
if (0 != memcmp(dfield_get_data(field), data, len)) {
return(FALSE);
}
return(TRUE);
}
/************************************************************//**
Compare two data tuples, respecting the collation of character fields.
@return 1, 0 , -1 if tuple1 is greater, equal, less, respectively,
than tuple2 */
UNIV_INTERN
int
dtuple_coll_cmp(
/*============*/
const dtuple_t* tuple1, /*!< in: tuple 1 */
const dtuple_t* tuple2) /*!< in: tuple 2 */
{
ulint n_fields;
ulint i;
ut_ad(tuple1 && tuple2);
ut_ad(tuple1->magic_n == DATA_TUPLE_MAGIC_N);
ut_ad(tuple2->magic_n == DATA_TUPLE_MAGIC_N);
ut_ad(dtuple_check_typed(tuple1));
ut_ad(dtuple_check_typed(tuple2));
n_fields = dtuple_get_n_fields(tuple1);
if (n_fields != dtuple_get_n_fields(tuple2)) {
return(n_fields < dtuple_get_n_fields(tuple2) ? -1 : 1);
}
for (i = 0; i < n_fields; i++) {
int cmp;
const dfield_t* field1 = dtuple_get_nth_field(tuple1, i);
const dfield_t* field2 = dtuple_get_nth_field(tuple2, i);
cmp = cmp_dfield_dfield(field1, field2);
if (cmp) {
return(cmp);
}
}
return(0);
}
/*********************************************************************//**
Sets number of fields used in a tuple. Normally this is set in
dtuple_create, but if you want later to set it smaller, you can use this. */
UNIV_INTERN
void
dtuple_set_n_fields(
/*================*/
dtuple_t* tuple, /*!< in: tuple */
ulint n_fields) /*!< in: number of fields */
{
ut_ad(tuple);
tuple->n_fields = n_fields;
tuple->n_fields_cmp = n_fields;
}
/**********************************************************//**
Checks that a data field is typed.
@return TRUE if ok */
static
ibool
dfield_check_typed_no_assert(
/*=========================*/
const dfield_t* field) /*!< in: data field */
{
if (dfield_get_type(field)->mtype > DATA_MYSQL
|| dfield_get_type(field)->mtype < DATA_VARCHAR) {
fprintf(stderr,
"InnoDB: Error: data field type %lu, len %lu\n",
(ulong) dfield_get_type(field)->mtype,
(ulong) dfield_get_len(field));
return(FALSE);
}
return(TRUE);
}
/**********************************************************//**
Checks that a data tuple is typed.
@return TRUE if ok */
UNIV_INTERN
ibool
dtuple_check_typed_no_assert(
/*=========================*/
const dtuple_t* tuple) /*!< in: tuple */
{
const dfield_t* field;
ulint i;
if (dtuple_get_n_fields(tuple) > REC_MAX_N_FIELDS) {
fprintf(stderr,
"InnoDB: Error: index entry has %lu fields\n",
(ulong) dtuple_get_n_fields(tuple));
dump:
fputs("InnoDB: Tuple contents: ", stderr);
dtuple_print(stderr, tuple);
putc('\n', stderr);
return(FALSE);
}
for (i = 0; i < dtuple_get_n_fields(tuple); i++) {
field = dtuple_get_nth_field(tuple, i);
if (!dfield_check_typed_no_assert(field)) {
goto dump;
}
}
return(TRUE);
}
#endif /* !UNIV_HOTBACKUP */
#ifdef UNIV_DEBUG
/**********************************************************//**
Checks that a data field is typed. Asserts an error if not.
@return TRUE if ok */
UNIV_INTERN
ibool
dfield_check_typed(
/*===============*/
const dfield_t* field) /*!< in: data field */
{
if (dfield_get_type(field)->mtype > DATA_MYSQL
|| dfield_get_type(field)->mtype < DATA_VARCHAR) {
fprintf(stderr,
"InnoDB: Error: data field type %lu, len %lu\n",
(ulong) dfield_get_type(field)->mtype,
(ulong) dfield_get_len(field));
ut_error;
}
return(TRUE);
}
/**********************************************************//**
Checks that a data tuple is typed. Asserts an error if not.
@return TRUE if ok */
UNIV_INTERN
ibool
dtuple_check_typed(
/*===============*/
const dtuple_t* tuple) /*!< in: tuple */
{
const dfield_t* field;
ulint i;
for (i = 0; i < dtuple_get_n_fields(tuple); i++) {
field = dtuple_get_nth_field(tuple, i);
ut_a(dfield_check_typed(field));
}
return(TRUE);
}
/**********************************************************//**
Validates the consistency of a tuple which must be complete, i.e,
all fields must have been set.
@return TRUE if ok */
UNIV_INTERN
ibool
dtuple_validate(
/*============*/
const dtuple_t* tuple) /*!< in: tuple */
{
const dfield_t* field;
ulint n_fields;
ulint len;
ulint i;
ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
n_fields = dtuple_get_n_fields(tuple);
/* We dereference all the data of each field to test
for memory traps */
for (i = 0; i < n_fields; i++) {
field = dtuple_get_nth_field(tuple, i);
len = dfield_get_len(field);
if (!dfield_is_null(field)) {
const byte* data = dfield_get_data(field);
#ifndef UNIV_DEBUG_VALGRIND
ulint j;
for (j = 0; j < len; j++) {
data_dummy += *data; /* fool the compiler not
to optimize out this
code */
data++;
}
#endif /* !UNIV_DEBUG_VALGRIND */
UNIV_MEM_ASSERT_RW(data, len);
}
}
ut_a(dtuple_check_typed(tuple));
return(TRUE);
}
#endif /* UNIV_DEBUG */
#ifndef UNIV_HOTBACKUP
/*************************************************************//**
Pretty prints a dfield value according to its data type. */
UNIV_INTERN
void
dfield_print(
/*=========*/
const dfield_t* dfield) /*!< in: dfield */
{
const byte* data;
ulint len;
ulint i;
len = dfield_get_len(dfield);
data = dfield_get_data(dfield);
if (dfield_is_null(dfield)) {
fputs("NULL", stderr);
return;
}
switch (dtype_get_mtype(dfield_get_type(dfield))) {
case DATA_CHAR:
case DATA_VARCHAR:
for (i = 0; i < len; i++) {
int c = *data++;
putc(isprint(c) ? c : ' ', stderr);
}
if (dfield_is_ext(dfield)) {
fputs("(external)", stderr);
}
break;
case DATA_INT:
ut_a(len == 4); /* only works for 32-bit integers */
fprintf(stderr, "%d", (int)mach_read_from_4(data));
break;
default:
ut_error;
}
}
/*************************************************************//**
Pretty prints a dfield value according to its data type. Also the hex string
is printed if a string contains non-printable characters. */
UNIV_INTERN
void
dfield_print_also_hex(
/*==================*/
const dfield_t* dfield) /*!< in: dfield */
{
const byte* data;
ulint len;
ulint prtype;
ulint i;
ibool print_also_hex;
len = dfield_get_len(dfield);
data = dfield_get_data(dfield);
if (dfield_is_null(dfield)) {
fputs("NULL", stderr);
return;
}
prtype = dtype_get_prtype(dfield_get_type(dfield));
switch (dtype_get_mtype(dfield_get_type(dfield))) {
dulint id;
case DATA_INT:
switch (len) {
ulint val;
case 1:
val = mach_read_from_1(data);
if (!(prtype & DATA_UNSIGNED)) {
val &= ~0x80;
fprintf(stderr, "%ld", (long) val);
} else {
fprintf(stderr, "%lu", (ulong) val);
}
break;
case 2:
val = mach_read_from_2(data);
if (!(prtype & DATA_UNSIGNED)) {
val &= ~0x8000;
fprintf(stderr, "%ld", (long) val);
} else {
fprintf(stderr, "%lu", (ulong) val);
}
break;
case 3:
val = mach_read_from_3(data);
if (!(prtype & DATA_UNSIGNED)) {
val &= ~0x800000;
fprintf(stderr, "%ld", (long) val);
} else {
fprintf(stderr, "%lu", (ulong) val);
}
break;
case 4:
val = mach_read_from_4(data);
if (!(prtype & DATA_UNSIGNED)) {
val &= ~0x80000000;
fprintf(stderr, "%ld", (long) val);
} else {
fprintf(stderr, "%lu", (ulong) val);
}
break;
case 6:
id = mach_read_from_6(data);
fprintf(stderr, "{%lu %lu}",
ut_dulint_get_high(id),
ut_dulint_get_low(id));
break;
case 7:
id = mach_read_from_7(data);
fprintf(stderr, "{%lu %lu}",
ut_dulint_get_high(id),
ut_dulint_get_low(id));
break;
case 8:
id = mach_read_from_8(data);
fprintf(stderr, "{%lu %lu}",
ut_dulint_get_high(id),
ut_dulint_get_low(id));
break;
default:
goto print_hex;
}
break;
case DATA_SYS:
switch (prtype & DATA_SYS_PRTYPE_MASK) {
case DATA_TRX_ID:
id = mach_read_from_6(data);
fprintf(stderr, "trx_id " TRX_ID_FMT,
TRX_ID_PREP_PRINTF(id));
break;
case DATA_ROLL_PTR:
id = mach_read_from_7(data);
fprintf(stderr, "roll_ptr {%lu %lu}",
ut_dulint_get_high(id), ut_dulint_get_low(id));
break;
case DATA_ROW_ID:
id = mach_read_from_6(data);
fprintf(stderr, "row_id {%lu %lu}",
ut_dulint_get_high(id), ut_dulint_get_low(id));
break;
default:
id = mach_dulint_read_compressed(data);
fprintf(stderr, "mix_id {%lu %lu}",
ut_dulint_get_high(id), ut_dulint_get_low(id));
}
break;
case DATA_CHAR:
case DATA_VARCHAR:
print_also_hex = FALSE;
for (i = 0; i < len; i++) {
int c = *data++;
if (!isprint(c)) {
print_also_hex = TRUE;
fprintf(stderr, "\\x%02x", (unsigned char) c);
} else {
putc(c, stderr);
}
}
if (dfield_is_ext(dfield)) {
fputs("(external)", stderr);
}
if (!print_also_hex) {
break;
}
data = dfield_get_data(dfield);
/* fall through */
case DATA_BINARY:
default:
print_hex:
fputs(" Hex: ",stderr);
for (i = 0; i < len; i++) {
fprintf(stderr, "%02lx", (ulint) *data++);
}
if (dfield_is_ext(dfield)) {
fputs("(external)", stderr);
}
}
}
/*************************************************************//**
Print a dfield value using ut_print_buf. */
static
void
dfield_print_raw(
/*=============*/
FILE* f, /*!< in: output stream */
const dfield_t* dfield) /*!< in: dfield */
{
ulint len = dfield_get_len(dfield);
if (!dfield_is_null(dfield)) {
ulint print_len = ut_min(len, 1000);
ut_print_buf(f, dfield_get_data(dfield), print_len);
if (len != print_len) {
fprintf(f, "(total %lu bytes%s)",
(ulong) len,
dfield_is_ext(dfield) ? ", external" : "");
}
} else {
fputs(" SQL NULL", f);
}
}
/**********************************************************//**
The following function prints the contents of a tuple. */
UNIV_INTERN
void
dtuple_print(
/*=========*/
FILE* f, /*!< in: output stream */
const dtuple_t* tuple) /*!< in: tuple */
{
ulint n_fields;
ulint i;
n_fields = dtuple_get_n_fields(tuple);
fprintf(f, "DATA TUPLE: %lu fields;\n", (ulong) n_fields);
for (i = 0; i < n_fields; i++) {
fprintf(f, " %lu:", (ulong) i);
dfield_print_raw(f, dtuple_get_nth_field(tuple, i));
putc(';', f);
putc('\n', f);
}
ut_ad(dtuple_validate(tuple));
}
/**************************************************************//**
Moves parts of long fields in entry to the big record vector so that
the size of tuple drops below the maximum record size allowed in the
database. Moves data only from those fields which are not necessary
to determine uniquely the insertion place of the tuple in the index.
@return own: created big record vector, NULL if we are not able to
shorten the entry enough, i.e., if there are too many fixed-length or
short fields in entry or the index is clustered */
UNIV_INTERN
big_rec_t*
dtuple_convert_big_rec(
/*===================*/
dict_index_t* index, /*!< in: index */
dtuple_t* entry, /*!< in/out: index entry */
ulint* n_ext) /*!< in/out: number of
externally stored columns */
{
mem_heap_t* heap;
big_rec_t* vector;
dfield_t* dfield;
dict_field_t* ifield;
ulint size;
ulint n_fields;
ulint local_len;
ulint local_prefix_len;
if (UNIV_UNLIKELY(!dict_index_is_clust(index))) {
return(NULL);
}
if (dict_table_get_format(index->table) < DICT_TF_FORMAT_ZIP) {
/* up to MySQL 5.1: store a 768-byte prefix locally */
local_len = BTR_EXTERN_FIELD_REF_SIZE + DICT_MAX_INDEX_COL_LEN;
} else {
/* new-format table: do not store any BLOB prefix locally */
local_len = BTR_EXTERN_FIELD_REF_SIZE;
}
ut_a(dtuple_check_typed_no_assert(entry));
size = rec_get_converted_size(index, entry, *n_ext);
if (UNIV_UNLIKELY(size > 1000000000)) {
fprintf(stderr,
"InnoDB: Warning: tuple size very big: %lu\n",
(ulong) size);
fputs("InnoDB: Tuple contents: ", stderr);
dtuple_print(stderr, entry);
putc('\n', stderr);
}
heap = mem_heap_create(size + dtuple_get_n_fields(entry)
* sizeof(big_rec_field_t) + 1000);
vector = mem_heap_alloc(heap, sizeof(big_rec_t));
vector->heap = heap;
vector->fields = mem_heap_alloc(heap, dtuple_get_n_fields(entry)
* sizeof(big_rec_field_t));
/* Decide which fields to shorten: the algorithm is to look for
a variable-length field that yields the biggest savings when
stored externally */
n_fields = 0;
while (page_zip_rec_needs_ext(rec_get_converted_size(index, entry,
*n_ext),
dict_table_is_comp(index->table),
dict_index_get_n_fields(index),
dict_table_zip_size(index->table))) {
ulint i;
ulint longest = 0;
ulint longest_i = ULINT_MAX;
byte* data;
big_rec_field_t* b;
for (i = dict_index_get_n_unique_in_tree(index);
i < dtuple_get_n_fields(entry); i++) {
ulint savings;
dfield = dtuple_get_nth_field(entry, i);
ifield = dict_index_get_nth_field(index, i);
/* Skip fixed-length, NULL, externally stored,
or short columns */
if (ifield->fixed_len
|| dfield_is_null(dfield)
|| dfield_is_ext(dfield)
|| dfield_get_len(dfield) <= local_len
|| dfield_get_len(dfield)
<= BTR_EXTERN_FIELD_REF_SIZE * 2) {
goto skip_field;
}
savings = dfield_get_len(dfield) - local_len;
/* Check that there would be savings */
if (longest >= savings) {
goto skip_field;
}
longest_i = i;
longest = savings;
skip_field:
continue;
}
if (!longest) {
/* Cannot shorten more */
mem_heap_free(heap);
return(NULL);
}
/* Move data from field longest_i to big rec vector.
We store the first bytes locally to the record. Then
we can calculate all ordering fields in all indexes
from locally stored data. */
dfield = dtuple_get_nth_field(entry, longest_i);
ifield = dict_index_get_nth_field(index, longest_i);
local_prefix_len = local_len - BTR_EXTERN_FIELD_REF_SIZE;
b = &vector->fields[n_fields];
b->field_no = longest_i;
b->len = dfield_get_len(dfield) - local_prefix_len;
b->data = (char*) dfield_get_data(dfield) + local_prefix_len;
/* Allocate the locally stored part of the column. */
data = mem_heap_alloc(heap, local_len);
/* Copy the local prefix. */
memcpy(data, dfield_get_data(dfield), local_prefix_len);
/* Clear the extern field reference (BLOB pointer). */
memset(data + local_prefix_len, 0, BTR_EXTERN_FIELD_REF_SIZE);
#if 0
/* The following would fail the Valgrind checks in
page_cur_insert_rec_low() and page_cur_insert_rec_zip().
The BLOB pointers in the record will be initialized after
the record and the BLOBs have been written. */
UNIV_MEM_ALLOC(data + local_prefix_len,
BTR_EXTERN_FIELD_REF_SIZE);
#endif
dfield_set_data(dfield, data, local_len);
dfield_set_ext(dfield);
n_fields++;
(*n_ext)++;
ut_ad(n_fields < dtuple_get_n_fields(entry));
}
vector->n_fields = n_fields;
return(vector);
}
/**************************************************************//**
Puts back to entry the data stored in vector. Note that to ensure the
fields in entry can accommodate the data, vector must have been created
from entry with dtuple_convert_big_rec. */
UNIV_INTERN
void
dtuple_convert_back_big_rec(
/*========================*/
dict_index_t* index __attribute__((unused)), /*!< in: index */
dtuple_t* entry, /*!< in: entry whose data was put to vector */
big_rec_t* vector) /*!< in, own: big rec vector; it is
freed in this function */
{
big_rec_field_t* b = vector->fields;
const big_rec_field_t* const end = b + vector->n_fields;
for (; b < end; b++) {
dfield_t* dfield;
ulint local_len;
dfield = dtuple_get_nth_field(entry, b->field_no);
local_len = dfield_get_len(dfield);
ut_ad(dfield_is_ext(dfield));
ut_ad(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
local_len -= BTR_EXTERN_FIELD_REF_SIZE;
ut_ad(local_len <= DICT_MAX_INDEX_COL_LEN);
dfield_set_data(dfield,
(char*) b->data - local_len,
b->len + local_len);
}
mem_heap_free(vector->heap);
}
#endif /* !UNIV_HOTBACKUP */

297
perfschema/data/data0type.c Normal file
View file

@ -0,0 +1,297 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file data/data0type.c
Data types
Created 1/16/1996 Heikki Tuuri
*******************************************************/
#include "data0type.h"
#ifdef UNIV_NONINL
#include "data0type.ic"
#endif
#ifndef UNIV_HOTBACKUP
# include "ha_prototypes.h"
/* At the database startup we store the default-charset collation number of
this MySQL installation to this global variable. If we have < 4.1.2 format
column definitions, or records in the insert buffer, we use this
charset-collation code for them. */
UNIV_INTERN ulint data_mysql_default_charset_coll;
/*********************************************************************//**
Determine how many bytes the first n characters of the given string occupy.
If the string is shorter than n characters, returns the number of bytes
the characters in the string occupy.
@return length of the prefix, in bytes */
UNIV_INTERN
ulint
dtype_get_at_most_n_mbchars(
/*========================*/
ulint prtype, /*!< in: precise type */
ulint mbminlen, /*!< in: minimum length of a
multi-byte character */
ulint mbmaxlen, /*!< in: maximum length of a
multi-byte character */
ulint prefix_len, /*!< in: length of the requested
prefix, in characters, multiplied by
dtype_get_mbmaxlen(dtype) */
ulint data_len, /*!< in: length of str (in bytes) */
const char* str) /*!< in: the string whose prefix
length is being determined */
{
ut_a(data_len != UNIV_SQL_NULL);
ut_ad(!mbmaxlen || !(prefix_len % mbmaxlen));
if (mbminlen != mbmaxlen) {
ut_a(!(prefix_len % mbmaxlen));
return(innobase_get_at_most_n_mbchars(
dtype_get_charset_coll(prtype),
prefix_len, data_len, str));
}
if (prefix_len < data_len) {
return(prefix_len);
}
return(data_len);
}
#endif /* UNIV_HOTBACKUP */
/*********************************************************************//**
Checks if a data main type is a string type. Also a BLOB is considered a
string type.
@return TRUE if string type */
UNIV_INTERN
ibool
dtype_is_string_type(
/*=================*/
ulint mtype) /*!< in: InnoDB main data type code: DATA_CHAR, ... */
{
if (mtype <= DATA_BLOB
|| mtype == DATA_MYSQL
|| mtype == DATA_VARMYSQL) {
return(TRUE);
}
return(FALSE);
}
/*********************************************************************//**
Checks if a type is a binary string type. Note that for tables created with
< 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For
those DATA_BLOB columns this function currently returns FALSE.
@return TRUE if binary string type */
UNIV_INTERN
ibool
dtype_is_binary_string_type(
/*========================*/
ulint mtype, /*!< in: main data type */
ulint prtype) /*!< in: precise type */
{
if ((mtype == DATA_FIXBINARY)
|| (mtype == DATA_BINARY)
|| (mtype == DATA_BLOB && (prtype & DATA_BINARY_TYPE))) {
return(TRUE);
}
return(FALSE);
}
/*********************************************************************//**
Checks if a type is a non-binary string type. That is, dtype_is_string_type is
TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created
with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column.
For those DATA_BLOB columns this function currently returns TRUE.
@return TRUE if non-binary string type */
UNIV_INTERN
ibool
dtype_is_non_binary_string_type(
/*============================*/
ulint mtype, /*!< in: main data type */
ulint prtype) /*!< in: precise type */
{
if (dtype_is_string_type(mtype) == TRUE
&& dtype_is_binary_string_type(mtype, prtype) == FALSE) {
return(TRUE);
}
return(FALSE);
}
/*********************************************************************//**
Forms a precise type from the < 4.1.2 format precise type plus the
charset-collation code.
@return precise type, including the charset-collation code */
UNIV_INTERN
ulint
dtype_form_prtype(
/*==============*/
ulint old_prtype, /*!< in: the MySQL type code and the flags
DATA_BINARY_TYPE etc. */
ulint charset_coll) /*!< in: MySQL charset-collation code */
{
ut_a(old_prtype < 256 * 256);
ut_a(charset_coll < 256);
return(old_prtype + (charset_coll << 16));
}
/*********************************************************************//**
Validates a data type structure.
@return TRUE if ok */
UNIV_INTERN
ibool
dtype_validate(
/*===========*/
const dtype_t* type) /*!< in: type struct to validate */
{
ut_a(type);
ut_a(type->mtype >= DATA_VARCHAR);
ut_a(type->mtype <= DATA_MYSQL);
if (type->mtype == DATA_SYS) {
ut_a((type->prtype & DATA_MYSQL_TYPE_MASK) < DATA_N_SYS_COLS);
}
#ifndef UNIV_HOTBACKUP
ut_a(type->mbminlen <= type->mbmaxlen);
#endif /* !UNIV_HOTBACKUP */
return(TRUE);
}
#ifndef UNIV_HOTBACKUP
/*********************************************************************//**
Prints a data type structure. */
UNIV_INTERN
void
dtype_print(
/*========*/
const dtype_t* type) /*!< in: type */
{
ulint mtype;
ulint prtype;
ulint len;
ut_a(type);
mtype = type->mtype;
prtype = type->prtype;
switch (mtype) {
case DATA_VARCHAR:
fputs("DATA_VARCHAR", stderr);
break;
case DATA_CHAR:
fputs("DATA_CHAR", stderr);
break;
case DATA_BINARY:
fputs("DATA_BINARY", stderr);
break;
case DATA_FIXBINARY:
fputs("DATA_FIXBINARY", stderr);
break;
case DATA_BLOB:
fputs("DATA_BLOB", stderr);
break;
case DATA_INT:
fputs("DATA_INT", stderr);
break;
case DATA_MYSQL:
fputs("DATA_MYSQL", stderr);
break;
case DATA_SYS:
fputs("DATA_SYS", stderr);
break;
case DATA_FLOAT:
fputs("DATA_FLOAT", stderr);
break;
case DATA_DOUBLE:
fputs("DATA_DOUBLE", stderr);
break;
case DATA_DECIMAL:
fputs("DATA_DECIMAL", stderr);
break;
case DATA_VARMYSQL:
fputs("DATA_VARMYSQL", stderr);
break;
default:
fprintf(stderr, "type %lu", (ulong) mtype);
break;
}
len = type->len;
if ((type->mtype == DATA_SYS)
|| (type->mtype == DATA_VARCHAR)
|| (type->mtype == DATA_CHAR)) {
putc(' ', stderr);
if (prtype == DATA_ROW_ID) {
fputs("DATA_ROW_ID", stderr);
len = DATA_ROW_ID_LEN;
} else if (prtype == DATA_ROLL_PTR) {
fputs("DATA_ROLL_PTR", stderr);
len = DATA_ROLL_PTR_LEN;
} else if (prtype == DATA_TRX_ID) {
fputs("DATA_TRX_ID", stderr);
len = DATA_TRX_ID_LEN;
} else if (prtype == DATA_ENGLISH) {
fputs("DATA_ENGLISH", stderr);
} else {
fprintf(stderr, "prtype %lu", (ulong) prtype);
}
} else {
if (prtype & DATA_UNSIGNED) {
fputs(" DATA_UNSIGNED", stderr);
}
if (prtype & DATA_BINARY_TYPE) {
fputs(" DATA_BINARY_TYPE", stderr);
}
if (prtype & DATA_NOT_NULL) {
fputs(" DATA_NOT_NULL", stderr);
}
}
fprintf(stderr, " len %lu", (ulong) len);
}
#endif /* !UNIV_HOTBACKUP */

468
perfschema/dict/dict0boot.c Normal file
View file

@ -0,0 +1,468 @@
/*****************************************************************************
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file dict/dict0boot.c
Data dictionary creation and booting
Created 4/18/1996 Heikki Tuuri
*******************************************************/
#include "dict0boot.h"
#ifdef UNIV_NONINL
#include "dict0boot.ic"
#endif
#include "dict0crea.h"
#include "btr0btr.h"
#include "dict0load.h"
#include "dict0load.h"
#include "trx0trx.h"
#include "srv0srv.h"
#include "ibuf0ibuf.h"
#include "buf0flu.h"
#include "log0recv.h"
#include "os0file.h"
/**********************************************************************//**
Gets a pointer to the dictionary header and x-latches its page.
@return pointer to the dictionary header, page x-latched */
UNIV_INTERN
dict_hdr_t*
dict_hdr_get(
/*=========*/
mtr_t* mtr) /*!< in: mtr */
{
buf_block_t* block;
dict_hdr_t* header;
block = buf_page_get(DICT_HDR_SPACE, 0, DICT_HDR_PAGE_NO,
RW_X_LATCH, mtr);
header = DICT_HDR + buf_block_get_frame(block);
buf_block_dbg_add_level(block, SYNC_DICT_HEADER);
return(header);
}
/**********************************************************************//**
Returns a new table, index, or tree id.
@return the new id */
UNIV_INTERN
dulint
dict_hdr_get_new_id(
/*================*/
ulint type) /*!< in: DICT_HDR_ROW_ID, ... */
{
dict_hdr_t* dict_hdr;
dulint id;
mtr_t mtr;
ut_ad((type == DICT_HDR_TABLE_ID) || (type == DICT_HDR_INDEX_ID));
mtr_start(&mtr);
dict_hdr = dict_hdr_get(&mtr);
id = mtr_read_dulint(dict_hdr + type, &mtr);
id = ut_dulint_add(id, 1);
mlog_write_dulint(dict_hdr + type, id, &mtr);
mtr_commit(&mtr);
return(id);
}
/**********************************************************************//**
Writes the current value of the row id counter to the dictionary header file
page. */
UNIV_INTERN
void
dict_hdr_flush_row_id(void)
/*=======================*/
{
dict_hdr_t* dict_hdr;
dulint id;
mtr_t mtr;
ut_ad(mutex_own(&(dict_sys->mutex)));
id = dict_sys->row_id;
mtr_start(&mtr);
dict_hdr = dict_hdr_get(&mtr);
mlog_write_dulint(dict_hdr + DICT_HDR_ROW_ID, id, &mtr);
mtr_commit(&mtr);
}
/*****************************************************************//**
Creates the file page for the dictionary header. This function is
called only at the database creation.
@return TRUE if succeed */
static
ibool
dict_hdr_create(
/*============*/
mtr_t* mtr) /*!< in: mtr */
{
buf_block_t* block;
dict_hdr_t* dict_header;
ulint root_page_no;
ut_ad(mtr);
/* Create the dictionary header file block in a new, allocated file
segment in the system tablespace */
block = fseg_create(DICT_HDR_SPACE, 0,
DICT_HDR + DICT_HDR_FSEG_HEADER, mtr);
ut_a(DICT_HDR_PAGE_NO == buf_block_get_page_no(block));
dict_header = dict_hdr_get(mtr);
/* Start counting row, table, index, and tree ids from
DICT_HDR_FIRST_ID */
mlog_write_dulint(dict_header + DICT_HDR_ROW_ID,
ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr);
mlog_write_dulint(dict_header + DICT_HDR_TABLE_ID,
ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr);
mlog_write_dulint(dict_header + DICT_HDR_INDEX_ID,
ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr);
/* Obsolete, but we must initialize it to 0 anyway. */
mlog_write_dulint(dict_header + DICT_HDR_MIX_ID,
ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr);
/* Create the B-tree roots for the clustered indexes of the basic
system tables */
/*--------------------------*/
root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
DICT_HDR_SPACE, 0, DICT_TABLES_ID,
dict_ind_redundant, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
}
mlog_write_ulint(dict_header + DICT_HDR_TABLES, root_page_no,
MLOG_4BYTES, mtr);
/*--------------------------*/
root_page_no = btr_create(DICT_UNIQUE, DICT_HDR_SPACE, 0,
DICT_TABLE_IDS_ID,
dict_ind_redundant, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
}
mlog_write_ulint(dict_header + DICT_HDR_TABLE_IDS, root_page_no,
MLOG_4BYTES, mtr);
/*--------------------------*/
root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
DICT_HDR_SPACE, 0, DICT_COLUMNS_ID,
dict_ind_redundant, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
}
mlog_write_ulint(dict_header + DICT_HDR_COLUMNS, root_page_no,
MLOG_4BYTES, mtr);
/*--------------------------*/
root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
DICT_HDR_SPACE, 0, DICT_INDEXES_ID,
dict_ind_redundant, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
}
mlog_write_ulint(dict_header + DICT_HDR_INDEXES, root_page_no,
MLOG_4BYTES, mtr);
/*--------------------------*/
root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
DICT_HDR_SPACE, 0, DICT_FIELDS_ID,
dict_ind_redundant, mtr);
if (root_page_no == FIL_NULL) {
return(FALSE);
}
mlog_write_ulint(dict_header + DICT_HDR_FIELDS, root_page_no,
MLOG_4BYTES, mtr);
/*--------------------------*/
return(TRUE);
}
/*****************************************************************//**
Initializes the data dictionary memory structures when the database is
started. This function is also called when the data dictionary is created. */
UNIV_INTERN
void
dict_boot(void)
/*===========*/
{
dict_table_t* table;
dict_index_t* index;
dict_hdr_t* dict_hdr;
mem_heap_t* heap;
mtr_t mtr;
ulint error;
mtr_start(&mtr);
/* Create the hash tables etc. */
dict_init();
heap = mem_heap_create(450);
mutex_enter(&(dict_sys->mutex));
/* Get the dictionary header */
dict_hdr = dict_hdr_get(&mtr);
/* Because we only write new row ids to disk-based data structure
(dictionary header) when it is divisible by
DICT_HDR_ROW_ID_WRITE_MARGIN, in recovery we will not recover
the latest value of the row id counter. Therefore we advance
the counter at the database startup to avoid overlapping values.
Note that when a user after database startup first time asks for
a new row id, then because the counter is now divisible by
..._MARGIN, it will immediately be updated to the disk-based
header. */
dict_sys->row_id = ut_dulint_add(
ut_dulint_align_up(mtr_read_dulint(dict_hdr + DICT_HDR_ROW_ID,
&mtr),
DICT_HDR_ROW_ID_WRITE_MARGIN),
DICT_HDR_ROW_ID_WRITE_MARGIN);
/* Insert into the dictionary cache the descriptions of the basic
system tables */
/*-------------------------*/
table = dict_mem_table_create("SYS_TABLES", DICT_HDR_SPACE, 8, 0);
dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0);
/* ROW_FORMAT = (N_COLS >> 31) ? COMPACT : REDUNDANT */
dict_mem_table_add_col(table, heap, "N_COLS", DATA_INT, 0, 4);
/* TYPE is either DICT_TABLE_ORDINARY, or (TYPE & DICT_TF_COMPACT)
and (TYPE & DICT_TF_FORMAT_MASK) are nonzero and TYPE = table->flags */
dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "MIX_ID", DATA_BINARY, 0, 0);
/* MIX_LEN may contain additional table flags when
ROW_FORMAT!=REDUNDANT. Currently, these flags include
DICT_TF2_TEMPORARY. */
dict_mem_table_add_col(table, heap, "MIX_LEN", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "CLUSTER_NAME", DATA_BINARY, 0, 0);
dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4);
table->id = DICT_TABLES_ID;
dict_table_add_to_cache(table, heap);
dict_sys->sys_tables = table;
mem_heap_empty(heap);
index = dict_mem_index_create("SYS_TABLES", "CLUST_IND",
DICT_HDR_SPACE,
DICT_UNIQUE | DICT_CLUSTERED, 1);
dict_mem_index_add_field(index, "NAME", 0);
index->id = DICT_TABLES_ID;
error = dict_index_add_to_cache(table, index,
mtr_read_ulint(dict_hdr
+ DICT_HDR_TABLES,
MLOG_4BYTES, &mtr),
FALSE);
ut_a(error == DB_SUCCESS);
/*-------------------------*/
index = dict_mem_index_create("SYS_TABLES", "ID_IND",
DICT_HDR_SPACE, DICT_UNIQUE, 1);
dict_mem_index_add_field(index, "ID", 0);
index->id = DICT_TABLE_IDS_ID;
error = dict_index_add_to_cache(table, index,
mtr_read_ulint(dict_hdr
+ DICT_HDR_TABLE_IDS,
MLOG_4BYTES, &mtr),
FALSE);
ut_a(error == DB_SUCCESS);
/*-------------------------*/
table = dict_mem_table_create("SYS_COLUMNS", DICT_HDR_SPACE, 7, 0);
dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0);
dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
dict_mem_table_add_col(table, heap, "MTYPE", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "PRTYPE", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "LEN", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "PREC", DATA_INT, 0, 4);
table->id = DICT_COLUMNS_ID;
dict_table_add_to_cache(table, heap);
dict_sys->sys_columns = table;
mem_heap_empty(heap);
index = dict_mem_index_create("SYS_COLUMNS", "CLUST_IND",
DICT_HDR_SPACE,
DICT_UNIQUE | DICT_CLUSTERED, 2);
dict_mem_index_add_field(index, "TABLE_ID", 0);
dict_mem_index_add_field(index, "POS", 0);
index->id = DICT_COLUMNS_ID;
error = dict_index_add_to_cache(table, index,
mtr_read_ulint(dict_hdr
+ DICT_HDR_COLUMNS,
MLOG_4BYTES, &mtr),
FALSE);
ut_a(error == DB_SUCCESS);
/*-------------------------*/
table = dict_mem_table_create("SYS_INDEXES", DICT_HDR_SPACE, 7, 0);
dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0);
dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0);
dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
dict_mem_table_add_col(table, heap, "N_FIELDS", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "PAGE_NO", DATA_INT, 0, 4);
/* The '+ 2' below comes from the fields DB_TRX_ID, DB_ROLL_PTR */
#if DICT_SYS_INDEXES_PAGE_NO_FIELD != 6 + 2
#error "DICT_SYS_INDEXES_PAGE_NO_FIELD != 6 + 2"
#endif
#if DICT_SYS_INDEXES_SPACE_NO_FIELD != 5 + 2
#error "DICT_SYS_INDEXES_SPACE_NO_FIELD != 5 + 2"
#endif
#if DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2
#error "DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2"
#endif
#if DICT_SYS_INDEXES_NAME_FIELD != 1 + 2
#error "DICT_SYS_INDEXES_NAME_FIELD != 1 + 2"
#endif
table->id = DICT_INDEXES_ID;
dict_table_add_to_cache(table, heap);
dict_sys->sys_indexes = table;
mem_heap_empty(heap);
index = dict_mem_index_create("SYS_INDEXES", "CLUST_IND",
DICT_HDR_SPACE,
DICT_UNIQUE | DICT_CLUSTERED, 2);
dict_mem_index_add_field(index, "TABLE_ID", 0);
dict_mem_index_add_field(index, "ID", 0);
index->id = DICT_INDEXES_ID;
error = dict_index_add_to_cache(table, index,
mtr_read_ulint(dict_hdr
+ DICT_HDR_INDEXES,
MLOG_4BYTES, &mtr),
FALSE);
ut_a(error == DB_SUCCESS);
/*-------------------------*/
table = dict_mem_table_create("SYS_FIELDS", DICT_HDR_SPACE, 3, 0);
dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 0);
dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "COL_NAME", DATA_BINARY, 0, 0);
table->id = DICT_FIELDS_ID;
dict_table_add_to_cache(table, heap);
dict_sys->sys_fields = table;
mem_heap_free(heap);
index = dict_mem_index_create("SYS_FIELDS", "CLUST_IND",
DICT_HDR_SPACE,
DICT_UNIQUE | DICT_CLUSTERED, 2);
dict_mem_index_add_field(index, "INDEX_ID", 0);
dict_mem_index_add_field(index, "POS", 0);
index->id = DICT_FIELDS_ID;
error = dict_index_add_to_cache(table, index,
mtr_read_ulint(dict_hdr
+ DICT_HDR_FIELDS,
MLOG_4BYTES, &mtr),
FALSE);
ut_a(error == DB_SUCCESS);
mtr_commit(&mtr);
/*-------------------------*/
/* Initialize the insert buffer table and index for each tablespace */
ibuf_init_at_db_start();
/* Load definitions of other indexes on system tables */
dict_load_sys_table(dict_sys->sys_tables);
dict_load_sys_table(dict_sys->sys_columns);
dict_load_sys_table(dict_sys->sys_indexes);
dict_load_sys_table(dict_sys->sys_fields);
mutex_exit(&(dict_sys->mutex));
}
/*****************************************************************//**
Inserts the basic system table data into themselves in the database
creation. */
static
void
dict_insert_initial_data(void)
/*==========================*/
{
/* Does nothing yet */
}
/*****************************************************************//**
Creates and initializes the data dictionary at the database creation. */
UNIV_INTERN
void
dict_create(void)
/*=============*/
{
mtr_t mtr;
mtr_start(&mtr);
dict_hdr_create(&mtr);
mtr_commit(&mtr);
dict_boot();
dict_insert_initial_data();
}

1512
perfschema/dict/dict0crea.c Normal file

File diff suppressed because it is too large Load diff

4854
perfschema/dict/dict0dict.c Normal file

File diff suppressed because it is too large Load diff

1499
perfschema/dict/dict0load.c Normal file

File diff suppressed because it is too large Load diff

319
perfschema/dict/dict0mem.c Normal file
View file

@ -0,0 +1,319 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/******************************************************************//**
@file dict/dict0mem.c
Data dictionary memory object creation
Created 1/8/1996 Heikki Tuuri
***********************************************************************/
#include "dict0mem.h"
#ifdef UNIV_NONINL
#include "dict0mem.ic"
#endif
#include "rem0rec.h"
#include "data0type.h"
#include "mach0data.h"
#include "dict0dict.h"
#ifndef UNIV_HOTBACKUP
# include "lock0lock.h"
#endif /* !UNIV_HOTBACKUP */
#define DICT_HEAP_SIZE 100 /*!< initial memory heap size when
creating a table or index object */
/**********************************************************************//**
Creates a table memory object.
@return own: table object */
UNIV_INTERN
dict_table_t*
dict_mem_table_create(
/*==================*/
const char* name, /*!< in: table name */
ulint space, /*!< in: space where the clustered index of
the table is placed; this parameter is
ignored if the table is made a member of
a cluster */
ulint n_cols, /*!< in: number of columns */
ulint flags) /*!< in: table flags */
{
dict_table_t* table;
mem_heap_t* heap;
ut_ad(name);
ut_a(!(flags & (~0 << DICT_TF2_BITS)));
heap = mem_heap_create(DICT_HEAP_SIZE);
table = mem_heap_zalloc(heap, sizeof(dict_table_t));
table->heap = heap;
table->flags = (unsigned int) flags;
table->name = mem_heap_strdup(heap, name);
table->space = (unsigned int) space;
table->n_cols = (unsigned int) (n_cols + DATA_N_SYS_COLS);
table->cols = mem_heap_alloc(heap, (n_cols + DATA_N_SYS_COLS)
* sizeof(dict_col_t));
#ifndef UNIV_HOTBACKUP
table->autoinc_lock = mem_heap_alloc(heap, lock_get_size());
mutex_create(&table->autoinc_mutex, SYNC_DICT_AUTOINC_MUTEX);
table->autoinc = 0;
/* The number of transactions that are either waiting on the
AUTOINC lock or have been granted the lock. */
table->n_waiting_or_granted_auto_inc_locks = 0;
#endif /* !UNIV_HOTBACKUP */
ut_d(table->magic_n = DICT_TABLE_MAGIC_N);
return(table);
}
/****************************************************************//**
Free a table memory object. */
UNIV_INTERN
void
dict_mem_table_free(
/*================*/
dict_table_t* table) /*!< in: table */
{
ut_ad(table);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
ut_d(table->cached = FALSE);
#ifndef UNIV_HOTBACKUP
mutex_free(&(table->autoinc_mutex));
#endif /* UNIV_HOTBACKUP */
mem_heap_free(table->heap);
}
/****************************************************************//**
Append 'name' to 'col_names'. @see dict_table_t::col_names
@return new column names array */
static
const char*
dict_add_col_name(
/*==============*/
const char* col_names, /*!< in: existing column names, or
NULL */
ulint cols, /*!< in: number of existing columns */
const char* name, /*!< in: new column name */
mem_heap_t* heap) /*!< in: heap */
{
ulint old_len;
ulint new_len;
ulint total_len;
char* res;
ut_ad(!cols == !col_names);
/* Find out length of existing array. */
if (col_names) {
const char* s = col_names;
ulint i;
for (i = 0; i < cols; i++) {
s += strlen(s) + 1;
}
old_len = s - col_names;
} else {
old_len = 0;
}
new_len = strlen(name) + 1;
total_len = old_len + new_len;
res = mem_heap_alloc(heap, total_len);
if (old_len > 0) {
memcpy(res, col_names, old_len);
}
memcpy(res + old_len, name, new_len);
return(res);
}
/**********************************************************************//**
Adds a column definition to a table. */
UNIV_INTERN
void
dict_mem_table_add_col(
/*===================*/
dict_table_t* table, /*!< in: table */
mem_heap_t* heap, /*!< in: temporary memory heap, or NULL */
const char* name, /*!< in: column name, or NULL */
ulint mtype, /*!< in: main datatype */
ulint prtype, /*!< in: precise type */
ulint len) /*!< in: precision */
{
dict_col_t* col;
#ifndef UNIV_HOTBACKUP
ulint mbminlen;
ulint mbmaxlen;
#endif /* !UNIV_HOTBACKUP */
ulint i;
ut_ad(table);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
ut_ad(!heap == !name);
i = table->n_def++;
if (name) {
if (UNIV_UNLIKELY(table->n_def == table->n_cols)) {
heap = table->heap;
}
if (UNIV_LIKELY(i) && UNIV_UNLIKELY(!table->col_names)) {
/* All preceding column names are empty. */
char* s = mem_heap_zalloc(heap, table->n_def);
table->col_names = s;
}
table->col_names = dict_add_col_name(table->col_names,
i, name, heap);
}
col = dict_table_get_nth_col(table, i);
col->ind = (unsigned int) i;
col->ord_part = 0;
col->mtype = (unsigned int) mtype;
col->prtype = (unsigned int) prtype;
col->len = (unsigned int) len;
#ifndef UNIV_HOTBACKUP
dtype_get_mblen(mtype, prtype, &mbminlen, &mbmaxlen);
col->mbminlen = (unsigned int) mbminlen;
col->mbmaxlen = (unsigned int) mbmaxlen;
#endif /* !UNIV_HOTBACKUP */
}
/**********************************************************************//**
Creates an index memory object.
@return own: index object */
UNIV_INTERN
dict_index_t*
dict_mem_index_create(
/*==================*/
const char* table_name, /*!< in: table name */
const char* index_name, /*!< in: index name */
ulint space, /*!< in: space where the index tree is
placed, ignored if the index is of
the clustered type */
ulint type, /*!< in: DICT_UNIQUE,
DICT_CLUSTERED, ... ORed */
ulint n_fields) /*!< in: number of fields */
{
dict_index_t* index;
mem_heap_t* heap;
ut_ad(table_name && index_name);
heap = mem_heap_create(DICT_HEAP_SIZE);
index = mem_heap_zalloc(heap, sizeof(dict_index_t));
index->heap = heap;
index->type = type;
#ifndef UNIV_HOTBACKUP
index->space = (unsigned int) space;
#endif /* !UNIV_HOTBACKUP */
index->name = mem_heap_strdup(heap, index_name);
index->table_name = table_name;
index->n_fields = (unsigned int) n_fields;
index->fields = mem_heap_alloc(heap, 1 + n_fields
* sizeof(dict_field_t));
/* The '1 +' above prevents allocation
of an empty mem block */
#ifdef UNIV_DEBUG
index->magic_n = DICT_INDEX_MAGIC_N;
#endif /* UNIV_DEBUG */
return(index);
}
/**********************************************************************//**
Creates and initializes a foreign constraint memory object.
@return own: foreign constraint struct */
UNIV_INTERN
dict_foreign_t*
dict_mem_foreign_create(void)
/*=========================*/
{
dict_foreign_t* foreign;
mem_heap_t* heap;
heap = mem_heap_create(100);
foreign = mem_heap_zalloc(heap, sizeof(dict_foreign_t));
foreign->heap = heap;
return(foreign);
}
/**********************************************************************//**
Adds a field definition to an index. NOTE: does not take a copy
of the column name if the field is a column. The memory occupied
by the column name may be released only after publishing the index. */
UNIV_INTERN
void
dict_mem_index_add_field(
/*=====================*/
dict_index_t* index, /*!< in: index */
const char* name, /*!< in: column name */
ulint prefix_len) /*!< in: 0 or the column prefix length
in a MySQL index like
INDEX (textcol(25)) */
{
dict_field_t* field;
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
index->n_def++;
field = dict_index_get_nth_field(index, index->n_def - 1);
field->name = name;
field->prefix_len = (unsigned int) prefix_len;
}
/**********************************************************************//**
Frees an index memory object. */
UNIV_INTERN
void
dict_mem_index_free(
/*================*/
dict_index_t* index) /*!< in: index */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
mem_heap_free(index->heap);
}

65
perfschema/dyn/dyn0dyn.c Normal file
View file

@ -0,0 +1,65 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file dyn/dyn0dyn.c
The dynamically allocated array
Created 2/5/1996 Heikki Tuuri
*******************************************************/
#include "dyn0dyn.h"
#ifdef UNIV_NONINL
#include "dyn0dyn.ic"
#endif
/************************************************************//**
Adds a new block to a dyn array.
@return created block */
UNIV_INTERN
dyn_block_t*
dyn_array_add_block(
/*================*/
dyn_array_t* arr) /*!< in: dyn array */
{
mem_heap_t* heap;
dyn_block_t* block;
ut_ad(arr);
ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
if (arr->heap == NULL) {
UT_LIST_INIT(arr->base);
UT_LIST_ADD_FIRST(list, arr->base, arr);
arr->heap = mem_heap_create(sizeof(dyn_block_t));
}
block = dyn_array_get_last_block(arr);
block->used = block->used | DYN_BLOCK_FULL_FLAG;
heap = arr->heap;
block = mem_heap_alloc(heap, sizeof(dyn_block_t));
block->used = 0;
UT_LIST_ADD_LAST(list, arr->base, block);
return(block);
}

852
perfschema/eval/eval0eval.c Normal file
View file

@ -0,0 +1,852 @@
/*****************************************************************************
Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file eval/eval0eval.c
SQL evaluator: evaluates simple data structures, like expressions, in
a query graph
Created 12/29/1997 Heikki Tuuri
*******************************************************/
#include "eval0eval.h"
#ifdef UNIV_NONINL
#include "eval0eval.ic"
#endif
#include "data0data.h"
#include "row0sel.h"
/** The RND function seed */
static ulint eval_rnd = 128367121;
/** Dummy adress used when we should allocate a buffer of size 0 in
eval_node_alloc_val_buf */
static byte eval_dummy;
/*****************************************************************//**
Allocate a buffer from global dynamic memory for a value of a que_node.
NOTE that this memory must be explicitly freed when the query graph is
freed. If the node already has an allocated buffer, that buffer is freed
here. NOTE that this is the only function where dynamic memory should be
allocated for a query node val field.
@return pointer to allocated buffer */
UNIV_INTERN
byte*
eval_node_alloc_val_buf(
/*====================*/
que_node_t* node, /*!< in: query graph node; sets the val field
data field to point to the new buffer, and
len field equal to size */
ulint size) /*!< in: buffer size */
{
dfield_t* dfield;
byte* data;
ut_ad(que_node_get_type(node) == QUE_NODE_SYMBOL
|| que_node_get_type(node) == QUE_NODE_FUNC);
dfield = que_node_get_val(node);
data = dfield_get_data(dfield);
if (data && data != &eval_dummy) {
mem_free(data);
}
if (size == 0) {
data = &eval_dummy;
} else {
data = mem_alloc(size);
}
que_node_set_val_buf_size(node, size);
dfield_set_data(dfield, data, size);
return(data);
}
/*****************************************************************//**
Free the buffer from global dynamic memory for a value of a que_node,
if it has been allocated in the above function. The freeing for pushed
column values is done in sel_col_prefetch_buf_free. */
UNIV_INTERN
void
eval_node_free_val_buf(
/*===================*/
que_node_t* node) /*!< in: query graph node */
{
dfield_t* dfield;
byte* data;
ut_ad(que_node_get_type(node) == QUE_NODE_SYMBOL
|| que_node_get_type(node) == QUE_NODE_FUNC);
dfield = que_node_get_val(node);
data = dfield_get_data(dfield);
if (que_node_get_val_buf_size(node) > 0) {
ut_a(data);
mem_free(data);
}
}
/*****************************************************************//**
Evaluates a comparison node.
@return the result of the comparison */
UNIV_INTERN
ibool
eval_cmp(
/*=====*/
func_node_t* cmp_node) /*!< in: comparison node */
{
que_node_t* arg1;
que_node_t* arg2;
int res;
ibool val;
int func;
ut_ad(que_node_get_type(cmp_node) == QUE_NODE_FUNC);
arg1 = cmp_node->args;
arg2 = que_node_get_next(arg1);
res = cmp_dfield_dfield(que_node_get_val(arg1),
que_node_get_val(arg2));
val = TRUE;
func = cmp_node->func;
if (func == '=') {
if (res != 0) {
val = FALSE;
}
} else if (func == '<') {
if (res != -1) {
val = FALSE;
}
} else if (func == PARS_LE_TOKEN) {
if (res == 1) {
val = FALSE;
}
} else if (func == PARS_NE_TOKEN) {
if (res == 0) {
val = FALSE;
}
} else if (func == PARS_GE_TOKEN) {
if (res == -1) {
val = FALSE;
}
} else {
ut_ad(func == '>');
if (res != 1) {
val = FALSE;
}
}
eval_node_set_ibool_val(cmp_node, val);
return(val);
}
/*****************************************************************//**
Evaluates a logical operation node. */
UNIV_INLINE
void
eval_logical(
/*=========*/
func_node_t* logical_node) /*!< in: logical operation node */
{
que_node_t* arg1;
que_node_t* arg2;
ibool val1;
ibool val2 = 0; /* remove warning */
ibool val = 0; /* remove warning */
int func;
ut_ad(que_node_get_type(logical_node) == QUE_NODE_FUNC);
arg1 = logical_node->args;
arg2 = que_node_get_next(arg1); /* arg2 is NULL if func is 'NOT' */
val1 = eval_node_get_ibool_val(arg1);
if (arg2) {
val2 = eval_node_get_ibool_val(arg2);
}
func = logical_node->func;
if (func == PARS_AND_TOKEN) {
val = val1 & val2;
} else if (func == PARS_OR_TOKEN) {
val = val1 | val2;
} else if (func == PARS_NOT_TOKEN) {
val = TRUE - val1;
} else {
ut_error;
}
eval_node_set_ibool_val(logical_node, val);
}
/*****************************************************************//**
Evaluates an arithmetic operation node. */
UNIV_INLINE
void
eval_arith(
/*=======*/
func_node_t* arith_node) /*!< in: arithmetic operation node */
{
que_node_t* arg1;
que_node_t* arg2;
lint val1;
lint val2 = 0; /* remove warning */
lint val;
int func;
ut_ad(que_node_get_type(arith_node) == QUE_NODE_FUNC);
arg1 = arith_node->args;
arg2 = que_node_get_next(arg1); /* arg2 is NULL if func is unary '-' */
val1 = eval_node_get_int_val(arg1);
if (arg2) {
val2 = eval_node_get_int_val(arg2);
}
func = arith_node->func;
if (func == '+') {
val = val1 + val2;
} else if ((func == '-') && arg2) {
val = val1 - val2;
} else if (func == '-') {
val = -val1;
} else if (func == '*') {
val = val1 * val2;
} else {
ut_ad(func == '/');
val = val1 / val2;
}
eval_node_set_int_val(arith_node, val);
}
/*****************************************************************//**
Evaluates an aggregate operation node. */
UNIV_INLINE
void
eval_aggregate(
/*===========*/
func_node_t* node) /*!< in: aggregate operation node */
{
que_node_t* arg;
lint val;
lint arg_val;
int func;
ut_ad(que_node_get_type(node) == QUE_NODE_FUNC);
val = eval_node_get_int_val(node);
func = node->func;
if (func == PARS_COUNT_TOKEN) {
val = val + 1;
} else {
ut_ad(func == PARS_SUM_TOKEN);
arg = node->args;
arg_val = eval_node_get_int_val(arg);
val = val + arg_val;
}
eval_node_set_int_val(node, val);
}
/*****************************************************************//**
Evaluates a predefined function node where the function is not relevant
in benchmarks. */
static
void
eval_predefined_2(
/*==============*/
func_node_t* func_node) /*!< in: predefined function node */
{
que_node_t* arg;
que_node_t* arg1;
que_node_t* arg2 = 0; /* remove warning (??? bug ???) */
lint int_val;
byte* data;
ulint len1;
ulint len2;
int func;
ulint i;
ut_ad(que_node_get_type(func_node) == QUE_NODE_FUNC);
arg1 = func_node->args;
if (arg1) {
arg2 = que_node_get_next(arg1);
}
func = func_node->func;
if (func == PARS_PRINTF_TOKEN) {
arg = arg1;
while (arg) {
dfield_print(que_node_get_val(arg));
arg = que_node_get_next(arg);
}
putc('\n', stderr);
} else if (func == PARS_ASSERT_TOKEN) {
if (!eval_node_get_ibool_val(arg1)) {
fputs("SQL assertion fails in a stored procedure!\n",
stderr);
}
ut_a(eval_node_get_ibool_val(arg1));
/* This function, or more precisely, a debug procedure,
returns no value */
} else if (func == PARS_RND_TOKEN) {
len1 = (ulint)eval_node_get_int_val(arg1);
len2 = (ulint)eval_node_get_int_val(arg2);
ut_ad(len2 >= len1);
if (len2 > len1) {
int_val = (lint) (len1
+ (eval_rnd % (len2 - len1 + 1)));
} else {
int_val = (lint) len1;
}
eval_rnd = ut_rnd_gen_next_ulint(eval_rnd);
eval_node_set_int_val(func_node, int_val);
} else if (func == PARS_RND_STR_TOKEN) {
len1 = (ulint)eval_node_get_int_val(arg1);
data = eval_node_ensure_val_buf(func_node, len1);
for (i = 0; i < len1; i++) {
data[i] = (byte)(97 + (eval_rnd % 3));
eval_rnd = ut_rnd_gen_next_ulint(eval_rnd);
}
} else {
ut_error;
}
}
/*****************************************************************//**
Evaluates a notfound-function node. */
UNIV_INLINE
void
eval_notfound(
/*==========*/
func_node_t* func_node) /*!< in: function node */
{
que_node_t* arg1;
que_node_t* arg2;
sym_node_t* cursor;
sel_node_t* sel_node;
ibool ibool_val;
arg1 = func_node->args;
arg2 = que_node_get_next(arg1);
ut_ad(func_node->func == PARS_NOTFOUND_TOKEN);
cursor = arg1;
ut_ad(que_node_get_type(cursor) == QUE_NODE_SYMBOL);
if (cursor->token_type == SYM_LIT) {
ut_ad(ut_memcmp(dfield_get_data(que_node_get_val(cursor)),
"SQL", 3) == 0);
sel_node = cursor->sym_table->query_graph->last_sel_node;
} else {
sel_node = cursor->alias->cursor_def;
}
if (sel_node->state == SEL_NODE_NO_MORE_ROWS) {
ibool_val = TRUE;
} else {
ibool_val = FALSE;
}
eval_node_set_ibool_val(func_node, ibool_val);
}
/*****************************************************************//**
Evaluates a substr-function node. */
UNIV_INLINE
void
eval_substr(
/*========*/
func_node_t* func_node) /*!< in: function node */
{
que_node_t* arg1;
que_node_t* arg2;
que_node_t* arg3;
dfield_t* dfield;
byte* str1;
ulint len1;
ulint len2;
arg1 = func_node->args;
arg2 = que_node_get_next(arg1);
ut_ad(func_node->func == PARS_SUBSTR_TOKEN);
arg3 = que_node_get_next(arg2);
str1 = dfield_get_data(que_node_get_val(arg1));
len1 = (ulint)eval_node_get_int_val(arg2);
len2 = (ulint)eval_node_get_int_val(arg3);
dfield = que_node_get_val(func_node);
dfield_set_data(dfield, str1 + len1, len2);
}
/*****************************************************************//**
Evaluates a replstr-procedure node. */
static
void
eval_replstr(
/*=========*/
func_node_t* func_node) /*!< in: function node */
{
que_node_t* arg1;
que_node_t* arg2;
que_node_t* arg3;
que_node_t* arg4;
byte* str1;
byte* str2;
ulint len1;
ulint len2;
arg1 = func_node->args;
arg2 = que_node_get_next(arg1);
ut_ad(que_node_get_type(arg1) == QUE_NODE_SYMBOL);
arg3 = que_node_get_next(arg2);
arg4 = que_node_get_next(arg3);
str1 = dfield_get_data(que_node_get_val(arg1));
str2 = dfield_get_data(que_node_get_val(arg2));
len1 = (ulint)eval_node_get_int_val(arg3);
len2 = (ulint)eval_node_get_int_val(arg4);
if ((dfield_get_len(que_node_get_val(arg1)) < len1 + len2)
|| (dfield_get_len(que_node_get_val(arg2)) < len2)) {
ut_error;
}
ut_memcpy(str1 + len1, str2, len2);
}
/*****************************************************************//**
Evaluates an instr-function node. */
static
void
eval_instr(
/*=======*/
func_node_t* func_node) /*!< in: function node */
{
que_node_t* arg1;
que_node_t* arg2;
dfield_t* dfield1;
dfield_t* dfield2;
lint int_val;
byte* str1;
byte* str2;
byte match_char;
ulint len1;
ulint len2;
ulint i;
ulint j;
arg1 = func_node->args;
arg2 = que_node_get_next(arg1);
dfield1 = que_node_get_val(arg1);
dfield2 = que_node_get_val(arg2);
str1 = dfield_get_data(dfield1);
str2 = dfield_get_data(dfield2);
len1 = dfield_get_len(dfield1);
len2 = dfield_get_len(dfield2);
if (len2 == 0) {
ut_error;
}
match_char = str2[0];
for (i = 0; i < len1; i++) {
/* In this outer loop, the number of matched characters is 0 */
if (str1[i] == match_char) {
if (i + len2 > len1) {
break;
}
for (j = 1;; j++) {
/* We have already matched j characters */
if (j == len2) {
int_val = i + 1;
goto match_found;
}
if (str1[i + j] != str2[j]) {
break;
}
}
}
}
int_val = 0;
match_found:
eval_node_set_int_val(func_node, int_val);
}
/*****************************************************************//**
Evaluates a predefined function node. */
UNIV_INLINE
void
eval_binary_to_number(
/*==================*/
func_node_t* func_node) /*!< in: function node */
{
que_node_t* arg1;
dfield_t* dfield;
byte* str1;
byte* str2;
ulint len1;
ulint int_val;
arg1 = func_node->args;
dfield = que_node_get_val(arg1);
str1 = dfield_get_data(dfield);
len1 = dfield_get_len(dfield);
if (len1 > 4) {
ut_error;
}
if (len1 == 4) {
str2 = str1;
} else {
int_val = 0;
str2 = (byte*)&int_val;
ut_memcpy(str2 + (4 - len1), str1, len1);
}
eval_node_copy_and_alloc_val(func_node, str2, 4);
}
/*****************************************************************//**
Evaluates a predefined function node. */
static
void
eval_concat(
/*========*/
func_node_t* func_node) /*!< in: function node */
{
que_node_t* arg;
dfield_t* dfield;
byte* data;
ulint len;
ulint len1;
arg = func_node->args;
len = 0;
while (arg) {
len1 = dfield_get_len(que_node_get_val(arg));
len += len1;
arg = que_node_get_next(arg);
}
data = eval_node_ensure_val_buf(func_node, len);
arg = func_node->args;
len = 0;
while (arg) {
dfield = que_node_get_val(arg);
len1 = dfield_get_len(dfield);
ut_memcpy(data + len, dfield_get_data(dfield), len1);
len += len1;
arg = que_node_get_next(arg);
}
}
/*****************************************************************//**
Evaluates a predefined function node. If the first argument is an integer,
this function looks at the second argument which is the integer length in
bytes, and converts the integer to a VARCHAR.
If the first argument is of some other type, this function converts it to
BINARY. */
UNIV_INLINE
void
eval_to_binary(
/*===========*/
func_node_t* func_node) /*!< in: function node */
{
que_node_t* arg1;
que_node_t* arg2;
dfield_t* dfield;
byte* str1;
ulint len;
ulint len1;
arg1 = func_node->args;
str1 = dfield_get_data(que_node_get_val(arg1));
if (dtype_get_mtype(que_node_get_data_type(arg1)) != DATA_INT) {
len = dfield_get_len(que_node_get_val(arg1));
dfield = que_node_get_val(func_node);
dfield_set_data(dfield, str1, len);
return;
}
arg2 = que_node_get_next(arg1);
len1 = (ulint)eval_node_get_int_val(arg2);
if (len1 > 4) {
ut_error;
}
dfield = que_node_get_val(func_node);
dfield_set_data(dfield, str1 + (4 - len1), len1);
}
/*****************************************************************//**
Evaluates a predefined function node. */
UNIV_INLINE
void
eval_predefined(
/*============*/
func_node_t* func_node) /*!< in: function node */
{
que_node_t* arg1;
lint int_val;
byte* data;
int func;
func = func_node->func;
arg1 = func_node->args;
if (func == PARS_LENGTH_TOKEN) {
int_val = (lint)dfield_get_len(que_node_get_val(arg1));
} else if (func == PARS_TO_CHAR_TOKEN) {
/* Convert number to character string as a
signed decimal integer. */
ulint uint_val;
int int_len;
int_val = eval_node_get_int_val(arg1);
/* Determine the length of the string. */
if (int_val == 0) {
int_len = 1; /* the number 0 occupies 1 byte */
} else {
int_len = 0;
if (int_val < 0) {
uint_val = ((ulint) -int_val - 1) + 1;
int_len++; /* reserve space for minus sign */
} else {
uint_val = (ulint) int_val;
}
for (; uint_val > 0; int_len++) {
uint_val /= 10;
}
}
/* allocate the string */
data = eval_node_ensure_val_buf(func_node, int_len + 1);
/* add terminating NUL character */
data[int_len] = 0;
/* convert the number */
if (int_val == 0) {
data[0] = '0';
} else {
int tmp;
if (int_val < 0) {
data[0] = '-'; /* preceding minus sign */
uint_val = ((ulint) -int_val - 1) + 1;
} else {
uint_val = (ulint) int_val;
}
for (tmp = int_len; uint_val > 0; uint_val /= 10) {
data[--tmp] = (byte)
('0' + (byte)(uint_val % 10));
}
}
dfield_set_len(que_node_get_val(func_node), int_len);
return;
} else if (func == PARS_TO_NUMBER_TOKEN) {
int_val = atoi((char*)
dfield_get_data(que_node_get_val(arg1)));
} else if (func == PARS_SYSDATE_TOKEN) {
int_val = (lint)ut_time();
} else {
eval_predefined_2(func_node);
return;
}
eval_node_set_int_val(func_node, int_val);
}
/*****************************************************************//**
Evaluates a function node. */
UNIV_INTERN
void
eval_func(
/*======*/
func_node_t* func_node) /*!< in: function node */
{
que_node_t* arg;
ulint class;
ulint func;
ut_ad(que_node_get_type(func_node) == QUE_NODE_FUNC);
class = func_node->class;
func = func_node->func;
arg = func_node->args;
/* Evaluate first the argument list */
while (arg) {
eval_exp(arg);
/* The functions are not defined for SQL null argument
values, except for eval_cmp and notfound */
if (dfield_is_null(que_node_get_val(arg))
&& (class != PARS_FUNC_CMP)
&& (func != PARS_NOTFOUND_TOKEN)
&& (func != PARS_PRINTF_TOKEN)) {
ut_error;
}
arg = que_node_get_next(arg);
}
if (class == PARS_FUNC_CMP) {
eval_cmp(func_node);
} else if (class == PARS_FUNC_ARITH) {
eval_arith(func_node);
} else if (class == PARS_FUNC_AGGREGATE) {
eval_aggregate(func_node);
} else if (class == PARS_FUNC_PREDEFINED) {
if (func == PARS_NOTFOUND_TOKEN) {
eval_notfound(func_node);
} else if (func == PARS_SUBSTR_TOKEN) {
eval_substr(func_node);
} else if (func == PARS_REPLSTR_TOKEN) {
eval_replstr(func_node);
} else if (func == PARS_INSTR_TOKEN) {
eval_instr(func_node);
} else if (func == PARS_BINARY_TO_NUMBER_TOKEN) {
eval_binary_to_number(func_node);
} else if (func == PARS_CONCAT_TOKEN) {
eval_concat(func_node);
} else if (func == PARS_TO_BINARY_TOKEN) {
eval_to_binary(func_node);
} else {
eval_predefined(func_node);
}
} else {
ut_ad(class == PARS_FUNC_LOGICAL);
eval_logical(func_node);
}
}

295
perfschema/eval/eval0proc.c Normal file
View file

@ -0,0 +1,295 @@
/*****************************************************************************
Copyright (c) 1998, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file eval/eval0proc.c
Executes SQL stored procedures and their control structures
Created 1/20/1998 Heikki Tuuri
*******************************************************/
#include "eval0proc.h"
#ifdef UNIV_NONINL
#include "eval0proc.ic"
#endif
/**********************************************************************//**
Performs an execution step of an if-statement node.
@return query thread to run next or NULL */
UNIV_INTERN
que_thr_t*
if_step(
/*====*/
que_thr_t* thr) /*!< in: query thread */
{
if_node_t* node;
elsif_node_t* elsif_node;
ut_ad(thr);
node = thr->run_node;
ut_ad(que_node_get_type(node) == QUE_NODE_IF);
if (thr->prev_node == que_node_get_parent(node)) {
/* Evaluate the condition */
eval_exp(node->cond);
if (eval_node_get_ibool_val(node->cond)) {
/* The condition evaluated to TRUE: start execution
from the first statement in the statement list */
thr->run_node = node->stat_list;
} else if (node->else_part) {
thr->run_node = node->else_part;
} else if (node->elsif_list) {
elsif_node = node->elsif_list;
for (;;) {
eval_exp(elsif_node->cond);
if (eval_node_get_ibool_val(
elsif_node->cond)) {
/* The condition evaluated to TRUE:
start execution from the first
statement in the statement list */
thr->run_node = elsif_node->stat_list;
break;
}
elsif_node = que_node_get_next(elsif_node);
if (elsif_node == NULL) {
thr->run_node = NULL;
break;
}
}
} else {
thr->run_node = NULL;
}
} else {
/* Move to the next statement */
ut_ad(que_node_get_next(thr->prev_node) == NULL);
thr->run_node = NULL;
}
if (thr->run_node == NULL) {
thr->run_node = que_node_get_parent(node);
}
return(thr);
}
/**********************************************************************//**
Performs an execution step of a while-statement node.
@return query thread to run next or NULL */
UNIV_INTERN
que_thr_t*
while_step(
/*=======*/
que_thr_t* thr) /*!< in: query thread */
{
while_node_t* node;
ut_ad(thr);
node = thr->run_node;
ut_ad(que_node_get_type(node) == QUE_NODE_WHILE);
ut_ad((thr->prev_node == que_node_get_parent(node))
|| (que_node_get_next(thr->prev_node) == NULL));
/* Evaluate the condition */
eval_exp(node->cond);
if (eval_node_get_ibool_val(node->cond)) {
/* The condition evaluated to TRUE: start execution
from the first statement in the statement list */
thr->run_node = node->stat_list;
} else {
thr->run_node = que_node_get_parent(node);
}
return(thr);
}
/**********************************************************************//**
Performs an execution step of an assignment statement node.
@return query thread to run next or NULL */
UNIV_INTERN
que_thr_t*
assign_step(
/*========*/
que_thr_t* thr) /*!< in: query thread */
{
assign_node_t* node;
ut_ad(thr);
node = thr->run_node;
ut_ad(que_node_get_type(node) == QUE_NODE_ASSIGNMENT);
/* Evaluate the value to assign */
eval_exp(node->val);
eval_node_copy_val(node->var->alias, node->val);
thr->run_node = que_node_get_parent(node);
return(thr);
}
/**********************************************************************//**
Performs an execution step of a for-loop node.
@return query thread to run next or NULL */
UNIV_INTERN
que_thr_t*
for_step(
/*=====*/
que_thr_t* thr) /*!< in: query thread */
{
for_node_t* node;
que_node_t* parent;
lint loop_var_value;
ut_ad(thr);
node = thr->run_node;
ut_ad(que_node_get_type(node) == QUE_NODE_FOR);
parent = que_node_get_parent(node);
if (thr->prev_node != parent) {
/* Move to the next statement */
thr->run_node = que_node_get_next(thr->prev_node);
if (thr->run_node != NULL) {
return(thr);
}
/* Increment the value of loop_var */
loop_var_value = 1 + eval_node_get_int_val(node->loop_var);
} else {
/* Initialize the loop */
eval_exp(node->loop_start_limit);
eval_exp(node->loop_end_limit);
loop_var_value = eval_node_get_int_val(node->loop_start_limit);
node->loop_end_value
= (int) eval_node_get_int_val(node->loop_end_limit);
}
/* Check if we should do another loop */
if (loop_var_value > node->loop_end_value) {
/* Enough loops done */
thr->run_node = parent;
} else {
eval_node_set_int_val(node->loop_var, loop_var_value);
thr->run_node = node->stat_list;
}
return(thr);
}
/**********************************************************************//**
Performs an execution step of an exit statement node.
@return query thread to run next or NULL */
UNIV_INTERN
que_thr_t*
exit_step(
/*======*/
que_thr_t* thr) /*!< in: query thread */
{
exit_node_t* node;
que_node_t* loop_node;
ut_ad(thr);
node = thr->run_node;
ut_ad(que_node_get_type(node) == QUE_NODE_EXIT);
/* Loops exit by setting thr->run_node as the loop node's parent, so
find our containing loop node and get its parent. */
loop_node = que_node_get_containing_loop_node(node);
/* If someone uses an EXIT statement outside of a loop, this will
trigger. */
ut_a(loop_node);
thr->run_node = que_node_get_parent(loop_node);
return(thr);
}
/**********************************************************************//**
Performs an execution step of a return-statement node.
@return query thread to run next or NULL */
UNIV_INTERN
que_thr_t*
return_step(
/*========*/
que_thr_t* thr) /*!< in: query thread */
{
return_node_t* node;
que_node_t* parent;
ut_ad(thr);
node = thr->run_node;
ut_ad(que_node_get_type(node) == QUE_NODE_RETURN);
parent = node;
while (que_node_get_type(parent) != QUE_NODE_PROC) {
parent = que_node_get_parent(parent);
}
ut_a(parent);
thr->run_node = que_node_get_parent(parent);
return(thr);
}

4824
perfschema/fil/fil0fil.c Normal file

File diff suppressed because it is too large Load diff

4308
perfschema/fsp/fsp0fsp.c Normal file

File diff suppressed because it is too large Load diff

31
perfschema/fut/fut0fut.c Normal file
View file

@ -0,0 +1,31 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/******************************************************************//**
@file fut/fut0fut.c
File-based utilities
Created 12/13/1995 Heikki Tuuri
***********************************************************************/
#include "fut0fut.h"
#ifdef UNIV_NONINL
#include "fut0fut.ic"
#endif

530
perfschema/fut/fut0lst.c Normal file
View file

@ -0,0 +1,530 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/******************************************************************//**
@file fut/fut0lst.c
File-based list utilities
Created 11/28/1995 Heikki Tuuri
***********************************************************************/
#include "fut0lst.h"
#ifdef UNIV_NONINL
#include "fut0lst.ic"
#endif
#include "buf0buf.h"
#include "page0page.h"
/********************************************************************//**
Adds a node to an empty list. */
static
void
flst_add_to_empty(
/*==============*/
flst_base_node_t* base, /*!< in: pointer to base node of
empty list */
flst_node_t* node, /*!< in: node to add */
mtr_t* mtr) /*!< in: mini-transaction handle */
{
ulint space;
fil_addr_t node_addr;
ulint len;
ut_ad(mtr && base && node);
ut_ad(base != node);
ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains_page(mtr, node, MTR_MEMO_PAGE_X_FIX));
len = flst_get_len(base, mtr);
ut_a(len == 0);
buf_ptr_get_fsp_addr(node, &space, &node_addr);
/* Update first and last fields of base node */
flst_write_addr(base + FLST_FIRST, node_addr, mtr);
flst_write_addr(base + FLST_LAST, node_addr, mtr);
/* Set prev and next fields of node to add */
flst_write_addr(node + FLST_PREV, fil_addr_null, mtr);
flst_write_addr(node + FLST_NEXT, fil_addr_null, mtr);
/* Update len of base node */
mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
}
/********************************************************************//**
Adds a node as the last node in a list. */
UNIV_INTERN
void
flst_add_last(
/*==========*/
flst_base_node_t* base, /*!< in: pointer to base node of list */
flst_node_t* node, /*!< in: node to add */
mtr_t* mtr) /*!< in: mini-transaction handle */
{
ulint space;
fil_addr_t node_addr;
ulint len;
fil_addr_t last_addr;
flst_node_t* last_node;
ut_ad(mtr && base && node);
ut_ad(base != node);
ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains_page(mtr, node, MTR_MEMO_PAGE_X_FIX));
len = flst_get_len(base, mtr);
last_addr = flst_get_last(base, mtr);
buf_ptr_get_fsp_addr(node, &space, &node_addr);
/* If the list is not empty, call flst_insert_after */
if (len != 0) {
if (last_addr.page == node_addr.page) {
last_node = page_align(node) + last_addr.boffset;
} else {
ulint zip_size = fil_space_get_zip_size(space);
last_node = fut_get_ptr(space, zip_size, last_addr,
RW_X_LATCH, mtr);
}
flst_insert_after(base, last_node, node, mtr);
} else {
/* else call flst_add_to_empty */
flst_add_to_empty(base, node, mtr);
}
}
/********************************************************************//**
Adds a node as the first node in a list. */
UNIV_INTERN
void
flst_add_first(
/*===========*/
flst_base_node_t* base, /*!< in: pointer to base node of list */
flst_node_t* node, /*!< in: node to add */
mtr_t* mtr) /*!< in: mini-transaction handle */
{
ulint space;
fil_addr_t node_addr;
ulint len;
fil_addr_t first_addr;
flst_node_t* first_node;
ut_ad(mtr && base && node);
ut_ad(base != node);
ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains_page(mtr, node, MTR_MEMO_PAGE_X_FIX));
len = flst_get_len(base, mtr);
first_addr = flst_get_first(base, mtr);
buf_ptr_get_fsp_addr(node, &space, &node_addr);
/* If the list is not empty, call flst_insert_before */
if (len != 0) {
if (first_addr.page == node_addr.page) {
first_node = page_align(node) + first_addr.boffset;
} else {
ulint zip_size = fil_space_get_zip_size(space);
first_node = fut_get_ptr(space, zip_size, first_addr,
RW_X_LATCH, mtr);
}
flst_insert_before(base, node, first_node, mtr);
} else {
/* else call flst_add_to_empty */
flst_add_to_empty(base, node, mtr);
}
}
/********************************************************************//**
Inserts a node after another in a list. */
UNIV_INTERN
void
flst_insert_after(
/*==============*/
flst_base_node_t* base, /*!< in: pointer to base node of list */
flst_node_t* node1, /*!< in: node to insert after */
flst_node_t* node2, /*!< in: node to add */
mtr_t* mtr) /*!< in: mini-transaction handle */
{
ulint space;
fil_addr_t node1_addr;
fil_addr_t node2_addr;
flst_node_t* node3;
fil_addr_t node3_addr;
ulint len;
ut_ad(mtr && node1 && node2 && base);
ut_ad(base != node1);
ut_ad(base != node2);
ut_ad(node2 != node1);
ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains_page(mtr, node1, MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
buf_ptr_get_fsp_addr(node1, &space, &node1_addr);
buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
node3_addr = flst_get_next_addr(node1, mtr);
/* Set prev and next fields of node2 */
flst_write_addr(node2 + FLST_PREV, node1_addr, mtr);
flst_write_addr(node2 + FLST_NEXT, node3_addr, mtr);
if (!fil_addr_is_null(node3_addr)) {
/* Update prev field of node3 */
ulint zip_size = fil_space_get_zip_size(space);
node3 = fut_get_ptr(space, zip_size,
node3_addr, RW_X_LATCH, mtr);
flst_write_addr(node3 + FLST_PREV, node2_addr, mtr);
} else {
/* node1 was last in list: update last field in base */
flst_write_addr(base + FLST_LAST, node2_addr, mtr);
}
/* Set next field of node1 */
flst_write_addr(node1 + FLST_NEXT, node2_addr, mtr);
/* Update len of base node */
len = flst_get_len(base, mtr);
mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
}
/********************************************************************//**
Inserts a node before another in a list. */
UNIV_INTERN
void
flst_insert_before(
/*===============*/
flst_base_node_t* base, /*!< in: pointer to base node of list */
flst_node_t* node2, /*!< in: node to insert */
flst_node_t* node3, /*!< in: node to insert before */
mtr_t* mtr) /*!< in: mini-transaction handle */
{
ulint space;
flst_node_t* node1;
fil_addr_t node1_addr;
fil_addr_t node2_addr;
fil_addr_t node3_addr;
ulint len;
ut_ad(mtr && node2 && node3 && base);
ut_ad(base != node2);
ut_ad(base != node3);
ut_ad(node2 != node3);
ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains_page(mtr, node3, MTR_MEMO_PAGE_X_FIX));
buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
buf_ptr_get_fsp_addr(node3, &space, &node3_addr);
node1_addr = flst_get_prev_addr(node3, mtr);
/* Set prev and next fields of node2 */
flst_write_addr(node2 + FLST_PREV, node1_addr, mtr);
flst_write_addr(node2 + FLST_NEXT, node3_addr, mtr);
if (!fil_addr_is_null(node1_addr)) {
ulint zip_size = fil_space_get_zip_size(space);
/* Update next field of node1 */
node1 = fut_get_ptr(space, zip_size, node1_addr,
RW_X_LATCH, mtr);
flst_write_addr(node1 + FLST_NEXT, node2_addr, mtr);
} else {
/* node3 was first in list: update first field in base */
flst_write_addr(base + FLST_FIRST, node2_addr, mtr);
}
/* Set prev field of node3 */
flst_write_addr(node3 + FLST_PREV, node2_addr, mtr);
/* Update len of base node */
len = flst_get_len(base, mtr);
mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
}
/********************************************************************//**
Removes a node. */
UNIV_INTERN
void
flst_remove(
/*========*/
flst_base_node_t* base, /*!< in: pointer to base node of list */
flst_node_t* node2, /*!< in: node to remove */
mtr_t* mtr) /*!< in: mini-transaction handle */
{
ulint space;
ulint zip_size;
flst_node_t* node1;
fil_addr_t node1_addr;
fil_addr_t node2_addr;
flst_node_t* node3;
fil_addr_t node3_addr;
ulint len;
ut_ad(mtr && node2 && base);
ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
zip_size = fil_space_get_zip_size(space);
node1_addr = flst_get_prev_addr(node2, mtr);
node3_addr = flst_get_next_addr(node2, mtr);
if (!fil_addr_is_null(node1_addr)) {
/* Update next field of node1 */
if (node1_addr.page == node2_addr.page) {
node1 = page_align(node2) + node1_addr.boffset;
} else {
node1 = fut_get_ptr(space, zip_size,
node1_addr, RW_X_LATCH, mtr);
}
ut_ad(node1 != node2);
flst_write_addr(node1 + FLST_NEXT, node3_addr, mtr);
} else {
/* node2 was first in list: update first field in base */
flst_write_addr(base + FLST_FIRST, node3_addr, mtr);
}
if (!fil_addr_is_null(node3_addr)) {
/* Update prev field of node3 */
if (node3_addr.page == node2_addr.page) {
node3 = page_align(node2) + node3_addr.boffset;
} else {
node3 = fut_get_ptr(space, zip_size,
node3_addr, RW_X_LATCH, mtr);
}
ut_ad(node2 != node3);
flst_write_addr(node3 + FLST_PREV, node1_addr, mtr);
} else {
/* node2 was last in list: update last field in base */
flst_write_addr(base + FLST_LAST, node1_addr, mtr);
}
/* Update len of base node */
len = flst_get_len(base, mtr);
ut_ad(len > 0);
mlog_write_ulint(base + FLST_LEN, len - 1, MLOG_4BYTES, mtr);
}
/********************************************************************//**
Cuts off the tail of the list, including the node given. The number of
nodes which will be removed must be provided by the caller, as this function
does not measure the length of the tail. */
UNIV_INTERN
void
flst_cut_end(
/*=========*/
flst_base_node_t* base, /*!< in: pointer to base node of list */
flst_node_t* node2, /*!< in: first node to remove */
ulint n_nodes,/*!< in: number of nodes to remove,
must be >= 1 */
mtr_t* mtr) /*!< in: mini-transaction handle */
{
ulint space;
flst_node_t* node1;
fil_addr_t node1_addr;
fil_addr_t node2_addr;
ulint len;
ut_ad(mtr && node2 && base);
ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
ut_ad(n_nodes > 0);
buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
node1_addr = flst_get_prev_addr(node2, mtr);
if (!fil_addr_is_null(node1_addr)) {
/* Update next field of node1 */
if (node1_addr.page == node2_addr.page) {
node1 = page_align(node2) + node1_addr.boffset;
} else {
node1 = fut_get_ptr(space,
fil_space_get_zip_size(space),
node1_addr, RW_X_LATCH, mtr);
}
flst_write_addr(node1 + FLST_NEXT, fil_addr_null, mtr);
} else {
/* node2 was first in list: update the field in base */
flst_write_addr(base + FLST_FIRST, fil_addr_null, mtr);
}
flst_write_addr(base + FLST_LAST, node1_addr, mtr);
/* Update len of base node */
len = flst_get_len(base, mtr);
ut_ad(len >= n_nodes);
mlog_write_ulint(base + FLST_LEN, len - n_nodes, MLOG_4BYTES, mtr);
}
/********************************************************************//**
Cuts off the tail of the list, not including the given node. The number of
nodes which will be removed must be provided by the caller, as this function
does not measure the length of the tail. */
UNIV_INTERN
void
flst_truncate_end(
/*==============*/
flst_base_node_t* base, /*!< in: pointer to base node of list */
flst_node_t* node2, /*!< in: first node not to remove */
ulint n_nodes,/*!< in: number of nodes to remove */
mtr_t* mtr) /*!< in: mini-transaction handle */
{
fil_addr_t node2_addr;
ulint len;
ulint space;
ut_ad(mtr && node2 && base);
ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
if (n_nodes == 0) {
ut_ad(fil_addr_is_null(flst_get_next_addr(node2, mtr)));
return;
}
buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
/* Update next field of node2 */
flst_write_addr(node2 + FLST_NEXT, fil_addr_null, mtr);
flst_write_addr(base + FLST_LAST, node2_addr, mtr);
/* Update len of base node */
len = flst_get_len(base, mtr);
ut_ad(len >= n_nodes);
mlog_write_ulint(base + FLST_LEN, len - n_nodes, MLOG_4BYTES, mtr);
}
/********************************************************************//**
Validates a file-based list.
@return TRUE if ok */
UNIV_INTERN
ibool
flst_validate(
/*==========*/
const flst_base_node_t* base, /*!< in: pointer to base node of list */
mtr_t* mtr1) /*!< in: mtr */
{
ulint space;
ulint zip_size;
const flst_node_t* node;
fil_addr_t node_addr;
fil_addr_t base_addr;
ulint len;
ulint i;
mtr_t mtr2;
ut_ad(base);
ut_ad(mtr_memo_contains_page(mtr1, base, MTR_MEMO_PAGE_X_FIX));
/* We use two mini-transaction handles: the first is used to
lock the base node, and prevent other threads from modifying the
list. The second is used to traverse the list. We cannot run the
second mtr without committing it at times, because if the list
is long, then the x-locked pages could fill the buffer resulting
in a deadlock. */
/* Find out the space id */
buf_ptr_get_fsp_addr(base, &space, &base_addr);
zip_size = fil_space_get_zip_size(space);
len = flst_get_len(base, mtr1);
node_addr = flst_get_first(base, mtr1);
for (i = 0; i < len; i++) {
mtr_start(&mtr2);
node = fut_get_ptr(space, zip_size,
node_addr, RW_X_LATCH, &mtr2);
node_addr = flst_get_next_addr(node, &mtr2);
mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer
becoming full */
}
ut_a(fil_addr_is_null(node_addr));
node_addr = flst_get_last(base, mtr1);
for (i = 0; i < len; i++) {
mtr_start(&mtr2);
node = fut_get_ptr(space, zip_size,
node_addr, RW_X_LATCH, &mtr2);
node_addr = flst_get_prev_addr(node, &mtr2);
mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer
becoming full */
}
ut_a(fil_addr_is_null(node_addr));
return(TRUE);
}
/********************************************************************//**
Prints info of a file-based list. */
UNIV_INTERN
void
flst_print(
/*=======*/
const flst_base_node_t* base, /*!< in: pointer to base node of list */
mtr_t* mtr) /*!< in: mtr */
{
const buf_frame_t* frame;
ulint len;
ut_ad(base && mtr);
ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
frame = page_align((byte*) base);
len = flst_get_len(base, mtr);
fprintf(stderr,
"FILE-BASED LIST:\n"
"Base node in space %lu page %lu byte offset %lu; len %lu\n",
(ulong) page_get_space_id(frame),
(ulong) page_get_page_no(frame),
(ulong) page_offset(base), (ulong) len);
}

441
perfschema/ha/ha0ha.c Normal file
View file

@ -0,0 +1,441 @@
/*****************************************************************************
Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/********************************************************************//**
@file ha/ha0ha.c
The hash table with external chains
Created 8/22/1994 Heikki Tuuri
*************************************************************************/
#include "ha0ha.h"
#ifdef UNIV_NONINL
#include "ha0ha.ic"
#endif
#ifdef UNIV_DEBUG
# include "buf0buf.h"
#endif /* UNIV_DEBUG */
#ifdef UNIV_SYNC_DEBUG
# include "btr0sea.h"
#endif /* UNIV_SYNC_DEBUG */
#include "page0page.h"
/*************************************************************//**
Creates a hash table with at least n array cells. The actual number
of cells is chosen to be a prime number slightly bigger than n.
@return own: created table */
UNIV_INTERN
hash_table_t*
ha_create_func(
/*===========*/
ulint n, /*!< in: number of array cells */
#ifdef UNIV_SYNC_DEBUG
ulint mutex_level, /*!< in: level of the mutexes in the latching
order: this is used in the debug version */
#endif /* UNIV_SYNC_DEBUG */
ulint n_mutexes) /*!< in: number of mutexes to protect the
hash table: must be a power of 2, or 0 */
{
hash_table_t* table;
#ifndef UNIV_HOTBACKUP
ulint i;
#endif /* !UNIV_HOTBACKUP */
ut_ad(ut_is_2pow(n_mutexes));
table = hash_create(n);
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
# ifndef UNIV_HOTBACKUP
table->adaptive = TRUE;
# endif /* !UNIV_HOTBACKUP */
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
/* Creating MEM_HEAP_BTR_SEARCH type heaps can potentially fail,
but in practise it never should in this case, hence the asserts. */
if (n_mutexes == 0) {
table->heap = mem_heap_create_in_btr_search(
ut_min(4096, MEM_MAX_ALLOC_IN_BUF));
ut_a(table->heap);
return(table);
}
#ifndef UNIV_HOTBACKUP
hash_create_mutexes(table, n_mutexes, mutex_level);
table->heaps = mem_alloc(n_mutexes * sizeof(void*));
for (i = 0; i < n_mutexes; i++) {
table->heaps[i] = mem_heap_create_in_btr_search(4096);
ut_a(table->heaps[i]);
}
#endif /* !UNIV_HOTBACKUP */
return(table);
}
/*************************************************************//**
Empties a hash table and frees the memory heaps. */
UNIV_INTERN
void
ha_clear(
/*=====*/
hash_table_t* table) /*!< in, own: hash table */
{
ulint i;
ulint n;
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EXCLUSIVE));
#endif /* UNIV_SYNC_DEBUG */
#ifndef UNIV_HOTBACKUP
/* Free the memory heaps. */
n = table->n_mutexes;
for (i = 0; i < n; i++) {
mem_heap_free(table->heaps[i]);
}
#endif /* !UNIV_HOTBACKUP */
/* Clear the hash table. */
n = hash_get_n_cells(table);
for (i = 0; i < n; i++) {
hash_get_nth_cell(table, i)->node = NULL;
}
}
/*************************************************************//**
Inserts an entry into a hash table. If an entry with the same fold number
is found, its node is updated to point to the new data, and no new node
is inserted.
@return TRUE if succeed, FALSE if no more memory could be allocated */
UNIV_INTERN
ibool
ha_insert_for_fold_func(
/*====================*/
hash_table_t* table, /*!< in: hash table */
ulint fold, /*!< in: folded value of data; if a node with
the same fold value already exists, it is
updated to point to the same data, and no new
node is created! */
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
buf_block_t* block, /*!< in: buffer block containing the data */
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
void* data) /*!< in: data, must not be NULL */
{
hash_cell_t* cell;
ha_node_t* node;
ha_node_t* prev_node;
ulint hash;
ut_ad(table && data);
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
ut_a(block->frame == page_align(data));
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
ASSERT_HASH_MUTEX_OWN(table, fold);
hash = hash_calc_hash(fold, table);
cell = hash_get_nth_cell(table, hash);
prev_node = cell->node;
while (prev_node != NULL) {
if (prev_node->fold == fold) {
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
# ifndef UNIV_HOTBACKUP
if (table->adaptive) {
buf_block_t* prev_block = prev_node->block;
ut_a(prev_block->frame
== page_align(prev_node->data));
ut_a(prev_block->n_pointers > 0);
prev_block->n_pointers--;
block->n_pointers++;
}
# endif /* !UNIV_HOTBACKUP */
prev_node->block = block;
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
prev_node->data = data;
return(TRUE);
}
prev_node = prev_node->next;
}
/* We have to allocate a new chain node */
node = mem_heap_alloc(hash_get_heap(table, fold), sizeof(ha_node_t));
if (node == NULL) {
/* It was a btr search type memory heap and at the moment
no more memory could be allocated: return */
ut_ad(hash_get_heap(table, fold)->type & MEM_HEAP_BTR_SEARCH);
return(FALSE);
}
ha_node_set_data(node, block, data);
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
# ifndef UNIV_HOTBACKUP
if (table->adaptive) {
block->n_pointers++;
}
# endif /* !UNIV_HOTBACKUP */
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
node->fold = fold;
node->next = NULL;
prev_node = cell->node;
if (prev_node == NULL) {
cell->node = node;
return(TRUE);
}
while (prev_node->next != NULL) {
prev_node = prev_node->next;
}
prev_node->next = node;
return(TRUE);
}
/***********************************************************//**
Deletes a hash node. */
UNIV_INTERN
void
ha_delete_hash_node(
/*================*/
hash_table_t* table, /*!< in: hash table */
ha_node_t* del_node) /*!< in: node to be deleted */
{
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
# ifndef UNIV_HOTBACKUP
if (table->adaptive) {
ut_a(del_node->block->frame = page_align(del_node->data));
ut_a(del_node->block->n_pointers > 0);
del_node->block->n_pointers--;
}
# endif /* !UNIV_HOTBACKUP */
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
HASH_DELETE_AND_COMPACT(ha_node_t, next, table, del_node);
}
/*********************************************************//**
Looks for an element when we know the pointer to the data, and updates
the pointer to data, if found. */
UNIV_INTERN
void
ha_search_and_update_if_found_func(
/*===============================*/
hash_table_t* table, /*!< in/out: hash table */
ulint fold, /*!< in: folded value of the searched data */
void* data, /*!< in: pointer to the data */
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
buf_block_t* new_block,/*!< in: block containing new_data */
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
void* new_data)/*!< in: new pointer to the data */
{
ha_node_t* node;
ASSERT_HASH_MUTEX_OWN(table, fold);
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
ut_a(new_block->frame == page_align(new_data));
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
node = ha_search_with_data(table, fold, data);
if (node) {
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
# ifndef UNIV_HOTBACKUP
if (table->adaptive) {
ut_a(node->block->n_pointers > 0);
node->block->n_pointers--;
new_block->n_pointers++;
}
# endif /* !UNIV_HOTBACKUP */
node->block = new_block;
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
node->data = new_data;
}
}
#ifndef UNIV_HOTBACKUP
/*****************************************************************//**
Removes from the chain determined by fold all nodes whose data pointer
points to the page given. */
UNIV_INTERN
void
ha_remove_all_nodes_to_page(
/*========================*/
hash_table_t* table, /*!< in: hash table */
ulint fold, /*!< in: fold value */
const page_t* page) /*!< in: buffer page */
{
ha_node_t* node;
ASSERT_HASH_MUTEX_OWN(table, fold);
node = ha_chain_get_first(table, fold);
while (node) {
if (page_align(ha_node_get_data(node)) == page) {
/* Remove the hash node */
ha_delete_hash_node(table, node);
/* Start again from the first node in the chain
because the deletion may compact the heap of
nodes and move other nodes! */
node = ha_chain_get_first(table, fold);
} else {
node = ha_chain_get_next(node);
}
}
#ifdef UNIV_DEBUG
/* Check that all nodes really got deleted */
node = ha_chain_get_first(table, fold);
while (node) {
ut_a(page_align(ha_node_get_data(node)) != page);
node = ha_chain_get_next(node);
}
#endif
}
/*************************************************************//**
Validates a given range of the cells in hash table.
@return TRUE if ok */
UNIV_INTERN
ibool
ha_validate(
/*========*/
hash_table_t* table, /*!< in: hash table */
ulint start_index, /*!< in: start index */
ulint end_index) /*!< in: end index */
{
hash_cell_t* cell;
ha_node_t* node;
ibool ok = TRUE;
ulint i;
ut_a(start_index <= end_index);
ut_a(start_index < hash_get_n_cells(table));
ut_a(end_index < hash_get_n_cells(table));
for (i = start_index; i <= end_index; i++) {
cell = hash_get_nth_cell(table, i);
node = cell->node;
while (node) {
if (hash_calc_hash(node->fold, table) != i) {
ut_print_timestamp(stderr);
fprintf(stderr,
"InnoDB: Error: hash table node"
" fold value %lu does not\n"
"InnoDB: match the cell number %lu.\n",
(ulong) node->fold, (ulong) i);
ok = FALSE;
}
node = node->next;
}
}
return(ok);
}
/*************************************************************//**
Prints info of a hash table. */
UNIV_INTERN
void
ha_print_info(
/*==========*/
FILE* file, /*!< in: file where to print */
hash_table_t* table) /*!< in: hash table */
{
#ifdef UNIV_DEBUG
/* Some of the code here is disabled for performance reasons in production
builds, see http://bugs.mysql.com/36941 */
#define PRINT_USED_CELLS
#endif /* UNIV_DEBUG */
#ifdef PRINT_USED_CELLS
hash_cell_t* cell;
ulint cells = 0;
ulint i;
#endif /* PRINT_USED_CELLS */
ulint n_bufs;
#ifdef PRINT_USED_CELLS
for (i = 0; i < hash_get_n_cells(table); i++) {
cell = hash_get_nth_cell(table, i);
if (cell->node) {
cells++;
}
}
#endif /* PRINT_USED_CELLS */
fprintf(file, "Hash table size %lu",
(ulong) hash_get_n_cells(table));
#ifdef PRINT_USED_CELLS
fprintf(file, ", used cells %lu", (ulong) cells);
#endif /* PRINT_USED_CELLS */
if (table->heaps == NULL && table->heap != NULL) {
/* This calculation is intended for the adaptive hash
index: how many buffer frames we have reserved? */
n_bufs = UT_LIST_GET_LEN(table->heap->base) - 1;
if (table->heap->free_block) {
n_bufs++;
}
fprintf(file, ", node heap has %lu buffer(s)\n",
(ulong) n_bufs);
}
}
#endif /* !UNIV_HOTBACKUP */

184
perfschema/ha/ha0storage.c Normal file
View file

@ -0,0 +1,184 @@
/*****************************************************************************
Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file ha/ha0storage.c
Hash storage.
Provides a data structure that stores chunks of data in
its own storage, avoiding duplicates.
Created September 22, 2007 Vasil Dimov
*******************************************************/
#include "univ.i"
#include "ha0storage.h"
#include "hash0hash.h"
#include "mem0mem.h"
#include "ut0rnd.h"
#ifdef UNIV_NONINL
#include "ha0storage.ic"
#endif
/*******************************************************************//**
Retrieves a data from a storage. If it is present, a pointer to the
stored copy of data is returned, otherwise NULL is returned. */
static
const void*
ha_storage_get(
/*===========*/
ha_storage_t* storage, /*!< in: hash storage */
const void* data, /*!< in: data to check for */
ulint data_len) /*!< in: data length */
{
ha_storage_node_t* node;
ulint fold;
/* avoid repetitive calls to ut_fold_binary() in the HASH_SEARCH
macro */
fold = ut_fold_binary(data, data_len);
#define IS_FOUND \
node->data_len == data_len && memcmp(node->data, data, data_len) == 0
HASH_SEARCH(
next, /* node->"next" */
storage->hash, /* the hash table */
fold, /* key */
ha_storage_node_t*, /* type of node->next */
node, /* auxiliary variable */
, /* assertion */
IS_FOUND); /* search criteria */
if (node == NULL) {
return(NULL);
}
/* else */
return(node->data);
}
/*******************************************************************//**
Copies data into the storage and returns a pointer to the copy. If the
same data chunk is already present, then pointer to it is returned.
Data chunks are considered to be equal if len1 == len2 and
memcmp(data1, data2, len1) == 0. If "data" is not present (and thus
data_len bytes need to be allocated) and the size of storage is going to
become more than "memlim" then "data" is not added and NULL is returned.
To disable this behavior "memlim" can be set to 0, which stands for
"no limit". */
UNIV_INTERN
const void*
ha_storage_put_memlim(
/*==================*/
ha_storage_t* storage, /*!< in/out: hash storage */
const void* data, /*!< in: data to store */
ulint data_len, /*!< in: data length */
ulint memlim) /*!< in: memory limit to obey */
{
void* raw;
ha_storage_node_t* node;
const void* data_copy;
ulint fold;
/* check if data chunk is already present */
data_copy = ha_storage_get(storage, data, data_len);
if (data_copy != NULL) {
return(data_copy);
}
/* not present */
/* check if we are allowed to allocate data_len bytes */
if (memlim > 0
&& ha_storage_get_size(storage) + data_len > memlim) {
return(NULL);
}
/* we put the auxiliary node struct and the data itself in one
continuous block */
raw = mem_heap_alloc(storage->heap,
sizeof(ha_storage_node_t) + data_len);
node = (ha_storage_node_t*) raw;
data_copy = (byte*) raw + sizeof(*node);
memcpy((byte*) raw + sizeof(*node), data, data_len);
node->data_len = data_len;
node->data = data_copy;
/* avoid repetitive calls to ut_fold_binary() in the HASH_INSERT
macro */
fold = ut_fold_binary(data, data_len);
HASH_INSERT(
ha_storage_node_t, /* type used in the hash chain */
next, /* node->"next" */
storage->hash, /* the hash table */
fold, /* key */
node); /* add this data to the hash */
/* the output should not be changed because it will spoil the
hash table */
return(data_copy);
}
#ifdef UNIV_COMPILE_TEST_FUNCS
void
test_ha_storage()
{
ha_storage_t* storage;
char buf[1024];
int i;
const void* stored[256];
const void* p;
storage = ha_storage_create(0, 0);
for (i = 0; i < 256; i++) {
memset(buf, i, sizeof(buf));
stored[i] = ha_storage_put(storage, buf, sizeof(buf));
}
//ha_storage_empty(&storage);
for (i = 255; i >= 0; i--) {
memset(buf, i, sizeof(buf));
p = ha_storage_put(storage, buf, sizeof(buf));
if (p != stored[i]) {
fprintf(stderr, "ha_storage_put() returned %p "
"instead of %p, i=%d\n", p, stored[i], i);
return;
}
}
fprintf(stderr, "all ok\n");
ha_storage_free(storage);
}
#endif /* UNIV_COMPILE_TEST_FUNCS */

174
perfschema/ha/hash0hash.c Normal file
View file

@ -0,0 +1,174 @@
/*****************************************************************************
Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file ha/hash0hash.c
The simple hash table utility
Created 5/20/1997 Heikki Tuuri
*******************************************************/
#include "hash0hash.h"
#ifdef UNIV_NONINL
#include "hash0hash.ic"
#endif
#include "mem0mem.h"
#ifndef UNIV_HOTBACKUP
/************************************************************//**
Reserves the mutex for a fold value in a hash table. */
UNIV_INTERN
void
hash_mutex_enter(
/*=============*/
hash_table_t* table, /*!< in: hash table */
ulint fold) /*!< in: fold */
{
mutex_enter(hash_get_mutex(table, fold));
}
/************************************************************//**
Releases the mutex for a fold value in a hash table. */
UNIV_INTERN
void
hash_mutex_exit(
/*============*/
hash_table_t* table, /*!< in: hash table */
ulint fold) /*!< in: fold */
{
mutex_exit(hash_get_mutex(table, fold));
}
/************************************************************//**
Reserves all the mutexes of a hash table, in an ascending order. */
UNIV_INTERN
void
hash_mutex_enter_all(
/*=================*/
hash_table_t* table) /*!< in: hash table */
{
ulint i;
for (i = 0; i < table->n_mutexes; i++) {
mutex_enter(table->mutexes + i);
}
}
/************************************************************//**
Releases all the mutexes of a hash table. */
UNIV_INTERN
void
hash_mutex_exit_all(
/*================*/
hash_table_t* table) /*!< in: hash table */
{
ulint i;
for (i = 0; i < table->n_mutexes; i++) {
mutex_exit(table->mutexes + i);
}
}
#endif /* !UNIV_HOTBACKUP */
/*************************************************************//**
Creates a hash table with >= n array cells. The actual number of cells is
chosen to be a prime number slightly bigger than n.
@return own: created table */
UNIV_INTERN
hash_table_t*
hash_create(
/*========*/
ulint n) /*!< in: number of array cells */
{
hash_cell_t* array;
ulint prime;
hash_table_t* table;
prime = ut_find_prime(n);
table = mem_alloc(sizeof(hash_table_t));
array = ut_malloc(sizeof(hash_cell_t) * prime);
table->array = array;
table->n_cells = prime;
#ifndef UNIV_HOTBACKUP
# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
table->adaptive = FALSE;
# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
table->n_mutexes = 0;
table->mutexes = NULL;
table->heaps = NULL;
#endif /* !UNIV_HOTBACKUP */
table->heap = NULL;
table->magic_n = HASH_TABLE_MAGIC_N;
/* Initialize the cell array */
hash_table_clear(table);
return(table);
}
/*************************************************************//**
Frees a hash table. */
UNIV_INTERN
void
hash_table_free(
/*============*/
hash_table_t* table) /*!< in, own: hash table */
{
#ifndef UNIV_HOTBACKUP
ut_a(table->mutexes == NULL);
#endif /* !UNIV_HOTBACKUP */
ut_free(table->array);
mem_free(table);
}
#ifndef UNIV_HOTBACKUP
/*************************************************************//**
Creates a mutex array to protect a hash table. */
UNIV_INTERN
void
hash_create_mutexes_func(
/*=====================*/
hash_table_t* table, /*!< in: hash table */
#ifdef UNIV_SYNC_DEBUG
ulint sync_level, /*!< in: latching order level of the
mutexes: used in the debug version */
#endif /* UNIV_SYNC_DEBUG */
ulint n_mutexes) /*!< in: number of mutexes, must be a
power of 2 */
{
ulint i;
ut_a(n_mutexes > 0);
ut_a(ut_is_2pow(n_mutexes));
table->mutexes = mem_alloc(n_mutexes * sizeof(mutex_t));
for (i = 0; i < n_mutexes; i++) {
mutex_create(table->mutexes + i, sync_level);
}
table->n_mutexes = n_mutexes;
}
#endif /* !UNIV_HOTBACKUP */

4
perfschema/ha_innodb.def Normal file
View file

@ -0,0 +1,4 @@
EXPORTS
_mysql_plugin_interface_version_
_mysql_sizeof_struct_st_plugin_
_mysql_plugin_declarations_

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,326 @@
/*****************************************************************************
Copyright (c) 2000, 2010, MySQL AB & Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/*
This file is based on ha_berkeley.h of MySQL distribution
This file defines the Innodb handler: the interface between MySQL and
Innodb
*/
#ifdef USE_PRAGMA_INTERFACE
#pragma interface /* gcc class implementation */
#endif
/* Structure defines translation table between mysql index and innodb
index structures */
typedef struct innodb_idx_translate_struct {
ulint index_count; /*!< number of valid index entries
in the index_mapping array */
ulint array_size; /*!< array size of index_mapping */
dict_index_t** index_mapping; /*!< index pointer array directly
maps to index in Innodb from MySQL
array index */
} innodb_idx_translate_t;
/** InnoDB table share */
typedef struct st_innobase_share {
THR_LOCK lock; /*!< MySQL lock protecting
this structure */
const char* table_name; /*!< InnoDB table name */
uint use_count; /*!< reference count,
incremented in get_share()
and decremented in
free_share() */
void* table_name_hash;/*!< hash table chain node */
innodb_idx_translate_t idx_trans_tbl; /*!< index translation
table between MySQL and
Innodb */
} INNOBASE_SHARE;
/** InnoDB B-tree index */
struct dict_index_struct;
/** Prebuilt structures in an Innobase table handle used within MySQL */
struct row_prebuilt_struct;
/** InnoDB B-tree index */
typedef struct dict_index_struct dict_index_t;
/** Prebuilt structures in an Innobase table handle used within MySQL */
typedef struct row_prebuilt_struct row_prebuilt_t;
/** The class defining a handle to an Innodb table */
class ha_innobase: public handler
{
row_prebuilt_t* prebuilt; /*!< prebuilt struct in InnoDB, used
to save CPU time with prebuilt data
structures*/
THD* user_thd; /*!< the thread handle of the user
currently using the handle; this is
set in external_lock function */
THR_LOCK_DATA lock;
INNOBASE_SHARE* share; /*!< information for MySQL
table locking */
uchar* upd_buff; /*!< buffer used in updates */
uchar* key_val_buff; /*!< buffer used in converting
search key values from MySQL format
to Innodb format */
ulong upd_and_key_val_buff_len;
/* the length of each of the previous
two buffers */
Table_flags int_table_flags;
uint primary_key;
ulong start_of_scan; /*!< this is set to 1 when we are
starting a table scan but have not
yet fetched any row, else 0 */
uint last_match_mode;/* match mode of the latest search:
ROW_SEL_EXACT, ROW_SEL_EXACT_PREFIX,
or undefined */
uint num_write_row; /*!< number of write_row() calls */
uint store_key_val_for_row(uint keynr, char* buff, uint buff_len,
const uchar* record);
inline void update_thd(THD* thd);
void update_thd();
int change_active_index(uint keynr);
int general_fetch(uchar* buf, uint direction, uint match_mode);
ulint innobase_lock_autoinc();
ulonglong innobase_peek_autoinc();
ulint innobase_set_max_autoinc(ulonglong auto_inc);
ulint innobase_reset_autoinc(ulonglong auto_inc);
ulint innobase_get_autoinc(ulonglong* value);
ulint innobase_update_autoinc(ulonglong auto_inc);
void innobase_initialize_autoinc();
dict_index_t* innobase_get_index(uint keynr);
/* Init values for the class: */
public:
ha_innobase(handlerton *hton, TABLE_SHARE *table_arg);
~ha_innobase();
/*
Get the row type from the storage engine. If this method returns
ROW_TYPE_NOT_USED, the information in HA_CREATE_INFO should be used.
*/
enum row_type get_row_type() const;
const char* table_type() const;
const char* index_type(uint key_number);
const char** bas_ext() const;
Table_flags table_flags() const;
ulong index_flags(uint idx, uint part, bool all_parts) const;
uint max_supported_keys() const;
uint max_supported_key_length() const;
uint max_supported_key_part_length() const;
const key_map* keys_to_use_for_scanning();
int open(const char *name, int mode, uint test_if_locked);
int close(void);
double scan_time();
double read_time(uint index, uint ranges, ha_rows rows);
int write_row(uchar * buf);
int update_row(const uchar * old_data, uchar * new_data);
int delete_row(const uchar * buf);
bool was_semi_consistent_read();
void try_semi_consistent_read(bool yes);
void unlock_row();
int index_init(uint index, bool sorted);
int index_end();
int index_read(uchar * buf, const uchar * key,
uint key_len, enum ha_rkey_function find_flag);
int index_read_idx(uchar * buf, uint index, const uchar * key,
uint key_len, enum ha_rkey_function find_flag);
int index_read_last(uchar * buf, const uchar * key, uint key_len);
int index_next(uchar * buf);
int index_next_same(uchar * buf, const uchar *key, uint keylen);
int index_prev(uchar * buf);
int index_first(uchar * buf);
int index_last(uchar * buf);
int rnd_init(bool scan);
int rnd_end();
int rnd_next(uchar *buf);
int rnd_pos(uchar * buf, uchar *pos);
void position(const uchar *record);
int info(uint);
int analyze(THD* thd,HA_CHECK_OPT* check_opt);
int optimize(THD* thd,HA_CHECK_OPT* check_opt);
int discard_or_import_tablespace(my_bool discard);
int extra(enum ha_extra_function operation);
int reset();
int external_lock(THD *thd, int lock_type);
int transactional_table_lock(THD *thd, int lock_type);
int start_stmt(THD *thd, thr_lock_type lock_type);
void position(uchar *record);
ha_rows records_in_range(uint inx, key_range *min_key, key_range
*max_key);
ha_rows estimate_rows_upper_bound();
void update_create_info(HA_CREATE_INFO* create_info);
int create(const char *name, register TABLE *form,
HA_CREATE_INFO *create_info);
int delete_all_rows();
int delete_table(const char *name);
int rename_table(const char* from, const char* to);
int check(THD* thd, HA_CHECK_OPT* check_opt);
char* update_table_comment(const char* comment);
char* get_foreign_key_create_info();
int get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list);
bool can_switch_engines();
uint referenced_by_foreign_key();
void free_foreign_key_create_info(char* str);
THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to,
enum thr_lock_type lock_type);
void init_table_handle_for_HANDLER();
virtual void get_auto_increment(ulonglong offset, ulonglong increment,
ulonglong nb_desired_values,
ulonglong *first_value,
ulonglong *nb_reserved_values);
int reset_auto_increment(ulonglong value);
virtual bool get_error_message(int error, String *buf);
uint8 table_cache_type();
/*
ask handler about permission to cache table during query registration
*/
my_bool register_query_cache_table(THD *thd, char *table_key,
uint key_length,
qc_engine_callback *call_back,
ulonglong *engine_data);
static char *get_mysql_bin_log_name();
static ulonglong get_mysql_bin_log_pos();
bool primary_key_is_clustered();
int cmp_ref(const uchar *ref1, const uchar *ref2);
/** Fast index creation (smart ALTER TABLE) @see handler0alter.cc @{ */
int add_index(TABLE *table_arg, KEY *key_info, uint num_of_keys);
int prepare_drop_index(TABLE *table_arg, uint *key_num,
uint num_of_keys);
int final_drop_index(TABLE *table_arg);
/** @} */
bool check_if_incompatible_data(HA_CREATE_INFO *info,
uint table_changes);
};
/* Some accessor functions which the InnoDB plugin needs, but which
can not be added to mysql/plugin.h as part of the public interface;
the definitions are bracketed with #ifdef INNODB_COMPATIBILITY_HOOKS */
#ifndef INNODB_COMPATIBILITY_HOOKS
#error InnoDB needs MySQL to be built with #define INNODB_COMPATIBILITY_HOOKS
#endif
extern "C" {
struct charset_info_st *thd_charset(MYSQL_THD thd);
char **thd_query(MYSQL_THD thd);
/** Get the file name of the MySQL binlog.
* @return the name of the binlog file
*/
const char* mysql_bin_log_file_name(void);
/** Get the current position of the MySQL binlog.
* @return byte offset from the beginning of the binlog
*/
ulonglong mysql_bin_log_file_pos(void);
/**
Check if a user thread is a replication slave thread
@param thd user thread
@retval 0 the user thread is not a replication slave thread
@retval 1 the user thread is a replication slave thread
*/
int thd_slave_thread(const MYSQL_THD thd);
/**
Check if a user thread is running a non-transactional update
@param thd user thread
@retval 0 the user thread is not running a non-transactional update
@retval 1 the user thread is running a non-transactional update
*/
int thd_non_transactional_update(const MYSQL_THD thd);
/**
Get the user thread's binary logging format
@param thd user thread
@return Value to be used as index into the binlog_format_names array
*/
int thd_binlog_format(const MYSQL_THD thd);
/**
Mark transaction to rollback and mark error as fatal to a sub-statement.
@param thd Thread handle
@param all TRUE <=> rollback main transaction.
*/
void thd_mark_transaction_to_rollback(MYSQL_THD thd, bool all);
#if MYSQL_VERSION_ID > 50140
/**
Check if binary logging is filtered for thread's current db.
@param thd Thread handle
@retval 1 the query is not filtered, 0 otherwise.
*/
bool thd_binlog_filter_ok(const MYSQL_THD thd);
#endif /* MYSQL_VERSION_ID > 50140 */
}
typedef struct trx_struct trx_t;
/********************************************************************//**
@file handler/ha_innodb.h
Converts an InnoDB error code to a MySQL error code and also tells to MySQL
about a possible transaction rollback inside InnoDB caused by a lock wait
timeout or a deadlock.
@return MySQL error code */
extern "C"
int
convert_error_code_to_mysql(
/*========================*/
int error, /*!< in: InnoDB error code */
ulint flags, /*!< in: InnoDB table flags, or 0 */
MYSQL_THD thd); /*!< in: user thread handle or NULL */
/*********************************************************************//**
Allocates an InnoDB transaction for a MySQL handler object.
@return InnoDB transaction handle */
extern "C"
trx_t*
innobase_trx_allocate(
/*==================*/
MYSQL_THD thd); /*!< in: user thread handle */
/*********************************************************************//**
This function checks each index name for a table against reserved
system default primary index name 'GEN_CLUST_INDEX'. If a name
matches, this function pushes an warning message to the client,
and returns true. */
extern "C"
bool
innobase_index_name_is_reserved(
/*============================*/
/* out: true if the index name
matches the reserved name */
const trx_t* trx, /* in: InnoDB transaction handle */
const KEY* key_info, /* in: Indexes to be created */
ulint num_of_keys); /* in: Number of indexes to
be created. */

File diff suppressed because it is too large Load diff

1578
perfschema/handler/i_s.cc Normal file

File diff suppressed because it is too large Load diff

37
perfschema/handler/i_s.h Normal file
View file

@ -0,0 +1,37 @@
/*****************************************************************************
Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file handler/i_s.h
InnoDB INFORMATION SCHEMA tables interface to MySQL.
Created July 18, 2007 Vasil Dimov
*******************************************************/
#ifndef i_s_h
#define i_s_h
extern struct st_mysql_plugin i_s_innodb_trx;
extern struct st_mysql_plugin i_s_innodb_locks;
extern struct st_mysql_plugin i_s_innodb_lock_waits;
extern struct st_mysql_plugin i_s_innodb_cmp;
extern struct st_mysql_plugin i_s_innodb_cmp_reset;
extern struct st_mysql_plugin i_s_innodb_cmpmem;
extern struct st_mysql_plugin i_s_innodb_cmpmem_reset;
#endif /* i_s_h */

View file

@ -0,0 +1,42 @@
/*****************************************************************************
Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file handler/mysql_addons.cc
This file contains functions that need to be added to
MySQL code but have not been added yet.
Whenever you add a function here submit a MySQL bug
report (feature request) with the implementation. Then
write the bug number in the comment before the
function in this file.
When MySQL commits the function it can be deleted from
here. In a perfect world this file exists but is empty.
Created November 07, 2007 Vasil Dimov
*******************************************************/
#ifndef MYSQL_SERVER
#define MYSQL_SERVER
#endif /* MYSQL_SERVER */
#include <mysql_priv.h>
#include "mysql_addons.h"
#include "univ.i"

4690
perfschema/ibuf/ibuf0ibuf.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,528 @@
/*****************************************************************************
Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/btr0btr.h
The B-tree
Created 6/2/1994 Heikki Tuuri
*******************************************************/
#ifndef btr0btr_h
#define btr0btr_h
#include "univ.i"
#include "dict0dict.h"
#include "data0data.h"
#include "page0cur.h"
#include "mtr0mtr.h"
#include "btr0types.h"
#ifndef UNIV_HOTBACKUP
/** Maximum record size which can be stored on a page, without using the
special big record storage structure */
#define BTR_PAGE_MAX_REC_SIZE (UNIV_PAGE_SIZE / 2 - 200)
/** @brief Maximum depth of a B-tree in InnoDB.
Note that this isn't a maximum as such; none of the tree operations
avoid producing trees bigger than this. It is instead a "max depth
that other code must work with", useful for e.g. fixed-size arrays
that must store some information about each level in a tree. In other
words: if a B-tree with bigger depth than this is encountered, it is
not acceptable for it to lead to mysterious memory corruption, but it
is acceptable for the program to die with a clear assert failure. */
#define BTR_MAX_LEVELS 100
/** Latching modes for btr_cur_search_to_nth_level(). */
enum btr_latch_mode {
/** Search a record on a leaf page and S-latch it. */
BTR_SEARCH_LEAF = RW_S_LATCH,
/** (Prepare to) modify a record on a leaf page and X-latch it. */
BTR_MODIFY_LEAF = RW_X_LATCH,
/** Obtain no latches. */
BTR_NO_LATCHES = RW_NO_LATCH,
/** Start modifying the entire B-tree. */
BTR_MODIFY_TREE = 33,
/** Continue modifying the entire B-tree. */
BTR_CONT_MODIFY_TREE = 34,
/** Search the previous record. */
BTR_SEARCH_PREV = 35,
/** Modify the previous record. */
BTR_MODIFY_PREV = 36
};
/* BTR_INSERT, BTR_DELETE and BTR_DELETE_MARK are mutually exclusive. */
/** If this is ORed to btr_latch_mode, it means that the search tuple
will be inserted to the index, at the searched position.
When the record is not in the buffer pool, try to use the insert buffer. */
#define BTR_INSERT 512
/** This flag ORed to btr_latch_mode says that we do the search in query
optimization */
#define BTR_ESTIMATE 1024
/** This flag ORed to btr_latch_mode says that we can ignore possible
UNIQUE definition on secondary indexes when we decide if we can use
the insert buffer to speed up inserts */
#define BTR_IGNORE_SEC_UNIQUE 2048
/** Try to delete mark the record at the searched position using the
insert/delete buffer when the record is not in the buffer pool. */
#define BTR_DELETE_MARK 4096
/** Try to purge the record at the searched position using the insert/delete
buffer when the record is not in the buffer pool. */
#define BTR_DELETE 8192
/**************************************************************//**
Gets the root node of a tree and x-latches it.
@return root page, x-latched */
UNIV_INTERN
page_t*
btr_root_get(
/*=========*/
dict_index_t* index, /*!< in: index tree */
mtr_t* mtr); /*!< in: mtr */
/**************************************************************//**
Gets a buffer page and declares its latching order level. */
UNIV_INLINE
buf_block_t*
btr_block_get(
/*==========*/
ulint space, /*!< in: space id */
ulint zip_size, /*!< in: compressed page size in bytes
or 0 for uncompressed pages */
ulint page_no, /*!< in: page number */
ulint mode, /*!< in: latch mode */
mtr_t* mtr); /*!< in: mtr */
/**************************************************************//**
Gets a buffer page and declares its latching order level. */
UNIV_INLINE
page_t*
btr_page_get(
/*=========*/
ulint space, /*!< in: space id */
ulint zip_size, /*!< in: compressed page size in bytes
or 0 for uncompressed pages */
ulint page_no, /*!< in: page number */
ulint mode, /*!< in: latch mode */
mtr_t* mtr); /*!< in: mtr */
#endif /* !UNIV_HOTBACKUP */
/**************************************************************//**
Gets the index id field of a page.
@return index id */
UNIV_INLINE
dulint
btr_page_get_index_id(
/*==================*/
const page_t* page); /*!< in: index page */
#ifndef UNIV_HOTBACKUP
/********************************************************//**
Gets the node level field in an index page.
@return level, leaf level == 0 */
UNIV_INLINE
ulint
btr_page_get_level_low(
/*===================*/
const page_t* page); /*!< in: index page */
/********************************************************//**
Gets the node level field in an index page.
@return level, leaf level == 0 */
UNIV_INLINE
ulint
btr_page_get_level(
/*===============*/
const page_t* page, /*!< in: index page */
mtr_t* mtr); /*!< in: mini-transaction handle */
/********************************************************//**
Gets the next index page number.
@return next page number */
UNIV_INLINE
ulint
btr_page_get_next(
/*==============*/
const page_t* page, /*!< in: index page */
mtr_t* mtr); /*!< in: mini-transaction handle */
/********************************************************//**
Gets the previous index page number.
@return prev page number */
UNIV_INLINE
ulint
btr_page_get_prev(
/*==============*/
const page_t* page, /*!< in: index page */
mtr_t* mtr); /*!< in: mini-transaction handle */
/*************************************************************//**
Gets pointer to the previous user record in the tree. It is assumed
that the caller has appropriate latches on the page and its neighbor.
@return previous user record, NULL if there is none */
UNIV_INTERN
rec_t*
btr_get_prev_user_rec(
/*==================*/
rec_t* rec, /*!< in: record on leaf level */
mtr_t* mtr); /*!< in: mtr holding a latch on the page, and if
needed, also to the previous page */
/*************************************************************//**
Gets pointer to the next user record in the tree. It is assumed
that the caller has appropriate latches on the page and its neighbor.
@return next user record, NULL if there is none */
UNIV_INTERN
rec_t*
btr_get_next_user_rec(
/*==================*/
rec_t* rec, /*!< in: record on leaf level */
mtr_t* mtr); /*!< in: mtr holding a latch on the page, and if
needed, also to the next page */
/**************************************************************//**
Releases the latch on a leaf page and bufferunfixes it. */
UNIV_INLINE
void
btr_leaf_page_release(
/*==================*/
buf_block_t* block, /*!< in: buffer block */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF */
mtr_t* mtr); /*!< in: mtr */
/**************************************************************//**
Gets the child node file address in a node pointer.
NOTE: the offsets array must contain all offsets for the record since
we read the last field according to offsets and assume that it contains
the child page number. In other words offsets must have been retrieved
with rec_get_offsets(n_fields=ULINT_UNDEFINED).
@return child node address */
UNIV_INLINE
ulint
btr_node_ptr_get_child_page_no(
/*===========================*/
const rec_t* rec, /*!< in: node pointer record */
const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
/************************************************************//**
Creates the root node for a new index tree.
@return page number of the created root, FIL_NULL if did not succeed */
UNIV_INTERN
ulint
btr_create(
/*=======*/
ulint type, /*!< in: type of the index */
ulint space, /*!< in: space where created */
ulint zip_size,/*!< in: compressed page size in bytes
or 0 for uncompressed pages */
dulint index_id,/*!< in: index id */
dict_index_t* index, /*!< in: index */
mtr_t* mtr); /*!< in: mini-transaction handle */
/************************************************************//**
Frees a B-tree except the root page, which MUST be freed after this
by calling btr_free_root. */
UNIV_INTERN
void
btr_free_but_not_root(
/*==================*/
ulint space, /*!< in: space where created */
ulint zip_size, /*!< in: compressed page size in bytes
or 0 for uncompressed pages */
ulint root_page_no); /*!< in: root page number */
/************************************************************//**
Frees the B-tree root page. Other tree MUST already have been freed. */
UNIV_INTERN
void
btr_free_root(
/*==========*/
ulint space, /*!< in: space where created */
ulint zip_size, /*!< in: compressed page size in bytes
or 0 for uncompressed pages */
ulint root_page_no, /*!< in: root page number */
mtr_t* mtr); /*!< in: a mini-transaction which has already
been started */
/*************************************************************//**
Makes tree one level higher by splitting the root, and inserts
the tuple. It is assumed that mtr contains an x-latch on the tree.
NOTE that the operation of this function must always succeed,
we cannot reverse it: therefore enough free disk space must be
guaranteed to be available before this function is called.
@return inserted record */
UNIV_INTERN
rec_t*
btr_root_raise_and_insert(
/*======================*/
btr_cur_t* cursor, /*!< in: cursor at which to insert: must be
on the root page; when the function returns,
the cursor is positioned on the predecessor
of the inserted record */
const dtuple_t* tuple, /*!< in: tuple to insert */
ulint n_ext, /*!< in: number of externally stored columns */
mtr_t* mtr); /*!< in: mtr */
/*************************************************************//**
Reorganizes an index page.
IMPORTANT: if btr_page_reorganize() is invoked on a compressed leaf
page of a non-clustered index, the caller must update the insert
buffer free bits in the same mini-transaction in such a way that the
modification will be redo-logged.
@return TRUE on success, FALSE on failure */
UNIV_INTERN
ibool
btr_page_reorganize(
/*================*/
buf_block_t* block, /*!< in: page to be reorganized */
dict_index_t* index, /*!< in: record descriptor */
mtr_t* mtr); /*!< in: mtr */
/*************************************************************//**
Decides if the page should be split at the convergence point of
inserts converging to left.
@return TRUE if split recommended */
UNIV_INTERN
ibool
btr_page_get_split_rec_to_left(
/*===========================*/
btr_cur_t* cursor, /*!< in: cursor at which to insert */
rec_t** split_rec);/*!< out: if split recommended,
the first record on upper half page,
or NULL if tuple should be first */
/*************************************************************//**
Decides if the page should be split at the convergence point of
inserts converging to right.
@return TRUE if split recommended */
UNIV_INTERN
ibool
btr_page_get_split_rec_to_right(
/*============================*/
btr_cur_t* cursor, /*!< in: cursor at which to insert */
rec_t** split_rec);/*!< out: if split recommended,
the first record on upper half page,
or NULL if tuple should be first */
/*************************************************************//**
Splits an index page to halves and inserts the tuple. It is assumed
that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is
released within this function! NOTE that the operation of this
function must always succeed, we cannot reverse it: therefore enough
free disk space (2 pages) must be guaranteed to be available before
this function is called.
@return inserted record */
UNIV_INTERN
rec_t*
btr_page_split_and_insert(
/*======================*/
btr_cur_t* cursor, /*!< in: cursor at which to insert; when the
function returns, the cursor is positioned
on the predecessor of the inserted record */
const dtuple_t* tuple, /*!< in: tuple to insert */
ulint n_ext, /*!< in: number of externally stored columns */
mtr_t* mtr); /*!< in: mtr */
/*******************************************************//**
Inserts a data tuple to a tree on a non-leaf level. It is assumed
that mtr holds an x-latch on the tree. */
UNIV_INTERN
void
btr_insert_on_non_leaf_level_func(
/*==============================*/
dict_index_t* index, /*!< in: index */
ulint level, /*!< in: level, must be > 0 */
dtuple_t* tuple, /*!< in: the record to be inserted */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
# define btr_insert_on_non_leaf_level(i,l,t,m) \
btr_insert_on_non_leaf_level_func(i,l,t,__FILE__,__LINE__,m)
#endif /* !UNIV_HOTBACKUP */
/****************************************************************//**
Sets a record as the predefined minimum record. */
UNIV_INTERN
void
btr_set_min_rec_mark(
/*=================*/
rec_t* rec, /*!< in/out: record */
mtr_t* mtr); /*!< in: mtr */
#ifndef UNIV_HOTBACKUP
/*************************************************************//**
Deletes on the upper level the node pointer to a page. */
UNIV_INTERN
void
btr_node_ptr_delete(
/*================*/
dict_index_t* index, /*!< in: index tree */
buf_block_t* block, /*!< in: page whose node pointer is deleted */
mtr_t* mtr); /*!< in: mtr */
#ifdef UNIV_DEBUG
/************************************************************//**
Checks that the node pointer to a page is appropriate.
@return TRUE */
UNIV_INTERN
ibool
btr_check_node_ptr(
/*===============*/
dict_index_t* index, /*!< in: index tree */
buf_block_t* block, /*!< in: index page */
mtr_t* mtr); /*!< in: mtr */
#endif /* UNIV_DEBUG */
/*************************************************************//**
Tries to merge the page first to the left immediate brother if such a
brother exists, and the node pointers to the current page and to the
brother reside on the same page. If the left brother does not satisfy these
conditions, looks at the right brother. If the page is the only one on that
level lifts the records of the page to the father page, thus reducing the
tree height. It is assumed that mtr holds an x-latch on the tree and on the
page. If cursor is on the leaf level, mtr must also hold x-latches to
the brothers, if they exist.
@return TRUE on success */
UNIV_INTERN
ibool
btr_compress(
/*=========*/
btr_cur_t* cursor, /*!< in: cursor on the page to merge or lift;
the page must not be empty: in record delete
use btr_discard_page if the page would become
empty */
mtr_t* mtr); /*!< in: mtr */
/*************************************************************//**
Discards a page from a B-tree. This is used to remove the last record from
a B-tree page: the whole page must be removed at the same time. This cannot
be used for the root page, which is allowed to be empty. */
UNIV_INTERN
void
btr_discard_page(
/*=============*/
btr_cur_t* cursor, /*!< in: cursor on the page to discard: not on
the root page */
mtr_t* mtr); /*!< in: mtr */
#endif /* !UNIV_HOTBACKUP */
/****************************************************************//**
Parses the redo log record for setting an index record as the predefined
minimum record.
@return end of log record or NULL */
UNIV_INTERN
byte*
btr_parse_set_min_rec_mark(
/*=======================*/
byte* ptr, /*!< in: buffer */
byte* end_ptr,/*!< in: buffer end */
ulint comp, /*!< in: nonzero=compact page format */
page_t* page, /*!< in: page or NULL */
mtr_t* mtr); /*!< in: mtr or NULL */
/***********************************************************//**
Parses a redo log record of reorganizing a page.
@return end of log record or NULL */
UNIV_INTERN
byte*
btr_parse_page_reorganize(
/*======================*/
byte* ptr, /*!< in: buffer */
byte* end_ptr,/*!< in: buffer end */
dict_index_t* index, /*!< in: record descriptor */
buf_block_t* block, /*!< in: page to be reorganized, or NULL */
mtr_t* mtr); /*!< in: mtr or NULL */
#ifndef UNIV_HOTBACKUP
/**************************************************************//**
Gets the number of pages in a B-tree.
@return number of pages */
UNIV_INTERN
ulint
btr_get_size(
/*=========*/
dict_index_t* index, /*!< in: index */
ulint flag); /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
/**************************************************************//**
Allocates a new file page to be used in an index tree. NOTE: we assume
that the caller has made the reservation for free extents!
@return new allocated block, x-latched; NULL if out of space */
UNIV_INTERN
buf_block_t*
btr_page_alloc(
/*===========*/
dict_index_t* index, /*!< in: index tree */
ulint hint_page_no, /*!< in: hint of a good page */
byte file_direction, /*!< in: direction where a possible
page split is made */
ulint level, /*!< in: level where the page is placed
in the tree */
mtr_t* mtr); /*!< in: mtr */
/**************************************************************//**
Frees a file page used in an index tree. NOTE: cannot free field external
storage pages because the page must contain info on its level. */
UNIV_INTERN
void
btr_page_free(
/*==========*/
dict_index_t* index, /*!< in: index tree */
buf_block_t* block, /*!< in: block to be freed, x-latched */
mtr_t* mtr); /*!< in: mtr */
/**************************************************************//**
Frees a file page used in an index tree. Can be used also to BLOB
external storage pages, because the page level 0 can be given as an
argument. */
UNIV_INTERN
void
btr_page_free_low(
/*==============*/
dict_index_t* index, /*!< in: index tree */
buf_block_t* block, /*!< in: block to be freed, x-latched */
ulint level, /*!< in: page level */
mtr_t* mtr); /*!< in: mtr */
#ifdef UNIV_BTR_PRINT
/*************************************************************//**
Prints size info of a B-tree. */
UNIV_INTERN
void
btr_print_size(
/*===========*/
dict_index_t* index); /*!< in: index tree */
/**************************************************************//**
Prints directories and other info of all nodes in the index. */
UNIV_INTERN
void
btr_print_index(
/*============*/
dict_index_t* index, /*!< in: index */
ulint width); /*!< in: print this many entries from start
and end */
#endif /* UNIV_BTR_PRINT */
/************************************************************//**
Checks the size and number of fields in a record based on the definition of
the index.
@return TRUE if ok */
UNIV_INTERN
ibool
btr_index_rec_validate(
/*===================*/
const rec_t* rec, /*!< in: index record */
const dict_index_t* index, /*!< in: index */
ibool dump_on_error); /*!< in: TRUE if the function
should print hex dump of record
and page on error */
/**************************************************************//**
Checks the consistency of an index tree.
@return TRUE if ok */
UNIV_INTERN
ibool
btr_validate_index(
/*===============*/
dict_index_t* index, /*!< in: index */
trx_t* trx); /*!< in: transaction or NULL */
#define BTR_N_LEAF_PAGES 1
#define BTR_TOTAL_SIZE 2
#endif /* !UNIV_HOTBACKUP */
#ifndef UNIV_NONINL
#include "btr0btr.ic"
#endif
#endif

View file

@ -0,0 +1,314 @@
/*****************************************************************************
Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/btr0btr.ic
The B-tree
Created 6/2/1994 Heikki Tuuri
*******************************************************/
#include "mach0data.h"
#ifndef UNIV_HOTBACKUP
#include "mtr0mtr.h"
#include "mtr0log.h"
#include "page0zip.h"
#define BTR_MAX_NODE_LEVEL 50 /*!< Maximum B-tree page level
(not really a hard limit).
Used in debug assertions
in btr_page_set_level and
btr_page_get_level_low */
/**************************************************************//**
Gets a buffer page and declares its latching order level. */
UNIV_INLINE
buf_block_t*
btr_block_get(
/*==========*/
ulint space, /*!< in: space id */
ulint zip_size, /*!< in: compressed page size in bytes
or 0 for uncompressed pages */
ulint page_no, /*!< in: page number */
ulint mode, /*!< in: latch mode */
mtr_t* mtr) /*!< in: mtr */
{
buf_block_t* block;
block = buf_page_get(space, zip_size, page_no, mode, mtr);
if (mode != RW_NO_LATCH) {
buf_block_dbg_add_level(block, SYNC_TREE_NODE);
}
return(block);
}
/**************************************************************//**
Gets a buffer page and declares its latching order level. */
UNIV_INLINE
page_t*
btr_page_get(
/*=========*/
ulint space, /*!< in: space id */
ulint zip_size, /*!< in: compressed page size in bytes
or 0 for uncompressed pages */
ulint page_no, /*!< in: page number */
ulint mode, /*!< in: latch mode */
mtr_t* mtr) /*!< in: mtr */
{
return(buf_block_get_frame(btr_block_get(space, zip_size, page_no,
mode, mtr)));
}
/**************************************************************//**
Sets the index id field of a page. */
UNIV_INLINE
void
btr_page_set_index_id(
/*==================*/
page_t* page, /*!< in: page to be created */
page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
part will be updated, or NULL */
dulint id, /*!< in: index id */
mtr_t* mtr) /*!< in: mtr */
{
if (UNIV_LIKELY_NULL(page_zip)) {
mach_write_to_8(page + (PAGE_HEADER + PAGE_INDEX_ID), id);
page_zip_write_header(page_zip,
page + (PAGE_HEADER + PAGE_INDEX_ID),
8, mtr);
} else {
mlog_write_dulint(page + (PAGE_HEADER + PAGE_INDEX_ID),
id, mtr);
}
}
#endif /* !UNIV_HOTBACKUP */
/**************************************************************//**
Gets the index id field of a page.
@return index id */
UNIV_INLINE
dulint
btr_page_get_index_id(
/*==================*/
const page_t* page) /*!< in: index page */
{
return(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID));
}
#ifndef UNIV_HOTBACKUP
/********************************************************//**
Gets the node level field in an index page.
@return level, leaf level == 0 */
UNIV_INLINE
ulint
btr_page_get_level_low(
/*===================*/
const page_t* page) /*!< in: index page */
{
ulint level;
ut_ad(page);
level = mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL);
ut_ad(level <= BTR_MAX_NODE_LEVEL);
return(level);
}
/********************************************************//**
Gets the node level field in an index page.
@return level, leaf level == 0 */
UNIV_INLINE
ulint
btr_page_get_level(
/*===============*/
const page_t* page, /*!< in: index page */
mtr_t* mtr __attribute__((unused)))
/*!< in: mini-transaction handle */
{
ut_ad(page && mtr);
return(btr_page_get_level_low(page));
}
/********************************************************//**
Sets the node level field in an index page. */
UNIV_INLINE
void
btr_page_set_level(
/*===============*/
page_t* page, /*!< in: index page */
page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
part will be updated, or NULL */
ulint level, /*!< in: level, leaf level == 0 */
mtr_t* mtr) /*!< in: mini-transaction handle */
{
ut_ad(page && mtr);
ut_ad(level <= BTR_MAX_NODE_LEVEL);
if (UNIV_LIKELY_NULL(page_zip)) {
mach_write_to_2(page + (PAGE_HEADER + PAGE_LEVEL), level);
page_zip_write_header(page_zip,
page + (PAGE_HEADER + PAGE_LEVEL),
2, mtr);
} else {
mlog_write_ulint(page + (PAGE_HEADER + PAGE_LEVEL), level,
MLOG_2BYTES, mtr);
}
}
/********************************************************//**
Gets the next index page number.
@return next page number */
UNIV_INLINE
ulint
btr_page_get_next(
/*==============*/
const page_t* page, /*!< in: index page */
mtr_t* mtr __attribute__((unused)))
/*!< in: mini-transaction handle */
{
ut_ad(page && mtr);
ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX)
|| mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_S_FIX));
return(mach_read_from_4(page + FIL_PAGE_NEXT));
}
/********************************************************//**
Sets the next index page field. */
UNIV_INLINE
void
btr_page_set_next(
/*==============*/
page_t* page, /*!< in: index page */
page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
part will be updated, or NULL */
ulint next, /*!< in: next page number */
mtr_t* mtr) /*!< in: mini-transaction handle */
{
ut_ad(page && mtr);
if (UNIV_LIKELY_NULL(page_zip)) {
mach_write_to_4(page + FIL_PAGE_NEXT, next);
page_zip_write_header(page_zip, page + FIL_PAGE_NEXT, 4, mtr);
} else {
mlog_write_ulint(page + FIL_PAGE_NEXT, next, MLOG_4BYTES, mtr);
}
}
/********************************************************//**
Gets the previous index page number.
@return prev page number */
UNIV_INLINE
ulint
btr_page_get_prev(
/*==============*/
const page_t* page, /*!< in: index page */
mtr_t* mtr __attribute__((unused))) /*!< in: mini-transaction handle */
{
ut_ad(page && mtr);
return(mach_read_from_4(page + FIL_PAGE_PREV));
}
/********************************************************//**
Sets the previous index page field. */
UNIV_INLINE
void
btr_page_set_prev(
/*==============*/
page_t* page, /*!< in: index page */
page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
part will be updated, or NULL */
ulint prev, /*!< in: previous page number */
mtr_t* mtr) /*!< in: mini-transaction handle */
{
ut_ad(page && mtr);
if (UNIV_LIKELY_NULL(page_zip)) {
mach_write_to_4(page + FIL_PAGE_PREV, prev);
page_zip_write_header(page_zip, page + FIL_PAGE_PREV, 4, mtr);
} else {
mlog_write_ulint(page + FIL_PAGE_PREV, prev, MLOG_4BYTES, mtr);
}
}
/**************************************************************//**
Gets the child node file address in a node pointer.
NOTE: the offsets array must contain all offsets for the record since
we read the last field according to offsets and assume that it contains
the child page number. In other words offsets must have been retrieved
with rec_get_offsets(n_fields=ULINT_UNDEFINED).
@return child node address */
UNIV_INLINE
ulint
btr_node_ptr_get_child_page_no(
/*===========================*/
const rec_t* rec, /*!< in: node pointer record */
const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
{
const byte* field;
ulint len;
ulint page_no;
ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec));
/* The child address is in the last field */
field = rec_get_nth_field(rec, offsets,
rec_offs_n_fields(offsets) - 1, &len);
ut_ad(len == 4);
page_no = mach_read_from_4(field);
if (UNIV_UNLIKELY(page_no == 0)) {
fprintf(stderr,
"InnoDB: a nonsensical page number 0"
" in a node ptr record at offset %lu\n",
(ulong) page_offset(rec));
buf_page_print(page_align(rec), 0);
}
return(page_no);
}
/**************************************************************//**
Releases the latches on a leaf page and bufferunfixes it. */
UNIV_INLINE
void
btr_leaf_page_release(
/*==================*/
buf_block_t* block, /*!< in: buffer block */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF */
mtr_t* mtr) /*!< in: mtr */
{
ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF);
ut_ad(!mtr_memo_contains(mtr, block, MTR_MEMO_MODIFY));
mtr_memo_release(mtr, block,
latch_mode == BTR_SEARCH_LEAF
? MTR_MEMO_PAGE_S_FIX
: MTR_MEMO_PAGE_X_FIX);
}
#endif /* !UNIV_HOTBACKUP */

View file

@ -0,0 +1,787 @@
/*****************************************************************************
Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/btr0cur.h
The index tree cursor
Created 10/16/1994 Heikki Tuuri
*******************************************************/
#ifndef btr0cur_h
#define btr0cur_h
#include "univ.i"
#include "dict0dict.h"
#include "page0cur.h"
#include "btr0types.h"
/* Mode flags for btr_cur operations; these can be ORed */
#define BTR_NO_UNDO_LOG_FLAG 1 /* do no undo logging */
#define BTR_NO_LOCKING_FLAG 2 /* do no record lock checking */
#define BTR_KEEP_SYS_FLAG 4 /* sys fields will be found from the
update vector or inserted entry */
#ifndef UNIV_HOTBACKUP
#include "que0types.h"
#include "row0types.h"
#include "ha0ha.h"
#define BTR_CUR_ADAPT
#define BTR_CUR_HASH_ADAPT
#ifdef UNIV_DEBUG
/*********************************************************//**
Returns the page cursor component of a tree cursor.
@return pointer to page cursor component */
UNIV_INLINE
page_cur_t*
btr_cur_get_page_cur(
/*=================*/
const btr_cur_t* cursor);/*!< in: tree cursor */
#else /* UNIV_DEBUG */
# define btr_cur_get_page_cur(cursor) (&(cursor)->page_cur)
#endif /* UNIV_DEBUG */
/*********************************************************//**
Returns the buffer block on which the tree cursor is positioned.
@return pointer to buffer block */
UNIV_INLINE
buf_block_t*
btr_cur_get_block(
/*==============*/
btr_cur_t* cursor);/*!< in: tree cursor */
/*********************************************************//**
Returns the record pointer of a tree cursor.
@return pointer to record */
UNIV_INLINE
rec_t*
btr_cur_get_rec(
/*============*/
btr_cur_t* cursor);/*!< in: tree cursor */
/*********************************************************//**
Returns the compressed page on which the tree cursor is positioned.
@return pointer to compressed page, or NULL if the page is not compressed */
UNIV_INLINE
page_zip_des_t*
btr_cur_get_page_zip(
/*=================*/
btr_cur_t* cursor);/*!< in: tree cursor */
/*********************************************************//**
Invalidates a tree cursor by setting record pointer to NULL. */
UNIV_INLINE
void
btr_cur_invalidate(
/*===============*/
btr_cur_t* cursor);/*!< in: tree cursor */
/*********************************************************//**
Returns the page of a tree cursor.
@return pointer to page */
UNIV_INLINE
page_t*
btr_cur_get_page(
/*=============*/
btr_cur_t* cursor);/*!< in: tree cursor */
/*********************************************************//**
Returns the index of a cursor.
@return index */
UNIV_INLINE
dict_index_t*
btr_cur_get_index(
/*==============*/
btr_cur_t* cursor);/*!< in: B-tree cursor */
/*********************************************************//**
Positions a tree cursor at a given record. */
UNIV_INLINE
void
btr_cur_position(
/*=============*/
dict_index_t* index, /*!< in: index */
rec_t* rec, /*!< in: record in tree */
buf_block_t* block, /*!< in: buffer block of rec */
btr_cur_t* cursor);/*!< in: cursor */
/********************************************************************//**
Searches an index tree and positions a tree cursor on a given level.
NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
to node pointer page number fields on the upper levels of the tree!
Note that if mode is PAGE_CUR_LE, which is used in inserts, then
cursor->up_match and cursor->low_match both will have sensible values.
If mode is PAGE_CUR_GE, then up_match will a have a sensible value. */
UNIV_INTERN
void
btr_cur_search_to_nth_level(
/*========================*/
dict_index_t* index, /*!< in: index */
ulint level, /*!< in: the tree level of search */
const dtuple_t* tuple, /*!< in: data tuple; NOTE: n_fields_cmp in
tuple must be set so that it cannot get
compared to the node ptr page number field! */
ulint mode, /*!< in: PAGE_CUR_L, ...;
NOTE that if the search is made using a unique
prefix of a record, mode should be PAGE_CUR_LE,
not PAGE_CUR_GE, as the latter may end up on
the previous page of the record! Inserts
should always be made using PAGE_CUR_LE to
search the position! */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ..., ORed with
at most one of BTR_INSERT, BTR_DELETE_MARK,
BTR_DELETE, or BTR_ESTIMATE;
cursor->left_block is used to store a pointer
to the left neighbor page, in the cases
BTR_SEARCH_PREV and BTR_MODIFY_PREV;
NOTE that if has_search_latch
is != 0, we maybe do not have a latch set
on the cursor page, we assume
the caller uses his search latch
to protect the record! */
btr_cur_t* cursor, /*!< in/out: tree cursor; the cursor page is
s- or x-latched, but see also above! */
ulint has_search_latch,/*!< in: latch mode the caller
currently has on btr_search_latch:
RW_S_LATCH, or 0 */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
/*****************************************************************//**
Opens a cursor at either end of an index. */
UNIV_INTERN
void
btr_cur_open_at_index_side_func(
/*============================*/
ibool from_left, /*!< in: TRUE if open to the low end,
FALSE if to the high end */
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: latch mode */
btr_cur_t* cursor, /*!< in: cursor */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
#define btr_cur_open_at_index_side(f,i,l,c,m) \
btr_cur_open_at_index_side_func(f,i,l,c,__FILE__,__LINE__,m)
/**********************************************************************//**
Positions a cursor at a randomly chosen position within a B-tree. */
UNIV_INTERN
void
btr_cur_open_at_rnd_pos_func(
/*=========================*/
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
btr_cur_t* cursor, /*!< in/out: B-tree cursor */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
#define btr_cur_open_at_rnd_pos(i,l,c,m) \
btr_cur_open_at_rnd_pos_func(i,l,c,__FILE__,__LINE__,m)
/*************************************************************//**
Tries to perform an insert to a page in an index tree, next to cursor.
It is assumed that mtr holds an x-latch on the page. The operation does
not succeed if there is too little space on the page. If there is just
one record on the page, the insert will always succeed; this is to
prevent trying to split a page with just one record.
@return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
UNIV_INTERN
ulint
btr_cur_optimistic_insert(
/*======================*/
ulint flags, /*!< in: undo logging and locking flags: if not
zero, the parameters index and thr should be
specified */
btr_cur_t* cursor, /*!< in: cursor on page after which to insert;
cursor stays valid */
dtuple_t* entry, /*!< in/out: entry to insert */
rec_t** rec, /*!< out: pointer to inserted record if
succeed */
big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
be stored externally by the caller, or
NULL */
ulint n_ext, /*!< in: number of externally stored columns */
que_thr_t* thr, /*!< in: query thread or NULL */
mtr_t* mtr); /*!< in: mtr; if this function returns
DB_SUCCESS on a leaf page of a secondary
index in a compressed tablespace, the
mtr must be committed before latching
any further pages */
/*************************************************************//**
Performs an insert on a page of an index tree. It is assumed that mtr
holds an x-latch on the tree and on the cursor page. If the insert is
made on the leaf level, to avoid deadlocks, mtr must also own x-latches
to brothers of page, if those brothers exist.
@return DB_SUCCESS or error number */
UNIV_INTERN
ulint
btr_cur_pessimistic_insert(
/*=======================*/
ulint flags, /*!< in: undo logging and locking flags: if not
zero, the parameter thr should be
specified; if no undo logging is specified,
then the caller must have reserved enough
free extents in the file space so that the
insertion will certainly succeed */
btr_cur_t* cursor, /*!< in: cursor after which to insert;
cursor stays valid */
dtuple_t* entry, /*!< in/out: entry to insert */
rec_t** rec, /*!< out: pointer to inserted record if
succeed */
big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
be stored externally by the caller, or
NULL */
ulint n_ext, /*!< in: number of externally stored columns */
que_thr_t* thr, /*!< in: query thread or NULL */
mtr_t* mtr); /*!< in: mtr */
/*************************************************************//**
Updates a record when the update causes no size changes in its fields.
@return DB_SUCCESS or error number */
UNIV_INTERN
ulint
btr_cur_update_in_place(
/*====================*/
ulint flags, /*!< in: undo logging and locking flags */
btr_cur_t* cursor, /*!< in: cursor on the record to update;
cursor stays valid and positioned on the
same record */
const upd_t* update, /*!< in: update vector */
ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
que_thr_t* thr, /*!< in: query thread */
mtr_t* mtr); /*!< in: mtr; must be committed before
latching any further pages */
/*************************************************************//**
Tries to update a record on a page in an index tree. It is assumed that mtr
holds an x-latch on the page. The operation does not succeed if there is too
little space on the page or if the update would result in too empty a page,
so that tree compression is recommended.
@return DB_SUCCESS, or DB_OVERFLOW if the updated record does not fit,
DB_UNDERFLOW if the page would become too empty, or DB_ZIP_OVERFLOW if
there is not enough space left on the compressed page */
UNIV_INTERN
ulint
btr_cur_optimistic_update(
/*======================*/
ulint flags, /*!< in: undo logging and locking flags */
btr_cur_t* cursor, /*!< in: cursor on the record to update;
cursor stays valid and positioned on the
same record */
const upd_t* update, /*!< in: update vector; this must also
contain trx id and roll ptr fields */
ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
que_thr_t* thr, /*!< in: query thread */
mtr_t* mtr); /*!< in: mtr; must be committed before
latching any further pages */
/*************************************************************//**
Performs an update of a record on a page of a tree. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. If the
update is made on the leaf level, to avoid deadlocks, mtr must also
own x-latches to brothers of page, if those brothers exist.
@return DB_SUCCESS or error code */
UNIV_INTERN
ulint
btr_cur_pessimistic_update(
/*=======================*/
ulint flags, /*!< in: undo logging, locking, and rollback
flags */
btr_cur_t* cursor, /*!< in: cursor on the record to update */
mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
be stored externally by the caller, or NULL */
const upd_t* update, /*!< in: update vector; this is allowed also
contain trx id and roll ptr fields, but
the values in update vector have no effect */
ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
que_thr_t* thr, /*!< in: query thread */
mtr_t* mtr); /*!< in: mtr; must be committed before
latching any further pages */
/***********************************************************//**
Marks a clustered index record deleted. Writes an undo log record to
undo log on this delete marking. Writes in the trx id field the id
of the deleting transaction, and in the roll ptr field pointer to the
undo log record created.
@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
UNIV_INTERN
ulint
btr_cur_del_mark_set_clust_rec(
/*===========================*/
ulint flags, /*!< in: undo logging and locking flags */
btr_cur_t* cursor, /*!< in: cursor */
ibool val, /*!< in: value to set */
que_thr_t* thr, /*!< in: query thread */
mtr_t* mtr); /*!< in: mtr */
/***********************************************************//**
Sets a secondary index record delete mark to TRUE or FALSE.
@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
UNIV_INTERN
ulint
btr_cur_del_mark_set_sec_rec(
/*=========================*/
ulint flags, /*!< in: locking flag */
btr_cur_t* cursor, /*!< in: cursor */
ibool val, /*!< in: value to set */
que_thr_t* thr, /*!< in: query thread */
mtr_t* mtr); /*!< in: mtr */
/*************************************************************//**
Tries to compress a page of the tree if it seems useful. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. To avoid
deadlocks, mtr must also own x-latches to brothers of page, if those
brothers exist. NOTE: it is assumed that the caller has reserved enough
free extents so that the compression will always succeed if done!
@return TRUE if compression occurred */
UNIV_INTERN
ibool
btr_cur_compress_if_useful(
/*=======================*/
btr_cur_t* cursor, /*!< in: cursor on the page to compress;
cursor does not stay valid if compression
occurs */
mtr_t* mtr); /*!< in: mtr */
/*******************************************************//**
Removes the record on which the tree cursor is positioned. It is assumed
that the mtr has an x-latch on the page where the cursor is positioned,
but no latch on the whole tree.
@return TRUE if success, i.e., the page did not become too empty */
UNIV_INTERN
ibool
btr_cur_optimistic_delete(
/*======================*/
btr_cur_t* cursor, /*!< in: cursor on the record to delete;
cursor stays valid: if deletion succeeds,
on function exit it points to the successor
of the deleted record */
mtr_t* mtr); /*!< in: mtr; if this function returns
TRUE on a leaf page of a secondary
index, the mtr must be committed
before latching any further pages */
/*************************************************************//**
Removes the record on which the tree cursor is positioned. Tries
to compress the page if its fillfactor drops below a threshold
or if it is the only page on the level. It is assumed that mtr holds
an x-latch on the tree and on the cursor page. To avoid deadlocks,
mtr must also own x-latches to brothers of page, if those brothers
exist.
@return TRUE if compression occurred */
UNIV_INTERN
ibool
btr_cur_pessimistic_delete(
/*=======================*/
ulint* err, /*!< out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
the latter may occur because we may have
to update node pointers on upper levels,
and in the case of variable length keys
these may actually grow in size */
ibool has_reserved_extents, /*!< in: TRUE if the
caller has already reserved enough free
extents so that he knows that the operation
will succeed */
btr_cur_t* cursor, /*!< in: cursor on the record to delete;
if compression does not occur, the cursor
stays valid: it points to successor of
deleted record on function exit */
enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
mtr_t* mtr); /*!< in: mtr */
#endif /* !UNIV_HOTBACKUP */
/***********************************************************//**
Parses a redo log record of updating a record in-place.
@return end of log record or NULL */
UNIV_INTERN
byte*
btr_cur_parse_update_in_place(
/*==========================*/
byte* ptr, /*!< in: buffer */
byte* end_ptr,/*!< in: buffer end */
page_t* page, /*!< in/out: page or NULL */
page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
dict_index_t* index); /*!< in: index corresponding to page */
/****************************************************************//**
Parses the redo log record for delete marking or unmarking of a clustered
index record.
@return end of log record or NULL */
UNIV_INTERN
byte*
btr_cur_parse_del_mark_set_clust_rec(
/*=================================*/
byte* ptr, /*!< in: buffer */
byte* end_ptr,/*!< in: buffer end */
page_t* page, /*!< in/out: page or NULL */
page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
dict_index_t* index); /*!< in: index corresponding to page */
/****************************************************************//**
Parses the redo log record for delete marking or unmarking of a secondary
index record.
@return end of log record or NULL */
UNIV_INTERN
byte*
btr_cur_parse_del_mark_set_sec_rec(
/*===============================*/
byte* ptr, /*!< in: buffer */
byte* end_ptr,/*!< in: buffer end */
page_t* page, /*!< in/out: page or NULL */
page_zip_des_t* page_zip);/*!< in/out: compressed page, or NULL */
#ifndef UNIV_HOTBACKUP
/*******************************************************************//**
Estimates the number of rows in a given index range.
@return estimated number of rows */
UNIV_INTERN
ib_int64_t
btr_estimate_n_rows_in_range(
/*=========================*/
dict_index_t* index, /*!< in: index */
const dtuple_t* tuple1, /*!< in: range start, may also be empty tuple */
ulint mode1, /*!< in: search mode for range start */
const dtuple_t* tuple2, /*!< in: range end, may also be empty tuple */
ulint mode2); /*!< in: search mode for range end */
/*******************************************************************//**
Estimates the number of different key values in a given index, for
each n-column prefix of the index where n <= dict_index_get_n_unique(index).
The estimates are stored in the array index->stat_n_diff_key_vals. */
UNIV_INTERN
void
btr_estimate_number_of_different_key_vals(
/*======================================*/
dict_index_t* index); /*!< in: index */
/*******************************************************************//**
Marks not updated extern fields as not-owned by this record. The ownership
is transferred to the updated record which is inserted elsewhere in the
index tree. In purge only the owner of externally stored field is allowed
to free the field. */
UNIV_INTERN
void
btr_cur_mark_extern_inherited_fields(
/*=================================*/
page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed
part will be updated, or NULL */
rec_t* rec, /*!< in/out: record in a clustered index */
dict_index_t* index, /*!< in: index of the page */
const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
const upd_t* update, /*!< in: update vector */
mtr_t* mtr); /*!< in: mtr, or NULL if not logged */
/*******************************************************************//**
The complement of the previous function: in an update entry may inherit
some externally stored fields from a record. We must mark them as inherited
in entry, so that they are not freed in a rollback. */
UNIV_INTERN
void
btr_cur_mark_dtuple_inherited_extern(
/*=================================*/
dtuple_t* entry, /*!< in/out: updated entry to be
inserted to clustered index */
const upd_t* update); /*!< in: update vector */
/*******************************************************************//**
Marks all extern fields in a dtuple as owned by the record. */
UNIV_INTERN
void
btr_cur_unmark_dtuple_extern_fields(
/*================================*/
dtuple_t* entry); /*!< in/out: clustered index entry */
/*******************************************************************//**
Stores the fields in big_rec_vec to the tablespace and puts pointers to
them in rec. The extern flags in rec will have to be set beforehand.
The fields are stored on pages allocated from leaf node
file segment of the index tree.
@return DB_SUCCESS or error */
UNIV_INTERN
ulint
btr_store_big_rec_extern_fields(
/*============================*/
dict_index_t* index, /*!< in: index of rec; the index tree
MUST be X-latched */
buf_block_t* rec_block, /*!< in/out: block containing rec */
rec_t* rec, /*!< in: record */
const ulint* offsets, /*!< in: rec_get_offsets(rec, index);
the "external storage" flags in offsets
will not correspond to rec when
this function returns */
big_rec_t* big_rec_vec, /*!< in: vector containing fields
to be stored externally */
mtr_t* local_mtr); /*!< in: mtr containing the latch to
rec and to the tree */
/*******************************************************************//**
Frees the space in an externally stored field to the file space
management if the field in data is owned the externally stored field,
in a rollback we may have the additional condition that the field must
not be inherited. */
UNIV_INTERN
void
btr_free_externally_stored_field(
/*=============================*/
dict_index_t* index, /*!< in: index of the data, the index
tree MUST be X-latched; if the tree
height is 1, then also the root page
must be X-latched! (this is relevant
in the case this function is called
from purge where 'data' is located on
an undo log page, not an index
page) */
byte* field_ref, /*!< in/out: field reference */
const rec_t* rec, /*!< in: record containing field_ref, for
page_zip_write_blob_ptr(), or NULL */
const ulint* offsets, /*!< in: rec_get_offsets(rec, index),
or NULL */
page_zip_des_t* page_zip, /*!< in: compressed page corresponding
to rec, or NULL if rec == NULL */
ulint i, /*!< in: field number of field_ref;
ignored if rec == NULL */
enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
mtr_t* local_mtr); /*!< in: mtr containing the latch to
data an an X-latch to the index
tree */
/*******************************************************************//**
Copies the prefix of an externally stored field of a record. The
clustered index record must be protected by a lock or a page latch.
@return the length of the copied field, or 0 if the column was being
or has been deleted */
UNIV_INTERN
ulint
btr_copy_externally_stored_field_prefix(
/*====================================*/
byte* buf, /*!< out: the field, or a prefix of it */
ulint len, /*!< in: length of buf, in bytes */
ulint zip_size,/*!< in: nonzero=compressed BLOB page size,
zero for uncompressed BLOBs */
const byte* data, /*!< in: 'internally' stored part of the
field containing also the reference to
the external part; must be protected by
a lock or a page latch */
ulint local_len);/*!< in: length of data, in bytes */
/*******************************************************************//**
Copies an externally stored field of a record to mem heap.
@return the field copied to heap */
UNIV_INTERN
byte*
btr_rec_copy_externally_stored_field(
/*=================================*/
const rec_t* rec, /*!< in: record in a clustered index;
must be protected by a lock or a page latch */
const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
ulint zip_size,/*!< in: nonzero=compressed BLOB page size,
zero for uncompressed BLOBs */
ulint no, /*!< in: field number */
ulint* len, /*!< out: length of the field */
mem_heap_t* heap); /*!< in: mem heap */
/*******************************************************************//**
Flags the data tuple fields that are marked as extern storage in the
update vector. We use this function to remember which fields we must
mark as extern storage in a record inserted for an update.
@return number of flagged external columns */
UNIV_INTERN
ulint
btr_push_update_extern_fields(
/*==========================*/
dtuple_t* tuple, /*!< in/out: data tuple */
const upd_t* update, /*!< in: update vector */
mem_heap_t* heap) /*!< in: memory heap */
__attribute__((nonnull));
/***********************************************************//**
Sets a secondary index record's delete mark to the given value. This
function is only used by the insert buffer merge mechanism. */
UNIV_INTERN
void
btr_cur_set_deleted_flag_for_ibuf(
/*==============================*/
rec_t* rec, /*!< in/out: record */
page_zip_des_t* page_zip, /*!< in/out: compressed page
corresponding to rec, or NULL
when the tablespace is
uncompressed */
ibool val, /*!< in: value to set */
mtr_t* mtr); /*!< in: mtr */
/*######################################################################*/
/** In the pessimistic delete, if the page data size drops below this
limit, merging it to a neighbor is tried */
#define BTR_CUR_PAGE_COMPRESS_LIMIT (UNIV_PAGE_SIZE / 2)
/** A slot in the path array. We store here info on a search path down the
tree. Each slot contains data on a single level of the tree. */
typedef struct btr_path_struct btr_path_t;
struct btr_path_struct{
ulint nth_rec; /*!< index of the record
where the page cursor stopped on
this level (index in alphabetical
order); value ULINT_UNDEFINED
denotes array end */
ulint n_recs; /*!< number of records on the page */
};
#define BTR_PATH_ARRAY_N_SLOTS 250 /*!< size of path array (in slots) */
/** Values for the flag documenting the used search method */
enum btr_cur_method {
BTR_CUR_HASH = 1, /*!< successful shortcut using
the hash index */
BTR_CUR_HASH_FAIL, /*!< failure using hash, success using
binary search: the misleading hash
reference is stored in the field
hash_node, and might be necessary to
update */
BTR_CUR_BINARY, /*!< success using the binary search */
BTR_CUR_INSERT_TO_IBUF, /*!< performed the intended insert to
the insert buffer */
BTR_CUR_DEL_MARK_IBUF, /*!< performed the intended delete
mark in the insert/delete buffer */
BTR_CUR_DELETE_IBUF, /*!< performed the intended delete in
the insert/delete buffer */
BTR_CUR_DELETE_REF /*!< row_purge_poss_sec() failed */
};
/** The tree cursor: the definition appears here only for the compiler
to know struct size! */
struct btr_cur_struct {
dict_index_t* index; /*!< index where positioned */
page_cur_t page_cur; /*!< page cursor */
purge_node_t* purge_node; /*!< purge node, for BTR_DELETE */
buf_block_t* left_block; /*!< this field is used to store
a pointer to the left neighbor
page, in the cases
BTR_SEARCH_PREV and
BTR_MODIFY_PREV */
/*------------------------------*/
que_thr_t* thr; /*!< this field is only used
when btr_cur_search_to_nth_level
is called for an index entry
insertion: the calling query
thread is passed here to be
used in the insert buffer */
/*------------------------------*/
/** The following fields are used in
btr_cur_search_to_nth_level to pass information: */
/* @{ */
enum btr_cur_method flag; /*!< Search method used */
ulint tree_height; /*!< Tree height if the search is done
for a pessimistic insert or update
operation */
ulint up_match; /*!< If the search mode was PAGE_CUR_LE,
the number of matched fields to the
the first user record to the right of
the cursor record after
btr_cur_search_to_nth_level;
for the mode PAGE_CUR_GE, the matched
fields to the first user record AT THE
CURSOR or to the right of it;
NOTE that the up_match and low_match
values may exceed the correct values
for comparison to the adjacent user
record if that record is on a
different leaf page! (See the note in
row_ins_duplicate_key.) */
ulint up_bytes; /*!< number of matched bytes to the
right at the time cursor positioned;
only used internally in searches: not
defined after the search */
ulint low_match; /*!< if search mode was PAGE_CUR_LE,
the number of matched fields to the
first user record AT THE CURSOR or
to the left of it after
btr_cur_search_to_nth_level;
NOT defined for PAGE_CUR_GE or any
other search modes; see also the NOTE
in up_match! */
ulint low_bytes; /*!< number of matched bytes to the
right at the time cursor positioned;
only used internally in searches: not
defined after the search */
ulint n_fields; /*!< prefix length used in a hash
search if hash_node != NULL */
ulint n_bytes; /*!< hash prefix bytes if hash_node !=
NULL */
ulint fold; /*!< fold value used in the search if
flag is BTR_CUR_HASH */
/*----- Delete buffering -------*/
ulint ibuf_cnt; /* in searches done on insert buffer
trees, this contains the "counter"
value (the first two bytes of the
fourth field) extracted from the
page above the leaf page, from the
father node pointer that pointed to
the leaf page. in other words, it
contains the minimum counter value
for records to be inserted on the
chosen leaf page. If for some reason
this can't be read, or if the search
ended on the leftmost leaf page in
the tree (in which case the father
node pointer had the 'minimum
record' flag set), this is
ULINT_UNDEFINED. */
/*------------------------------*/
/* @} */
btr_path_t* path_arr; /*!< in estimating the number of
rows in range, we store in this array
information of the path through
the tree */
};
/** If pessimistic delete fails because of lack of file space, there
is still a good change of success a little later. Try this many
times. */
#define BTR_CUR_RETRY_DELETE_N_TIMES 100
/** If pessimistic delete fails because of lack of file space, there
is still a good change of success a little later. Sleep this many
microseconds between retries. */
#define BTR_CUR_RETRY_SLEEP_TIME 50000
/** The reference in a field for which data is stored on a different page.
The reference is at the end of the 'locally' stored part of the field.
'Locally' means storage in the index record.
We store locally a long enough prefix of each column so that we can determine
the ordering parts of each index record without looking into the externally
stored part. */
/*-------------------------------------- @{ */
#define BTR_EXTERN_SPACE_ID 0 /*!< space id where stored */
#define BTR_EXTERN_PAGE_NO 4 /*!< page no where stored */
#define BTR_EXTERN_OFFSET 8 /*!< offset of BLOB header
on that page */
#define BTR_EXTERN_LEN 12 /*!< 8 bytes containing the
length of the externally
stored part of the BLOB.
The 2 highest bits are
reserved to the flags below. */
/*-------------------------------------- @} */
/* #define BTR_EXTERN_FIELD_REF_SIZE 20 // moved to btr0types.h */
/** The most significant bit of BTR_EXTERN_LEN (i.e., the most
significant bit of the byte at smallest address) is set to 1 if this
field does not 'own' the externally stored field; only the owner field
is allowed to free the field in purge! */
#define BTR_EXTERN_OWNER_FLAG 128
/** If the second most significant bit of BTR_EXTERN_LEN (i.e., the
second most significant bit of the byte at smallest address) is 1 then
it means that the externally stored field was inherited from an
earlier version of the row. In rollback we are not allowed to free an
inherited external field. */
#define BTR_EXTERN_INHERITED_FLAG 64
/** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */
extern ulint btr_cur_n_non_sea;
/** Number of successful adaptive hash index lookups in
btr_cur_search_to_nth_level(). */
extern ulint btr_cur_n_sea;
/** Old value of btr_cur_n_non_sea. Copied by
srv_refresh_innodb_monitor_stats(). Referenced by
srv_printf_innodb_monitor(). */
extern ulint btr_cur_n_non_sea_old;
/** Old value of btr_cur_n_sea. Copied by
srv_refresh_innodb_monitor_stats(). Referenced by
srv_printf_innodb_monitor(). */
extern ulint btr_cur_n_sea_old;
#endif /* !UNIV_HOTBACKUP */
#ifndef UNIV_NONINL
#include "btr0cur.ic"
#endif
#endif

View file

@ -0,0 +1,200 @@
/*****************************************************************************
Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/btr0cur.ic
The index tree cursor
Created 10/16/1994 Heikki Tuuri
*******************************************************/
#ifndef UNIV_HOTBACKUP
#include "btr0btr.h"
#ifdef UNIV_DEBUG
/*********************************************************//**
Returns the page cursor component of a tree cursor.
@return pointer to page cursor component */
UNIV_INLINE
page_cur_t*
btr_cur_get_page_cur(
/*=================*/
const btr_cur_t* cursor) /*!< in: tree cursor */
{
return(&((btr_cur_t*) cursor)->page_cur);
}
#endif /* UNIV_DEBUG */
/*********************************************************//**
Returns the buffer block on which the tree cursor is positioned.
@return pointer to buffer block */
UNIV_INLINE
buf_block_t*
btr_cur_get_block(
/*==============*/
btr_cur_t* cursor) /*!< in: tree cursor */
{
return(page_cur_get_block(btr_cur_get_page_cur(cursor)));
}
/*********************************************************//**
Returns the record pointer of a tree cursor.
@return pointer to record */
UNIV_INLINE
rec_t*
btr_cur_get_rec(
/*============*/
btr_cur_t* cursor) /*!< in: tree cursor */
{
return(page_cur_get_rec(&(cursor->page_cur)));
}
/*********************************************************//**
Returns the compressed page on which the tree cursor is positioned.
@return pointer to compressed page, or NULL if the page is not compressed */
UNIV_INLINE
page_zip_des_t*
btr_cur_get_page_zip(
/*=================*/
btr_cur_t* cursor) /*!< in: tree cursor */
{
return(buf_block_get_page_zip(btr_cur_get_block(cursor)));
}
/*********************************************************//**
Invalidates a tree cursor by setting record pointer to NULL. */
UNIV_INLINE
void
btr_cur_invalidate(
/*===============*/
btr_cur_t* cursor) /*!< in: tree cursor */
{
page_cur_invalidate(&(cursor->page_cur));
}
/*********************************************************//**
Returns the page of a tree cursor.
@return pointer to page */
UNIV_INLINE
page_t*
btr_cur_get_page(
/*=============*/
btr_cur_t* cursor) /*!< in: tree cursor */
{
return(page_align(page_cur_get_rec(&(cursor->page_cur))));
}
/*********************************************************//**
Returns the index of a cursor.
@return index */
UNIV_INLINE
dict_index_t*
btr_cur_get_index(
/*==============*/
btr_cur_t* cursor) /*!< in: B-tree cursor */
{
return(cursor->index);
}
/*********************************************************//**
Positions a tree cursor at a given record. */
UNIV_INLINE
void
btr_cur_position(
/*=============*/
dict_index_t* index, /*!< in: index */
rec_t* rec, /*!< in: record in tree */
buf_block_t* block, /*!< in: buffer block of rec */
btr_cur_t* cursor) /*!< out: cursor */
{
ut_ad(page_align(rec) == block->frame);
page_cur_position(rec, block, btr_cur_get_page_cur(cursor));
cursor->index = index;
}
/*********************************************************************//**
Checks if compressing an index page where a btr cursor is placed makes
sense.
@return TRUE if compression is recommended */
UNIV_INLINE
ibool
btr_cur_compress_recommendation(
/*============================*/
btr_cur_t* cursor, /*!< in: btr cursor */
mtr_t* mtr) /*!< in: mtr */
{
page_t* page;
ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
MTR_MEMO_PAGE_X_FIX));
page = btr_cur_get_page(cursor);
if ((page_get_data_size(page) < BTR_CUR_PAGE_COMPRESS_LIMIT)
|| ((btr_page_get_next(page, mtr) == FIL_NULL)
&& (btr_page_get_prev(page, mtr) == FIL_NULL))) {
/* The page fillfactor has dropped below a predefined
minimum value OR the level in the B-tree contains just
one page: we recommend compression if this is not the
root page. */
return(dict_index_get_page(cursor->index)
!= page_get_page_no(page));
}
return(FALSE);
}
/*********************************************************************//**
Checks if the record on which the cursor is placed can be deleted without
making tree compression necessary (or, recommended).
@return TRUE if can be deleted without recommended compression */
UNIV_INLINE
ibool
btr_cur_can_delete_without_compress(
/*================================*/
btr_cur_t* cursor, /*!< in: btr cursor */
ulint rec_size,/*!< in: rec_get_size(btr_cur_get_rec(cursor))*/
mtr_t* mtr) /*!< in: mtr */
{
page_t* page;
ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
MTR_MEMO_PAGE_X_FIX));
page = btr_cur_get_page(cursor);
if ((page_get_data_size(page) - rec_size < BTR_CUR_PAGE_COMPRESS_LIMIT)
|| ((btr_page_get_next(page, mtr) == FIL_NULL)
&& (btr_page_get_prev(page, mtr) == FIL_NULL))
|| (page_get_n_recs(page) < 2)) {
/* The page fillfactor will drop below a predefined
minimum value, OR the level in the B-tree contains just
one page, OR the page will become empty: we recommend
compression if this is not the root page. */
return(dict_index_get_page(cursor->index)
== page_get_page_no(page));
}
return(TRUE);
}
#endif /* !UNIV_HOTBACKUP */

View file

@ -0,0 +1,551 @@
/*****************************************************************************
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/btr0pcur.h
The index tree persistent cursor
Created 2/23/1996 Heikki Tuuri
*******************************************************/
#ifndef btr0pcur_h
#define btr0pcur_h
#include "univ.i"
#include "dict0dict.h"
#include "data0data.h"
#include "mtr0mtr.h"
#include "page0cur.h"
#include "btr0cur.h"
#include "btr0btr.h"
#include "btr0types.h"
/* Relative positions for a stored cursor position */
#define BTR_PCUR_ON 1
#define BTR_PCUR_BEFORE 2
#define BTR_PCUR_AFTER 3
/* Note that if the tree is not empty, btr_pcur_store_position does not
use the following, but only uses the above three alternatives, where the
position is stored relative to a specific record: this makes implementation
of a scroll cursor easier */
#define BTR_PCUR_BEFORE_FIRST_IN_TREE 4 /* in an empty tree */
#define BTR_PCUR_AFTER_LAST_IN_TREE 5 /* in an empty tree */
/**************************************************************//**
Allocates memory for a persistent cursor object and initializes the cursor.
@return own: persistent cursor */
UNIV_INTERN
btr_pcur_t*
btr_pcur_create_for_mysql(void);
/*============================*/
/**************************************************************//**
Frees the memory for a persistent cursor object. */
UNIV_INTERN
void
btr_pcur_free_for_mysql(
/*====================*/
btr_pcur_t* cursor); /*!< in, own: persistent cursor */
/**************************************************************//**
Copies the stored position of a pcur to another pcur. */
UNIV_INTERN
void
btr_pcur_copy_stored_position(
/*==========================*/
btr_pcur_t* pcur_receive, /*!< in: pcur which will receive the
position info */
btr_pcur_t* pcur_donate); /*!< in: pcur from which the info is
copied */
/**************************************************************//**
Sets the old_rec_buf field to NULL. */
UNIV_INLINE
void
btr_pcur_init(
/*==========*/
btr_pcur_t* pcur); /*!< in: persistent cursor */
/**************************************************************//**
Initializes and opens a persistent cursor to an index tree. It should be
closed with btr_pcur_close. */
UNIV_INLINE
void
btr_pcur_open_func(
/*===============*/
dict_index_t* index, /*!< in: index */
const dtuple_t* tuple, /*!< in: tuple on which search done */
ulint mode, /*!< in: PAGE_CUR_L, ...;
NOTE that if the search is made using a unique
prefix of a record, mode should be
PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
may end up on the previous page from the
record! */
ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
#define btr_pcur_open(i,t,md,l,c,m) \
btr_pcur_open_func(i,t,md,l,c,__FILE__,__LINE__,m)
/**************************************************************//**
Opens an persistent cursor to an index tree without initializing the
cursor. */
UNIV_INLINE
void
btr_pcur_open_with_no_init_func(
/*============================*/
dict_index_t* index, /*!< in: index */
const dtuple_t* tuple, /*!< in: tuple on which search done */
ulint mode, /*!< in: PAGE_CUR_L, ...;
NOTE that if the search is made using a unique
prefix of a record, mode should be
PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
may end up on the previous page of the
record! */
ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ...;
NOTE that if has_search_latch != 0 then
we maybe do not acquire a latch on the cursor
page, but assume that the caller uses his
btr search latch to protect the record! */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
ulint has_search_latch,/*!< in: latch mode the caller
currently has on btr_search_latch:
RW_S_LATCH, or 0 */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
#define btr_pcur_open_with_no_init(ix,t,md,l,cur,has,m) \
btr_pcur_open_with_no_init_func(ix,t,md,l,cur,has,__FILE__,__LINE__,m)
/*****************************************************************//**
Opens a persistent cursor at either end of an index. */
UNIV_INLINE
void
btr_pcur_open_at_index_side(
/*========================*/
ibool from_left, /*!< in: TRUE if open to the low end,
FALSE if to the high end */
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: latch mode */
btr_pcur_t* pcur, /*!< in: cursor */
ibool do_init, /*!< in: TRUE if should be initialized */
mtr_t* mtr); /*!< in: mtr */
/**************************************************************//**
Gets the up_match value for a pcur after a search.
@return number of matched fields at the cursor or to the right if
search mode was PAGE_CUR_GE, otherwise undefined */
UNIV_INLINE
ulint
btr_pcur_get_up_match(
/*==================*/
btr_pcur_t* cursor); /*!< in: memory buffer for persistent cursor */
/**************************************************************//**
Gets the low_match value for a pcur after a search.
@return number of matched fields at the cursor or to the right if
search mode was PAGE_CUR_LE, otherwise undefined */
UNIV_INLINE
ulint
btr_pcur_get_low_match(
/*===================*/
btr_pcur_t* cursor); /*!< in: memory buffer for persistent cursor */
/**************************************************************//**
If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first
user record satisfying the search condition, in the case PAGE_CUR_L or
PAGE_CUR_LE, on the last user record. If no such user record exists, then
in the first case sets the cursor after last in tree, and in the latter case
before first in tree. The latching mode must be BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF. */
UNIV_INTERN
void
btr_pcur_open_on_user_rec_func(
/*===========================*/
dict_index_t* index, /*!< in: index */
const dtuple_t* tuple, /*!< in: tuple on which search done */
ulint mode, /*!< in: PAGE_CUR_L, ... */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent
cursor */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
#define btr_pcur_open_on_user_rec(i,t,md,l,c,m) \
btr_pcur_open_on_user_rec_func(i,t,md,l,c,__FILE__,__LINE__,m)
/**********************************************************************//**
Positions a cursor at a randomly chosen position within a B-tree. */
UNIV_INLINE
void
btr_pcur_open_at_rnd_pos_func(
/*==========================*/
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /*!< in/out: B-tree pcur */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
#define btr_pcur_open_at_rnd_pos(i,l,c,m) \
btr_pcur_open_at_rnd_pos_func(i,l,c,__FILE__,__LINE__,m)
/**************************************************************//**
Frees the possible old_rec_buf buffer of a persistent cursor and sets the
latch mode of the persistent cursor to BTR_NO_LATCHES. */
UNIV_INLINE
void
btr_pcur_close(
/*===========*/
btr_pcur_t* cursor); /*!< in: persistent cursor */
/**************************************************************//**
The position of the cursor is stored by taking an initial segment of the
record the cursor is positioned on, before, or after, and copying it to the
cursor data structure, or just setting a flag if the cursor id before the
first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the
page where the cursor is positioned must not be empty if the index tree is
not totally empty! */
UNIV_INTERN
void
btr_pcur_store_position(
/*====================*/
btr_pcur_t* cursor, /*!< in: persistent cursor */
mtr_t* mtr); /*!< in: mtr */
/**************************************************************//**
Restores the stored position of a persistent cursor bufferfixing the page and
obtaining the specified latches. If the cursor position was saved when the
(1) cursor was positioned on a user record: this function restores the position
to the last record LESS OR EQUAL to the stored record;
(2) cursor was positioned on a page infimum record: restores the position to
the last record LESS than the user record which was the successor of the page
infimum;
(3) cursor was positioned on the page supremum: restores to the first record
GREATER than the user record which was the predecessor of the supremum.
(4) cursor was positioned before the first or after the last in an empty tree:
restores to before first or after the last in the tree.
@return TRUE if the cursor position was stored when it was on a user
record and it can be restored on a user record whose ordering fields
are identical to the ones of the original user record */
UNIV_INTERN
ibool
btr_pcur_restore_position_func(
/*===========================*/
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /*!< in: detached persistent cursor */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
#define btr_pcur_restore_position(l,cur,mtr) \
btr_pcur_restore_position_func(l,cur,__FILE__,__LINE__,mtr)
/**************************************************************//**
If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY,
releases the page latch and bufferfix reserved by the cursor.
NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes
made by the current mini-transaction to the data protected by the
cursor latch, as then the latch must not be released until mtr_commit. */
UNIV_INTERN
void
btr_pcur_release_leaf(
/*==================*/
btr_pcur_t* cursor, /*!< in: persistent cursor */
mtr_t* mtr); /*!< in: mtr */
/*********************************************************//**
Gets the rel_pos field for a cursor whose position has been stored.
@return BTR_PCUR_ON, ... */
UNIV_INLINE
ulint
btr_pcur_get_rel_pos(
/*=================*/
const btr_pcur_t* cursor);/*!< in: persistent cursor */
/*********************************************************//**
Sets the mtr field for a pcur. */
UNIV_INLINE
void
btr_pcur_set_mtr(
/*=============*/
btr_pcur_t* cursor, /*!< in: persistent cursor */
mtr_t* mtr); /*!< in, own: mtr */
/*********************************************************//**
Gets the mtr field for a pcur.
@return mtr */
UNIV_INLINE
mtr_t*
btr_pcur_get_mtr(
/*=============*/
btr_pcur_t* cursor); /*!< in: persistent cursor */
/**************************************************************//**
Commits the mtr and sets the pcur latch mode to BTR_NO_LATCHES,
that is, the cursor becomes detached. If there have been modifications
to the page where pcur is positioned, this can be used instead of
btr_pcur_release_leaf. Function btr_pcur_store_position should be used
before calling this, if restoration of cursor is wanted later. */
UNIV_INLINE
void
btr_pcur_commit_specify_mtr(
/*========================*/
btr_pcur_t* pcur, /*!< in: persistent cursor */
mtr_t* mtr); /*!< in: mtr to commit */
/**************************************************************//**
Tests if a cursor is detached: that is the latch mode is BTR_NO_LATCHES.
@return TRUE if detached */
UNIV_INLINE
ibool
btr_pcur_is_detached(
/*=================*/
btr_pcur_t* pcur); /*!< in: persistent cursor */
/*********************************************************//**
Moves the persistent cursor to the next record in the tree. If no records are
left, the cursor stays 'after last in tree'.
@return TRUE if the cursor was not after last in tree */
UNIV_INLINE
ibool
btr_pcur_move_to_next(
/*==================*/
btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the
function may release the page latch */
mtr_t* mtr); /*!< in: mtr */
/*********************************************************//**
Moves the persistent cursor to the previous record in the tree. If no records
are left, the cursor stays 'before first in tree'.
@return TRUE if the cursor was not before first in tree */
UNIV_INTERN
ibool
btr_pcur_move_to_prev(
/*==================*/
btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the
function may release the page latch */
mtr_t* mtr); /*!< in: mtr */
/*********************************************************//**
Moves the persistent cursor to the last record on the same page. */
UNIV_INLINE
void
btr_pcur_move_to_last_on_page(
/*==========================*/
btr_pcur_t* cursor, /*!< in: persistent cursor */
mtr_t* mtr); /*!< in: mtr */
/*********************************************************//**
Moves the persistent cursor to the next user record in the tree. If no user
records are left, the cursor ends up 'after last in tree'.
@return TRUE if the cursor moved forward, ending on a user record */
UNIV_INLINE
ibool
btr_pcur_move_to_next_user_rec(
/*===========================*/
btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the
function may release the page latch */
mtr_t* mtr); /*!< in: mtr */
/*********************************************************//**
Moves the persistent cursor to the first record on the next page.
Releases the latch on the current page, and bufferunfixes it.
Note that there must not be modifications on the current page,
as then the x-latch can be released only in mtr_commit. */
UNIV_INTERN
void
btr_pcur_move_to_next_page(
/*=======================*/
btr_pcur_t* cursor, /*!< in: persistent cursor; must be on the
last record of the current page */
mtr_t* mtr); /*!< in: mtr */
/*********************************************************//**
Moves the persistent cursor backward if it is on the first record
of the page. Releases the latch on the current page, and bufferunfixes
it. Note that to prevent a possible deadlock, the operation first
stores the position of the cursor, releases the leaf latch, acquires
necessary latches and restores the cursor position again before returning.
The alphabetical position of the cursor is guaranteed to be sensible
on return, but it may happen that the cursor is not positioned on the
last record of any page, because the structure of the tree may have
changed while the cursor had no latches. */
UNIV_INTERN
void
btr_pcur_move_backward_from_page(
/*=============================*/
btr_pcur_t* cursor, /*!< in: persistent cursor, must be on the
first record of the current page */
mtr_t* mtr); /*!< in: mtr */
#ifdef UNIV_DEBUG
/*********************************************************//**
Returns the btr cursor component of a persistent cursor.
@return pointer to btr cursor component */
UNIV_INLINE
btr_cur_t*
btr_pcur_get_btr_cur(
/*=================*/
const btr_pcur_t* cursor); /*!< in: persistent cursor */
/*********************************************************//**
Returns the page cursor component of a persistent cursor.
@return pointer to page cursor component */
UNIV_INLINE
page_cur_t*
btr_pcur_get_page_cur(
/*==================*/
const btr_pcur_t* cursor); /*!< in: persistent cursor */
#else /* UNIV_DEBUG */
# define btr_pcur_get_btr_cur(cursor) (&(cursor)->btr_cur)
# define btr_pcur_get_page_cur(cursor) (&(cursor)->btr_cur.page_cur)
#endif /* UNIV_DEBUG */
/*********************************************************//**
Returns the page of a persistent cursor.
@return pointer to the page */
UNIV_INLINE
page_t*
btr_pcur_get_page(
/*==============*/
btr_pcur_t* cursor);/*!< in: persistent cursor */
/*********************************************************//**
Returns the buffer block of a persistent cursor.
@return pointer to the block */
UNIV_INLINE
buf_block_t*
btr_pcur_get_block(
/*===============*/
btr_pcur_t* cursor);/*!< in: persistent cursor */
/*********************************************************//**
Returns the record of a persistent cursor.
@return pointer to the record */
UNIV_INLINE
rec_t*
btr_pcur_get_rec(
/*=============*/
btr_pcur_t* cursor);/*!< in: persistent cursor */
/*********************************************************//**
Checks if the persistent cursor is on a user record. */
UNIV_INLINE
ibool
btr_pcur_is_on_user_rec(
/*====================*/
const btr_pcur_t* cursor);/*!< in: persistent cursor */
/*********************************************************//**
Checks if the persistent cursor is after the last user record on
a page. */
UNIV_INLINE
ibool
btr_pcur_is_after_last_on_page(
/*===========================*/
const btr_pcur_t* cursor);/*!< in: persistent cursor */
/*********************************************************//**
Checks if the persistent cursor is before the first user record on
a page. */
UNIV_INLINE
ibool
btr_pcur_is_before_first_on_page(
/*=============================*/
const btr_pcur_t* cursor);/*!< in: persistent cursor */
/*********************************************************//**
Checks if the persistent cursor is before the first user record in
the index tree. */
UNIV_INLINE
ibool
btr_pcur_is_before_first_in_tree(
/*=============================*/
btr_pcur_t* cursor, /*!< in: persistent cursor */
mtr_t* mtr); /*!< in: mtr */
/*********************************************************//**
Checks if the persistent cursor is after the last user record in
the index tree. */
UNIV_INLINE
ibool
btr_pcur_is_after_last_in_tree(
/*===========================*/
btr_pcur_t* cursor, /*!< in: persistent cursor */
mtr_t* mtr); /*!< in: mtr */
/*********************************************************//**
Moves the persistent cursor to the next record on the same page. */
UNIV_INLINE
void
btr_pcur_move_to_next_on_page(
/*==========================*/
btr_pcur_t* cursor);/*!< in/out: persistent cursor */
/*********************************************************//**
Moves the persistent cursor to the previous record on the same page. */
UNIV_INLINE
void
btr_pcur_move_to_prev_on_page(
/*==========================*/
btr_pcur_t* cursor);/*!< in/out: persistent cursor */
/* The persistent B-tree cursor structure. This is used mainly for SQL
selects, updates, and deletes. */
struct btr_pcur_struct{
btr_cur_t btr_cur; /*!< a B-tree cursor */
ulint latch_mode; /*!< see TODO note below!
BTR_SEARCH_LEAF, BTR_MODIFY_LEAF,
BTR_MODIFY_TREE, or BTR_NO_LATCHES,
depending on the latching state of
the page and tree where the cursor is
positioned; the last value means that
the cursor is not currently positioned:
we say then that the cursor is
detached; it can be restored to
attached if the old position was
stored in old_rec */
ulint old_stored; /*!< BTR_PCUR_OLD_STORED
or BTR_PCUR_OLD_NOT_STORED */
rec_t* old_rec; /*!< if cursor position is stored,
contains an initial segment of the
latest record cursor was positioned
either on, before, or after */
ulint old_n_fields; /*!< number of fields in old_rec */
ulint rel_pos; /*!< BTR_PCUR_ON, BTR_PCUR_BEFORE, or
BTR_PCUR_AFTER, depending on whether
cursor was on, before, or after the
old_rec record */
buf_block_t* block_when_stored;/* buffer block when the position was
stored */
ib_uint64_t modify_clock; /*!< the modify clock value of the
buffer block when the cursor position
was stored */
ulint pos_state; /*!< see TODO note below!
BTR_PCUR_IS_POSITIONED,
BTR_PCUR_WAS_POSITIONED,
BTR_PCUR_NOT_POSITIONED */
ulint search_mode; /*!< PAGE_CUR_G, ... */
trx_t* trx_if_known; /*!< the transaction, if we know it;
otherwise this field is not defined;
can ONLY BE USED in error prints in
fatal assertion failures! */
/*-----------------------------*/
/* NOTE that the following fields may possess dynamically allocated
memory which should be freed if not needed anymore! */
mtr_t* mtr; /*!< NULL, or this field may contain
a mini-transaction which holds the
latch on the cursor page */
byte* old_rec_buf; /*!< NULL, or a dynamically allocated
buffer for old_rec */
ulint buf_size; /*!< old_rec_buf size if old_rec_buf
is not NULL */
};
#define BTR_PCUR_IS_POSITIONED 1997660512 /* TODO: currently, the state
can be BTR_PCUR_IS_POSITIONED,
though it really should be
BTR_PCUR_WAS_POSITIONED,
because we have no obligation
to commit the cursor with
mtr; similarly latch_mode may
be out of date. This can
lead to problems if btr_pcur
is not used the right way;
all current code should be
ok. */
#define BTR_PCUR_WAS_POSITIONED 1187549791
#define BTR_PCUR_NOT_POSITIONED 1328997689
#define BTR_PCUR_OLD_STORED 908467085
#define BTR_PCUR_OLD_NOT_STORED 122766467
#ifndef UNIV_NONINL
#include "btr0pcur.ic"
#endif
#endif

View file

@ -0,0 +1,642 @@
/*****************************************************************************
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/btr0pcur.ic
The index tree persistent cursor
Created 2/23/1996 Heikki Tuuri
*******************************************************/
/*********************************************************//**
Gets the rel_pos field for a cursor whose position has been stored.
@return BTR_PCUR_ON, ... */
UNIV_INLINE
ulint
btr_pcur_get_rel_pos(
/*=================*/
const btr_pcur_t* cursor) /*!< in: persistent cursor */
{
ut_ad(cursor);
ut_ad(cursor->old_rec);
ut_ad(cursor->old_stored == BTR_PCUR_OLD_STORED);
ut_ad(cursor->pos_state == BTR_PCUR_WAS_POSITIONED
|| cursor->pos_state == BTR_PCUR_IS_POSITIONED);
return(cursor->rel_pos);
}
/*********************************************************//**
Sets the mtr field for a pcur. */
UNIV_INLINE
void
btr_pcur_set_mtr(
/*=============*/
btr_pcur_t* cursor, /*!< in: persistent cursor */
mtr_t* mtr) /*!< in, own: mtr */
{
ut_ad(cursor);
cursor->mtr = mtr;
}
/*********************************************************//**
Gets the mtr field for a pcur.
@return mtr */
UNIV_INLINE
mtr_t*
btr_pcur_get_mtr(
/*=============*/
btr_pcur_t* cursor) /*!< in: persistent cursor */
{
ut_ad(cursor);
return(cursor->mtr);
}
#ifdef UNIV_DEBUG
/*********************************************************//**
Returns the btr cursor component of a persistent cursor.
@return pointer to btr cursor component */
UNIV_INLINE
btr_cur_t*
btr_pcur_get_btr_cur(
/*=================*/
const btr_pcur_t* cursor) /*!< in: persistent cursor */
{
const btr_cur_t* btr_cur = &cursor->btr_cur;
return((btr_cur_t*) btr_cur);
}
/*********************************************************//**
Returns the page cursor component of a persistent cursor.
@return pointer to page cursor component */
UNIV_INLINE
page_cur_t*
btr_pcur_get_page_cur(
/*==================*/
const btr_pcur_t* cursor) /*!< in: persistent cursor */
{
return(btr_cur_get_page_cur(btr_pcur_get_btr_cur(cursor)));
}
#endif /* UNIV_DEBUG */
/*********************************************************//**
Returns the page of a persistent cursor.
@return pointer to the page */
UNIV_INLINE
page_t*
btr_pcur_get_page(
/*==============*/
btr_pcur_t* cursor) /*!< in: persistent cursor */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
return(btr_cur_get_page(btr_pcur_get_btr_cur(cursor)));
}
/*********************************************************//**
Returns the buffer block of a persistent cursor.
@return pointer to the block */
UNIV_INLINE
buf_block_t*
btr_pcur_get_block(
/*===============*/
btr_pcur_t* cursor) /*!< in: persistent cursor */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
return(btr_cur_get_block(btr_pcur_get_btr_cur(cursor)));
}
/*********************************************************//**
Returns the record of a persistent cursor.
@return pointer to the record */
UNIV_INLINE
rec_t*
btr_pcur_get_rec(
/*=============*/
btr_pcur_t* cursor) /*!< in: persistent cursor */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
return(btr_cur_get_rec(btr_pcur_get_btr_cur(cursor)));
}
/**************************************************************//**
Gets the up_match value for a pcur after a search.
@return number of matched fields at the cursor or to the right if
search mode was PAGE_CUR_GE, otherwise undefined */
UNIV_INLINE
ulint
btr_pcur_get_up_match(
/*==================*/
btr_pcur_t* cursor) /*!< in: memory buffer for persistent cursor */
{
btr_cur_t* btr_cursor;
ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED)
|| (cursor->pos_state == BTR_PCUR_IS_POSITIONED));
btr_cursor = btr_pcur_get_btr_cur(cursor);
ut_ad(btr_cursor->up_match != ULINT_UNDEFINED);
return(btr_cursor->up_match);
}
/**************************************************************//**
Gets the low_match value for a pcur after a search.
@return number of matched fields at the cursor or to the right if
search mode was PAGE_CUR_LE, otherwise undefined */
UNIV_INLINE
ulint
btr_pcur_get_low_match(
/*===================*/
btr_pcur_t* cursor) /*!< in: memory buffer for persistent cursor */
{
btr_cur_t* btr_cursor;
ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED)
|| (cursor->pos_state == BTR_PCUR_IS_POSITIONED));
btr_cursor = btr_pcur_get_btr_cur(cursor);
ut_ad(btr_cursor->low_match != ULINT_UNDEFINED);
return(btr_cursor->low_match);
}
/*********************************************************//**
Checks if the persistent cursor is after the last user record on
a page. */
UNIV_INLINE
ibool
btr_pcur_is_after_last_on_page(
/*===========================*/
const btr_pcur_t* cursor) /*!< in: persistent cursor */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor)));
}
/*********************************************************//**
Checks if the persistent cursor is before the first user record on
a page. */
UNIV_INLINE
ibool
btr_pcur_is_before_first_on_page(
/*=============================*/
const btr_pcur_t* cursor) /*!< in: persistent cursor */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor)));
}
/*********************************************************//**
Checks if the persistent cursor is on a user record. */
UNIV_INLINE
ibool
btr_pcur_is_on_user_rec(
/*====================*/
const btr_pcur_t* cursor) /*!< in: persistent cursor */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
if (btr_pcur_is_before_first_on_page(cursor)
|| btr_pcur_is_after_last_on_page(cursor)) {
return(FALSE);
}
return(TRUE);
}
/*********************************************************//**
Checks if the persistent cursor is before the first user record in
the index tree. */
UNIV_INLINE
ibool
btr_pcur_is_before_first_in_tree(
/*=============================*/
btr_pcur_t* cursor, /*!< in: persistent cursor */
mtr_t* mtr) /*!< in: mtr */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
if (btr_page_get_prev(btr_pcur_get_page(cursor), mtr) != FIL_NULL) {
return(FALSE);
}
return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor)));
}
/*********************************************************//**
Checks if the persistent cursor is after the last user record in
the index tree. */
UNIV_INLINE
ibool
btr_pcur_is_after_last_in_tree(
/*===========================*/
btr_pcur_t* cursor, /*!< in: persistent cursor */
mtr_t* mtr) /*!< in: mtr */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
if (btr_page_get_next(btr_pcur_get_page(cursor), mtr) != FIL_NULL) {
return(FALSE);
}
return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor)));
}
/*********************************************************//**
Moves the persistent cursor to the next record on the same page. */
UNIV_INLINE
void
btr_pcur_move_to_next_on_page(
/*==========================*/
btr_pcur_t* cursor) /*!< in/out: persistent cursor */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
page_cur_move_to_next(btr_pcur_get_page_cur(cursor));
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
}
/*********************************************************//**
Moves the persistent cursor to the previous record on the same page. */
UNIV_INLINE
void
btr_pcur_move_to_prev_on_page(
/*==========================*/
btr_pcur_t* cursor) /*!< in/out: persistent cursor */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
page_cur_move_to_prev(btr_pcur_get_page_cur(cursor));
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
}
/*********************************************************//**
Moves the persistent cursor to the last record on the same page. */
UNIV_INLINE
void
btr_pcur_move_to_last_on_page(
/*==========================*/
btr_pcur_t* cursor, /*!< in: persistent cursor */
mtr_t* mtr) /*!< in: mtr */
{
UT_NOT_USED(mtr);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
page_cur_set_after_last(btr_pcur_get_block(cursor),
btr_pcur_get_page_cur(cursor));
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
}
/*********************************************************//**
Moves the persistent cursor to the next user record in the tree. If no user
records are left, the cursor ends up 'after last in tree'.
@return TRUE if the cursor moved forward, ending on a user record */
UNIV_INLINE
ibool
btr_pcur_move_to_next_user_rec(
/*===========================*/
btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the
function may release the page latch */
mtr_t* mtr) /*!< in: mtr */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
loop:
if (btr_pcur_is_after_last_on_page(cursor)) {
if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
return(FALSE);
}
btr_pcur_move_to_next_page(cursor, mtr);
} else {
btr_pcur_move_to_next_on_page(cursor);
}
if (btr_pcur_is_on_user_rec(cursor)) {
return(TRUE);
}
goto loop;
}
/*********************************************************//**
Moves the persistent cursor to the next record in the tree. If no records are
left, the cursor stays 'after last in tree'.
@return TRUE if the cursor was not after last in tree */
UNIV_INLINE
ibool
btr_pcur_move_to_next(
/*==================*/
btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the
function may release the page latch */
mtr_t* mtr) /*!< in: mtr */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
if (btr_pcur_is_after_last_on_page(cursor)) {
if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
return(FALSE);
}
btr_pcur_move_to_next_page(cursor, mtr);
return(TRUE);
}
btr_pcur_move_to_next_on_page(cursor);
return(TRUE);
}
/**************************************************************//**
Commits the mtr and sets the pcur latch mode to BTR_NO_LATCHES,
that is, the cursor becomes detached. If there have been modifications
to the page where pcur is positioned, this can be used instead of
btr_pcur_release_leaf. Function btr_pcur_store_position should be used
before calling this, if restoration of cursor is wanted later. */
UNIV_INLINE
void
btr_pcur_commit_specify_mtr(
/*========================*/
btr_pcur_t* pcur, /*!< in: persistent cursor */
mtr_t* mtr) /*!< in: mtr to commit */
{
ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
pcur->latch_mode = BTR_NO_LATCHES;
mtr_commit(mtr);
pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
}
/**************************************************************//**
Sets the pcur latch mode to BTR_NO_LATCHES. */
UNIV_INLINE
void
btr_pcur_detach(
/*============*/
btr_pcur_t* pcur) /*!< in: persistent cursor */
{
ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
pcur->latch_mode = BTR_NO_LATCHES;
pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
}
/**************************************************************//**
Tests if a cursor is detached: that is the latch mode is BTR_NO_LATCHES.
@return TRUE if detached */
UNIV_INLINE
ibool
btr_pcur_is_detached(
/*=================*/
btr_pcur_t* pcur) /*!< in: persistent cursor */
{
if (pcur->latch_mode == BTR_NO_LATCHES) {
return(TRUE);
}
return(FALSE);
}
/**************************************************************//**
Sets the old_rec_buf field to NULL. */
UNIV_INLINE
void
btr_pcur_init(
/*==========*/
btr_pcur_t* pcur) /*!< in: persistent cursor */
{
pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
pcur->old_rec_buf = NULL;
pcur->old_rec = NULL;
}
/**************************************************************//**
Initializes and opens a persistent cursor to an index tree. It should be
closed with btr_pcur_close. */
UNIV_INLINE
void
btr_pcur_open_func(
/*===============*/
dict_index_t* index, /*!< in: index */
const dtuple_t* tuple, /*!< in: tuple on which search done */
ulint mode, /*!< in: PAGE_CUR_L, ...;
NOTE that if the search is made using a unique
prefix of a record, mode should be
PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
may end up on the previous page from the
record! */
ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
btr_cur_t* btr_cursor;
/* Initialize the cursor */
btr_pcur_init(cursor);
cursor->latch_mode = latch_mode;
cursor->search_mode = mode;
/* Search with the tree cursor */
btr_cursor = btr_pcur_get_btr_cur(cursor);
btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
btr_cursor, 0, file, line, mtr);
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
cursor->trx_if_known = NULL;
}
/**************************************************************//**
Opens an persistent cursor to an index tree without initializing the
cursor. */
UNIV_INLINE
void
btr_pcur_open_with_no_init_func(
/*============================*/
dict_index_t* index, /*!< in: index */
const dtuple_t* tuple, /*!< in: tuple on which search done */
ulint mode, /*!< in: PAGE_CUR_L, ...;
NOTE that if the search is made using a unique
prefix of a record, mode should be
PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
may end up on the previous page of the
record! */
ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ...;
NOTE that if has_search_latch != 0 then
we maybe do not acquire a latch on the cursor
page, but assume that the caller uses his
btr search latch to protect the record! */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
ulint has_search_latch,/*!< in: latch mode the caller
currently has on btr_search_latch:
RW_S_LATCH, or 0 */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
btr_cur_t* btr_cursor;
cursor->latch_mode = latch_mode;
cursor->search_mode = mode;
/* Search with the tree cursor */
btr_cursor = btr_pcur_get_btr_cur(cursor);
btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
btr_cursor, has_search_latch,
file, line, mtr);
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
cursor->trx_if_known = NULL;
}
/*****************************************************************//**
Opens a persistent cursor at either end of an index. */
UNIV_INLINE
void
btr_pcur_open_at_index_side(
/*========================*/
ibool from_left, /*!< in: TRUE if open to the low end,
FALSE if to the high end */
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: latch mode */
btr_pcur_t* pcur, /*!< in: cursor */
ibool do_init, /*!< in: TRUE if should be initialized */
mtr_t* mtr) /*!< in: mtr */
{
pcur->latch_mode = latch_mode;
if (from_left) {
pcur->search_mode = PAGE_CUR_G;
} else {
pcur->search_mode = PAGE_CUR_L;
}
if (do_init) {
btr_pcur_init(pcur);
}
btr_cur_open_at_index_side(from_left, index, latch_mode,
btr_pcur_get_btr_cur(pcur), mtr);
pcur->pos_state = BTR_PCUR_IS_POSITIONED;
pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
pcur->trx_if_known = NULL;
}
/**********************************************************************//**
Positions a cursor at a randomly chosen position within a B-tree. */
UNIV_INLINE
void
btr_pcur_open_at_rnd_pos_func(
/*==========================*/
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
btr_pcur_t* cursor, /*!< in/out: B-tree pcur */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
{
/* Initialize the cursor */
cursor->latch_mode = latch_mode;
cursor->search_mode = PAGE_CUR_G;
btr_pcur_init(cursor);
btr_cur_open_at_rnd_pos_func(index, latch_mode,
btr_pcur_get_btr_cur(cursor),
file, line, mtr);
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
cursor->trx_if_known = NULL;
}
/**************************************************************//**
Frees the possible memory heap of a persistent cursor and sets the latch
mode of the persistent cursor to BTR_NO_LATCHES. */
UNIV_INLINE
void
btr_pcur_close(
/*===========*/
btr_pcur_t* cursor) /*!< in: persistent cursor */
{
if (cursor->old_rec_buf != NULL) {
mem_free(cursor->old_rec_buf);
cursor->old_rec = NULL;
cursor->old_rec_buf = NULL;
}
cursor->btr_cur.page_cur.rec = NULL;
cursor->btr_cur.page_cur.block = NULL;
cursor->old_rec = NULL;
cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
cursor->latch_mode = BTR_NO_LATCHES;
cursor->pos_state = BTR_PCUR_NOT_POSITIONED;
cursor->trx_if_known = NULL;
}

View file

@ -0,0 +1,310 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/********************************************************************//**
@file include/btr0sea.h
The index tree adaptive search
Created 2/17/1996 Heikki Tuuri
*************************************************************************/
#ifndef btr0sea_h
#define btr0sea_h
#include "univ.i"
#include "rem0rec.h"
#include "dict0dict.h"
#include "btr0types.h"
#include "mtr0mtr.h"
#include "ha0ha.h"
/*****************************************************************//**
Creates and initializes the adaptive search system at a database start. */
UNIV_INTERN
void
btr_search_sys_create(
/*==================*/
ulint hash_size); /*!< in: hash index hash table size */
/*****************************************************************//**
Frees the adaptive search system at a database shutdown. */
UNIV_INTERN
void
btr_search_sys_free(void);
/*=====================*/
/********************************************************************//**
Disable the adaptive hash search system and empty the index. */
UNIV_INTERN
void
btr_search_disable(void);
/*====================*/
/********************************************************************//**
Enable the adaptive hash search system. */
UNIV_INTERN
void
btr_search_enable(void);
/*====================*/
/********************************************************************//**
Returns search info for an index.
@return search info; search mutex reserved */
UNIV_INLINE
btr_search_t*
btr_search_get_info(
/*================*/
dict_index_t* index); /*!< in: index */
/*****************************************************************//**
Creates and initializes a search info struct.
@return own: search info struct */
UNIV_INTERN
btr_search_t*
btr_search_info_create(
/*===================*/
mem_heap_t* heap); /*!< in: heap where created */
/*****************************************************************//**
Returns the value of ref_count. The value is protected by
btr_search_latch.
@return ref_count value. */
UNIV_INTERN
ulint
btr_search_info_get_ref_count(
/*==========================*/
btr_search_t* info); /*!< in: search info. */
/*********************************************************************//**
Updates the search info. */
UNIV_INLINE
void
btr_search_info_update(
/*===================*/
dict_index_t* index, /*!< in: index of the cursor */
btr_cur_t* cursor);/*!< in: cursor which was just positioned */
/******************************************************************//**
Tries to guess the right search position based on the hash search info
of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts,
and the function returns TRUE, then cursor->up_match and cursor->low_match
both have sensible values.
@return TRUE if succeeded */
UNIV_INTERN
ibool
btr_search_guess_on_hash(
/*=====================*/
dict_index_t* index, /*!< in: index */
btr_search_t* info, /*!< in: index search info */
const dtuple_t* tuple, /*!< in: logical record */
ulint mode, /*!< in: PAGE_CUR_L, ... */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
btr_cur_t* cursor, /*!< out: tree cursor */
ulint has_search_latch,/*!< in: latch mode the caller
currently has on btr_search_latch:
RW_S_LATCH, RW_X_LATCH, or 0 */
mtr_t* mtr); /*!< in: mtr */
/********************************************************************//**
Moves or deletes hash entries for moved records. If new_page is already hashed,
then the hash index for page, if any, is dropped. If new_page is not hashed,
and page is hashed, then a new hash index is built to new_page with the same
parameters as page (this often happens when a page is split). */
UNIV_INTERN
void
btr_search_move_or_delete_hash_entries(
/*===================================*/
buf_block_t* new_block, /*!< in: records are copied
to this page */
buf_block_t* block, /*!< in: index page from which
records were copied, and the
copied records will be deleted
from this page */
dict_index_t* index); /*!< in: record descriptor */
/********************************************************************//**
Drops a page hash index. */
UNIV_INTERN
void
btr_search_drop_page_hash_index(
/*============================*/
buf_block_t* block); /*!< in: block containing index page,
s- or x-latched, or an index page
for which we know that
block->buf_fix_count == 0 */
/********************************************************************//**
Drops a page hash index when a page is freed from a fseg to the file system.
Drops possible hash index if the page happens to be in the buffer pool. */
UNIV_INTERN
void
btr_search_drop_page_hash_when_freed(
/*=================================*/
ulint space, /*!< in: space id */
ulint zip_size, /*!< in: compressed page size in bytes
or 0 for uncompressed pages */
ulint page_no); /*!< in: page number */
/********************************************************************//**
Updates the page hash index when a single record is inserted on a page. */
UNIV_INTERN
void
btr_search_update_hash_node_on_insert(
/*==================================*/
btr_cur_t* cursor);/*!< in: cursor which was positioned to the
place to insert using btr_cur_search_...,
and the new record has been inserted next
to the cursor */
/********************************************************************//**
Updates the page hash index when a single record is inserted on a page. */
UNIV_INTERN
void
btr_search_update_hash_on_insert(
/*=============================*/
btr_cur_t* cursor);/*!< in: cursor which was positioned to the
place to insert using btr_cur_search_...,
and the new record has been inserted next
to the cursor */
/********************************************************************//**
Updates the page hash index when a single record is deleted from a page. */
UNIV_INTERN
void
btr_search_update_hash_on_delete(
/*=============================*/
btr_cur_t* cursor);/*!< in: cursor which was positioned on the
record to delete using btr_cur_search_...,
the record is not yet deleted */
/********************************************************************//**
Validates the search system.
@return TRUE if ok */
UNIV_INTERN
ibool
btr_search_validate(void);
/*======================*/
/** Flag: has the search system been enabled?
Protected by btr_search_latch and btr_search_enabled_mutex. */
extern char btr_search_enabled;
/** The search info struct in an index */
struct btr_search_struct{
ulint ref_count; /*!< Number of blocks in this index tree
that have search index built
i.e. block->index points to this index.
Protected by btr_search_latch except
when during initialization in
btr_search_info_create(). */
/* @{ The following fields are not protected by any latch.
Unfortunately, this means that they must be aligned to
the machine word, i.e., they cannot be turned into bit-fields. */
buf_block_t* root_guess;/*!< the root page frame when it was last time
fetched, or NULL */
ulint hash_analysis; /*!< when this exceeds
BTR_SEARCH_HASH_ANALYSIS, the hash
analysis starts; this is reset if no
success noticed */
ibool last_hash_succ; /*!< TRUE if the last search would have
succeeded, or did succeed, using the hash
index; NOTE that the value here is not exact:
it is not calculated for every search, and the
calculation itself is not always accurate! */
ulint n_hash_potential;
/*!< number of consecutive searches
which would have succeeded, or did succeed,
using the hash index;
the range is 0 .. BTR_SEARCH_BUILD_LIMIT + 5 */
/* @} */
/*---------------------- @{ */
ulint n_fields; /*!< recommended prefix length for hash search:
number of full fields */
ulint n_bytes; /*!< recommended prefix: number of bytes in
an incomplete field
@see BTR_PAGE_MAX_REC_SIZE */
ibool left_side; /*!< TRUE or FALSE, depending on whether
the leftmost record of several records with
the same prefix should be indexed in the
hash index */
/*---------------------- @} */
#ifdef UNIV_SEARCH_PERF_STAT
ulint n_hash_succ; /*!< number of successful hash searches thus
far */
ulint n_hash_fail; /*!< number of failed hash searches */
ulint n_patt_succ; /*!< number of successful pattern searches thus
far */
ulint n_searches; /*!< number of searches */
#endif /* UNIV_SEARCH_PERF_STAT */
#ifdef UNIV_DEBUG
ulint magic_n; /*!< magic number @see BTR_SEARCH_MAGIC_N */
/** value of btr_search_struct::magic_n, used in assertions */
# define BTR_SEARCH_MAGIC_N 1112765
#endif /* UNIV_DEBUG */
};
/** The hash index system */
typedef struct btr_search_sys_struct btr_search_sys_t;
/** The hash index system */
struct btr_search_sys_struct{
hash_table_t* hash_index; /*!< the adaptive hash index,
mapping dtuple_fold values
to rec_t pointers on index pages */
};
/** The adaptive hash index */
extern btr_search_sys_t* btr_search_sys;
/** @brief The latch protecting the adaptive search system
This latch protects the
(1) hash index;
(2) columns of a record to which we have a pointer in the hash index;
but does NOT protect:
(3) next record offset field in a record;
(4) next or previous records on the same page.
Bear in mind (3) and (4) when using the hash index.
*/
extern rw_lock_t* btr_search_latch_temp;
/** The latch protecting the adaptive search system */
#define btr_search_latch (*btr_search_latch_temp)
#ifdef UNIV_SEARCH_PERF_STAT
/** Number of successful adaptive hash index lookups */
extern ulint btr_search_n_succ;
/** Number of failed adaptive hash index lookups */
extern ulint btr_search_n_hash_fail;
#endif /* UNIV_SEARCH_PERF_STAT */
/** After change in n_fields or n_bytes in info, this many rounds are waited
before starting the hash analysis again: this is to save CPU time when there
is no hope in building a hash index. */
#define BTR_SEARCH_HASH_ANALYSIS 17
/** Limit of consecutive searches for trying a search shortcut on the search
pattern */
#define BTR_SEARCH_ON_PATTERN_LIMIT 3
/** Limit of consecutive searches for trying a search shortcut using
the hash index */
#define BTR_SEARCH_ON_HASH_LIMIT 3
/** We do this many searches before trying to keep the search latch
over calls from MySQL. If we notice someone waiting for the latch, we
again set this much timeout. This is to reduce contention. */
#define BTR_SEA_TIMEOUT 10000
#ifndef UNIV_NONINL
#include "btr0sea.ic"
#endif
#endif

View file

@ -0,0 +1,84 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/********************************************************************//**
@file include/btr0sea.ic
The index tree adaptive search
Created 2/17/1996 Heikki Tuuri
*************************************************************************/
#include "dict0mem.h"
#include "btr0cur.h"
#include "buf0buf.h"
/*********************************************************************//**
Updates the search info. */
UNIV_INTERN
void
btr_search_info_update_slow(
/*========================*/
btr_search_t* info, /*!< in/out: search info */
btr_cur_t* cursor);/*!< in: cursor which was just positioned */
/********************************************************************//**
Returns search info for an index.
@return search info; search mutex reserved */
UNIV_INLINE
btr_search_t*
btr_search_get_info(
/*================*/
dict_index_t* index) /*!< in: index */
{
ut_ad(index);
return(index->search_info);
}
/*********************************************************************//**
Updates the search info. */
UNIV_INLINE
void
btr_search_info_update(
/*===================*/
dict_index_t* index, /*!< in: index of the cursor */
btr_cur_t* cursor) /*!< in: cursor which was just positioned */
{
btr_search_t* info;
#ifdef UNIV_SYNC_DEBUG
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
info = btr_search_get_info(index);
info->hash_analysis++;
if (info->hash_analysis < BTR_SEARCH_HASH_ANALYSIS) {
/* Do nothing */
return;
}
ut_ad(cursor->flag != BTR_CUR_HASH);
btr_search_info_update_slow(info, cursor);
}

View file

@ -0,0 +1,51 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/********************************************************************//**
@file include/btr0types.h
The index tree general types
Created 2/17/1996 Heikki Tuuri
*************************************************************************/
#ifndef btr0types_h
#define btr0types_h
#include "univ.i"
#include "rem0types.h"
#include "page0types.h"
/** Persistent cursor */
typedef struct btr_pcur_struct btr_pcur_t;
/** B-tree cursor */
typedef struct btr_cur_struct btr_cur_t;
/** B-tree search information for the adaptive hash index */
typedef struct btr_search_struct btr_search_t;
/** The size of a reference to data stored on a different page.
The reference is stored at the end of the prefix of the field
in the index record. */
#define BTR_EXTERN_FIELD_REF_SIZE 20
/** A BLOB field reference full of zero, for use in assertions and tests.
Initially, BLOB field references are set to zero, in
dtuple_convert_big_rec(). */
extern const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE];
#endif

View file

@ -0,0 +1,90 @@
/*****************************************************************************
Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/buf0buddy.h
Binary buddy allocator for compressed pages
Created December 2006 by Marko Makela
*******************************************************/
#ifndef buf0buddy_h
#define buf0buddy_h
#ifdef UNIV_MATERIALIZE
# undef UNIV_INLINE
# define UNIV_INLINE
#endif
#include "univ.i"
#include "buf0types.h"
/**********************************************************************//**
Allocate a block. The thread calling this function must hold
buf_pool_mutex and must not hold buf_pool_zip_mutex or any
block->mutex. The buf_pool_mutex may only be released and reacquired
if lru != NULL. This function should only be used for allocating
compressed page frames or control blocks (buf_page_t). Allocated
control blocks must be properly initialized immediately after
buf_buddy_alloc() has returned the memory, before releasing
buf_pool_mutex.
@return allocated block, possibly NULL if lru == NULL */
UNIV_INLINE
void*
buf_buddy_alloc(
/*============*/
ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
ibool* lru) /*!< in: pointer to a variable that will be assigned
TRUE if storage was allocated from the LRU list
and buf_pool_mutex was temporarily released,
or NULL if the LRU list should not be used */
__attribute__((malloc));
/**********************************************************************//**
Release a block. */
UNIV_INLINE
void
buf_buddy_free(
/*===========*/
void* buf, /*!< in: block to be freed, must not be
pointed to by the buffer pool */
ulint size) /*!< in: block size, up to UNIV_PAGE_SIZE */
__attribute__((nonnull));
/** Statistics of buddy blocks of a given size. */
struct buf_buddy_stat_struct {
/** Number of blocks allocated from the buddy system. */
ulint used;
/** Number of blocks relocated by the buddy system. */
ib_uint64_t relocated;
/** Total duration of block relocations, in microseconds. */
ib_uint64_t relocated_usec;
};
/** Statistics of buddy blocks of a given size. */
typedef struct buf_buddy_stat_struct buf_buddy_stat_t;
/** Statistics of the buddy system, indexed by block size.
Protected by buf_pool_mutex. */
extern buf_buddy_stat_t buf_buddy_stat[BUF_BUDDY_SIZES + 1];
#ifndef UNIV_NONINL
# include "buf0buddy.ic"
#endif
#endif /* buf0buddy_h */

View file

@ -0,0 +1,127 @@
/*****************************************************************************
Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/buf0buddy.ic
Binary buddy allocator for compressed pages
Created December 2006 by Marko Makela
*******************************************************/
#ifdef UNIV_MATERIALIZE
# undef UNIV_INLINE
# define UNIV_INLINE
#endif
#include "buf0buf.h"
#include "buf0buddy.h"
#include "ut0ut.h"
#include "sync0sync.h"
/**********************************************************************//**
Allocate a block. The thread calling this function must hold
buf_pool_mutex and must not hold buf_pool_zip_mutex or any block->mutex.
The buf_pool_mutex may only be released and reacquired if lru != NULL.
@return allocated block, possibly NULL if lru==NULL */
UNIV_INTERN
void*
buf_buddy_alloc_low(
/*================*/
ulint i, /*!< in: index of buf_pool->zip_free[],
or BUF_BUDDY_SIZES */
ibool* lru) /*!< in: pointer to a variable that will be assigned
TRUE if storage was allocated from the LRU list
and buf_pool_mutex was temporarily released,
or NULL if the LRU list should not be used */
__attribute__((malloc));
/**********************************************************************//**
Deallocate a block. */
UNIV_INTERN
void
buf_buddy_free_low(
/*===============*/
void* buf, /*!< in: block to be freed, must not be
pointed to by the buffer pool */
ulint i) /*!< in: index of buf_pool->zip_free[],
or BUF_BUDDY_SIZES */
__attribute__((nonnull));
/**********************************************************************//**
Get the index of buf_pool->zip_free[] for a given block size.
@return index of buf_pool->zip_free[], or BUF_BUDDY_SIZES */
UNIV_INLINE
ulint
buf_buddy_get_slot(
/*===============*/
ulint size) /*!< in: block size */
{
ulint i;
ulint s;
for (i = 0, s = BUF_BUDDY_LOW; s < size; i++, s <<= 1) {
}
ut_ad(i <= BUF_BUDDY_SIZES);
return(i);
}
/**********************************************************************//**
Allocate a block. The thread calling this function must hold
buf_pool_mutex and must not hold buf_pool_zip_mutex or any
block->mutex. The buf_pool_mutex may only be released and reacquired
if lru != NULL. This function should only be used for allocating
compressed page frames or control blocks (buf_page_t). Allocated
control blocks must be properly initialized immediately after
buf_buddy_alloc() has returned the memory, before releasing
buf_pool_mutex.
@return allocated block, possibly NULL if lru == NULL */
UNIV_INLINE
void*
buf_buddy_alloc(
/*============*/
ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
ibool* lru) /*!< in: pointer to a variable that will be assigned
TRUE if storage was allocated from the LRU list
and buf_pool_mutex was temporarily released,
or NULL if the LRU list should not be used */
{
ut_ad(buf_pool_mutex_own());
return(buf_buddy_alloc_low(buf_buddy_get_slot(size), lru));
}
/**********************************************************************//**
Deallocate a block. */
UNIV_INLINE
void
buf_buddy_free(
/*===========*/
void* buf, /*!< in: block to be freed, must not be
pointed to by the buffer pool */
ulint size) /*!< in: block size, up to UNIV_PAGE_SIZE */
{
ut_ad(buf_pool_mutex_own());
buf_buddy_free_low(buf, buf_buddy_get_slot(size));
}
#ifdef UNIV_MATERIALIZE
# undef UNIV_INLINE
# define UNIV_INLINE UNIV_INLINE_ORIGINAL
#endif

1633
perfschema/include/buf0buf.h Normal file

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,217 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/buf0flu.h
The database buffer pool flush algorithm
Created 11/5/1995 Heikki Tuuri
*******************************************************/
#ifndef buf0flu_h
#define buf0flu_h
#include "univ.i"
#include "ut0byte.h"
#ifndef UNIV_HOTBACKUP
#include "mtr0types.h"
#include "buf0types.h"
/********************************************************************//**
Remove a block from the flush list of modified blocks. */
UNIV_INTERN
void
buf_flush_remove(
/*=============*/
buf_page_t* bpage); /*!< in: pointer to the block in question */
/*******************************************************************//**
Relocates a buffer control block on the flush_list.
Note that it is assumed that the contents of bpage has already been
copied to dpage. */
UNIV_INTERN
void
buf_flush_relocate_on_flush_list(
/*=============================*/
buf_page_t* bpage, /*!< in/out: control block being moved */
buf_page_t* dpage); /*!< in/out: destination block */
/********************************************************************//**
Updates the flush system data structures when a write is completed. */
UNIV_INTERN
void
buf_flush_write_complete(
/*=====================*/
buf_page_t* bpage); /*!< in: pointer to the block in question */
/*********************************************************************//**
Flushes pages from the end of the LRU list if there is too small
a margin of replaceable pages there. */
UNIV_INTERN
void
buf_flush_free_margin(void);
/*=======================*/
#endif /* !UNIV_HOTBACKUP */
/********************************************************************//**
Initializes a page for writing to the tablespace. */
UNIV_INTERN
void
buf_flush_init_for_writing(
/*=======================*/
byte* page, /*!< in/out: page */
void* page_zip_, /*!< in/out: compressed page, or NULL */
ib_uint64_t newest_lsn); /*!< in: newest modification lsn
to the page */
#ifndef UNIV_HOTBACKUP
/*******************************************************************//**
This utility flushes dirty blocks from the end of the LRU list or flush_list.
NOTE 1: in the case of an LRU flush the calling thread may own latches to
pages: to avoid deadlocks, this function must be written so that it cannot
end up waiting for these latches! NOTE 2: in the case of a flush list flush,
the calling thread is not allowed to own any latches on pages!
@return number of blocks for which the write request was queued;
ULINT_UNDEFINED if there was a flush of the same type already running */
UNIV_INTERN
ulint
buf_flush_batch(
/*============*/
enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU or
BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
then the caller must not own any
latches on pages */
ulint min_n, /*!< in: wished minimum mumber of blocks
flushed (it is not guaranteed that the
actual number is that big, though) */
ib_uint64_t lsn_limit); /*!< in the case BUF_FLUSH_LIST all
blocks whose oldest_modification is
smaller than this should be flushed
(if their number does not exceed
min_n), otherwise ignored */
/******************************************************************//**
Waits until a flush batch of the given type ends */
UNIV_INTERN
void
buf_flush_wait_batch_end(
/*=====================*/
enum buf_flush type); /*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
/********************************************************************//**
This function should be called at a mini-transaction commit, if a page was
modified in it. Puts the block to the list of modified blocks, if it not
already in it. */
UNIV_INLINE
void
buf_flush_note_modification(
/*========================*/
buf_block_t* block, /*!< in: block which is modified */
mtr_t* mtr); /*!< in: mtr */
/********************************************************************//**
This function should be called when recovery has modified a buffer page. */
UNIV_INLINE
void
buf_flush_recv_note_modification(
/*=============================*/
buf_block_t* block, /*!< in: block which is modified */
ib_uint64_t start_lsn, /*!< in: start lsn of the first mtr in a
set of mtr's */
ib_uint64_t end_lsn); /*!< in: end lsn of the last mtr in the
set of mtr's */
/********************************************************************//**
Returns TRUE if the file page block is immediately suitable for replacement,
i.e., transition FILE_PAGE => NOT_USED allowed.
@return TRUE if can replace immediately */
UNIV_INTERN
ibool
buf_flush_ready_for_replace(
/*========================*/
buf_page_t* bpage); /*!< in: buffer control block, must be
buf_page_in_file(bpage) and in the LRU list */
/** @brief Statistics for selecting flush rate based on redo log
generation speed.
These statistics are generated for heuristics used in estimating the
rate at which we should flush the dirty blocks to avoid bursty IO
activity. Note that the rate of flushing not only depends on how many
dirty pages we have in the buffer pool but it is also a fucntion of
how much redo the workload is generating and at what rate. */
struct buf_flush_stat_struct
{
ib_uint64_t redo; /**< amount of redo generated. */
ulint n_flushed; /**< number of pages flushed. */
};
/** Statistics for selecting flush rate of dirty pages. */
typedef struct buf_flush_stat_struct buf_flush_stat_t;
/*********************************************************************
Update the historical stats that we are collecting for flush rate
heuristics at the end of each interval. */
UNIV_INTERN
void
buf_flush_stat_update(void);
/*=======================*/
/*********************************************************************
Determines the fraction of dirty pages that need to be flushed based
on the speed at which we generate redo log. Note that if redo log
is generated at significant rate without a corresponding increase
in the number of dirty pages (for example, an in-memory workload)
it can cause IO bursts of flushing. This function implements heuristics
to avoid this burstiness.
@return number of dirty pages to be flushed / second */
UNIV_INTERN
ulint
buf_flush_get_desired_flush_rate(void);
/*==================================*/
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/******************************************************************//**
Validates the flush list.
@return TRUE if ok */
UNIV_INTERN
ibool
buf_flush_validate(void);
/*====================*/
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
/********************************************************************//**
Initialize the red-black tree to speed up insertions into the flush_list
during recovery process. Should be called at the start of recovery
process before any page has been read/written. */
UNIV_INTERN
void
buf_flush_init_flush_rbt(void);
/*==========================*/
/********************************************************************//**
Frees up the red-black tree. */
UNIV_INTERN
void
buf_flush_free_flush_rbt(void);
/*==========================*/
/** When buf_flush_free_margin is called, it tries to make this many blocks
available to replacement in the free list and at the end of the LRU list (to
make sure that a read-ahead batch can be read efficiently in a single
sweep). */
#define BUF_FLUSH_FREE_BLOCK_MARGIN (5 + BUF_READ_AHEAD_AREA)
/** Extra margin to apply above BUF_FLUSH_FREE_BLOCK_MARGIN */
#define BUF_FLUSH_EXTRA_MARGIN (BUF_FLUSH_FREE_BLOCK_MARGIN / 4 + 100)
#endif /* !UNIV_HOTBACKUP */
#ifndef UNIV_NONINL
#include "buf0flu.ic"
#endif
#endif

View file

@ -0,0 +1,126 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/buf0flu.ic
The database buffer pool flush algorithm
Created 11/5/1995 Heikki Tuuri
*******************************************************/
#ifndef UNIV_HOTBACKUP
#include "buf0buf.h"
#include "mtr0mtr.h"
/********************************************************************//**
Inserts a modified block into the flush list. */
UNIV_INTERN
void
buf_flush_insert_into_flush_list(
/*=============================*/
buf_block_t* block, /*!< in/out: block which is modified */
ib_uint64_t lsn); /*!< in: oldest modification */
/********************************************************************//**
Inserts a modified block into the flush list in the right sorted position.
This function is used by recovery, because there the modifications do not
necessarily come in the order of lsn's. */
UNIV_INTERN
void
buf_flush_insert_sorted_into_flush_list(
/*====================================*/
buf_block_t* block, /*!< in/out: block which is modified */
ib_uint64_t lsn); /*!< in: oldest modification */
/********************************************************************//**
This function should be called at a mini-transaction commit, if a page was
modified in it. Puts the block to the list of modified blocks, if it is not
already in it. */
UNIV_INLINE
void
buf_flush_note_modification(
/*========================*/
buf_block_t* block, /*!< in: block which is modified */
mtr_t* mtr) /*!< in: mtr */
{
ut_ad(block);
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->page.buf_fix_count > 0);
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
ut_ad(!buf_pool_mutex_own());
ut_ad(!buf_flush_list_mutex_own());
ut_ad(mtr->start_lsn != 0);
ut_ad(mtr->modifications);
mutex_enter(&block->mutex);
ut_ad(block->page.newest_modification <= mtr->end_lsn);
block->page.newest_modification = mtr->end_lsn;
if (!block->page.oldest_modification) {
buf_flush_insert_into_flush_list(block, mtr->start_lsn);
} else {
ut_ad(block->page.oldest_modification <= mtr->start_lsn);
}
mutex_exit(&block->mutex);
++srv_buf_pool_write_requests;
}
/********************************************************************//**
This function should be called when recovery has modified a buffer page. */
UNIV_INLINE
void
buf_flush_recv_note_modification(
/*=============================*/
buf_block_t* block, /*!< in: block which is modified */
ib_uint64_t start_lsn, /*!< in: start lsn of the first mtr in a
set of mtr's */
ib_uint64_t end_lsn) /*!< in: end lsn of the last mtr in the
set of mtr's */
{
ut_ad(block);
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->page.buf_fix_count > 0);
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
ut_ad(!buf_pool_mutex_own());
ut_ad(!buf_flush_list_mutex_own());
ut_ad(start_lsn != 0);
ut_ad(block->page.newest_modification <= end_lsn);
mutex_enter(&block->mutex);
block->page.newest_modification = end_lsn;
if (!block->page.oldest_modification) {
buf_flush_insert_sorted_into_flush_list(block, start_lsn);
} else {
ut_ad(block->page.oldest_modification <= start_lsn);
}
mutex_exit(&block->mutex);
}
#endif /* !UNIV_HOTBACKUP */

View file

@ -0,0 +1,295 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/buf0lru.h
The database buffer pool LRU replacement algorithm
Created 11/5/1995 Heikki Tuuri
*******************************************************/
#ifndef buf0lru_h
#define buf0lru_h
#include "univ.i"
#include "ut0byte.h"
#include "buf0types.h"
/** The return type of buf_LRU_free_block() */
enum buf_lru_free_block_status {
/** freed */
BUF_LRU_FREED = 0,
/** not freed because the caller asked to remove the
uncompressed frame but the control block cannot be
relocated */
BUF_LRU_CANNOT_RELOCATE,
/** not freed because of some other reason */
BUF_LRU_NOT_FREED
};
/******************************************************************//**
Tries to remove LRU flushed blocks from the end of the LRU list and put them
to the free list. This is beneficial for the efficiency of the insert buffer
operation, as flushed pages from non-unique non-clustered indexes are here
taken out of the buffer pool, and their inserts redirected to the insert
buffer. Otherwise, the flushed blocks could get modified again before read
operations need new buffer blocks, and the i/o work done in flushing would be
wasted. */
UNIV_INTERN
void
buf_LRU_try_free_flushed_blocks(void);
/*==================================*/
/******************************************************************//**
Returns TRUE if less than 25 % of the buffer pool is available. This can be
used in heuristics to prevent huge transactions eating up the whole buffer
pool for their locks.
@return TRUE if less than 25 % of buffer pool left */
UNIV_INTERN
ibool
buf_LRU_buf_pool_running_out(void);
/*==============================*/
/*#######################################################################
These are low-level functions
#########################################################################*/
/** Minimum LRU list length for which the LRU_old pointer is defined */
#define BUF_LRU_OLD_MIN_LEN 512 /* 8 megabytes of 16k pages */
/** Maximum LRU list search length in buf_flush_LRU_recommendation() */
#define BUF_LRU_FREE_SEARCH_LEN (5 + 2 * BUF_READ_AHEAD_AREA)
/******************************************************************//**
Invalidates all pages belonging to a given tablespace when we are deleting
the data file(s) of that tablespace. A PROBLEM: if readahead is being started,
what guarantees that it will not try to read in pages after this operation has
completed? */
UNIV_INTERN
void
buf_LRU_invalidate_tablespace(
/*==========================*/
ulint id); /*!< in: space id */
/********************************************************************//**
Insert a compressed block into buf_pool->zip_clean in the LRU order. */
UNIV_INTERN
void
buf_LRU_insert_zip_clean(
/*=====================*/
buf_page_t* bpage); /*!< in: pointer to the block in question */
/******************************************************************//**
Try to free a block. If bpage is a descriptor of a compressed-only
page, the descriptor object will be freed as well.
NOTE: If this function returns BUF_LRU_FREED, it will not temporarily
release buf_pool_mutex. Furthermore, the page frame will no longer be
accessible via bpage.
The caller must hold buf_pool_mutex and buf_page_get_mutex(bpage) and
release these two mutexes after the call. No other
buf_page_get_mutex() may be held when calling this function.
@return BUF_LRU_FREED if freed, BUF_LRU_CANNOT_RELOCATE or
BUF_LRU_NOT_FREED otherwise. */
UNIV_INTERN
enum buf_lru_free_block_status
buf_LRU_free_block(
/*===============*/
buf_page_t* bpage, /*!< in: block to be freed */
ibool zip, /*!< in: TRUE if should remove also the
compressed page of an uncompressed page */
ibool* buf_pool_mutex_released);
/*!< in: pointer to a variable that will
be assigned TRUE if buf_pool_mutex
was temporarily released, or NULL */
/******************************************************************//**
Try to free a replaceable block.
@return TRUE if found and freed */
UNIV_INTERN
ibool
buf_LRU_search_and_free_block(
/*==========================*/
ulint n_iterations); /*!< in: how many times this has been called
repeatedly without result: a high value means
that we should search farther; if
n_iterations < 10, then we search
n_iterations / 10 * buf_pool->curr_size
pages from the end of the LRU list; if
n_iterations < 5, then we will also search
n_iterations / 5 of the unzip_LRU list. */
/******************************************************************//**
Returns a free block from the buf_pool. The block is taken off the
free list. If it is empty, returns NULL.
@return a free control block, or NULL if the buf_block->free list is empty */
UNIV_INTERN
buf_block_t*
buf_LRU_get_free_only(void);
/*=======================*/
/******************************************************************//**
Returns a free block from the buf_pool. The block is taken off the
free list. If it is empty, blocks are moved from the end of the
LRU list to the free list.
@return the free control block, in state BUF_BLOCK_READY_FOR_USE */
UNIV_INTERN
buf_block_t*
buf_LRU_get_free_block(
/*===================*/
ulint zip_size); /*!< in: compressed page size in bytes,
or 0 if uncompressed tablespace */
/******************************************************************//**
Puts a block back to the free list. */
UNIV_INTERN
void
buf_LRU_block_free_non_file_page(
/*=============================*/
buf_block_t* block); /*!< in: block, must not contain a file page */
/******************************************************************//**
Adds a block to the LRU list. */
UNIV_INTERN
void
buf_LRU_add_block(
/*==============*/
buf_page_t* bpage, /*!< in: control block */
ibool old); /*!< in: TRUE if should be put to the old
blocks in the LRU list, else put to the
start; if the LRU list is very short, added to
the start regardless of this parameter */
/******************************************************************//**
Adds a block to the LRU list of decompressed zip pages. */
UNIV_INTERN
void
buf_unzip_LRU_add_block(
/*====================*/
buf_block_t* block, /*!< in: control block */
ibool old); /*!< in: TRUE if should be put to the end
of the list, else put to the start */
/******************************************************************//**
Moves a block to the start of the LRU list. */
UNIV_INTERN
void
buf_LRU_make_block_young(
/*=====================*/
buf_page_t* bpage); /*!< in: control block */
/******************************************************************//**
Moves a block to the end of the LRU list. */
UNIV_INTERN
void
buf_LRU_make_block_old(
/*===================*/
buf_page_t* bpage); /*!< in: control block */
/**********************************************************************//**
Updates buf_LRU_old_ratio.
@return updated old_pct */
UNIV_INTERN
uint
buf_LRU_old_ratio_update(
/*=====================*/
uint old_pct,/*!< in: Reserve this percentage of
the buffer pool for "old" blocks. */
ibool adjust);/*!< in: TRUE=adjust the LRU list;
FALSE=just assign buf_LRU_old_ratio
during the initialization of InnoDB */
/********************************************************************//**
Update the historical stats that we are collecting for LRU eviction
policy at the end of each interval. */
UNIV_INTERN
void
buf_LRU_stat_update(void);
/*=====================*/
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/**********************************************************************//**
Validates the LRU list.
@return TRUE */
UNIV_INTERN
ibool
buf_LRU_validate(void);
/*==================*/
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/**********************************************************************//**
Prints the LRU list. */
UNIV_INTERN
void
buf_LRU_print(void);
/*===============*/
#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
/** @name Heuristics for detecting index scan @{ */
/** Reserve this much/BUF_LRU_OLD_RATIO_DIV of the buffer pool for
"old" blocks. Protected by buf_pool_mutex. */
extern uint buf_LRU_old_ratio;
/** The denominator of buf_LRU_old_ratio. */
#define BUF_LRU_OLD_RATIO_DIV 1024
/** Maximum value of buf_LRU_old_ratio.
@see buf_LRU_old_adjust_len
@see buf_LRU_old_ratio_update */
#define BUF_LRU_OLD_RATIO_MAX BUF_LRU_OLD_RATIO_DIV
/** Minimum value of buf_LRU_old_ratio.
@see buf_LRU_old_adjust_len
@see buf_LRU_old_ratio_update
The minimum must exceed
(BUF_LRU_OLD_TOLERANCE + 5) * BUF_LRU_OLD_RATIO_DIV / BUF_LRU_OLD_MIN_LEN. */
#define BUF_LRU_OLD_RATIO_MIN 51
#if BUF_LRU_OLD_RATIO_MIN >= BUF_LRU_OLD_RATIO_MAX
# error "BUF_LRU_OLD_RATIO_MIN >= BUF_LRU_OLD_RATIO_MAX"
#endif
#if BUF_LRU_OLD_RATIO_MAX > BUF_LRU_OLD_RATIO_DIV
# error "BUF_LRU_OLD_RATIO_MAX > BUF_LRU_OLD_RATIO_DIV"
#endif
/** Move blocks to "new" LRU list only if the first access was at
least this many milliseconds ago. Not protected by any mutex or latch. */
extern uint buf_LRU_old_threshold_ms;
/* @} */
/** @brief Statistics for selecting the LRU list for eviction.
These statistics are not 'of' LRU but 'for' LRU. We keep count of I/O
and page_zip_decompress() operations. Based on the statistics we decide
if we want to evict from buf_pool->unzip_LRU or buf_pool->LRU. */
struct buf_LRU_stat_struct
{
ulint io; /**< Counter of buffer pool I/O operations. */
ulint unzip; /**< Counter of page_zip_decompress operations. */
};
/** Statistics for selecting the LRU list for eviction. */
typedef struct buf_LRU_stat_struct buf_LRU_stat_t;
/** Current operation counters. Not protected by any mutex.
Cleared by buf_LRU_stat_update(). */
extern buf_LRU_stat_t buf_LRU_stat_cur;
/** Running sum of past values of buf_LRU_stat_cur.
Updated by buf_LRU_stat_update(). Protected by buf_pool_mutex. */
extern buf_LRU_stat_t buf_LRU_stat_sum;
/********************************************************************//**
Increments the I/O counter in buf_LRU_stat_cur. */
#define buf_LRU_stat_inc_io() buf_LRU_stat_cur.io++
/********************************************************************//**
Increments the page_zip_decompress() counter in buf_LRU_stat_cur. */
#define buf_LRU_stat_inc_unzip() buf_LRU_stat_cur.unzip++
#ifndef UNIV_NONINL
#include "buf0lru.ic"
#endif
#endif

View file

@ -0,0 +1,25 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/buf0lru.ic
The database buffer replacement algorithm
Created 11/5/1995 Heikki Tuuri
*******************************************************/

View file

@ -0,0 +1,137 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/buf0rea.h
The database buffer read
Created 11/5/1995 Heikki Tuuri
*******************************************************/
#ifndef buf0rea_h
#define buf0rea_h
#include "univ.i"
#include "buf0types.h"
/********************************************************************//**
High-level function which reads a page asynchronously from a file to the
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
released by the i/o-handler thread.
@return TRUE if page has been read in, FALSE in case of failure */
UNIV_INTERN
ibool
buf_read_page(
/*==========*/
ulint space, /*!< in: space id */
ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
ulint offset);/*!< in: page number */
/********************************************************************//**
Applies linear read-ahead if in the buf_pool the page is a border page of
a linear read-ahead area and all the pages in the area have been accessed.
Does not read any page if the read-ahead mechanism is not activated. Note
that the algorithm looks at the 'natural' adjacent successor and
predecessor of the page, which on the leaf level of a B-tree are the next
and previous page in the chain of leaves. To know these, the page specified
in (space, offset) must already be present in the buf_pool. Thus, the
natural way to use this function is to call it when a page in the buf_pool
is accessed the first time, calling this function just after it has been
bufferfixed.
NOTE 1: as this function looks at the natural predecessor and successor
fields on the page, what happens, if these are not initialized to any
sensible value? No problem, before applying read-ahead we check that the
area to read is within the span of the space, if not, read-ahead is not
applied. An uninitialized value may result in a useless read operation, but
only very improbably.
NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
function must be written such that it cannot end up waiting for these
latches!
NOTE 3: the calling thread must want access to the page given: this rule is
set to prevent unintended read-aheads performed by ibuf routines, a situation
which could result in a deadlock if the OS does not support asynchronous io.
@return number of page read requests issued */
UNIV_INTERN
ulint
buf_read_ahead_linear(
/*==================*/
ulint space, /*!< in: space id */
ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
ulint offset);/*!< in: page number of a page; NOTE: the current thread
must want access to this page (see NOTE 3 above) */
/********************************************************************//**
Issues read requests for pages which the ibuf module wants to read in, in
order to contract the insert buffer tree. Technically, this function is like
a read-ahead function. */
UNIV_INTERN
void
buf_read_ibuf_merge_pages(
/*======================*/
ibool sync, /*!< in: TRUE if the caller
wants this function to wait
for the highest address page
to get read in, before this
function returns */
const ulint* space_ids, /*!< in: array of space ids */
const ib_int64_t* space_versions,/*!< in: the spaces must have
this version number
(timestamp), otherwise we
discard the read; we use this
to cancel reads if DISCARD +
IMPORT may have changed the
tablespace size */
const ulint* page_nos, /*!< in: array of page numbers
to read, with the highest page
number the last in the
array */
ulint n_stored); /*!< in: number of elements
in the arrays */
/********************************************************************//**
Issues read requests for pages which recovery wants to read in. */
UNIV_INTERN
void
buf_read_recv_pages(
/*================*/
ibool sync, /*!< in: TRUE if the caller
wants this function to wait
for the highest address page
to get read in, before this
function returns */
ulint space, /*!< in: space id */
ulint zip_size, /*!< in: compressed page size in
bytes, or 0 */
const ulint* page_nos, /*!< in: array of page numbers
to read, with the highest page
number the last in the
array */
ulint n_stored); /*!< in: number of page numbers
in the array */
/** The size in pages of the area which the read-ahead algorithms read if
invoked */
#define BUF_READ_AHEAD_AREA \
ut_min(64, ut_2_power_up(buf_pool->curr_size / 32))
/** @name Modes used in read-ahead @{ */
/** read only pages belonging to the insert buffer tree */
#define BUF_READ_IBUF_PAGES_ONLY 131
/** read any page */
#define BUF_READ_ANY_PAGE 132
/* @} */
#endif

View file

@ -0,0 +1,82 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/buf0types.h
The database buffer pool global types for the directory
Created 11/17/1995 Heikki Tuuri
*******************************************************/
#ifndef buf0types_h
#define buf0types_h
/** Buffer page (uncompressed or compressed) */
typedef struct buf_page_struct buf_page_t;
/** Buffer block for which an uncompressed page exists */
typedef struct buf_block_struct buf_block_t;
/** Buffer pool chunk comprising buf_block_t */
typedef struct buf_chunk_struct buf_chunk_t;
/** Buffer pool comprising buf_chunk_t */
typedef struct buf_pool_struct buf_pool_t;
/** Buffer pool statistics struct */
typedef struct buf_pool_stat_struct buf_pool_stat_t;
/** A buffer frame. @see page_t */
typedef byte buf_frame_t;
/** Flags for flush types */
enum buf_flush {
BUF_FLUSH_LRU = 0, /*!< flush via the LRU list */
BUF_FLUSH_SINGLE_PAGE, /*!< flush a single page */
BUF_FLUSH_LIST, /*!< flush via the flush list
of dirty blocks */
BUF_FLUSH_N_TYPES /*!< index of last element + 1 */
};
/** Flags for io_fix types */
enum buf_io_fix {
BUF_IO_NONE = 0, /**< no pending I/O */
BUF_IO_READ, /**< read pending */
BUF_IO_WRITE /**< write pending */
};
/** Parameters of binary buddy system for compressed pages (buf0buddy.h) */
/* @{ */
#if UNIV_WORD_SIZE <= 4 /* 32-bit system */
/** Base-2 logarithm of the smallest buddy block size */
# define BUF_BUDDY_LOW_SHIFT 6
#else /* 64-bit system */
/** Base-2 logarithm of the smallest buddy block size */
# define BUF_BUDDY_LOW_SHIFT 7
#endif
#define BUF_BUDDY_LOW (1 << BUF_BUDDY_LOW_SHIFT)
/*!< minimum block size in the binary
buddy system; must be at least
sizeof(buf_page_t) */
#define BUF_BUDDY_SIZES (UNIV_PAGE_SIZE_SHIFT - BUF_BUDDY_LOW_SHIFT)
/*!< number of buddy sizes */
/** twice the maximum block size of the buddy system;
the underlying memory is aligned by this amount:
this must be equal to UNIV_PAGE_SIZE */
#define BUF_BUDDY_HIGH (BUF_BUDDY_LOW << BUF_BUDDY_SIZES)
/* @} */
#endif

View file

@ -0,0 +1,483 @@
/*****************************************************************************
Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/********************************************************************//**
@file include/data0data.h
SQL data field and tuple
Created 5/30/1994 Heikki Tuuri
*************************************************************************/
#ifndef data0data_h
#define data0data_h
#include "univ.i"
#include "data0types.h"
#include "data0type.h"
#include "mem0mem.h"
#include "dict0types.h"
/** Storage for overflow data in a big record, that is, a clustered
index record which needs external storage of data fields */
typedef struct big_rec_struct big_rec_t;
#ifdef UNIV_DEBUG
/*********************************************************************//**
Gets pointer to the type struct of SQL data field.
@return pointer to the type struct */
UNIV_INLINE
dtype_t*
dfield_get_type(
/*============*/
const dfield_t* field); /*!< in: SQL data field */
/*********************************************************************//**
Gets pointer to the data in a field.
@return pointer to data */
UNIV_INLINE
void*
dfield_get_data(
/*============*/
const dfield_t* field); /*!< in: field */
#else /* UNIV_DEBUG */
# define dfield_get_type(field) (&(field)->type)
# define dfield_get_data(field) ((field)->data)
#endif /* UNIV_DEBUG */
/*********************************************************************//**
Sets the type struct of SQL data field. */
UNIV_INLINE
void
dfield_set_type(
/*============*/
dfield_t* field, /*!< in: SQL data field */
dtype_t* type); /*!< in: pointer to data type struct */
/*********************************************************************//**
Gets length of field data.
@return length of data; UNIV_SQL_NULL if SQL null data */
UNIV_INLINE
ulint
dfield_get_len(
/*===========*/
const dfield_t* field); /*!< in: field */
/*********************************************************************//**
Sets length in a field. */
UNIV_INLINE
void
dfield_set_len(
/*===========*/
dfield_t* field, /*!< in: field */
ulint len); /*!< in: length or UNIV_SQL_NULL */
/*********************************************************************//**
Determines if a field is SQL NULL
@return nonzero if SQL null data */
UNIV_INLINE
ulint
dfield_is_null(
/*===========*/
const dfield_t* field); /*!< in: field */
/*********************************************************************//**
Determines if a field is externally stored
@return nonzero if externally stored */
UNIV_INLINE
ulint
dfield_is_ext(
/*==========*/
const dfield_t* field); /*!< in: field */
/*********************************************************************//**
Sets the "external storage" flag */
UNIV_INLINE
void
dfield_set_ext(
/*===========*/
dfield_t* field); /*!< in/out: field */
/*********************************************************************//**
Sets pointer to the data and length in a field. */
UNIV_INLINE
void
dfield_set_data(
/*============*/
dfield_t* field, /*!< in: field */
const void* data, /*!< in: data */
ulint len); /*!< in: length or UNIV_SQL_NULL */
/*********************************************************************//**
Sets a data field to SQL NULL. */
UNIV_INLINE
void
dfield_set_null(
/*============*/
dfield_t* field); /*!< in/out: field */
/**********************************************************************//**
Writes an SQL null field full of zeros. */
UNIV_INLINE
void
data_write_sql_null(
/*================*/
byte* data, /*!< in: pointer to a buffer of size len */
ulint len); /*!< in: SQL null size in bytes */
/*********************************************************************//**
Copies the data and len fields. */
UNIV_INLINE
void
dfield_copy_data(
/*=============*/
dfield_t* field1, /*!< out: field to copy to */
const dfield_t* field2);/*!< in: field to copy from */
/*********************************************************************//**
Copies a data field to another. */
UNIV_INLINE
void
dfield_copy(
/*========*/
dfield_t* field1, /*!< out: field to copy to */
const dfield_t* field2);/*!< in: field to copy from */
/*********************************************************************//**
Copies the data pointed to by a data field. */
UNIV_INLINE
void
dfield_dup(
/*=======*/
dfield_t* field, /*!< in/out: data field */
mem_heap_t* heap); /*!< in: memory heap where allocated */
/*********************************************************************//**
Tests if data length and content is equal for two dfields.
@return TRUE if equal */
UNIV_INLINE
ibool
dfield_datas_are_binary_equal(
/*==========================*/
const dfield_t* field1, /*!< in: field */
const dfield_t* field2);/*!< in: field */
/*********************************************************************//**
Tests if dfield data length and content is equal to the given.
@return TRUE if equal */
UNIV_INTERN
ibool
dfield_data_is_binary_equal(
/*========================*/
const dfield_t* field, /*!< in: field */
ulint len, /*!< in: data length or UNIV_SQL_NULL */
const byte* data); /*!< in: data */
/*********************************************************************//**
Gets number of fields in a data tuple.
@return number of fields */
UNIV_INLINE
ulint
dtuple_get_n_fields(
/*================*/
const dtuple_t* tuple); /*!< in: tuple */
#ifdef UNIV_DEBUG
/*********************************************************************//**
Gets nth field of a tuple.
@return nth field */
UNIV_INLINE
dfield_t*
dtuple_get_nth_field(
/*=================*/
const dtuple_t* tuple, /*!< in: tuple */
ulint n); /*!< in: index of field */
#else /* UNIV_DEBUG */
# define dtuple_get_nth_field(tuple, n) ((tuple)->fields + (n))
#endif /* UNIV_DEBUG */
/*********************************************************************//**
Gets info bits in a data tuple.
@return info bits */
UNIV_INLINE
ulint
dtuple_get_info_bits(
/*=================*/
const dtuple_t* tuple); /*!< in: tuple */
/*********************************************************************//**
Sets info bits in a data tuple. */
UNIV_INLINE
void
dtuple_set_info_bits(
/*=================*/
dtuple_t* tuple, /*!< in: tuple */
ulint info_bits); /*!< in: info bits */
/*********************************************************************//**
Gets number of fields used in record comparisons.
@return number of fields used in comparisons in rem0cmp.* */
UNIV_INLINE
ulint
dtuple_get_n_fields_cmp(
/*====================*/
const dtuple_t* tuple); /*!< in: tuple */
/*********************************************************************//**
Gets number of fields used in record comparisons. */
UNIV_INLINE
void
dtuple_set_n_fields_cmp(
/*====================*/
dtuple_t* tuple, /*!< in: tuple */
ulint n_fields_cmp); /*!< in: number of fields used in
comparisons in rem0cmp.* */
/**********************************************************//**
Creates a data tuple to a memory heap. The default value for number
of fields used in record comparisons for this tuple is n_fields.
@return own: created tuple */
UNIV_INLINE
dtuple_t*
dtuple_create(
/*==========*/
mem_heap_t* heap, /*!< in: memory heap where the tuple
is created */
ulint n_fields); /*!< in: number of fields */
/**********************************************************//**
Wrap data fields in a tuple. The default value for number
of fields used in record comparisons for this tuple is n_fields.
@return data tuple */
UNIV_INLINE
const dtuple_t*
dtuple_from_fields(
/*===============*/
dtuple_t* tuple, /*!< in: storage for data tuple */
const dfield_t* fields, /*!< in: fields */
ulint n_fields); /*!< in: number of fields */
/*********************************************************************//**
Sets number of fields used in a tuple. Normally this is set in
dtuple_create, but if you want later to set it smaller, you can use this. */
UNIV_INTERN
void
dtuple_set_n_fields(
/*================*/
dtuple_t* tuple, /*!< in: tuple */
ulint n_fields); /*!< in: number of fields */
/*********************************************************************//**
Copies a data tuple to another. This is a shallow copy; if a deep copy
is desired, dfield_dup() will have to be invoked on each field.
@return own: copy of tuple */
UNIV_INLINE
dtuple_t*
dtuple_copy(
/*========*/
const dtuple_t* tuple, /*!< in: tuple to copy from */
mem_heap_t* heap); /*!< in: memory heap
where the tuple is created */
/**********************************************************//**
The following function returns the sum of data lengths of a tuple. The space
occupied by the field structs or the tuple struct is not counted.
@return sum of data lens */
UNIV_INLINE
ulint
dtuple_get_data_size(
/*=================*/
const dtuple_t* tuple, /*!< in: typed data tuple */
ulint comp); /*!< in: nonzero=ROW_FORMAT=COMPACT */
/*********************************************************************//**
Computes the number of externally stored fields in a data tuple.
@return number of fields */
UNIV_INLINE
ulint
dtuple_get_n_ext(
/*=============*/
const dtuple_t* tuple); /*!< in: tuple */
/************************************************************//**
Compare two data tuples, respecting the collation of character fields.
@return 1, 0 , -1 if tuple1 is greater, equal, less, respectively,
than tuple2 */
UNIV_INTERN
int
dtuple_coll_cmp(
/*============*/
const dtuple_t* tuple1, /*!< in: tuple 1 */
const dtuple_t* tuple2);/*!< in: tuple 2 */
/************************************************************//**
Folds a prefix given as the number of fields of a tuple.
@return the folded value */
UNIV_INLINE
ulint
dtuple_fold(
/*========*/
const dtuple_t* tuple, /*!< in: the tuple */
ulint n_fields,/*!< in: number of complete fields to fold */
ulint n_bytes,/*!< in: number of bytes to fold in an
incomplete last field */
dulint tree_id)/*!< in: index tree id */
__attribute__((pure));
/*******************************************************************//**
Sets types of fields binary in a tuple. */
UNIV_INLINE
void
dtuple_set_types_binary(
/*====================*/
dtuple_t* tuple, /*!< in: data tuple */
ulint n); /*!< in: number of fields to set */
/**********************************************************************//**
Checks if a dtuple contains an SQL null value.
@return TRUE if some field is SQL null */
UNIV_INLINE
ibool
dtuple_contains_null(
/*=================*/
const dtuple_t* tuple); /*!< in: dtuple */
/**********************************************************//**
Checks that a data field is typed. Asserts an error if not.
@return TRUE if ok */
UNIV_INTERN
ibool
dfield_check_typed(
/*===============*/
const dfield_t* field); /*!< in: data field */
/**********************************************************//**
Checks that a data tuple is typed. Asserts an error if not.
@return TRUE if ok */
UNIV_INTERN
ibool
dtuple_check_typed(
/*===============*/
const dtuple_t* tuple); /*!< in: tuple */
/**********************************************************//**
Checks that a data tuple is typed.
@return TRUE if ok */
UNIV_INTERN
ibool
dtuple_check_typed_no_assert(
/*=========================*/
const dtuple_t* tuple); /*!< in: tuple */
#ifdef UNIV_DEBUG
/**********************************************************//**
Validates the consistency of a tuple which must be complete, i.e,
all fields must have been set.
@return TRUE if ok */
UNIV_INTERN
ibool
dtuple_validate(
/*============*/
const dtuple_t* tuple); /*!< in: tuple */
#endif /* UNIV_DEBUG */
/*************************************************************//**
Pretty prints a dfield value according to its data type. */
UNIV_INTERN
void
dfield_print(
/*=========*/
const dfield_t* dfield);/*!< in: dfield */
/*************************************************************//**
Pretty prints a dfield value according to its data type. Also the hex string
is printed if a string contains non-printable characters. */
UNIV_INTERN
void
dfield_print_also_hex(
/*==================*/
const dfield_t* dfield); /*!< in: dfield */
/**********************************************************//**
The following function prints the contents of a tuple. */
UNIV_INTERN
void
dtuple_print(
/*=========*/
FILE* f, /*!< in: output stream */
const dtuple_t* tuple); /*!< in: tuple */
/**************************************************************//**
Moves parts of long fields in entry to the big record vector so that
the size of tuple drops below the maximum record size allowed in the
database. Moves data only from those fields which are not necessary
to determine uniquely the insertion place of the tuple in the index.
@return own: created big record vector, NULL if we are not able to
shorten the entry enough, i.e., if there are too many fixed-length or
short fields in entry or the index is clustered */
UNIV_INTERN
big_rec_t*
dtuple_convert_big_rec(
/*===================*/
dict_index_t* index, /*!< in: index */
dtuple_t* entry, /*!< in/out: index entry */
ulint* n_ext); /*!< in/out: number of
externally stored columns */
/**************************************************************//**
Puts back to entry the data stored in vector. Note that to ensure the
fields in entry can accommodate the data, vector must have been created
from entry with dtuple_convert_big_rec. */
UNIV_INTERN
void
dtuple_convert_back_big_rec(
/*========================*/
dict_index_t* index, /*!< in: index */
dtuple_t* entry, /*!< in: entry whose data was put to vector */
big_rec_t* vector);/*!< in, own: big rec vector; it is
freed in this function */
/**************************************************************//**
Frees the memory in a big rec vector. */
UNIV_INLINE
void
dtuple_big_rec_free(
/*================*/
big_rec_t* vector); /*!< in, own: big rec vector; it is
freed in this function */
/*######################################################################*/
/** Structure for an SQL data field */
struct dfield_struct{
void* data; /*!< pointer to data */
unsigned ext:1; /*!< TRUE=externally stored, FALSE=local */
unsigned len:32; /*!< data length; UNIV_SQL_NULL if SQL null */
dtype_t type; /*!< type of data */
};
/** Structure for an SQL data tuple of fields (logical record) */
struct dtuple_struct {
ulint info_bits; /*!< info bits of an index record:
the default is 0; this field is used
if an index record is built from
a data tuple */
ulint n_fields; /*!< number of fields in dtuple */
ulint n_fields_cmp; /*!< number of fields which should
be used in comparison services
of rem0cmp.*; the index search
is performed by comparing only these
fields, others are ignored; the
default value in dtuple creation is
the same value as n_fields */
dfield_t* fields; /*!< fields */
UT_LIST_NODE_T(dtuple_t) tuple_list;
/*!< data tuples can be linked into a
list using this field */
#ifdef UNIV_DEBUG
ulint magic_n; /*!< magic number, used in
debug assertions */
/** Value of dtuple_struct::magic_n */
# define DATA_TUPLE_MAGIC_N 65478679
#endif /* UNIV_DEBUG */
};
/** A slot for a field in a big rec vector */
typedef struct big_rec_field_struct big_rec_field_t;
/** A slot for a field in a big rec vector */
struct big_rec_field_struct {
ulint field_no; /*!< field number in record */
ulint len; /*!< stored data length, in bytes */
const void* data; /*!< stored data */
};
/** Storage format for overflow data in a big record, that is, a
clustered index record which needs external storage of data fields */
struct big_rec_struct {
mem_heap_t* heap; /*!< memory heap from which
allocated */
ulint n_fields; /*!< number of stored fields */
big_rec_field_t*fields; /*!< stored fields */
};
#ifndef UNIV_NONINL
#include "data0data.ic"
#endif
#endif

View file

@ -0,0 +1,612 @@
/*****************************************************************************
Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/********************************************************************//**
@file include/data0data.ic
SQL data field and tuple
Created 5/30/1994 Heikki Tuuri
*************************************************************************/
#include "mem0mem.h"
#include "ut0rnd.h"
#ifdef UNIV_DEBUG
/** Dummy variable to catch access to uninitialized fields. In the
debug version, dtuple_create() will make all fields of dtuple_t point
to data_error. */
extern byte data_error;
/*********************************************************************//**
Gets pointer to the type struct of SQL data field.
@return pointer to the type struct */
UNIV_INLINE
dtype_t*
dfield_get_type(
/*============*/
const dfield_t* field) /*!< in: SQL data field */
{
ut_ad(field);
return((dtype_t*) &(field->type));
}
#endif /* UNIV_DEBUG */
/*********************************************************************//**
Sets the type struct of SQL data field. */
UNIV_INLINE
void
dfield_set_type(
/*============*/
dfield_t* field, /*!< in: SQL data field */
dtype_t* type) /*!< in: pointer to data type struct */
{
ut_ad(field && type);
field->type = *type;
}
#ifdef UNIV_DEBUG
/*********************************************************************//**
Gets pointer to the data in a field.
@return pointer to data */
UNIV_INLINE
void*
dfield_get_data(
/*============*/
const dfield_t* field) /*!< in: field */
{
ut_ad(field);
ut_ad((field->len == UNIV_SQL_NULL)
|| (field->data != &data_error));
return((void*) field->data);
}
#endif /* UNIV_DEBUG */
/*********************************************************************//**
Gets length of field data.
@return length of data; UNIV_SQL_NULL if SQL null data */
UNIV_INLINE
ulint
dfield_get_len(
/*===========*/
const dfield_t* field) /*!< in: field */
{
ut_ad(field);
ut_ad((field->len == UNIV_SQL_NULL)
|| (field->data != &data_error));
return(field->len);
}
/*********************************************************************//**
Sets length in a field. */
UNIV_INLINE
void
dfield_set_len(
/*===========*/
dfield_t* field, /*!< in: field */
ulint len) /*!< in: length or UNIV_SQL_NULL */
{
ut_ad(field);
#ifdef UNIV_VALGRIND_DEBUG
if (len != UNIV_SQL_NULL) UNIV_MEM_ASSERT_RW(field->data, len);
#endif /* UNIV_VALGRIND_DEBUG */
field->ext = 0;
field->len = len;
}
/*********************************************************************//**
Determines if a field is SQL NULL
@return nonzero if SQL null data */
UNIV_INLINE
ulint
dfield_is_null(
/*===========*/
const dfield_t* field) /*!< in: field */
{
ut_ad(field);
return(field->len == UNIV_SQL_NULL);
}
/*********************************************************************//**
Determines if a field is externally stored
@return nonzero if externally stored */
UNIV_INLINE
ulint
dfield_is_ext(
/*==========*/
const dfield_t* field) /*!< in: field */
{
ut_ad(field);
return(UNIV_UNLIKELY(field->ext));
}
/*********************************************************************//**
Sets the "external storage" flag */
UNIV_INLINE
void
dfield_set_ext(
/*===========*/
dfield_t* field) /*!< in/out: field */
{
ut_ad(field);
field->ext = 1;
}
/*********************************************************************//**
Sets pointer to the data and length in a field. */
UNIV_INLINE
void
dfield_set_data(
/*============*/
dfield_t* field, /*!< in: field */
const void* data, /*!< in: data */
ulint len) /*!< in: length or UNIV_SQL_NULL */
{
ut_ad(field);
#ifdef UNIV_VALGRIND_DEBUG
if (len != UNIV_SQL_NULL) UNIV_MEM_ASSERT_RW(data, len);
#endif /* UNIV_VALGRIND_DEBUG */
field->data = (void*) data;
field->ext = 0;
field->len = len;
}
/*********************************************************************//**
Sets a data field to SQL NULL. */
UNIV_INLINE
void
dfield_set_null(
/*============*/
dfield_t* field) /*!< in/out: field */
{
dfield_set_data(field, NULL, UNIV_SQL_NULL);
}
/*********************************************************************//**
Copies the data and len fields. */
UNIV_INLINE
void
dfield_copy_data(
/*=============*/
dfield_t* field1, /*!< out: field to copy to */
const dfield_t* field2) /*!< in: field to copy from */
{
ut_ad(field1 && field2);
field1->data = field2->data;
field1->len = field2->len;
field1->ext = field2->ext;
}
/*********************************************************************//**
Copies a data field to another. */
UNIV_INLINE
void
dfield_copy(
/*========*/
dfield_t* field1, /*!< out: field to copy to */
const dfield_t* field2) /*!< in: field to copy from */
{
*field1 = *field2;
}
/*********************************************************************//**
Copies the data pointed to by a data field. */
UNIV_INLINE
void
dfield_dup(
/*=======*/
dfield_t* field, /*!< in/out: data field */
mem_heap_t* heap) /*!< in: memory heap where allocated */
{
if (!dfield_is_null(field)) {
UNIV_MEM_ASSERT_RW(field->data, field->len);
field->data = mem_heap_dup(heap, field->data, field->len);
}
}
/*********************************************************************//**
Tests if data length and content is equal for two dfields.
@return TRUE if equal */
UNIV_INLINE
ibool
dfield_datas_are_binary_equal(
/*==========================*/
const dfield_t* field1, /*!< in: field */
const dfield_t* field2) /*!< in: field */
{
ulint len;
len = field1->len;
return(len == field2->len
&& (len == UNIV_SQL_NULL
|| !memcmp(field1->data, field2->data, len)));
}
/*********************************************************************//**
Gets info bits in a data tuple.
@return info bits */
UNIV_INLINE
ulint
dtuple_get_info_bits(
/*=================*/
const dtuple_t* tuple) /*!< in: tuple */
{
ut_ad(tuple);
return(tuple->info_bits);
}
/*********************************************************************//**
Sets info bits in a data tuple. */
UNIV_INLINE
void
dtuple_set_info_bits(
/*=================*/
dtuple_t* tuple, /*!< in: tuple */
ulint info_bits) /*!< in: info bits */
{
ut_ad(tuple);
tuple->info_bits = info_bits;
}
/*********************************************************************//**
Gets number of fields used in record comparisons.
@return number of fields used in comparisons in rem0cmp.* */
UNIV_INLINE
ulint
dtuple_get_n_fields_cmp(
/*====================*/
const dtuple_t* tuple) /*!< in: tuple */
{
ut_ad(tuple);
return(tuple->n_fields_cmp);
}
/*********************************************************************//**
Sets number of fields used in record comparisons. */
UNIV_INLINE
void
dtuple_set_n_fields_cmp(
/*====================*/
dtuple_t* tuple, /*!< in: tuple */
ulint n_fields_cmp) /*!< in: number of fields used in
comparisons in rem0cmp.* */
{
ut_ad(tuple);
ut_ad(n_fields_cmp <= tuple->n_fields);
tuple->n_fields_cmp = n_fields_cmp;
}
/*********************************************************************//**
Gets number of fields in a data tuple.
@return number of fields */
UNIV_INLINE
ulint
dtuple_get_n_fields(
/*================*/
const dtuple_t* tuple) /*!< in: tuple */
{
ut_ad(tuple);
return(tuple->n_fields);
}
#ifdef UNIV_DEBUG
/*********************************************************************//**
Gets nth field of a tuple.
@return nth field */
UNIV_INLINE
dfield_t*
dtuple_get_nth_field(
/*=================*/
const dtuple_t* tuple, /*!< in: tuple */
ulint n) /*!< in: index of field */
{
ut_ad(tuple);
ut_ad(n < tuple->n_fields);
return((dfield_t*) tuple->fields + n);
}
#endif /* UNIV_DEBUG */
/**********************************************************//**
Creates a data tuple to a memory heap. The default value for number
of fields used in record comparisons for this tuple is n_fields.
@return own: created tuple */
UNIV_INLINE
dtuple_t*
dtuple_create(
/*==========*/
mem_heap_t* heap, /*!< in: memory heap where the tuple
is created */
ulint n_fields) /*!< in: number of fields */
{
dtuple_t* tuple;
ut_ad(heap);
tuple = (dtuple_t*) mem_heap_alloc(heap, sizeof(dtuple_t)
+ n_fields * sizeof(dfield_t));
tuple->info_bits = 0;
tuple->n_fields = n_fields;
tuple->n_fields_cmp = n_fields;
tuple->fields = (dfield_t*) &tuple[1];
#ifdef UNIV_DEBUG
tuple->magic_n = DATA_TUPLE_MAGIC_N;
{ /* In the debug version, initialize fields to an error value */
ulint i;
for (i = 0; i < n_fields; i++) {
dfield_t* field;
field = dtuple_get_nth_field(tuple, i);
dfield_set_len(field, UNIV_SQL_NULL);
field->data = &data_error;
dfield_get_type(field)->mtype = DATA_ERROR;
}
}
UNIV_MEM_INVALID(tuple->fields, n_fields * sizeof *tuple->fields);
#endif
return(tuple);
}
/**********************************************************//**
Wrap data fields in a tuple. The default value for number
of fields used in record comparisons for this tuple is n_fields.
@return data tuple */
UNIV_INLINE
const dtuple_t*
dtuple_from_fields(
/*===============*/
dtuple_t* tuple, /*!< in: storage for data tuple */
const dfield_t* fields, /*!< in: fields */
ulint n_fields) /*!< in: number of fields */
{
tuple->info_bits = 0;
tuple->n_fields = tuple->n_fields_cmp = n_fields;
tuple->fields = (dfield_t*) fields;
ut_d(tuple->magic_n = DATA_TUPLE_MAGIC_N);
return(tuple);
}
/*********************************************************************//**
Copies a data tuple to another. This is a shallow copy; if a deep copy
is desired, dfield_dup() will have to be invoked on each field.
@return own: copy of tuple */
UNIV_INLINE
dtuple_t*
dtuple_copy(
/*========*/
const dtuple_t* tuple, /*!< in: tuple to copy from */
mem_heap_t* heap) /*!< in: memory heap
where the tuple is created */
{
ulint n_fields = dtuple_get_n_fields(tuple);
dtuple_t* new_tuple = dtuple_create(heap, n_fields);
ulint i;
for (i = 0; i < n_fields; i++) {
dfield_copy(dtuple_get_nth_field(new_tuple, i),
dtuple_get_nth_field(tuple, i));
}
return(new_tuple);
}
/**********************************************************//**
The following function returns the sum of data lengths of a tuple. The space
occupied by the field structs or the tuple struct is not counted. Neither
is possible space in externally stored parts of the field.
@return sum of data lengths */
UNIV_INLINE
ulint
dtuple_get_data_size(
/*=================*/
const dtuple_t* tuple, /*!< in: typed data tuple */
ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
{
const dfield_t* field;
ulint n_fields;
ulint len;
ulint i;
ulint sum = 0;
ut_ad(tuple);
ut_ad(dtuple_check_typed(tuple));
ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
n_fields = tuple->n_fields;
for (i = 0; i < n_fields; i++) {
field = dtuple_get_nth_field(tuple, i);
len = dfield_get_len(field);
if (len == UNIV_SQL_NULL) {
len = dtype_get_sql_null_size(dfield_get_type(field),
comp);
}
sum += len;
}
return(sum);
}
/*********************************************************************//**
Computes the number of externally stored fields in a data tuple.
@return number of externally stored fields */
UNIV_INLINE
ulint
dtuple_get_n_ext(
/*=============*/
const dtuple_t* tuple) /*!< in: tuple */
{
ulint n_ext = 0;
ulint n_fields = tuple->n_fields;
ulint i;
ut_ad(tuple);
ut_ad(dtuple_check_typed(tuple));
ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
for (i = 0; i < n_fields; i++) {
n_ext += dtuple_get_nth_field(tuple, i)->ext;
}
return(n_ext);
}
/*******************************************************************//**
Sets types of fields binary in a tuple. */
UNIV_INLINE
void
dtuple_set_types_binary(
/*====================*/
dtuple_t* tuple, /*!< in: data tuple */
ulint n) /*!< in: number of fields to set */
{
dtype_t* dfield_type;
ulint i;
for (i = 0; i < n; i++) {
dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i));
dtype_set(dfield_type, DATA_BINARY, 0, 0);
}
}
/************************************************************//**
Folds a prefix given as the number of fields of a tuple.
@return the folded value */
UNIV_INLINE
ulint
dtuple_fold(
/*========*/
const dtuple_t* tuple, /*!< in: the tuple */
ulint n_fields,/*!< in: number of complete fields to fold */
ulint n_bytes,/*!< in: number of bytes to fold in an
incomplete last field */
dulint tree_id)/*!< in: index tree id */
{
const dfield_t* field;
ulint i;
const byte* data;
ulint len;
ulint fold;
ut_ad(tuple);
ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
ut_ad(dtuple_check_typed(tuple));
fold = ut_fold_dulint(tree_id);
for (i = 0; i < n_fields; i++) {
field = dtuple_get_nth_field(tuple, i);
data = (const byte*) dfield_get_data(field);
len = dfield_get_len(field);
if (len != UNIV_SQL_NULL) {
fold = ut_fold_ulint_pair(fold,
ut_fold_binary(data, len));
}
}
if (n_bytes > 0) {
field = dtuple_get_nth_field(tuple, i);
data = (const byte*) dfield_get_data(field);
len = dfield_get_len(field);
if (len != UNIV_SQL_NULL) {
if (len > n_bytes) {
len = n_bytes;
}
fold = ut_fold_ulint_pair(fold,
ut_fold_binary(data, len));
}
}
return(fold);
}
/**********************************************************************//**
Writes an SQL null field full of zeros. */
UNIV_INLINE
void
data_write_sql_null(
/*================*/
byte* data, /*!< in: pointer to a buffer of size len */
ulint len) /*!< in: SQL null size in bytes */
{
memset(data, 0, len);
}
/**********************************************************************//**
Checks if a dtuple contains an SQL null value.
@return TRUE if some field is SQL null */
UNIV_INLINE
ibool
dtuple_contains_null(
/*=================*/
const dtuple_t* tuple) /*!< in: dtuple */
{
ulint n;
ulint i;
n = dtuple_get_n_fields(tuple);
for (i = 0; i < n; i++) {
if (dfield_is_null(dtuple_get_nth_field(tuple, i))) {
return(TRUE);
}
}
return(FALSE);
}
/**************************************************************//**
Frees the memory in a big rec vector. */
UNIV_INLINE
void
dtuple_big_rec_free(
/*================*/
big_rec_t* vector) /*!< in, own: big rec vector; it is
freed in this function */
{
mem_heap_free(vector->heap);
}

View file

@ -0,0 +1,486 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/data0type.h
Data types
Created 1/16/1996 Heikki Tuuri
*******************************************************/
#ifndef data0type_h
#define data0type_h
#include "univ.i"
extern ulint data_mysql_default_charset_coll;
#define DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL 8
#define DATA_MYSQL_BINARY_CHARSET_COLL 63
/* SQL data type struct */
typedef struct dtype_struct dtype_t;
/*-------------------------------------------*/
/* The 'MAIN TYPE' of a column */
#define DATA_VARCHAR 1 /* character varying of the
latin1_swedish_ci charset-collation; note
that the MySQL format for this, DATA_BINARY,
DATA_VARMYSQL, is also affected by whether the
'precise type' contains
DATA_MYSQL_TRUE_VARCHAR */
#define DATA_CHAR 2 /* fixed length character of the
latin1_swedish_ci charset-collation */
#define DATA_FIXBINARY 3 /* binary string of fixed length */
#define DATA_BINARY 4 /* binary string */
#define DATA_BLOB 5 /* binary large object, or a TEXT type;
if prtype & DATA_BINARY_TYPE == 0, then this is
actually a TEXT column (or a BLOB created
with < 4.0.14; since column prefix indexes
came only in 4.0.14, the missing flag in BLOBs
created before that does not cause any harm) */
#define DATA_INT 6 /* integer: can be any size 1 - 8 bytes */
#define DATA_SYS_CHILD 7 /* address of the child page in node pointer */
#define DATA_SYS 8 /* system column */
/* Data types >= DATA_FLOAT must be compared using the whole field, not as
binary strings */
#define DATA_FLOAT 9
#define DATA_DOUBLE 10
#define DATA_DECIMAL 11 /* decimal number stored as an ASCII string */
#define DATA_VARMYSQL 12 /* any charset varying length char */
#define DATA_MYSQL 13 /* any charset fixed length char */
/* NOTE that 4.1.1 used DATA_MYSQL and
DATA_VARMYSQL for all character sets, and the
charset-collation for tables created with it
can also be latin1_swedish_ci */
#define DATA_MTYPE_MAX 63 /* dtype_store_for_order_and_null_size()
requires the values are <= 63 */
/*-------------------------------------------*/
/* The 'PRECISE TYPE' of a column */
/*
Tables created by a MySQL user have the following convention:
- In the least significant byte in the precise type we store the MySQL type
code (not applicable for system columns).
- In the second least significant byte we OR flags DATA_NOT_NULL,
DATA_UNSIGNED, DATA_BINARY_TYPE.
- In the third least significant byte of the precise type of string types we
store the MySQL charset-collation code. In DATA_BLOB columns created with
< 4.0.14 we do not actually know if it is a BLOB or a TEXT column. Since there
are no indexes on prefixes of BLOB or TEXT columns in < 4.0.14, this is no
problem, though.
Note that versions < 4.1.2 or < 5.0.1 did not store the charset code to the
precise type, since the charset was always the default charset of the MySQL
installation. If the stored charset code is 0 in the system table SYS_COLUMNS
of InnoDB, that means that the default charset of this MySQL installation
should be used.
When loading a table definition from the system tables to the InnoDB data
dictionary cache in main memory, InnoDB versions >= 4.1.2 and >= 5.0.1 check
if the stored charset-collation is 0, and if that is the case and the type is
a non-binary string, replace that 0 by the default charset-collation code of
this MySQL installation. In short, in old tables, the charset-collation code
in the system tables on disk can be 0, but in in-memory data structures
(dtype_t), the charset-collation code is always != 0 for non-binary string
types.
In new tables, in binary string types, the charset-collation code is the
MySQL code for the 'binary charset', that is, != 0.
For binary string types and for DATA_CHAR, DATA_VARCHAR, and for those
DATA_BLOB which are binary or have the charset-collation latin1_swedish_ci,
InnoDB performs all comparisons internally, without resorting to the MySQL
comparison functions. This is to save CPU time.
InnoDB's own internal system tables have different precise types for their
columns, and for them the precise type is usually not used at all.
*/
#define DATA_ENGLISH 4 /* English language character string: this
is a relic from pre-MySQL time and only used
for InnoDB's own system tables */
#define DATA_ERROR 111 /* another relic from pre-MySQL time */
#define DATA_MYSQL_TYPE_MASK 255 /* AND with this mask to extract the MySQL
type from the precise type */
#define DATA_MYSQL_TRUE_VARCHAR 15 /* MySQL type code for the >= 5.0.3
format true VARCHAR */
/* Precise data types for system columns and the length of those columns;
NOTE: the values must run from 0 up in the order given! All codes must
be less than 256 */
#define DATA_ROW_ID 0 /* row id: a dulint */
#define DATA_ROW_ID_LEN 6 /* stored length for row id */
#define DATA_TRX_ID 1 /* transaction id: 6 bytes */
#define DATA_TRX_ID_LEN 6
#define DATA_ROLL_PTR 2 /* rollback data pointer: 7 bytes */
#define DATA_ROLL_PTR_LEN 7
#define DATA_N_SYS_COLS 3 /* number of system columns defined above */
#define DATA_SYS_PRTYPE_MASK 0xF /* mask to extract the above from prtype */
/* Flags ORed to the precise data type */
#define DATA_NOT_NULL 256 /* this is ORed to the precise type when
the column is declared as NOT NULL */
#define DATA_UNSIGNED 512 /* this id ORed to the precise type when
we have an unsigned integer type */
#define DATA_BINARY_TYPE 1024 /* if the data type is a binary character
string, this is ORed to the precise type:
this only holds for tables created with
>= MySQL-4.0.14 */
/* #define DATA_NONLATIN1 2048 This is a relic from < 4.1.2 and < 5.0.1.
In earlier versions this was set for some
BLOB columns.
*/
#define DATA_LONG_TRUE_VARCHAR 4096 /* this is ORed to the precise data
type when the column is true VARCHAR where
MySQL uses 2 bytes to store the data len;
for shorter VARCHARs MySQL uses only 1 byte */
/*-------------------------------------------*/
/* This many bytes we need to store the type information affecting the
alphabetical order for a single field and decide the storage size of an
SQL null*/
#define DATA_ORDER_NULL_TYPE_BUF_SIZE 4
/* In the >= 4.1.x storage format we add 2 bytes more so that we can also
store the charset-collation number; one byte is left unused, though */
#define DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE 6
#ifndef UNIV_HOTBACKUP
/*********************************************************************//**
Gets the MySQL type code from a dtype.
@return MySQL type code; this is NOT an InnoDB type code! */
UNIV_INLINE
ulint
dtype_get_mysql_type(
/*=================*/
const dtype_t* type); /*!< in: type struct */
/*********************************************************************//**
Determine how many bytes the first n characters of the given string occupy.
If the string is shorter than n characters, returns the number of bytes
the characters in the string occupy.
@return length of the prefix, in bytes */
UNIV_INTERN
ulint
dtype_get_at_most_n_mbchars(
/*========================*/
ulint prtype, /*!< in: precise type */
ulint mbminlen, /*!< in: minimum length of a
multi-byte character */
ulint mbmaxlen, /*!< in: maximum length of a
multi-byte character */
ulint prefix_len, /*!< in: length of the requested
prefix, in characters, multiplied by
dtype_get_mbmaxlen(dtype) */
ulint data_len, /*!< in: length of str (in bytes) */
const char* str); /*!< in: the string whose prefix
length is being determined */
#endif /* !UNIV_HOTBACKUP */
/*********************************************************************//**
Checks if a data main type is a string type. Also a BLOB is considered a
string type.
@return TRUE if string type */
UNIV_INTERN
ibool
dtype_is_string_type(
/*=================*/
ulint mtype); /*!< in: InnoDB main data type code: DATA_CHAR, ... */
/*********************************************************************//**
Checks if a type is a binary string type. Note that for tables created with
< 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For
those DATA_BLOB columns this function currently returns FALSE.
@return TRUE if binary string type */
UNIV_INTERN
ibool
dtype_is_binary_string_type(
/*========================*/
ulint mtype, /*!< in: main data type */
ulint prtype);/*!< in: precise type */
/*********************************************************************//**
Checks if a type is a non-binary string type. That is, dtype_is_string_type is
TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created
with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column.
For those DATA_BLOB columns this function currently returns TRUE.
@return TRUE if non-binary string type */
UNIV_INTERN
ibool
dtype_is_non_binary_string_type(
/*============================*/
ulint mtype, /*!< in: main data type */
ulint prtype);/*!< in: precise type */
/*********************************************************************//**
Sets a data type structure. */
UNIV_INLINE
void
dtype_set(
/*======*/
dtype_t* type, /*!< in: type struct to init */
ulint mtype, /*!< in: main data type */
ulint prtype, /*!< in: precise type */
ulint len); /*!< in: precision of type */
/*********************************************************************//**
Copies a data type structure. */
UNIV_INLINE
void
dtype_copy(
/*=======*/
dtype_t* type1, /*!< in: type struct to copy to */
const dtype_t* type2); /*!< in: type struct to copy from */
/*********************************************************************//**
Gets the SQL main data type.
@return SQL main data type */
UNIV_INLINE
ulint
dtype_get_mtype(
/*============*/
const dtype_t* type); /*!< in: data type */
/*********************************************************************//**
Gets the precise data type.
@return precise data type */
UNIV_INLINE
ulint
dtype_get_prtype(
/*=============*/
const dtype_t* type); /*!< in: data type */
#ifndef UNIV_HOTBACKUP
/*********************************************************************//**
Compute the mbminlen and mbmaxlen members of a data type structure. */
UNIV_INLINE
void
dtype_get_mblen(
/*============*/
ulint mtype, /*!< in: main type */
ulint prtype, /*!< in: precise type (and collation) */
ulint* mbminlen, /*!< out: minimum length of a
multi-byte character */
ulint* mbmaxlen); /*!< out: maximum length of a
multi-byte character */
/*********************************************************************//**
Gets the MySQL charset-collation code for MySQL string types.
@return MySQL charset-collation code */
UNIV_INLINE
ulint
dtype_get_charset_coll(
/*===================*/
ulint prtype);/*!< in: precise data type */
/*********************************************************************//**
Forms a precise type from the < 4.1.2 format precise type plus the
charset-collation code.
@return precise type, including the charset-collation code */
UNIV_INTERN
ulint
dtype_form_prtype(
/*==============*/
ulint old_prtype, /*!< in: the MySQL type code and the flags
DATA_BINARY_TYPE etc. */
ulint charset_coll); /*!< in: MySQL charset-collation code */
/*********************************************************************//**
Determines if a MySQL string type is a subset of UTF-8. This function
may return false negatives, in case further character-set collation
codes are introduced in MySQL later.
@return TRUE if a subset of UTF-8 */
UNIV_INLINE
ibool
dtype_is_utf8(
/*==========*/
ulint prtype);/*!< in: precise data type */
#endif /* !UNIV_HOTBACKUP */
/*********************************************************************//**
Gets the type length.
@return fixed length of the type, in bytes, or 0 if variable-length */
UNIV_INLINE
ulint
dtype_get_len(
/*==========*/
const dtype_t* type); /*!< in: data type */
#ifndef UNIV_HOTBACKUP
/*********************************************************************//**
Gets the minimum length of a character, in bytes.
@return minimum length of a char, in bytes, or 0 if this is not a
character type */
UNIV_INLINE
ulint
dtype_get_mbminlen(
/*===============*/
const dtype_t* type); /*!< in: type */
/*********************************************************************//**
Gets the maximum length of a character, in bytes.
@return maximum length of a char, in bytes, or 0 if this is not a
character type */
UNIV_INLINE
ulint
dtype_get_mbmaxlen(
/*===============*/
const dtype_t* type); /*!< in: type */
/*********************************************************************//**
Gets the padding character code for the type.
@return padding character code, or ULINT_UNDEFINED if no padding specified */
UNIV_INLINE
ulint
dtype_get_pad_char(
/*===============*/
ulint mtype, /*!< in: main type */
ulint prtype); /*!< in: precise type */
#endif /* !UNIV_HOTBACKUP */
/***********************************************************************//**
Returns the size of a fixed size data type, 0 if not a fixed size type.
@return fixed size, or 0 */
UNIV_INLINE
ulint
dtype_get_fixed_size_low(
/*=====================*/
ulint mtype, /*!< in: main type */
ulint prtype, /*!< in: precise type */
ulint len, /*!< in: length */
ulint mbminlen, /*!< in: minimum length of a multibyte char */
ulint mbmaxlen, /*!< in: maximum length of a multibyte char */
ulint comp); /*!< in: nonzero=ROW_FORMAT=COMPACT */
#ifndef UNIV_HOTBACKUP
/***********************************************************************//**
Returns the minimum size of a data type.
@return minimum size */
UNIV_INLINE
ulint
dtype_get_min_size_low(
/*===================*/
ulint mtype, /*!< in: main type */
ulint prtype, /*!< in: precise type */
ulint len, /*!< in: length */
ulint mbminlen, /*!< in: minimum length of a multibyte char */
ulint mbmaxlen); /*!< in: maximum length of a multibyte char */
/***********************************************************************//**
Returns the maximum size of a data type. Note: types in system tables may be
incomplete and return incorrect information.
@return maximum size */
UNIV_INLINE
ulint
dtype_get_max_size_low(
/*===================*/
ulint mtype, /*!< in: main type */
ulint len); /*!< in: length */
#endif /* !UNIV_HOTBACKUP */
/***********************************************************************//**
Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type.
For fixed length types it is the fixed length of the type, otherwise 0.
@return SQL null storage size in ROW_FORMAT=REDUNDANT */
UNIV_INLINE
ulint
dtype_get_sql_null_size(
/*====================*/
const dtype_t* type, /*!< in: type */
ulint comp); /*!< in: nonzero=ROW_FORMAT=COMPACT */
#ifndef UNIV_HOTBACKUP
/**********************************************************************//**
Reads to a type the stored information which determines its alphabetical
ordering and the storage size of an SQL NULL value. */
UNIV_INLINE
void
dtype_read_for_order_and_null_size(
/*===============================*/
dtype_t* type, /*!< in: type struct */
const byte* buf); /*!< in: buffer for the stored order info */
/**********************************************************************//**
Stores for a type the information which determines its alphabetical ordering
and the storage size of an SQL NULL value. This is the >= 4.1.x storage
format. */
UNIV_INLINE
void
dtype_new_store_for_order_and_null_size(
/*====================================*/
byte* buf, /*!< in: buffer for
DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
bytes where we store the info */
const dtype_t* type, /*!< in: type struct */
ulint prefix_len);/*!< in: prefix length to
replace type->len, or 0 */
/**********************************************************************//**
Reads to a type the stored information which determines its alphabetical
ordering and the storage size of an SQL NULL value. This is the 4.1.x storage
format. */
UNIV_INLINE
void
dtype_new_read_for_order_and_null_size(
/*===================================*/
dtype_t* type, /*!< in: type struct */
const byte* buf); /*!< in: buffer for stored type order info */
#endif /* !UNIV_HOTBACKUP */
/*********************************************************************//**
Validates a data type structure.
@return TRUE if ok */
UNIV_INTERN
ibool
dtype_validate(
/*===========*/
const dtype_t* type); /*!< in: type struct to validate */
/*********************************************************************//**
Prints a data type structure. */
UNIV_INTERN
void
dtype_print(
/*========*/
const dtype_t* type); /*!< in: type */
/* Structure for an SQL data type.
If you add fields to this structure, be sure to initialize them everywhere.
This structure is initialized in the following functions:
dtype_set()
dtype_read_for_order_and_null_size()
dtype_new_read_for_order_and_null_size()
sym_tab_add_null_lit() */
struct dtype_struct{
unsigned mtype:8; /*!< main data type */
unsigned prtype:24; /*!< precise type; MySQL data
type, charset code, flags to
indicate nullability,
signedness, whether this is a
binary string, whether this is
a true VARCHAR where MySQL
uses 2 bytes to store the length */
/* the remaining fields do not affect alphabetical ordering: */
unsigned len:16; /*!< length; for MySQL data this
is field->pack_length(),
except that for a >= 5.0.3
type true VARCHAR this is the
maximum byte length of the
string data (in addition to
the string, MySQL uses 1 or 2
bytes to store the string length) */
#ifndef UNIV_HOTBACKUP
unsigned mbminlen:2; /*!< minimum length of a
character, in bytes */
unsigned mbmaxlen:3; /*!< maximum length of a
character, in bytes */
#endif /* !UNIV_HOTBACKUP */
};
#ifndef UNIV_NONINL
#include "data0type.ic"
#endif
#endif

View file

@ -0,0 +1,599 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/data0type.ic
Data types
Created 1/16/1996 Heikki Tuuri
*******************************************************/
#include "mach0data.h"
#ifndef UNIV_HOTBACKUP
# include "ha_prototypes.h"
/*********************************************************************//**
Gets the MySQL charset-collation code for MySQL string types.
@return MySQL charset-collation code */
UNIV_INLINE
ulint
dtype_get_charset_coll(
/*===================*/
ulint prtype) /*!< in: precise data type */
{
return((prtype >> 16) & 0xFFUL);
}
/*********************************************************************//**
Determines if a MySQL string type is a subset of UTF-8. This function
may return false negatives, in case further character-set collation
codes are introduced in MySQL later.
@return TRUE if a subset of UTF-8 */
UNIV_INLINE
ibool
dtype_is_utf8(
/*==========*/
ulint prtype) /*!< in: precise data type */
{
/* These codes have been copied from strings/ctype-extra.c
and strings/ctype-utf8.c. */
switch (dtype_get_charset_coll(prtype)) {
case 11: /* ascii_general_ci */
case 65: /* ascii_bin */
case 33: /* utf8_general_ci */
case 83: /* utf8_bin */
case 254: /* utf8_general_cs */
return(TRUE);
}
return(FALSE);
}
/*********************************************************************//**
Gets the MySQL type code from a dtype.
@return MySQL type code; this is NOT an InnoDB type code! */
UNIV_INLINE
ulint
dtype_get_mysql_type(
/*=================*/
const dtype_t* type) /*!< in: type struct */
{
return(type->prtype & 0xFFUL);
}
/*********************************************************************//**
Compute the mbminlen and mbmaxlen members of a data type structure. */
UNIV_INLINE
void
dtype_get_mblen(
/*============*/
ulint mtype, /*!< in: main type */
ulint prtype, /*!< in: precise type (and collation) */
ulint* mbminlen, /*!< out: minimum length of a
multi-byte character */
ulint* mbmaxlen) /*!< out: maximum length of a
multi-byte character */
{
if (dtype_is_string_type(mtype)) {
innobase_get_cset_width(dtype_get_charset_coll(prtype),
mbminlen, mbmaxlen);
ut_ad(*mbminlen <= *mbmaxlen);
ut_ad(*mbminlen <= 2); /* mbminlen in dtype_t is 0..3 */
ut_ad(*mbmaxlen < 1 << 3); /* mbmaxlen in dtype_t is 0..7 */
} else {
*mbminlen = *mbmaxlen = 0;
}
}
/*********************************************************************//**
Compute the mbminlen and mbmaxlen members of a data type structure. */
UNIV_INLINE
void
dtype_set_mblen(
/*============*/
dtype_t* type) /*!< in/out: type */
{
ulint mbminlen;
ulint mbmaxlen;
dtype_get_mblen(type->mtype, type->prtype, &mbminlen, &mbmaxlen);
type->mbminlen = mbminlen;
type->mbmaxlen = mbmaxlen;
ut_ad(dtype_validate(type));
}
#else /* !UNIV_HOTBACKUP */
# define dtype_set_mblen(type) (void) 0
#endif /* !UNIV_HOTBACKUP */
/*********************************************************************//**
Sets a data type structure. */
UNIV_INLINE
void
dtype_set(
/*======*/
dtype_t* type, /*!< in: type struct to init */
ulint mtype, /*!< in: main data type */
ulint prtype, /*!< in: precise type */
ulint len) /*!< in: precision of type */
{
ut_ad(type);
ut_ad(mtype <= DATA_MTYPE_MAX);
type->mtype = mtype;
type->prtype = prtype;
type->len = len;
dtype_set_mblen(type);
}
/*********************************************************************//**
Copies a data type structure. */
UNIV_INLINE
void
dtype_copy(
/*=======*/
dtype_t* type1, /*!< in: type struct to copy to */
const dtype_t* type2) /*!< in: type struct to copy from */
{
*type1 = *type2;
ut_ad(dtype_validate(type1));
}
/*********************************************************************//**
Gets the SQL main data type.
@return SQL main data type */
UNIV_INLINE
ulint
dtype_get_mtype(
/*============*/
const dtype_t* type) /*!< in: data type */
{
ut_ad(type);
return(type->mtype);
}
/*********************************************************************//**
Gets the precise data type.
@return precise data type */
UNIV_INLINE
ulint
dtype_get_prtype(
/*=============*/
const dtype_t* type) /*!< in: data type */
{
ut_ad(type);
return(type->prtype);
}
/*********************************************************************//**
Gets the type length.
@return fixed length of the type, in bytes, or 0 if variable-length */
UNIV_INLINE
ulint
dtype_get_len(
/*==========*/
const dtype_t* type) /*!< in: data type */
{
ut_ad(type);
return(type->len);
}
#ifndef UNIV_HOTBACKUP
/*********************************************************************//**
Gets the minimum length of a character, in bytes.
@return minimum length of a char, in bytes, or 0 if this is not a
character type */
UNIV_INLINE
ulint
dtype_get_mbminlen(
/*===============*/
const dtype_t* type) /*!< in: type */
{
ut_ad(type);
return(type->mbminlen);
}
/*********************************************************************//**
Gets the maximum length of a character, in bytes.
@return maximum length of a char, in bytes, or 0 if this is not a
character type */
UNIV_INLINE
ulint
dtype_get_mbmaxlen(
/*===============*/
const dtype_t* type) /*!< in: type */
{
ut_ad(type);
return(type->mbmaxlen);
}
/*********************************************************************//**
Gets the padding character code for a type.
@return padding character code, or ULINT_UNDEFINED if no padding specified */
UNIV_INLINE
ulint
dtype_get_pad_char(
/*===============*/
ulint mtype, /*!< in: main type */
ulint prtype) /*!< in: precise type */
{
switch (mtype) {
case DATA_FIXBINARY:
case DATA_BINARY:
if (UNIV_UNLIKELY(dtype_get_charset_coll(prtype)
== DATA_MYSQL_BINARY_CHARSET_COLL)) {
/* Starting from 5.0.18, do not pad
VARBINARY or BINARY columns. */
return(ULINT_UNDEFINED);
}
/* Fall through */
case DATA_CHAR:
case DATA_VARCHAR:
case DATA_MYSQL:
case DATA_VARMYSQL:
/* Space is the padding character for all char and binary
strings, and starting from 5.0.3, also for TEXT strings. */
return(0x20);
case DATA_BLOB:
if (!(prtype & DATA_BINARY_TYPE)) {
return(0x20);
}
/* Fall through */
default:
/* No padding specified */
return(ULINT_UNDEFINED);
}
}
/**********************************************************************//**
Stores for a type the information which determines its alphabetical ordering
and the storage size of an SQL NULL value. This is the >= 4.1.x storage
format. */
UNIV_INLINE
void
dtype_new_store_for_order_and_null_size(
/*====================================*/
byte* buf, /*!< in: buffer for
DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
bytes where we store the info */
const dtype_t* type, /*!< in: type struct */
ulint prefix_len)/*!< in: prefix length to
replace type->len, or 0 */
{
#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
#endif
ulint len;
buf[0] = (byte)(type->mtype & 0xFFUL);
if (type->prtype & DATA_BINARY_TYPE) {
buf[0] = buf[0] | 128;
}
/* In versions < 4.1.2 we had: if (type->prtype & DATA_NONLATIN1) {
buf[0] = buf[0] | 64;
}
*/
buf[1] = (byte)(type->prtype & 0xFFUL);
len = prefix_len ? prefix_len : type->len;
mach_write_to_2(buf + 2, len & 0xFFFFUL);
ut_ad(dtype_get_charset_coll(type->prtype) < 256);
mach_write_to_2(buf + 4, dtype_get_charset_coll(type->prtype));
if (type->prtype & DATA_NOT_NULL) {
buf[4] |= 128;
}
}
/**********************************************************************//**
Reads to a type the stored information which determines its alphabetical
ordering and the storage size of an SQL NULL value. This is the < 4.1.x
storage format. */
UNIV_INLINE
void
dtype_read_for_order_and_null_size(
/*===============================*/
dtype_t* type, /*!< in: type struct */
const byte* buf) /*!< in: buffer for stored type order info */
{
#if 4 != DATA_ORDER_NULL_TYPE_BUF_SIZE
# error "4 != DATA_ORDER_NULL_TYPE_BUF_SIZE"
#endif
type->mtype = buf[0] & 63;
type->prtype = buf[1];
if (buf[0] & 128) {
type->prtype = type->prtype | DATA_BINARY_TYPE;
}
type->len = mach_read_from_2(buf + 2);
type->prtype = dtype_form_prtype(type->prtype,
data_mysql_default_charset_coll);
dtype_set_mblen(type);
}
/**********************************************************************//**
Reads to a type the stored information which determines its alphabetical
ordering and the storage size of an SQL NULL value. This is the >= 4.1.x
storage format. */
UNIV_INLINE
void
dtype_new_read_for_order_and_null_size(
/*===================================*/
dtype_t* type, /*!< in: type struct */
const byte* buf) /*!< in: buffer for stored type order info */
{
ulint charset_coll;
#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
#endif
type->mtype = buf[0] & 63;
type->prtype = buf[1];
if (buf[0] & 128) {
type->prtype |= DATA_BINARY_TYPE;
}
if (buf[4] & 128) {
type->prtype |= DATA_NOT_NULL;
}
type->len = mach_read_from_2(buf + 2);
charset_coll = mach_read_from_2(buf + 4) & 0x7fff;
if (dtype_is_string_type(type->mtype)) {
ut_a(charset_coll < 256);
if (charset_coll == 0) {
/* This insert buffer record was inserted with MySQL
version < 4.1.2, and the charset-collation code was not
explicitly stored to dtype->prtype at that time. It
must be the default charset-collation of this MySQL
installation. */
charset_coll = data_mysql_default_charset_coll;
}
type->prtype = dtype_form_prtype(type->prtype, charset_coll);
}
dtype_set_mblen(type);
}
#endif /* !UNIV_HOTBACKUP */
/***********************************************************************//**
Returns the size of a fixed size data type, 0 if not a fixed size type.
@return fixed size, or 0 */
UNIV_INLINE
ulint
dtype_get_fixed_size_low(
/*=====================*/
ulint mtype, /*!< in: main type */
ulint prtype, /*!< in: precise type */
ulint len, /*!< in: length */
ulint mbminlen, /*!< in: minimum length of a multibyte char */
ulint mbmaxlen, /*!< in: maximum length of a multibyte char */
ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
{
switch (mtype) {
case DATA_SYS:
#ifdef UNIV_DEBUG
switch (prtype & DATA_MYSQL_TYPE_MASK) {
case DATA_ROW_ID:
ut_ad(len == DATA_ROW_ID_LEN);
break;
case DATA_TRX_ID:
ut_ad(len == DATA_TRX_ID_LEN);
break;
case DATA_ROLL_PTR:
ut_ad(len == DATA_ROLL_PTR_LEN);
break;
default:
ut_ad(0);
return(0);
}
#endif /* UNIV_DEBUG */
case DATA_CHAR:
case DATA_FIXBINARY:
case DATA_INT:
case DATA_FLOAT:
case DATA_DOUBLE:
return(len);
case DATA_MYSQL:
#ifndef UNIV_HOTBACKUP
if (prtype & DATA_BINARY_TYPE) {
return(len);
} else if (!comp) {
return(len);
} else {
/* We play it safe here and ask MySQL for
mbminlen and mbmaxlen. Although
mbminlen and mbmaxlen are
initialized if and only if prtype
is (in one of the 3 functions in this file),
it could be that none of these functions
has been called. */
ulint i_mbminlen, i_mbmaxlen;
innobase_get_cset_width(
dtype_get_charset_coll(prtype),
&i_mbminlen, &i_mbmaxlen);
if (UNIV_UNLIKELY(mbminlen != i_mbminlen)
|| UNIV_UNLIKELY(mbmaxlen != i_mbmaxlen)) {
ut_print_timestamp(stderr);
fprintf(stderr, " InnoDB: "
"mbminlen=%lu, "
"mbmaxlen=%lu, "
"type->mbminlen=%lu, "
"type->mbmaxlen=%lu\n",
(ulong) i_mbminlen,
(ulong) i_mbmaxlen,
(ulong) mbminlen,
(ulong) mbmaxlen);
}
if (mbminlen == mbmaxlen) {
return(len);
}
}
#else /* !UNIV_HOTBACKUP */
return(len);
#endif /* !UNIV_HOTBACKUP */
/* fall through for variable-length charsets */
case DATA_VARCHAR:
case DATA_BINARY:
case DATA_DECIMAL:
case DATA_VARMYSQL:
case DATA_BLOB:
return(0);
default:
ut_error;
}
return(0);
}
#ifndef UNIV_HOTBACKUP
/***********************************************************************//**
Returns the minimum size of a data type.
@return minimum size */
UNIV_INLINE
ulint
dtype_get_min_size_low(
/*===================*/
ulint mtype, /*!< in: main type */
ulint prtype, /*!< in: precise type */
ulint len, /*!< in: length */
ulint mbminlen, /*!< in: minimum length of a multibyte char */
ulint mbmaxlen) /*!< in: maximum length of a multibyte char */
{
switch (mtype) {
case DATA_SYS:
#ifdef UNIV_DEBUG
switch (prtype & DATA_MYSQL_TYPE_MASK) {
case DATA_ROW_ID:
ut_ad(len == DATA_ROW_ID_LEN);
break;
case DATA_TRX_ID:
ut_ad(len == DATA_TRX_ID_LEN);
break;
case DATA_ROLL_PTR:
ut_ad(len == DATA_ROLL_PTR_LEN);
break;
default:
ut_ad(0);
return(0);
}
#endif /* UNIV_DEBUG */
case DATA_CHAR:
case DATA_FIXBINARY:
case DATA_INT:
case DATA_FLOAT:
case DATA_DOUBLE:
return(len);
case DATA_MYSQL:
if ((prtype & DATA_BINARY_TYPE) || mbminlen == mbmaxlen) {
return(len);
}
/* this is a variable-length character set */
ut_a(mbminlen > 0);
ut_a(mbmaxlen > mbminlen);
ut_a(len % mbmaxlen == 0);
return(len * mbminlen / mbmaxlen);
case DATA_VARCHAR:
case DATA_BINARY:
case DATA_DECIMAL:
case DATA_VARMYSQL:
case DATA_BLOB:
return(0);
default:
ut_error;
}
return(0);
}
/***********************************************************************//**
Returns the maximum size of a data type. Note: types in system tables may be
incomplete and return incorrect information.
@return maximum size */
UNIV_INLINE
ulint
dtype_get_max_size_low(
/*===================*/
ulint mtype, /*!< in: main type */
ulint len) /*!< in: length */
{
switch (mtype) {
case DATA_SYS:
case DATA_CHAR:
case DATA_FIXBINARY:
case DATA_INT:
case DATA_FLOAT:
case DATA_DOUBLE:
case DATA_MYSQL:
case DATA_VARCHAR:
case DATA_BINARY:
case DATA_DECIMAL:
case DATA_VARMYSQL:
return(len);
case DATA_BLOB:
break;
default:
ut_error;
}
return(ULINT_MAX);
}
#endif /* !UNIV_HOTBACKUP */
/***********************************************************************//**
Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type.
For fixed length types it is the fixed length of the type, otherwise 0.
@return SQL null storage size in ROW_FORMAT=REDUNDANT */
UNIV_INLINE
ulint
dtype_get_sql_null_size(
/*====================*/
const dtype_t* type, /*!< in: type */
ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
{
#ifndef UNIV_HOTBACKUP
return(dtype_get_fixed_size_low(type->mtype, type->prtype, type->len,
type->mbminlen, type->mbmaxlen, comp));
#else /* !UNIV_HOTBACKUP */
return(dtype_get_fixed_size_low(type->mtype, type->prtype, type->len,
0, 0, 0));
#endif /* !UNIV_HOTBACKUP */
}

View file

@ -0,0 +1,36 @@
/*****************************************************************************
Copyright (c) 2000, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/********************************************************************//**
@file include/data0types.h
Some type definitions
Created 9/21/2000 Heikki Tuuri
*************************************************************************/
#ifndef data0types_h
#define data0types_h
/* SQL data field struct */
typedef struct dfield_struct dfield_t;
/* SQL data tuple struct */
typedef struct dtuple_struct dtuple_t;
#endif

106
perfschema/include/db0err.h Normal file
View file

@ -0,0 +1,106 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/db0err.h
Global error codes for the database
Created 5/24/1996 Heikki Tuuri
*******************************************************/
#ifndef db0err_h
#define db0err_h
enum db_err {
DB_SUCCESS = 10,
/* The following are error codes */
DB_ERROR,
DB_INTERRUPTED,
DB_OUT_OF_MEMORY,
DB_OUT_OF_FILE_SPACE,
DB_LOCK_WAIT,
DB_DEADLOCK,
DB_ROLLBACK,
DB_DUPLICATE_KEY,
DB_QUE_THR_SUSPENDED,
DB_MISSING_HISTORY, /* required history data has been
deleted due to lack of space in
rollback segment */
DB_CLUSTER_NOT_FOUND = 30,
DB_TABLE_NOT_FOUND,
DB_MUST_GET_MORE_FILE_SPACE, /* the database has to be stopped
and restarted with more file space */
DB_TABLE_IS_BEING_USED,
DB_TOO_BIG_RECORD, /* a record in an index would not fit
on a compressed page, or it would
become bigger than 1/2 free space in
an uncompressed page frame */
DB_LOCK_WAIT_TIMEOUT, /* lock wait lasted too long */
DB_NO_REFERENCED_ROW, /* referenced key value not found
for a foreign key in an insert or
update of a row */
DB_ROW_IS_REFERENCED, /* cannot delete or update a row
because it contains a key value
which is referenced */
DB_CANNOT_ADD_CONSTRAINT, /* adding a foreign key constraint
to a table failed */
DB_CORRUPTION, /* data structure corruption noticed */
DB_COL_APPEARS_TWICE_IN_INDEX, /* InnoDB cannot handle an index
where same column appears twice */
DB_CANNOT_DROP_CONSTRAINT, /* dropping a foreign key constraint
from a table failed */
DB_NO_SAVEPOINT, /* no savepoint exists with the given
name */
DB_TABLESPACE_ALREADY_EXISTS, /* we cannot create a new single-table
tablespace because a file of the same
name already exists */
DB_TABLESPACE_DELETED, /* tablespace does not exist or is
being dropped right now */
DB_LOCK_TABLE_FULL, /* lock structs have exhausted the
buffer pool (for big transactions,
InnoDB stores the lock structs in the
buffer pool) */
DB_FOREIGN_DUPLICATE_KEY, /* foreign key constraints
activated by the operation would
lead to a duplicate key in some
table */
DB_TOO_MANY_CONCURRENT_TRXS, /* when InnoDB runs out of the
preconfigured undo slots, this can
only happen when there are too many
concurrent transactions */
DB_UNSUPPORTED, /* when InnoDB sees any artefact or
a feature that it can't recoginize or
work with e.g., FT indexes created by
a later version of the engine. */
DB_PRIMARY_KEY_IS_NULL, /* a column in the PRIMARY KEY
was found to be NULL */
/* The following are partial failure codes */
DB_FAIL = 1000,
DB_OVERFLOW,
DB_UNDERFLOW,
DB_STRONG_FAIL,
DB_ZIP_OVERFLOW,
DB_RECORD_NOT_FOUND = 1500,
DB_END_OF_INDEX
};
#endif

View file

@ -0,0 +1,151 @@
/*****************************************************************************
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/dict0boot.h
Data dictionary creation and booting
Created 4/18/1996 Heikki Tuuri
*******************************************************/
#ifndef dict0boot_h
#define dict0boot_h
#include "univ.i"
#include "mtr0mtr.h"
#include "mtr0log.h"
#include "ut0byte.h"
#include "buf0buf.h"
#include "fsp0fsp.h"
#include "dict0dict.h"
typedef byte dict_hdr_t;
/**********************************************************************//**
Gets a pointer to the dictionary header and x-latches its page.
@return pointer to the dictionary header, page x-latched */
UNIV_INTERN
dict_hdr_t*
dict_hdr_get(
/*=========*/
mtr_t* mtr); /*!< in: mtr */
/**********************************************************************//**
Returns a new row, table, index, or tree id.
@return the new id */
UNIV_INTERN
dulint
dict_hdr_get_new_id(
/*================*/
ulint type); /*!< in: DICT_HDR_ROW_ID, ... */
/**********************************************************************//**
Returns a new row id.
@return the new id */
UNIV_INLINE
dulint
dict_sys_get_new_row_id(void);
/*=========================*/
/**********************************************************************//**
Reads a row id from a record or other 6-byte stored form.
@return row id */
UNIV_INLINE
dulint
dict_sys_read_row_id(
/*=================*/
byte* field); /*!< in: record field */
/**********************************************************************//**
Writes a row id to a record or other 6-byte stored form. */
UNIV_INLINE
void
dict_sys_write_row_id(
/*==================*/
byte* field, /*!< in: record field */
dulint row_id);/*!< in: row id */
/*****************************************************************//**
Initializes the data dictionary memory structures when the database is
started. This function is also called when the data dictionary is created. */
UNIV_INTERN
void
dict_boot(void);
/*===========*/
/*****************************************************************//**
Creates and initializes the data dictionary at the database creation. */
UNIV_INTERN
void
dict_create(void);
/*=============*/
/* Space id and page no where the dictionary header resides */
#define DICT_HDR_SPACE 0 /* the SYSTEM tablespace */
#define DICT_HDR_PAGE_NO FSP_DICT_HDR_PAGE_NO
/* The ids for the basic system tables and their indexes */
#define DICT_TABLES_ID ut_dulint_create(0, 1)
#define DICT_COLUMNS_ID ut_dulint_create(0, 2)
#define DICT_INDEXES_ID ut_dulint_create(0, 3)
#define DICT_FIELDS_ID ut_dulint_create(0, 4)
/* The following is a secondary index on SYS_TABLES */
#define DICT_TABLE_IDS_ID ut_dulint_create(0, 5)
#define DICT_HDR_FIRST_ID 10 /* the ids for tables etc. start
from this number, except for basic
system tables and their above defined
indexes; ibuf tables and indexes are
assigned as the id the number
DICT_IBUF_ID_MIN plus the space id */
#define DICT_IBUF_ID_MIN ut_dulint_create(0xFFFFFFFFUL, 0)
/* The offset of the dictionary header on the page */
#define DICT_HDR FSEG_PAGE_DATA
/*-------------------------------------------------------------*/
/* Dictionary header offsets */
#define DICT_HDR_ROW_ID 0 /* The latest assigned row id */
#define DICT_HDR_TABLE_ID 8 /* The latest assigned table id */
#define DICT_HDR_INDEX_ID 16 /* The latest assigned index id */
#define DICT_HDR_MIX_ID 24 /* Obsolete, always 0. */
#define DICT_HDR_TABLES 32 /* Root of the table index tree */
#define DICT_HDR_TABLE_IDS 36 /* Root of the table index tree */
#define DICT_HDR_COLUMNS 40 /* Root of the column index tree */
#define DICT_HDR_INDEXES 44 /* Root of the index index tree */
#define DICT_HDR_FIELDS 48 /* Root of the index field
index tree */
#define DICT_HDR_FSEG_HEADER 56 /* Segment header for the tablespace
segment into which the dictionary
header is created */
/*-------------------------------------------------------------*/
/* The field number of the page number field in the sys_indexes table
clustered index */
#define DICT_SYS_INDEXES_PAGE_NO_FIELD 8
#define DICT_SYS_INDEXES_SPACE_NO_FIELD 7
#define DICT_SYS_INDEXES_TYPE_FIELD 6
#define DICT_SYS_INDEXES_NAME_FIELD 3
/* When a row id which is zero modulo this number (which must be a power of
two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is
updated */
#define DICT_HDR_ROW_ID_WRITE_MARGIN 256
#ifndef UNIV_NONINL
#include "dict0boot.ic"
#endif
#endif

View file

@ -0,0 +1,93 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/dict0boot.ic
Data dictionary creation and booting
Created 4/18/1996 Heikki Tuuri
*******************************************************/
/**********************************************************************//**
Writes the current value of the row id counter to the dictionary header file
page. */
UNIV_INTERN
void
dict_hdr_flush_row_id(void);
/*=======================*/
/**********************************************************************//**
Returns a new row id.
@return the new id */
UNIV_INLINE
dulint
dict_sys_get_new_row_id(void)
/*=========================*/
{
dulint id;
mutex_enter(&(dict_sys->mutex));
id = dict_sys->row_id;
if (0 == (ut_dulint_get_low(id) % DICT_HDR_ROW_ID_WRITE_MARGIN)) {
dict_hdr_flush_row_id();
}
UT_DULINT_INC(dict_sys->row_id);
mutex_exit(&(dict_sys->mutex));
return(id);
}
/**********************************************************************//**
Reads a row id from a record or other 6-byte stored form.
@return row id */
UNIV_INLINE
dulint
dict_sys_read_row_id(
/*=================*/
byte* field) /*!< in: record field */
{
#if DATA_ROW_ID_LEN != 6
# error "DATA_ROW_ID_LEN != 6"
#endif
return(mach_read_from_6(field));
}
/**********************************************************************//**
Writes a row id to a record or other 6-byte stored form. */
UNIV_INLINE
void
dict_sys_write_row_id(
/*==================*/
byte* field, /*!< in: record field */
dulint row_id) /*!< in: row id */
{
#if DATA_ROW_ID_LEN != 6
# error "DATA_ROW_ID_LEN != 6"
#endif
mach_write_to_6(field, row_id);
}

View file

@ -0,0 +1,197 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/dict0crea.h
Database object creation
Created 1/8/1996 Heikki Tuuri
*******************************************************/
#ifndef dict0crea_h
#define dict0crea_h
#include "univ.i"
#include "dict0types.h"
#include "dict0dict.h"
#include "que0types.h"
#include "row0types.h"
#include "mtr0mtr.h"
/*********************************************************************//**
Creates a table create graph.
@return own: table create node */
UNIV_INTERN
tab_node_t*
tab_create_graph_create(
/*====================*/
dict_table_t* table, /*!< in: table to create, built as a memory data
structure */
mem_heap_t* heap); /*!< in: heap where created */
/*********************************************************************//**
Creates an index create graph.
@return own: index create node */
UNIV_INTERN
ind_node_t*
ind_create_graph_create(
/*====================*/
dict_index_t* index, /*!< in: index to create, built as a memory data
structure */
mem_heap_t* heap); /*!< in: heap where created */
/***********************************************************//**
Creates a table. This is a high-level function used in SQL execution graphs.
@return query thread to run next or NULL */
UNIV_INTERN
que_thr_t*
dict_create_table_step(
/*===================*/
que_thr_t* thr); /*!< in: query thread */
/***********************************************************//**
Creates an index. This is a high-level function used in SQL execution
graphs.
@return query thread to run next or NULL */
UNIV_INTERN
que_thr_t*
dict_create_index_step(
/*===================*/
que_thr_t* thr); /*!< in: query thread */
/*******************************************************************//**
Truncates the index tree associated with a row in SYS_INDEXES table.
@return new root page number, or FIL_NULL on failure */
UNIV_INTERN
ulint
dict_truncate_index_tree(
/*=====================*/
dict_table_t* table, /*!< in: the table the index belongs to */
ulint space, /*!< in: 0=truncate,
nonzero=create the index tree in the
given tablespace */
btr_pcur_t* pcur, /*!< in/out: persistent cursor pointing to
record in the clustered index of
SYS_INDEXES table. The cursor may be
repositioned in this call. */
mtr_t* mtr); /*!< in: mtr having the latch
on the record page. The mtr may be
committed and restarted in this call. */
/*******************************************************************//**
Drops the index tree associated with a row in SYS_INDEXES table. */
UNIV_INTERN
void
dict_drop_index_tree(
/*=================*/
rec_t* rec, /*!< in/out: record in the clustered index
of SYS_INDEXES table */
mtr_t* mtr); /*!< in: mtr having the latch on the record page */
/****************************************************************//**
Creates the foreign key constraints system tables inside InnoDB
at database creation or database start if they are not found or are
not of the right form.
@return DB_SUCCESS or error code */
UNIV_INTERN
ulint
dict_create_or_check_foreign_constraint_tables(void);
/*================================================*/
/********************************************************************//**
Adds foreign key definitions to data dictionary tables in the database. We
look at table->foreign_list, and also generate names to constraints that were
not named by the user. A generated constraint has a name of the format
databasename/tablename_ibfk_NUMBER, where the numbers start from 1, and are
given locally for this table, that is, the number is not global, as in the
old format constraints < 4.0.18 it used to be.
@return error code or DB_SUCCESS */
UNIV_INTERN
ulint
dict_create_add_foreigns_to_dictionary(
/*===================================*/
ulint start_id,/*!< in: if we are actually doing ALTER TABLE
ADD CONSTRAINT, we want to generate constraint
numbers which are bigger than in the table so
far; we number the constraints from
start_id + 1 up; start_id should be set to 0 if
we are creating a new table, or if the table
so far has no constraints for which the name
was generated here */
dict_table_t* table, /*!< in: table */
trx_t* trx); /*!< in: transaction */
/* Table create node structure */
struct tab_node_struct{
que_common_t common; /*!< node type: QUE_NODE_TABLE_CREATE */
dict_table_t* table; /*!< table to create, built as a memory data
structure with dict_mem_... functions */
ins_node_t* tab_def; /* child node which does the insert of
the table definition; the row to be inserted
is built by the parent node */
ins_node_t* col_def; /* child node which does the inserts of
the column definitions; the row to be inserted
is built by the parent node */
commit_node_t* commit_node;
/* child node which performs a commit after
a successful table creation */
/*----------------------*/
/* Local storage for this graph node */
ulint state; /*!< node execution state */
ulint col_no; /*!< next column definition to insert */
mem_heap_t* heap; /*!< memory heap used as auxiliary storage */
};
/* Table create node states */
#define TABLE_BUILD_TABLE_DEF 1
#define TABLE_BUILD_COL_DEF 2
#define TABLE_COMMIT_WORK 3
#define TABLE_ADD_TO_CACHE 4
#define TABLE_COMPLETED 5
/* Index create node struct */
struct ind_node_struct{
que_common_t common; /*!< node type: QUE_NODE_INDEX_CREATE */
dict_index_t* index; /*!< index to create, built as a memory data
structure with dict_mem_... functions */
ins_node_t* ind_def; /* child node which does the insert of
the index definition; the row to be inserted
is built by the parent node */
ins_node_t* field_def; /* child node which does the inserts of
the field definitions; the row to be inserted
is built by the parent node */
commit_node_t* commit_node;
/* child node which performs a commit after
a successful index creation */
/*----------------------*/
/* Local storage for this graph node */
ulint state; /*!< node execution state */
ulint page_no;/* root page number of the index */
dict_table_t* table; /*!< table which owns the index */
dtuple_t* ind_row;/* index definition row built */
ulint field_no;/* next field definition to insert */
mem_heap_t* heap; /*!< memory heap used as auxiliary storage */
};
/* Index create node states */
#define INDEX_BUILD_INDEX_DEF 1
#define INDEX_BUILD_FIELD_DEF 2
#define INDEX_CREATE_INDEX_TREE 3
#define INDEX_COMMIT_WORK 4
#define INDEX_ADD_TO_CACHE 5
#ifndef UNIV_NONINL
#include "dict0crea.ic"
#endif
#endif

View file

@ -0,0 +1,25 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/dict0crea.ic
Database object creation
Created 1/8/1996 Heikki Tuuri
*******************************************************/

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,806 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/******************************************************************//**
@file include/dict0dict.ic
Data dictionary system
Created 1/8/1996 Heikki Tuuri
***********************************************************************/
#include "data0type.h"
#ifndef UNIV_HOTBACKUP
#include "dict0load.h"
#include "rem0types.h"
/*********************************************************************//**
Gets the column data type. */
UNIV_INLINE
void
dict_col_copy_type(
/*===============*/
const dict_col_t* col, /*!< in: column */
dtype_t* type) /*!< out: data type */
{
ut_ad(col && type);
type->mtype = col->mtype;
type->prtype = col->prtype;
type->len = col->len;
type->mbminlen = col->mbminlen;
type->mbmaxlen = col->mbmaxlen;
}
#endif /* !UNIV_HOTBACKUP */
#ifdef UNIV_DEBUG
/*********************************************************************//**
Assert that a column and a data type match.
@return TRUE */
UNIV_INLINE
ibool
dict_col_type_assert_equal(
/*=======================*/
const dict_col_t* col, /*!< in: column */
const dtype_t* type) /*!< in: data type */
{
ut_ad(col);
ut_ad(type);
ut_ad(col->mtype == type->mtype);
ut_ad(col->prtype == type->prtype);
ut_ad(col->len == type->len);
# ifndef UNIV_HOTBACKUP
ut_ad(col->mbminlen == type->mbminlen);
ut_ad(col->mbmaxlen == type->mbmaxlen);
# endif /* !UNIV_HOTBACKUP */
return(TRUE);
}
#endif /* UNIV_DEBUG */
#ifndef UNIV_HOTBACKUP
/***********************************************************************//**
Returns the minimum size of the column.
@return minimum size */
UNIV_INLINE
ulint
dict_col_get_min_size(
/*==================*/
const dict_col_t* col) /*!< in: column */
{
return(dtype_get_min_size_low(col->mtype, col->prtype, col->len,
col->mbminlen, col->mbmaxlen));
}
/***********************************************************************//**
Returns the maximum size of the column.
@return maximum size */
UNIV_INLINE
ulint
dict_col_get_max_size(
/*==================*/
const dict_col_t* col) /*!< in: column */
{
return(dtype_get_max_size_low(col->mtype, col->len));
}
#endif /* !UNIV_HOTBACKUP */
/***********************************************************************//**
Returns the size of a fixed size column, 0 if not a fixed size column.
@return fixed size, or 0 */
UNIV_INLINE
ulint
dict_col_get_fixed_size(
/*====================*/
const dict_col_t* col, /*!< in: column */
ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
{
return(dtype_get_fixed_size_low(col->mtype, col->prtype, col->len,
col->mbminlen, col->mbmaxlen, comp));
}
/***********************************************************************//**
Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column.
For fixed length types it is the fixed length of the type, otherwise 0.
@return SQL null storage size in ROW_FORMAT=REDUNDANT */
UNIV_INLINE
ulint
dict_col_get_sql_null_size(
/*=======================*/
const dict_col_t* col, /*!< in: column */
ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
{
return(dict_col_get_fixed_size(col, comp));
}
/*********************************************************************//**
Gets the column number.
@return col->ind, table column position (starting from 0) */
UNIV_INLINE
ulint
dict_col_get_no(
/*============*/
const dict_col_t* col) /*!< in: column */
{
ut_ad(col);
return(col->ind);
}
/*********************************************************************//**
Gets the column position in the clustered index. */
UNIV_INLINE
ulint
dict_col_get_clust_pos(
/*===================*/
const dict_col_t* col, /*!< in: table column */
const dict_index_t* clust_index) /*!< in: clustered index */
{
ulint i;
ut_ad(col);
ut_ad(clust_index);
ut_ad(dict_index_is_clust(clust_index));
for (i = 0; i < clust_index->n_def; i++) {
const dict_field_t* field = &clust_index->fields[i];
if (!field->prefix_len && field->col == col) {
return(i);
}
}
return(ULINT_UNDEFINED);
}
#ifndef UNIV_HOTBACKUP
#ifdef UNIV_DEBUG
/********************************************************************//**
Gets the first index on the table (the clustered index).
@return index, NULL if none exists */
UNIV_INLINE
dict_index_t*
dict_table_get_first_index(
/*=======================*/
const dict_table_t* table) /*!< in: table */
{
ut_ad(table);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
return(UT_LIST_GET_FIRST(((dict_table_t*) table)->indexes));
}
/********************************************************************//**
Gets the next index on the table.
@return index, NULL if none left */
UNIV_INLINE
dict_index_t*
dict_table_get_next_index(
/*======================*/
const dict_index_t* index) /*!< in: index */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
return(UT_LIST_GET_NEXT(indexes, (dict_index_t*) index));
}
#endif /* UNIV_DEBUG */
#endif /* !UNIV_HOTBACKUP */
/********************************************************************//**
Check whether the index is the clustered index.
@return nonzero for clustered index, zero for other indexes */
UNIV_INLINE
ulint
dict_index_is_clust(
/*================*/
const dict_index_t* index) /*!< in: index */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
return(UNIV_UNLIKELY(index->type & DICT_CLUSTERED));
}
/********************************************************************//**
Check whether the index is unique.
@return nonzero for unique index, zero for other indexes */
UNIV_INLINE
ulint
dict_index_is_unique(
/*=================*/
const dict_index_t* index) /*!< in: index */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
return(UNIV_UNLIKELY(index->type & DICT_UNIQUE));
}
/********************************************************************//**
Check whether the index is the insert buffer tree.
@return nonzero for insert buffer, zero for other indexes */
UNIV_INLINE
ulint
dict_index_is_ibuf(
/*===============*/
const dict_index_t* index) /*!< in: index */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
return(UNIV_UNLIKELY(index->type & DICT_IBUF));
}
/********************************************************************//**
Check whether the index is a secondary index or the insert buffer tree.
@return nonzero for insert buffer, zero for other indexes */
UNIV_INLINE
ulint
dict_index_is_sec_or_ibuf(
/*======================*/
const dict_index_t* index) /*!< in: index */
{
ulint type;
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
type = index->type;
return(UNIV_LIKELY(!(type & DICT_CLUSTERED) || (type & DICT_IBUF)));
}
/********************************************************************//**
Gets the number of user-defined columns in a table in the dictionary
cache.
@return number of user-defined (e.g., not ROW_ID) columns of a table */
UNIV_INLINE
ulint
dict_table_get_n_user_cols(
/*=======================*/
const dict_table_t* table) /*!< in: table */
{
ut_ad(table);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
return(table->n_cols - DATA_N_SYS_COLS);
}
/********************************************************************//**
Gets the number of system columns in a table in the dictionary cache.
@return number of system (e.g., ROW_ID) columns of a table */
UNIV_INLINE
ulint
dict_table_get_n_sys_cols(
/*======================*/
const dict_table_t* table __attribute__((unused))) /*!< in: table */
{
ut_ad(table);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
ut_ad(table->cached);
return(DATA_N_SYS_COLS);
}
/********************************************************************//**
Gets the number of all columns (also system) in a table in the dictionary
cache.
@return number of columns of a table */
UNIV_INLINE
ulint
dict_table_get_n_cols(
/*==================*/
const dict_table_t* table) /*!< in: table */
{
ut_ad(table);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
return(table->n_cols);
}
#ifdef UNIV_DEBUG
/********************************************************************//**
Gets the nth column of a table.
@return pointer to column object */
UNIV_INLINE
dict_col_t*
dict_table_get_nth_col(
/*===================*/
const dict_table_t* table, /*!< in: table */
ulint pos) /*!< in: position of column */
{
ut_ad(table);
ut_ad(pos < table->n_def);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
return((dict_col_t*) (table->cols) + pos);
}
/********************************************************************//**
Gets the given system column of a table.
@return pointer to column object */
UNIV_INLINE
dict_col_t*
dict_table_get_sys_col(
/*===================*/
const dict_table_t* table, /*!< in: table */
ulint sys) /*!< in: DATA_ROW_ID, ... */
{
dict_col_t* col;
ut_ad(table);
ut_ad(sys < DATA_N_SYS_COLS);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
col = dict_table_get_nth_col(table, table->n_cols
- DATA_N_SYS_COLS + sys);
ut_ad(col->mtype == DATA_SYS);
ut_ad(col->prtype == (sys | DATA_NOT_NULL));
return(col);
}
#endif /* UNIV_DEBUG */
/********************************************************************//**
Gets the given system column number of a table.
@return column number */
UNIV_INLINE
ulint
dict_table_get_sys_col_no(
/*======================*/
const dict_table_t* table, /*!< in: table */
ulint sys) /*!< in: DATA_ROW_ID, ... */
{
ut_ad(table);
ut_ad(sys < DATA_N_SYS_COLS);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
return(table->n_cols - DATA_N_SYS_COLS + sys);
}
/********************************************************************//**
Check whether the table uses the compact page format.
@return TRUE if table uses the compact page format */
UNIV_INLINE
ibool
dict_table_is_comp(
/*===============*/
const dict_table_t* table) /*!< in: table */
{
ut_ad(table);
#if DICT_TF_COMPACT != TRUE
#error
#endif
return(UNIV_LIKELY(table->flags & DICT_TF_COMPACT));
}
/********************************************************************//**
Determine the file format of a table.
@return file format version */
UNIV_INLINE
ulint
dict_table_get_format(
/*==================*/
const dict_table_t* table) /*!< in: table */
{
ut_ad(table);
return((table->flags & DICT_TF_FORMAT_MASK) >> DICT_TF_FORMAT_SHIFT);
}
/********************************************************************//**
Determine the file format of a table. */
UNIV_INLINE
void
dict_table_set_format(
/*==================*/
dict_table_t* table, /*!< in/out: table */
ulint format) /*!< in: file format version */
{
ut_ad(table);
table->flags = (table->flags & ~DICT_TF_FORMAT_MASK)
| (format << DICT_TF_FORMAT_SHIFT);
}
/********************************************************************//**
Extract the compressed page size from table flags.
@return compressed page size, or 0 if not compressed */
UNIV_INLINE
ulint
dict_table_flags_to_zip_size(
/*=========================*/
ulint flags) /*!< in: flags */
{
ulint zip_size = flags & DICT_TF_ZSSIZE_MASK;
if (UNIV_UNLIKELY(zip_size)) {
zip_size = ((PAGE_ZIP_MIN_SIZE >> 1)
<< (zip_size >> DICT_TF_ZSSIZE_SHIFT));
ut_ad(zip_size <= UNIV_PAGE_SIZE);
}
return(zip_size);
}
/********************************************************************//**
Check whether the table uses the compressed compact page format.
@return compressed page size, or 0 if not compressed */
UNIV_INLINE
ulint
dict_table_zip_size(
/*================*/
const dict_table_t* table) /*!< in: table */
{
ut_ad(table);
return(dict_table_flags_to_zip_size(table->flags));
}
/********************************************************************//**
Gets the number of fields in the internal representation of an index,
including fields added by the dictionary system.
@return number of fields */
UNIV_INLINE
ulint
dict_index_get_n_fields(
/*====================*/
const dict_index_t* index) /*!< in: an internal
representation of index (in
the dictionary cache) */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
return(index->n_fields);
}
/********************************************************************//**
Gets the number of fields in the internal representation of an index
that uniquely determine the position of an index entry in the index, if
we do not take multiversioning into account: in the B-tree use the value
returned by dict_index_get_n_unique_in_tree.
@return number of fields */
UNIV_INLINE
ulint
dict_index_get_n_unique(
/*====================*/
const dict_index_t* index) /*!< in: an internal representation
of index (in the dictionary cache) */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
ut_ad(index->cached);
return(index->n_uniq);
}
/********************************************************************//**
Gets the number of fields in the internal representation of an index
which uniquely determine the position of an index entry in the index, if
we also take multiversioning into account.
@return number of fields */
UNIV_INLINE
ulint
dict_index_get_n_unique_in_tree(
/*============================*/
const dict_index_t* index) /*!< in: an internal representation
of index (in the dictionary cache) */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
ut_ad(index->cached);
if (dict_index_is_clust(index)) {
return(dict_index_get_n_unique(index));
}
return(dict_index_get_n_fields(index));
}
/********************************************************************//**
Gets the number of user-defined ordering fields in the index. In the internal
representation of clustered indexes we add the row id to the ordering fields
to make a clustered index unique, but this function returns the number of
fields the user defined in the index as ordering fields.
@return number of fields */
UNIV_INLINE
ulint
dict_index_get_n_ordering_defined_by_user(
/*======================================*/
const dict_index_t* index) /*!< in: an internal representation
of index (in the dictionary cache) */
{
return(index->n_user_defined_cols);
}
#ifdef UNIV_DEBUG
/********************************************************************//**
Gets the nth field of an index.
@return pointer to field object */
UNIV_INLINE
dict_field_t*
dict_index_get_nth_field(
/*=====================*/
const dict_index_t* index, /*!< in: index */
ulint pos) /*!< in: position of field */
{
ut_ad(index);
ut_ad(pos < index->n_def);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
return((dict_field_t*) (index->fields) + pos);
}
#endif /* UNIV_DEBUG */
/********************************************************************//**
Returns the position of a system column in an index.
@return position, ULINT_UNDEFINED if not contained */
UNIV_INLINE
ulint
dict_index_get_sys_col_pos(
/*=======================*/
const dict_index_t* index, /*!< in: index */
ulint type) /*!< in: DATA_ROW_ID, ... */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
ut_ad(!(index->type & DICT_UNIVERSAL));
if (dict_index_is_clust(index)) {
return(dict_col_get_clust_pos(
dict_table_get_sys_col(index->table, type),
index));
}
return(dict_index_get_nth_col_pos(
index, dict_table_get_sys_col_no(index->table, type)));
}
/*********************************************************************//**
Gets the field column.
@return field->col, pointer to the table column */
UNIV_INLINE
const dict_col_t*
dict_field_get_col(
/*===============*/
const dict_field_t* field) /*!< in: index field */
{
ut_ad(field);
return(field->col);
}
/********************************************************************//**
Gets pointer to the nth column in an index.
@return column */
UNIV_INLINE
const dict_col_t*
dict_index_get_nth_col(
/*===================*/
const dict_index_t* index, /*!< in: index */
ulint pos) /*!< in: position of the field */
{
return(dict_field_get_col(dict_index_get_nth_field(index, pos)));
}
/********************************************************************//**
Gets the column number the nth field in an index.
@return column number */
UNIV_INLINE
ulint
dict_index_get_nth_col_no(
/*======================*/
const dict_index_t* index, /*!< in: index */
ulint pos) /*!< in: position of the field */
{
return(dict_col_get_no(dict_index_get_nth_col(index, pos)));
}
#ifndef UNIV_HOTBACKUP
/********************************************************************//**
Returns the minimum data size of an index record.
@return minimum data size in bytes */
UNIV_INLINE
ulint
dict_index_get_min_size(
/*====================*/
const dict_index_t* index) /*!< in: index */
{
ulint n = dict_index_get_n_fields(index);
ulint size = 0;
while (n--) {
size += dict_col_get_min_size(dict_index_get_nth_col(index,
n));
}
return(size);
}
/*********************************************************************//**
Gets the space id of the root of the index tree.
@return space id */
UNIV_INLINE
ulint
dict_index_get_space(
/*=================*/
const dict_index_t* index) /*!< in: index */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
return(index->space);
}
/*********************************************************************//**
Sets the space id of the root of the index tree. */
UNIV_INLINE
void
dict_index_set_space(
/*=================*/
dict_index_t* index, /*!< in/out: index */
ulint space) /*!< in: space id */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
index->space = space;
}
/*********************************************************************//**
Gets the page number of the root of the index tree.
@return page number */
UNIV_INLINE
ulint
dict_index_get_page(
/*================*/
const dict_index_t* index) /*!< in: index */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
return(index->page);
}
/*********************************************************************//**
Sets the page number of the root of index tree. */
UNIV_INLINE
void
dict_index_set_page(
/*================*/
dict_index_t* index, /*!< in/out: index */
ulint page) /*!< in: page number */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
index->page = page;
}
/*********************************************************************//**
Gets the read-write lock of the index tree.
@return read-write lock */
UNIV_INLINE
rw_lock_t*
dict_index_get_lock(
/*================*/
dict_index_t* index) /*!< in: index */
{
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
return(&(index->lock));
}
/********************************************************************//**
Returns free space reserved for future updates of records. This is
relevant only in the case of many consecutive inserts, as updates
which make the records bigger might fragment the index.
@return number of free bytes on page, reserved for updates */
UNIV_INLINE
ulint
dict_index_get_space_reserve(void)
/*==============================*/
{
return(UNIV_PAGE_SIZE / 16);
}
/**********************************************************************//**
Checks if a table is in the dictionary cache.
@return table, NULL if not found */
UNIV_INLINE
dict_table_t*
dict_table_check_if_in_cache_low(
/*=============================*/
const char* table_name) /*!< in: table name */
{
dict_table_t* table;
ulint table_fold;
ut_ad(table_name);
ut_ad(mutex_own(&(dict_sys->mutex)));
/* Look for the table name in the hash table */
table_fold = ut_fold_string(table_name);
HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold,
dict_table_t*, table, ut_ad(table->cached),
!strcmp(table->name, table_name));
return(table);
}
/**********************************************************************//**
Gets a table; loads it to the dictionary cache if necessary. A low-level
function.
@return table, NULL if not found */
UNIV_INLINE
dict_table_t*
dict_table_get_low(
/*===============*/
const char* table_name) /*!< in: table name */
{
dict_table_t* table;
ut_ad(table_name);
ut_ad(mutex_own(&(dict_sys->mutex)));
table = dict_table_check_if_in_cache_low(table_name);
if (table == NULL) {
table = dict_load_table(table_name);
}
ut_ad(!table || table->cached);
return(table);
}
/**********************************************************************//**
Returns a table object based on table id.
@return table, NULL if does not exist */
UNIV_INLINE
dict_table_t*
dict_table_get_on_id_low(
/*=====================*/
dulint table_id) /*!< in: table id */
{
dict_table_t* table;
ulint fold;
ut_ad(mutex_own(&(dict_sys->mutex)));
/* Look for the table name in the hash table */
fold = ut_fold_dulint(table_id);
HASH_SEARCH(id_hash, dict_sys->table_id_hash, fold,
dict_table_t*, table, ut_ad(table->cached),
!ut_dulint_cmp(table->id, table_id));
if (table == NULL) {
table = dict_load_table_on_id(table_id);
}
ut_ad(!table || table->cached);
/* TODO: should get the type information from MySQL */
return(table);
}
#endif /* !UNIV_HOTBACKUP */

View file

@ -0,0 +1,115 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/dict0load.h
Loads to the memory cache database object definitions
from dictionary tables
Created 4/24/1996 Heikki Tuuri
*******************************************************/
#ifndef dict0load_h
#define dict0load_h
#include "univ.i"
#include "dict0types.h"
#include "ut0byte.h"
#include "mem0mem.h"
/********************************************************************//**
In a crash recovery we already have all the tablespace objects created.
This function compares the space id information in the InnoDB data dictionary
to what we already read with fil_load_single_table_tablespaces().
In a normal startup, we create the tablespace objects for every table in
InnoDB's data dictionary, if the corresponding .ibd file exists.
We also scan the biggest space id, and store it to fil_system. */
UNIV_INTERN
void
dict_check_tablespaces_and_store_max_id(
/*====================================*/
ibool in_crash_recovery); /*!< in: are we doing a crash recovery */
/********************************************************************//**
Finds the first table name in the given database.
@return own: table name, NULL if does not exist; the caller must free
the memory in the string! */
UNIV_INTERN
char*
dict_get_first_table_name_in_db(
/*============================*/
const char* name); /*!< in: database name which ends to '/' */
/********************************************************************//**
Loads a table definition and also all its index definitions, and also
the cluster definition if the table is a member in a cluster. Also loads
all foreign key constraints where the foreign key is in the table or where
a foreign key references columns in this table.
@return table, NULL if does not exist; if the table is stored in an
.ibd file, but the file does not exist, then we set the
ibd_file_missing flag TRUE in the table object we return */
UNIV_INTERN
dict_table_t*
dict_load_table(
/*============*/
const char* name); /*!< in: table name in the
databasename/tablename format */
/***********************************************************************//**
Loads a table object based on the table id.
@return table; NULL if table does not exist */
UNIV_INTERN
dict_table_t*
dict_load_table_on_id(
/*==================*/
dulint table_id); /*!< in: table id */
/********************************************************************//**
This function is called when the database is booted.
Loads system table index definitions except for the clustered index which
is added to the dictionary cache at booting before calling this function. */
UNIV_INTERN
void
dict_load_sys_table(
/*================*/
dict_table_t* table); /*!< in: system table */
/***********************************************************************//**
Loads foreign key constraints where the table is either the foreign key
holder or where the table is referenced by a foreign key. Adds these
constraints to the data dictionary. Note that we know that the dictionary
cache already contains all constraints where the other relevant table is
already in the dictionary cache.
@return DB_SUCCESS or error code */
UNIV_INTERN
ulint
dict_load_foreigns(
/*===============*/
const char* table_name, /*!< in: table name */
ibool check_charsets);/*!< in: TRUE=check charsets
compatibility */
/********************************************************************//**
Prints to the standard output information on all tables found in the data
dictionary system table. */
UNIV_INTERN
void
dict_print(void);
/*============*/
#ifndef UNIV_NONINL
#include "dict0load.ic"
#endif
#endif

View file

@ -0,0 +1,26 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/dict0load.ic
Loads to the memory cache database object definitions
from dictionary tables
Created 4/24/1996 Heikki Tuuri
*******************************************************/

View file

@ -0,0 +1,555 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/dict0mem.h
Data dictionary memory object creation
Created 1/8/1996 Heikki Tuuri
*******************************************************/
#ifndef dict0mem_h
#define dict0mem_h
#include "univ.i"
#include "dict0types.h"
#include "data0type.h"
#include "mem0mem.h"
#include "rem0types.h"
#include "btr0types.h"
#ifndef UNIV_HOTBACKUP
# include "lock0types.h"
# include "que0types.h"
# include "sync0rw.h"
#endif /* !UNIV_HOTBACKUP */
#include "ut0mem.h"
#include "ut0lst.h"
#include "ut0rnd.h"
#include "ut0byte.h"
#include "hash0hash.h"
#include "trx0types.h"
/** Type flags of an index: OR'ing of the flags is allowed to define a
combination of types */
/* @{ */
#define DICT_CLUSTERED 1 /*!< clustered index */
#define DICT_UNIQUE 2 /*!< unique index */
#define DICT_UNIVERSAL 4 /*!< index which can contain records from any
other index */
#define DICT_IBUF 8 /*!< insert buffer tree */
/* @} */
/** Types for a table object */
#define DICT_TABLE_ORDINARY 1 /*!< ordinary table */
#if 0 /* not implemented */
#define DICT_TABLE_CLUSTER_MEMBER 2
#define DICT_TABLE_CLUSTER 3 /* this means that the table is
really a cluster definition */
#endif
/** Table flags. All unused bits must be 0. */
/* @{ */
#define DICT_TF_COMPACT 1 /* Compact page format.
This must be set for
new file formats
(later than
DICT_TF_FORMAT_51). */
/** Compressed page size (0=uncompressed, up to 15 compressed sizes) */
/* @{ */
#define DICT_TF_ZSSIZE_SHIFT 1
#define DICT_TF_ZSSIZE_MASK (15 << DICT_TF_ZSSIZE_SHIFT)
#define DICT_TF_ZSSIZE_MAX (UNIV_PAGE_SIZE_SHIFT - PAGE_ZIP_MIN_SIZE_SHIFT + 1)
/* @} */
/** File format */
/* @{ */
#define DICT_TF_FORMAT_SHIFT 5 /* file format */
#define DICT_TF_FORMAT_MASK \
((~(~0 << (DICT_TF_BITS - DICT_TF_FORMAT_SHIFT))) << DICT_TF_FORMAT_SHIFT)
#define DICT_TF_FORMAT_51 0 /*!< InnoDB/MySQL up to 5.1 */
#define DICT_TF_FORMAT_ZIP 1 /*!< InnoDB plugin for 5.1:
compressed tables,
new BLOB treatment */
/** Maximum supported file format */
#define DICT_TF_FORMAT_MAX DICT_TF_FORMAT_ZIP
/* @} */
#define DICT_TF_BITS 6 /*!< number of flag bits */
#if (1 << (DICT_TF_BITS - DICT_TF_FORMAT_SHIFT)) <= DICT_TF_FORMAT_MAX
# error "DICT_TF_BITS is insufficient for DICT_TF_FORMAT_MAX"
#endif
/* @} */
/** @brief Additional table flags.
These flags will be stored in SYS_TABLES.MIX_LEN. All unused flags
will be written as 0. The column may contain garbage for tables
created with old versions of InnoDB that only implemented
ROW_FORMAT=REDUNDANT. */
/* @{ */
#define DICT_TF2_SHIFT DICT_TF_BITS
/*!< Shift value for
table->flags. */
#define DICT_TF2_TEMPORARY 1 /*!< TRUE for tables from
CREATE TEMPORARY TABLE. */
#define DICT_TF2_BITS (DICT_TF2_SHIFT + 1)
/*!< Total number of bits
in table->flags. */
/* @} */
/**********************************************************************//**
Creates a table memory object.
@return own: table object */
UNIV_INTERN
dict_table_t*
dict_mem_table_create(
/*==================*/
const char* name, /*!< in: table name */
ulint space, /*!< in: space where the clustered index
of the table is placed; this parameter
is ignored if the table is made
a member of a cluster */
ulint n_cols, /*!< in: number of columns */
ulint flags); /*!< in: table flags */
/****************************************************************//**
Free a table memory object. */
UNIV_INTERN
void
dict_mem_table_free(
/*================*/
dict_table_t* table); /*!< in: table */
/**********************************************************************//**
Adds a column definition to a table. */
UNIV_INTERN
void
dict_mem_table_add_col(
/*===================*/
dict_table_t* table, /*!< in: table */
mem_heap_t* heap, /*!< in: temporary memory heap, or NULL */
const char* name, /*!< in: column name, or NULL */
ulint mtype, /*!< in: main datatype */
ulint prtype, /*!< in: precise type */
ulint len); /*!< in: precision */
/**********************************************************************//**
Creates an index memory object.
@return own: index object */
UNIV_INTERN
dict_index_t*
dict_mem_index_create(
/*==================*/
const char* table_name, /*!< in: table name */
const char* index_name, /*!< in: index name */
ulint space, /*!< in: space where the index tree is
placed, ignored if the index is of
the clustered type */
ulint type, /*!< in: DICT_UNIQUE,
DICT_CLUSTERED, ... ORed */
ulint n_fields); /*!< in: number of fields */
/**********************************************************************//**
Adds a field definition to an index. NOTE: does not take a copy
of the column name if the field is a column. The memory occupied
by the column name may be released only after publishing the index. */
UNIV_INTERN
void
dict_mem_index_add_field(
/*=====================*/
dict_index_t* index, /*!< in: index */
const char* name, /*!< in: column name */
ulint prefix_len); /*!< in: 0 or the column prefix length
in a MySQL index like
INDEX (textcol(25)) */
/**********************************************************************//**
Frees an index memory object. */
UNIV_INTERN
void
dict_mem_index_free(
/*================*/
dict_index_t* index); /*!< in: index */
/**********************************************************************//**
Creates and initializes a foreign constraint memory object.
@return own: foreign constraint struct */
UNIV_INTERN
dict_foreign_t*
dict_mem_foreign_create(void);
/*=========================*/
/** Data structure for a column in a table */
struct dict_col_struct{
/*----------------------*/
/** The following are copied from dtype_t,
so that all bit-fields can be packed tightly. */
/* @{ */
unsigned mtype:8; /*!< main data type */
unsigned prtype:24; /*!< precise type; MySQL data
type, charset code, flags to
indicate nullability,
signedness, whether this is a
binary string, whether this is
a true VARCHAR where MySQL
uses 2 bytes to store the length */
/* the remaining fields do not affect alphabetical ordering: */
unsigned len:16; /*!< length; for MySQL data this
is field->pack_length(),
except that for a >= 5.0.3
type true VARCHAR this is the
maximum byte length of the
string data (in addition to
the string, MySQL uses 1 or 2
bytes to store the string length) */
unsigned mbminlen:2; /*!< minimum length of a
character, in bytes */
unsigned mbmaxlen:3; /*!< maximum length of a
character, in bytes */
/*----------------------*/
/* End of definitions copied from dtype_t */
/* @} */
unsigned ind:10; /*!< table column position
(starting from 0) */
unsigned ord_part:1; /*!< nonzero if this column
appears in the ordering fields
of an index */
};
/** @brief DICT_MAX_INDEX_COL_LEN is measured in bytes and is the maximum
indexed column length (or indexed prefix length).
It is set to 3*256, so that one can create a column prefix index on
256 characters of a TEXT or VARCHAR column also in the UTF-8
charset. In that charset, a character may take at most 3 bytes. This
constant MUST NOT BE CHANGED, or the compatibility of InnoDB data
files would be at risk! */
#define DICT_MAX_INDEX_COL_LEN REC_MAX_INDEX_COL_LEN
/** Data structure for a field in an index */
struct dict_field_struct{
dict_col_t* col; /*!< pointer to the table column */
const char* name; /*!< name of the column */
unsigned prefix_len:10; /*!< 0 or the length of the column
prefix in bytes in a MySQL index of
type, e.g., INDEX (textcol(25));
must be smaller than
DICT_MAX_INDEX_COL_LEN; NOTE that
in the UTF-8 charset, MySQL sets this
to 3 * the prefix len in UTF-8 chars */
unsigned fixed_len:10; /*!< 0 or the fixed length of the
column if smaller than
DICT_MAX_INDEX_COL_LEN */
};
/** Data structure for an index. Most fields will be
initialized to 0, NULL or FALSE in dict_mem_index_create(). */
struct dict_index_struct{
dulint id; /*!< id of the index */
mem_heap_t* heap; /*!< memory heap */
const char* name; /*!< index name */
const char* table_name;/*!< table name */
dict_table_t* table; /*!< back pointer to table */
#ifndef UNIV_HOTBACKUP
unsigned space:32;
/*!< space where the index tree is placed */
unsigned page:32;/*!< index tree root page number */
#endif /* !UNIV_HOTBACKUP */
unsigned type:4; /*!< index type (DICT_CLUSTERED, DICT_UNIQUE,
DICT_UNIVERSAL, DICT_IBUF) */
unsigned trx_id_offset:10;/*!< position of the trx id column
in a clustered index record, if the fields
before it are known to be of a fixed size,
0 otherwise */
unsigned n_user_defined_cols:10;
/*!< number of columns the user defined to
be in the index: in the internal
representation we add more columns */
unsigned n_uniq:10;/*!< number of fields from the beginning
which are enough to determine an index
entry uniquely */
unsigned n_def:10;/*!< number of fields defined so far */
unsigned n_fields:10;/*!< number of fields in the index */
unsigned n_nullable:10;/*!< number of nullable fields */
unsigned cached:1;/*!< TRUE if the index object is in the
dictionary cache */
unsigned to_be_dropped:1;
/*!< TRUE if this index is marked to be
dropped in ha_innobase::prepare_drop_index(),
otherwise FALSE */
dict_field_t* fields; /*!< array of field descriptions */
#ifndef UNIV_HOTBACKUP
UT_LIST_NODE_T(dict_index_t)
indexes;/*!< list of indexes of the table */
btr_search_t* search_info; /*!< info used in optimistic searches */
/*----------------------*/
/** Statistics for query optimization */
/* @{ */
ib_int64_t* stat_n_diff_key_vals;
/*!< approximate number of different
key values for this index, for each
n-column prefix where n <=
dict_get_n_unique(index); we
periodically calculate new
estimates */
ulint stat_index_size;
/*!< approximate index size in
database pages */
ulint stat_n_leaf_pages;
/*!< approximate number of leaf pages in the
index tree */
/* @} */
rw_lock_t lock; /*!< read-write lock protecting the
upper levels of the index tree */
ib_uint64_t trx_id; /*!< id of the transaction that created this
index, or 0 if the index existed
when InnoDB was started up */
#endif /* !UNIV_HOTBACKUP */
#ifdef UNIV_DEBUG
ulint magic_n;/*!< magic number */
/** Value of dict_index_struct::magic_n */
# define DICT_INDEX_MAGIC_N 76789786
#endif
};
/** Data structure for a foreign key constraint; an example:
FOREIGN KEY (A, B) REFERENCES TABLE2 (C, D). Most fields will be
initialized to 0, NULL or FALSE in dict_mem_foreign_create(). */
struct dict_foreign_struct{
mem_heap_t* heap; /*!< this object is allocated from
this memory heap */
char* id; /*!< id of the constraint as a
null-terminated string */
unsigned n_fields:10; /*!< number of indexes' first fields
for which the foreign key
constraint is defined: we allow the
indexes to contain more fields than
mentioned in the constraint, as long
as the first fields are as mentioned */
unsigned type:6; /*!< 0 or DICT_FOREIGN_ON_DELETE_CASCADE
or DICT_FOREIGN_ON_DELETE_SET_NULL */
char* foreign_table_name;/*!< foreign table name */
dict_table_t* foreign_table; /*!< table where the foreign key is */
const char** foreign_col_names;/*!< names of the columns in the
foreign key */
char* referenced_table_name;/*!< referenced table name */
dict_table_t* referenced_table;/*!< table where the referenced key
is */
const char** referenced_col_names;/*!< names of the referenced
columns in the referenced table */
dict_index_t* foreign_index; /*!< foreign index; we require that
both tables contain explicitly defined
indexes for the constraint: InnoDB
does not generate new indexes
implicitly */
dict_index_t* referenced_index;/*!< referenced index */
UT_LIST_NODE_T(dict_foreign_t)
foreign_list; /*!< list node for foreign keys of the
table */
UT_LIST_NODE_T(dict_foreign_t)
referenced_list;/*!< list node for referenced
keys of the table */
};
/** The flags for ON_UPDATE and ON_DELETE can be ORed; the default is that
a foreign key constraint is enforced, therefore RESTRICT just means no flag */
/* @{ */
#define DICT_FOREIGN_ON_DELETE_CASCADE 1 /*!< ON DELETE CASCADE */
#define DICT_FOREIGN_ON_DELETE_SET_NULL 2 /*!< ON UPDATE SET NULL */
#define DICT_FOREIGN_ON_UPDATE_CASCADE 4 /*!< ON DELETE CASCADE */
#define DICT_FOREIGN_ON_UPDATE_SET_NULL 8 /*!< ON UPDATE SET NULL */
#define DICT_FOREIGN_ON_DELETE_NO_ACTION 16 /*!< ON DELETE NO ACTION */
#define DICT_FOREIGN_ON_UPDATE_NO_ACTION 32 /*!< ON UPDATE NO ACTION */
/* @} */
/** Data structure for a database table. Most fields will be
initialized to 0, NULL or FALSE in dict_mem_table_create(). */
struct dict_table_struct{
dulint id; /*!< id of the table */
mem_heap_t* heap; /*!< memory heap */
const char* name; /*!< table name */
const char* dir_path_of_temp_table;/*!< NULL or the directory path
where a TEMPORARY table that was explicitly
created by a user should be placed if
innodb_file_per_table is defined in my.cnf;
in Unix this is usually /tmp/..., in Windows
temp\... */
unsigned space:32;
/*!< space where the clustered index of the
table is placed */
unsigned flags:DICT_TF2_BITS;/*!< DICT_TF_COMPACT, ... */
unsigned ibd_file_missing:1;
/*!< TRUE if this is in a single-table
tablespace and the .ibd file is missing; then
we must return in ha_innodb.cc an error if the
user tries to query such an orphaned table */
unsigned tablespace_discarded:1;
/*!< this flag is set TRUE when the user
calls DISCARD TABLESPACE on this
table, and reset to FALSE in IMPORT
TABLESPACE */
unsigned cached:1;/*!< TRUE if the table object has been added
to the dictionary cache */
unsigned n_def:10;/*!< number of columns defined so far */
unsigned n_cols:10;/*!< number of columns */
dict_col_t* cols; /*!< array of column descriptions */
const char* col_names;
/*!< Column names packed in a character string
"name1\0name2\0...nameN\0". Until
the string contains n_cols, it will be
allocated from a temporary heap. The final
string will be allocated from table->heap. */
#ifndef UNIV_HOTBACKUP
hash_node_t name_hash; /*!< hash chain node */
hash_node_t id_hash; /*!< hash chain node */
UT_LIST_BASE_NODE_T(dict_index_t)
indexes; /*!< list of indexes of the table */
UT_LIST_BASE_NODE_T(dict_foreign_t)
foreign_list;/*!< list of foreign key constraints
in the table; these refer to columns
in other tables */
UT_LIST_BASE_NODE_T(dict_foreign_t)
referenced_list;/*!< list of foreign key constraints
which refer to this table */
UT_LIST_NODE_T(dict_table_t)
table_LRU; /*!< node of the LRU list of tables */
ulint n_mysql_handles_opened;
/*!< count of how many handles MySQL has opened
to this table; dropping of the table is
NOT allowed until this count gets to zero;
MySQL does NOT itself check the number of
open handles at drop */
ulint n_foreign_key_checks_running;
/*!< count of how many foreign key check
operations are currently being performed
on the table: we cannot drop the table while
there are foreign key checks running on
it! */
trx_id_t query_cache_inv_trx_id;
/*!< transactions whose trx id is
smaller than this number are not
allowed to store to the MySQL query
cache or retrieve from it; when a trx
with undo logs commits, it sets this
to the value of the trx id counter for
the tables it had an IX lock on */
UT_LIST_BASE_NODE_T(lock_t)
locks; /*!< list of locks on the table */
#ifdef UNIV_DEBUG
/*----------------------*/
ibool does_not_fit_in_memory;
/*!< this field is used to specify in
simulations tables which are so big
that disk should be accessed: disk
access is simulated by putting the
thread to sleep for a while; NOTE that
this flag is not stored to the data
dictionary on disk, and the database
will forget about value TRUE if it has
to reload the table definition from
disk */
#endif /* UNIV_DEBUG */
/*----------------------*/
unsigned big_rows:1;
/*!< flag: TRUE if the maximum length of
a single row exceeds BIG_ROW_SIZE;
initialized in dict_table_add_to_cache() */
/** Statistics for query optimization */
/* @{ */
unsigned stat_initialized:1; /*!< TRUE if statistics have
been calculated the first time
after database startup or table creation */
ib_int64_t stat_n_rows;
/*!< approximate number of rows in the table;
we periodically calculate new estimates */
ulint stat_clustered_index_size;
/*!< approximate clustered index size in
database pages */
ulint stat_sum_of_other_index_sizes;
/*!< other indexes in database pages */
ulint stat_modified_counter;
/*!< when a row is inserted, updated,
or deleted,
we add 1 to this number; we calculate new
estimates for the stat_... values for the
table and the indexes at an interval of 2 GB
or when about 1 / 16 of table has been
modified; also when the estimate operation is
called for MySQL SHOW TABLE STATUS; the
counter is reset to zero at statistics
calculation; this counter is not protected by
any latch, because this is only used for
heuristics */
/* @} */
/*----------------------*/
/**!< The following fields are used by the
AUTOINC code. The actual collection of
tables locked during AUTOINC read/write is
kept in trx_t. In order to quickly determine
whether a transaction has locked the AUTOINC
lock we keep a pointer to the transaction
here in the autoinc_trx variable. This is to
avoid acquiring the kernel mutex and scanning
the vector in trx_t.
When an AUTOINC lock has to wait, the
corresponding lock instance is created on
the trx lock heap rather than use the
pre-allocated instance in autoinc_lock below.*/
/* @{ */
lock_t* autoinc_lock;
/*!< a buffer for an AUTOINC lock
for this table: we allocate the memory here
so that individual transactions can get it
and release it without a need to allocate
space from the lock heap of the trx:
otherwise the lock heap would grow rapidly
if we do a large insert from a select */
mutex_t autoinc_mutex;
/*!< mutex protecting the autoincrement
counter */
ib_uint64_t autoinc;/*!< autoinc counter value to give to the
next inserted row */
ulong n_waiting_or_granted_auto_inc_locks;
/*!< This counter is used to track the number
of granted and pending autoinc locks on this
table. This value is set after acquiring the
kernel mutex but we peek the contents to
determine whether other transactions have
acquired the AUTOINC lock or not. Of course
only one transaction can be granted the
lock but there can be multiple waiters. */
const trx_t* autoinc_trx;
/*!< The transaction that currently holds the
the AUTOINC lock on this table. */
/* @} */
/*----------------------*/
#endif /* !UNIV_HOTBACKUP */
#ifdef UNIV_DEBUG
ulint magic_n;/*!< magic number */
/** Value of dict_table_struct::magic_n */
# define DICT_TABLE_MAGIC_N 76333786
#endif /* UNIV_DEBUG */
};
#ifndef UNIV_NONINL
#include "dict0mem.ic"
#endif
#endif

View file

@ -0,0 +1,26 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/******************************************************************//**
@file include/dict0mem.ic
Data dictionary memory object creation
Created 1/8/1996 Heikki Tuuri
***********************************************************************/

View file

@ -0,0 +1,48 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/dict0types.h
Data dictionary global types
Created 1/8/1996 Heikki Tuuri
*******************************************************/
#ifndef dict0types_h
#define dict0types_h
typedef struct dict_sys_struct dict_sys_t;
typedef struct dict_col_struct dict_col_t;
typedef struct dict_field_struct dict_field_t;
typedef struct dict_index_struct dict_index_t;
typedef struct dict_table_struct dict_table_t;
typedef struct dict_foreign_struct dict_foreign_t;
/* A cluster object is a table object with the type field set to
DICT_CLUSTERED */
typedef dict_table_t dict_cluster_t;
typedef struct ind_node_struct ind_node_t;
typedef struct tab_node_struct tab_node_t;
/* Space id and page no where the dictionary header resides */
#define DICT_HDR_SPACE 0 /* the SYSTEM tablespace */
#define DICT_HDR_PAGE_NO FSP_DICT_HDR_PAGE_NO
#endif

View file

@ -0,0 +1,188 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/dyn0dyn.h
The dynamically allocated array
Created 2/5/1996 Heikki Tuuri
*******************************************************/
#ifndef dyn0dyn_h
#define dyn0dyn_h
#include "univ.i"
#include "ut0lst.h"
#include "mem0mem.h"
/** A block in a dynamically allocated array */
typedef struct dyn_block_struct dyn_block_t;
/** Dynamically allocated array */
typedef dyn_block_t dyn_array_t;
/** This is the initial 'payload' size of a dynamic array;
this must be > MLOG_BUF_MARGIN + 30! */
#define DYN_ARRAY_DATA_SIZE 512
/*********************************************************************//**
Initializes a dynamic array.
@return initialized dyn array */
UNIV_INLINE
dyn_array_t*
dyn_array_create(
/*=============*/
dyn_array_t* arr); /*!< in: pointer to a memory buffer of
size sizeof(dyn_array_t) */
/************************************************************//**
Frees a dynamic array. */
UNIV_INLINE
void
dyn_array_free(
/*===========*/
dyn_array_t* arr); /*!< in: dyn array */
/*********************************************************************//**
Makes room on top of a dyn array and returns a pointer to a buffer in it.
After copying the elements, the caller must close the buffer using
dyn_array_close.
@return pointer to the buffer */
UNIV_INLINE
byte*
dyn_array_open(
/*===========*/
dyn_array_t* arr, /*!< in: dynamic array */
ulint size); /*!< in: size in bytes of the buffer; MUST be
smaller than DYN_ARRAY_DATA_SIZE! */
/*********************************************************************//**
Closes the buffer returned by dyn_array_open. */
UNIV_INLINE
void
dyn_array_close(
/*============*/
dyn_array_t* arr, /*!< in: dynamic array */
byte* ptr); /*!< in: buffer space from ptr up was not used */
/*********************************************************************//**
Makes room on top of a dyn array and returns a pointer to
the added element. The caller must copy the element to
the pointer returned.
@return pointer to the element */
UNIV_INLINE
void*
dyn_array_push(
/*===========*/
dyn_array_t* arr, /*!< in: dynamic array */
ulint size); /*!< in: size in bytes of the element */
/************************************************************//**
Returns pointer to an element in dyn array.
@return pointer to element */
UNIV_INLINE
void*
dyn_array_get_element(
/*==================*/
dyn_array_t* arr, /*!< in: dyn array */
ulint pos); /*!< in: position of element as bytes
from array start */
/************************************************************//**
Returns the size of stored data in a dyn array.
@return data size in bytes */
UNIV_INLINE
ulint
dyn_array_get_data_size(
/*====================*/
dyn_array_t* arr); /*!< in: dyn array */
/************************************************************//**
Gets the first block in a dyn array. */
UNIV_INLINE
dyn_block_t*
dyn_array_get_first_block(
/*======================*/
dyn_array_t* arr); /*!< in: dyn array */
/************************************************************//**
Gets the last block in a dyn array. */
UNIV_INLINE
dyn_block_t*
dyn_array_get_last_block(
/*=====================*/
dyn_array_t* arr); /*!< in: dyn array */
/********************************************************************//**
Gets the next block in a dyn array.
@return pointer to next, NULL if end of list */
UNIV_INLINE
dyn_block_t*
dyn_array_get_next_block(
/*=====================*/
dyn_array_t* arr, /*!< in: dyn array */
dyn_block_t* block); /*!< in: dyn array block */
/********************************************************************//**
Gets the number of used bytes in a dyn array block.
@return number of bytes used */
UNIV_INLINE
ulint
dyn_block_get_used(
/*===============*/
dyn_block_t* block); /*!< in: dyn array block */
/********************************************************************//**
Gets pointer to the start of data in a dyn array block.
@return pointer to data */
UNIV_INLINE
byte*
dyn_block_get_data(
/*===============*/
dyn_block_t* block); /*!< in: dyn array block */
/********************************************************//**
Pushes n bytes to a dyn array. */
UNIV_INLINE
void
dyn_push_string(
/*============*/
dyn_array_t* arr, /*!< in: dyn array */
const byte* str, /*!< in: string to write */
ulint len); /*!< in: string length */
/*#################################################################*/
/** @brief A block in a dynamically allocated array.
NOTE! Do not access the fields of the struct directly: the definition
appears here only for the compiler to know its size! */
struct dyn_block_struct{
mem_heap_t* heap; /*!< in the first block this is != NULL
if dynamic allocation has been needed */
ulint used; /*!< number of data bytes used in this block;
DYN_BLOCK_FULL_FLAG is set when the block
becomes full */
byte data[DYN_ARRAY_DATA_SIZE];
/*!< storage for array elements */
UT_LIST_BASE_NODE_T(dyn_block_t) base;
/*!< linear list of dyn blocks: this node is
used only in the first block */
UT_LIST_NODE_T(dyn_block_t) list;
/*!< linear list node: used in all blocks */
#ifdef UNIV_DEBUG
ulint buf_end;/*!< only in the debug version: if dyn
array is opened, this is the buffer
end offset, else this is 0 */
ulint magic_n;/*!< magic number (DYN_BLOCK_MAGIC_N) */
#endif
};
#ifndef UNIV_NONINL
#include "dyn0dyn.ic"
#endif
#endif

View file

@ -0,0 +1,365 @@
/*****************************************************************************
Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/dyn0dyn.ic
The dynamically allocated array
Created 2/5/1996 Heikki Tuuri
*******************************************************/
/** Value of dyn_block_struct::magic_n */
#define DYN_BLOCK_MAGIC_N 375767
/** Flag for dyn_block_struct::used that indicates a full block */
#define DYN_BLOCK_FULL_FLAG 0x1000000UL
/************************************************************//**
Adds a new block to a dyn array.
@return created block */
UNIV_INTERN
dyn_block_t*
dyn_array_add_block(
/*================*/
dyn_array_t* arr); /*!< in: dyn array */
/************************************************************//**
Gets the first block in a dyn array. */
UNIV_INLINE
dyn_block_t*
dyn_array_get_first_block(
/*======================*/
dyn_array_t* arr) /*!< in: dyn array */
{
return(arr);
}
/************************************************************//**
Gets the last block in a dyn array. */
UNIV_INLINE
dyn_block_t*
dyn_array_get_last_block(
/*=====================*/
dyn_array_t* arr) /*!< in: dyn array */
{
if (arr->heap == NULL) {
return(arr);
}
return(UT_LIST_GET_LAST(arr->base));
}
/********************************************************************//**
Gets the next block in a dyn array.
@return pointer to next, NULL if end of list */
UNIV_INLINE
dyn_block_t*
dyn_array_get_next_block(
/*=====================*/
dyn_array_t* arr, /*!< in: dyn array */
dyn_block_t* block) /*!< in: dyn array block */
{
ut_ad(arr && block);
if (arr->heap == NULL) {
ut_ad(arr == block);
return(NULL);
}
return(UT_LIST_GET_NEXT(list, block));
}
/********************************************************************//**
Gets the number of used bytes in a dyn array block.
@return number of bytes used */
UNIV_INLINE
ulint
dyn_block_get_used(
/*===============*/
dyn_block_t* block) /*!< in: dyn array block */
{
ut_ad(block);
return((block->used) & ~DYN_BLOCK_FULL_FLAG);
}
/********************************************************************//**
Gets pointer to the start of data in a dyn array block.
@return pointer to data */
UNIV_INLINE
byte*
dyn_block_get_data(
/*===============*/
dyn_block_t* block) /*!< in: dyn array block */
{
ut_ad(block);
return(block->data);
}
/*********************************************************************//**
Initializes a dynamic array.
@return initialized dyn array */
UNIV_INLINE
dyn_array_t*
dyn_array_create(
/*=============*/
dyn_array_t* arr) /*!< in: pointer to a memory buffer of
size sizeof(dyn_array_t) */
{
ut_ad(arr);
#if DYN_ARRAY_DATA_SIZE >= DYN_BLOCK_FULL_FLAG
# error "DYN_ARRAY_DATA_SIZE >= DYN_BLOCK_FULL_FLAG"
#endif
arr->heap = NULL;
arr->used = 0;
#ifdef UNIV_DEBUG
arr->buf_end = 0;
arr->magic_n = DYN_BLOCK_MAGIC_N;
#endif
return(arr);
}
/************************************************************//**
Frees a dynamic array. */
UNIV_INLINE
void
dyn_array_free(
/*===========*/
dyn_array_t* arr) /*!< in: dyn array */
{
if (arr->heap != NULL) {
mem_heap_free(arr->heap);
}
#ifdef UNIV_DEBUG
arr->magic_n = 0;
#endif
}
/*********************************************************************//**
Makes room on top of a dyn array and returns a pointer to the added element.
The caller must copy the element to the pointer returned.
@return pointer to the element */
UNIV_INLINE
void*
dyn_array_push(
/*===========*/
dyn_array_t* arr, /*!< in: dynamic array */
ulint size) /*!< in: size in bytes of the element */
{
dyn_block_t* block;
ulint used;
ut_ad(arr);
ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
ut_ad(size <= DYN_ARRAY_DATA_SIZE);
ut_ad(size);
block = arr;
used = block->used;
if (used + size > DYN_ARRAY_DATA_SIZE) {
/* Get the last array block */
block = dyn_array_get_last_block(arr);
used = block->used;
if (used + size > DYN_ARRAY_DATA_SIZE) {
block = dyn_array_add_block(arr);
used = block->used;
}
}
block->used = used + size;
ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
return((block->data) + used);
}
/*********************************************************************//**
Makes room on top of a dyn array and returns a pointer to a buffer in it.
After copying the elements, the caller must close the buffer using
dyn_array_close.
@return pointer to the buffer */
UNIV_INLINE
byte*
dyn_array_open(
/*===========*/
dyn_array_t* arr, /*!< in: dynamic array */
ulint size) /*!< in: size in bytes of the buffer; MUST be
smaller than DYN_ARRAY_DATA_SIZE! */
{
dyn_block_t* block;
ulint used;
ut_ad(arr);
ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
ut_ad(size <= DYN_ARRAY_DATA_SIZE);
ut_ad(size);
block = arr;
used = block->used;
if (used + size > DYN_ARRAY_DATA_SIZE) {
/* Get the last array block */
block = dyn_array_get_last_block(arr);
used = block->used;
if (used + size > DYN_ARRAY_DATA_SIZE) {
block = dyn_array_add_block(arr);
used = block->used;
ut_a(size <= DYN_ARRAY_DATA_SIZE);
}
}
ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
#ifdef UNIV_DEBUG
ut_ad(arr->buf_end == 0);
arr->buf_end = used + size;
#endif
return((block->data) + used);
}
/*********************************************************************//**
Closes the buffer returned by dyn_array_open. */
UNIV_INLINE
void
dyn_array_close(
/*============*/
dyn_array_t* arr, /*!< in: dynamic array */
byte* ptr) /*!< in: buffer space from ptr up was not used */
{
dyn_block_t* block;
ut_ad(arr);
ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
block = dyn_array_get_last_block(arr);
ut_ad(arr->buf_end + block->data >= ptr);
block->used = ptr - block->data;
ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
#ifdef UNIV_DEBUG
arr->buf_end = 0;
#endif
}
/************************************************************//**
Returns pointer to an element in dyn array.
@return pointer to element */
UNIV_INLINE
void*
dyn_array_get_element(
/*==================*/
dyn_array_t* arr, /*!< in: dyn array */
ulint pos) /*!< in: position of element as bytes
from array start */
{
dyn_block_t* block;
ulint used;
ut_ad(arr);
ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
/* Get the first array block */
block = dyn_array_get_first_block(arr);
if (arr->heap != NULL) {
used = dyn_block_get_used(block);
while (pos >= used) {
pos -= used;
block = UT_LIST_GET_NEXT(list, block);
ut_ad(block);
used = dyn_block_get_used(block);
}
}
ut_ad(block);
ut_ad(dyn_block_get_used(block) >= pos);
return(block->data + pos);
}
/************************************************************//**
Returns the size of stored data in a dyn array.
@return data size in bytes */
UNIV_INLINE
ulint
dyn_array_get_data_size(
/*====================*/
dyn_array_t* arr) /*!< in: dyn array */
{
dyn_block_t* block;
ulint sum = 0;
ut_ad(arr);
ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
if (arr->heap == NULL) {
return(arr->used);
}
/* Get the first array block */
block = dyn_array_get_first_block(arr);
while (block != NULL) {
sum += dyn_block_get_used(block);
block = dyn_array_get_next_block(arr, block);
}
return(sum);
}
/********************************************************//**
Pushes n bytes to a dyn array. */
UNIV_INLINE
void
dyn_push_string(
/*============*/
dyn_array_t* arr, /*!< in: dyn array */
const byte* str, /*!< in: string to write */
ulint len) /*!< in: string length */
{
ulint n_copied;
while (len > 0) {
if (len > DYN_ARRAY_DATA_SIZE) {
n_copied = DYN_ARRAY_DATA_SIZE;
} else {
n_copied = len;
}
memcpy(dyn_array_push(arr, n_copied), str, n_copied);
str += n_copied;
len -= n_copied;
}
}

View file

@ -0,0 +1,114 @@
/*****************************************************************************
Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/eval0eval.h
SQL evaluator: evaluates simple data structures, like expressions, in
a query graph
Created 12/29/1997 Heikki Tuuri
*******************************************************/
#ifndef eval0eval_h
#define eval0eval_h
#include "univ.i"
#include "que0types.h"
#include "pars0sym.h"
#include "pars0pars.h"
/*****************************************************************//**
Free the buffer from global dynamic memory for a value of a que_node,
if it has been allocated in the above function. The freeing for pushed
column values is done in sel_col_prefetch_buf_free. */
UNIV_INTERN
void
eval_node_free_val_buf(
/*===================*/
que_node_t* node); /*!< in: query graph node */
/*****************************************************************//**
Evaluates a symbol table symbol. */
UNIV_INLINE
void
eval_sym(
/*=====*/
sym_node_t* sym_node); /*!< in: symbol table node */
/*****************************************************************//**
Evaluates an expression. */
UNIV_INLINE
void
eval_exp(
/*=====*/
que_node_t* exp_node); /*!< in: expression */
/*****************************************************************//**
Sets an integer value as the value of an expression node. */
UNIV_INLINE
void
eval_node_set_int_val(
/*==================*/
que_node_t* node, /*!< in: expression node */
lint val); /*!< in: value to set */
/*****************************************************************//**
Gets an integer value from an expression node.
@return integer value */
UNIV_INLINE
lint
eval_node_get_int_val(
/*==================*/
que_node_t* node); /*!< in: expression node */
/*****************************************************************//**
Copies a binary string value as the value of a query graph node. Allocates a
new buffer if necessary. */
UNIV_INLINE
void
eval_node_copy_and_alloc_val(
/*=========================*/
que_node_t* node, /*!< in: query graph node */
const byte* str, /*!< in: binary string */
ulint len); /*!< in: string length or UNIV_SQL_NULL */
/*****************************************************************//**
Copies a query node value to another node. */
UNIV_INLINE
void
eval_node_copy_val(
/*===============*/
que_node_t* node1, /*!< in: node to copy to */
que_node_t* node2); /*!< in: node to copy from */
/*****************************************************************//**
Gets a iboolean value from a query node.
@return iboolean value */
UNIV_INLINE
ibool
eval_node_get_ibool_val(
/*====================*/
que_node_t* node); /*!< in: query graph node */
/*****************************************************************//**
Evaluates a comparison node.
@return the result of the comparison */
UNIV_INTERN
ibool
eval_cmp(
/*=====*/
func_node_t* cmp_node); /*!< in: comparison node */
#ifndef UNIV_NONINL
#include "eval0eval.ic"
#endif
#endif

View file

@ -0,0 +1,251 @@
/*****************************************************************************
Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/eval0eval.ic
SQL evaluator: evaluates simple data structures, like expressions, in
a query graph
Created 12/29/1997 Heikki Tuuri
*******************************************************/
#include "que0que.h"
#include "rem0cmp.h"
#include "pars0grm.h"
/*****************************************************************//**
Evaluates a function node. */
UNIV_INTERN
void
eval_func(
/*======*/
func_node_t* func_node); /*!< in: function node */
/*****************************************************************//**
Allocate a buffer from global dynamic memory for a value of a que_node.
NOTE that this memory must be explicitly freed when the query graph is
freed. If the node already has allocated buffer, that buffer is freed
here. NOTE that this is the only function where dynamic memory should be
allocated for a query node val field.
@return pointer to allocated buffer */
UNIV_INTERN
byte*
eval_node_alloc_val_buf(
/*====================*/
que_node_t* node, /*!< in: query graph node; sets the val field
data field to point to the new buffer, and
len field equal to size */
ulint size); /*!< in: buffer size */
/*****************************************************************//**
Allocates a new buffer if needed.
@return pointer to buffer */
UNIV_INLINE
byte*
eval_node_ensure_val_buf(
/*=====================*/
que_node_t* node, /*!< in: query graph node; sets the val field
data field to point to the new buffer, and
len field equal to size */
ulint size) /*!< in: buffer size */
{
dfield_t* dfield;
byte* data;
dfield = que_node_get_val(node);
dfield_set_len(dfield, size);
data = dfield_get_data(dfield);
if (!data || que_node_get_val_buf_size(node) < size) {
data = eval_node_alloc_val_buf(node, size);
}
return(data);
}
/*****************************************************************//**
Evaluates a symbol table symbol. */
UNIV_INLINE
void
eval_sym(
/*=====*/
sym_node_t* sym_node) /*!< in: symbol table node */
{
ut_ad(que_node_get_type(sym_node) == QUE_NODE_SYMBOL);
if (sym_node->indirection) {
/* The symbol table node is an alias for a variable or a
column */
dfield_copy_data(que_node_get_val(sym_node),
que_node_get_val(sym_node->indirection));
}
}
/*****************************************************************//**
Evaluates an expression. */
UNIV_INLINE
void
eval_exp(
/*=====*/
que_node_t* exp_node) /*!< in: expression */
{
if (que_node_get_type(exp_node) == QUE_NODE_SYMBOL) {
eval_sym((sym_node_t*)exp_node);
return;
}
eval_func(exp_node);
}
/*****************************************************************//**
Sets an integer value as the value of an expression node. */
UNIV_INLINE
void
eval_node_set_int_val(
/*==================*/
que_node_t* node, /*!< in: expression node */
lint val) /*!< in: value to set */
{
dfield_t* dfield;
byte* data;
dfield = que_node_get_val(node);
data = dfield_get_data(dfield);
if (data == NULL) {
data = eval_node_alloc_val_buf(node, 4);
}
ut_ad(dfield_get_len(dfield) == 4);
mach_write_to_4(data, (ulint)val);
}
/*****************************************************************//**
Gets an integer non-SQL null value from an expression node.
@return integer value */
UNIV_INLINE
lint
eval_node_get_int_val(
/*==================*/
que_node_t* node) /*!< in: expression node */
{
dfield_t* dfield;
dfield = que_node_get_val(node);
ut_ad(dfield_get_len(dfield) == 4);
return((int)mach_read_from_4(dfield_get_data(dfield)));
}
/*****************************************************************//**
Gets a iboolean value from a query node.
@return iboolean value */
UNIV_INLINE
ibool
eval_node_get_ibool_val(
/*====================*/
que_node_t* node) /*!< in: query graph node */
{
dfield_t* dfield;
byte* data;
dfield = que_node_get_val(node);
data = dfield_get_data(dfield);
ut_ad(data != NULL);
return(mach_read_from_1(data));
}
/*****************************************************************//**
Sets a iboolean value as the value of a function node. */
UNIV_INLINE
void
eval_node_set_ibool_val(
/*====================*/
func_node_t* func_node, /*!< in: function node */
ibool val) /*!< in: value to set */
{
dfield_t* dfield;
byte* data;
dfield = que_node_get_val(func_node);
data = dfield_get_data(dfield);
if (data == NULL) {
/* Allocate 1 byte to hold the value */
data = eval_node_alloc_val_buf(func_node, 1);
}
ut_ad(dfield_get_len(dfield) == 1);
mach_write_to_1(data, val);
}
/*****************************************************************//**
Copies a binary string value as the value of a query graph node. Allocates a
new buffer if necessary. */
UNIV_INLINE
void
eval_node_copy_and_alloc_val(
/*=========================*/
que_node_t* node, /*!< in: query graph node */
const byte* str, /*!< in: binary string */
ulint len) /*!< in: string length or UNIV_SQL_NULL */
{
byte* data;
if (len == UNIV_SQL_NULL) {
dfield_set_len(que_node_get_val(node), len);
return;
}
data = eval_node_ensure_val_buf(node, len);
ut_memcpy(data, str, len);
}
/*****************************************************************//**
Copies a query node value to another node. */
UNIV_INLINE
void
eval_node_copy_val(
/*===============*/
que_node_t* node1, /*!< in: node to copy to */
que_node_t* node2) /*!< in: node to copy from */
{
dfield_t* dfield2;
dfield2 = que_node_get_val(node2);
eval_node_copy_and_alloc_val(node1, dfield_get_data(dfield2),
dfield_get_len(dfield2));
}

View file

@ -0,0 +1,104 @@
/*****************************************************************************
Copyright (c) 1998, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/eval0proc.h
Executes SQL stored procedures and their control structures
Created 1/20/1998 Heikki Tuuri
*******************************************************/
#ifndef eval0proc_h
#define eval0proc_h
#include "univ.i"
#include "que0types.h"
#include "pars0sym.h"
#include "pars0pars.h"
/**********************************************************************//**
Performs an execution step of a procedure node.
@return query thread to run next or NULL */
UNIV_INLINE
que_thr_t*
proc_step(
/*======*/
que_thr_t* thr); /*!< in: query thread */
/**********************************************************************//**
Performs an execution step of an if-statement node.
@return query thread to run next or NULL */
UNIV_INTERN
que_thr_t*
if_step(
/*====*/
que_thr_t* thr); /*!< in: query thread */
/**********************************************************************//**
Performs an execution step of a while-statement node.
@return query thread to run next or NULL */
UNIV_INTERN
que_thr_t*
while_step(
/*=======*/
que_thr_t* thr); /*!< in: query thread */
/**********************************************************************//**
Performs an execution step of a for-loop node.
@return query thread to run next or NULL */
UNIV_INTERN
que_thr_t*
for_step(
/*=====*/
que_thr_t* thr); /*!< in: query thread */
/**********************************************************************//**
Performs an execution step of an assignment statement node.
@return query thread to run next or NULL */
UNIV_INTERN
que_thr_t*
assign_step(
/*========*/
que_thr_t* thr); /*!< in: query thread */
/**********************************************************************//**
Performs an execution step of a procedure call node.
@return query thread to run next or NULL */
UNIV_INLINE
que_thr_t*
proc_eval_step(
/*===========*/
que_thr_t* thr); /*!< in: query thread */
/**********************************************************************//**
Performs an execution step of an exit statement node.
@return query thread to run next or NULL */
UNIV_INTERN
que_thr_t*
exit_step(
/*======*/
que_thr_t* thr); /*!< in: query thread */
/**********************************************************************//**
Performs an execution step of a return-statement node.
@return query thread to run next or NULL */
UNIV_INTERN
que_thr_t*
return_step(
/*========*/
que_thr_t* thr); /*!< in: query thread */
#ifndef UNIV_NONINL
#include "eval0proc.ic"
#endif
#endif

View file

@ -0,0 +1,88 @@
/*****************************************************************************
Copyright (c) 1998, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/eval0proc.ic
Executes SQL stored procedures and their control structures
Created 1/20/1998 Heikki Tuuri
*******************************************************/
#include "pars0pars.h"
#include "que0que.h"
#include "eval0eval.h"
/**********************************************************************//**
Performs an execution step of a procedure node.
@return query thread to run next or NULL */
UNIV_INLINE
que_thr_t*
proc_step(
/*======*/
que_thr_t* thr) /*!< in: query thread */
{
proc_node_t* node;
ut_ad(thr);
node = thr->run_node;
ut_ad(que_node_get_type(node) == QUE_NODE_PROC);
if (thr->prev_node == que_node_get_parent(node)) {
/* Start execution from the first statement in the statement
list */
thr->run_node = node->stat_list;
} else {
/* Move to the next statement */
ut_ad(que_node_get_next(thr->prev_node) == NULL);
thr->run_node = NULL;
}
if (thr->run_node == NULL) {
thr->run_node = que_node_get_parent(node);
}
return(thr);
}
/**********************************************************************//**
Performs an execution step of a procedure call node.
@return query thread to run next or NULL */
UNIV_INLINE
que_thr_t*
proc_eval_step(
/*===========*/
que_thr_t* thr) /*!< in: query thread */
{
func_node_t* node;
ut_ad(thr);
node = thr->run_node;
ut_ad(que_node_get_type(node) == QUE_NODE_FUNC);
/* Evaluate the procedure */
eval_exp(node);
thr->run_node = que_node_get_parent(node);
return(thr);
}

View file

@ -0,0 +1,724 @@
/*****************************************************************************
Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/fil0fil.h
The low-level file system
Created 10/25/1995 Heikki Tuuri
*******************************************************/
#ifndef fil0fil_h
#define fil0fil_h
#include "dict0types.h"
#include "ut0byte.h"
#include "os0file.h"
#ifndef UNIV_HOTBACKUP
#include "sync0rw.h"
#include "ibuf0types.h"
#endif /* !UNIV_HOTBACKUP */
/** When mysqld is run, the default directory "." is the mysqld datadir,
but in the MySQL Embedded Server Library and ibbackup it is not the default
directory, and we must set the base file path explicitly */
extern const char* fil_path_to_mysql_datadir;
/** Initial size of a single-table tablespace in pages */
#define FIL_IBD_FILE_INITIAL_SIZE 4
/** 'null' (undefined) page offset in the context of file spaces */
#define FIL_NULL ULINT32_UNDEFINED
/* Space address data type; this is intended to be used when
addresses accurate to a byte are stored in file pages. If the page part
of the address is FIL_NULL, the address is considered undefined. */
typedef byte fil_faddr_t; /*!< 'type' definition in C: an address
stored in a file page is a string of bytes */
#define FIL_ADDR_PAGE 0 /* first in address is the page offset */
#define FIL_ADDR_BYTE 4 /* then comes 2-byte byte offset within page*/
#define FIL_ADDR_SIZE 6 /* address size is 6 bytes */
/** A struct for storing a space address FIL_ADDR, when it is used
in C program data structures. */
typedef struct fil_addr_struct fil_addr_t;
/** File space address */
struct fil_addr_struct{
ulint page; /*!< page number within a space */
ulint boffset; /*!< byte offset within the page */
};
/** The null file address */
extern fil_addr_t fil_addr_null;
/** The byte offsets on a file page for various variables @{ */
#define FIL_PAGE_SPACE_OR_CHKSUM 0 /*!< in < MySQL-4.0.14 space id the
page belongs to (== 0) but in later
versions the 'new' checksum of the
page */
#define FIL_PAGE_OFFSET 4 /*!< page offset inside space */
#define FIL_PAGE_PREV 8 /*!< if there is a 'natural'
predecessor of the page, its
offset. Otherwise FIL_NULL.
This field is not set on BLOB
pages, which are stored as a
singly-linked list. See also
FIL_PAGE_NEXT. */
#define FIL_PAGE_NEXT 12 /*!< if there is a 'natural' successor
of the page, its offset.
Otherwise FIL_NULL.
B-tree index pages
(FIL_PAGE_TYPE contains FIL_PAGE_INDEX)
on the same PAGE_LEVEL are maintained
as a doubly linked list via
FIL_PAGE_PREV and FIL_PAGE_NEXT
in the collation order of the
smallest user record on each page. */
#define FIL_PAGE_LSN 16 /*!< lsn of the end of the newest
modification log record to the page */
#define FIL_PAGE_TYPE 24 /*!< file page type: FIL_PAGE_INDEX,...,
2 bytes.
The contents of this field can only
be trusted in the following case:
if the page is an uncompressed
B-tree index page, then it is
guaranteed that the value is
FIL_PAGE_INDEX.
The opposite does not hold.
In tablespaces created by
MySQL/InnoDB 5.1.7 or later, the
contents of this field is valid
for all uncompressed pages. */
#define FIL_PAGE_FILE_FLUSH_LSN 26 /*!< this is only defined for the
first page in a system tablespace
data file (ibdata*, not *.ibd):
the file has been flushed to disk
at least up to this lsn */
#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID 34 /*!< starting from 4.1.x this
contains the space id of the page */
#define FIL_PAGE_DATA 38 /*!< start of the data on the page */
/* @} */
/** File page trailer @{ */
#define FIL_PAGE_END_LSN_OLD_CHKSUM 8 /*!< the low 4 bytes of this are used
to store the page checksum, the
last 4 bytes should be identical
to the last 4 bytes of FIL_PAGE_LSN */
#define FIL_PAGE_DATA_END 8 /*!< size of the page trailer */
/* @} */
/** File page types (values of FIL_PAGE_TYPE) @{ */
#define FIL_PAGE_INDEX 17855 /*!< B-tree node */
#define FIL_PAGE_UNDO_LOG 2 /*!< Undo log page */
#define FIL_PAGE_INODE 3 /*!< Index node */
#define FIL_PAGE_IBUF_FREE_LIST 4 /*!< Insert buffer free list */
/* File page types introduced in MySQL/InnoDB 5.1.7 */
#define FIL_PAGE_TYPE_ALLOCATED 0 /*!< Freshly allocated page */
#define FIL_PAGE_IBUF_BITMAP 5 /*!< Insert buffer bitmap */
#define FIL_PAGE_TYPE_SYS 6 /*!< System page */
#define FIL_PAGE_TYPE_TRX_SYS 7 /*!< Transaction system data */
#define FIL_PAGE_TYPE_FSP_HDR 8 /*!< File space header */
#define FIL_PAGE_TYPE_XDES 9 /*!< Extent descriptor page */
#define FIL_PAGE_TYPE_BLOB 10 /*!< Uncompressed BLOB page */
#define FIL_PAGE_TYPE_ZBLOB 11 /*!< First compressed BLOB page */
#define FIL_PAGE_TYPE_ZBLOB2 12 /*!< Subsequent compressed BLOB page */
/* @} */
/** Space types @{ */
#define FIL_TABLESPACE 501 /*!< tablespace */
#define FIL_LOG 502 /*!< redo log */
/* @} */
/** The number of fsyncs done to the log */
extern ulint fil_n_log_flushes;
/** Number of pending redo log flushes */
extern ulint fil_n_pending_log_flushes;
/** Number of pending tablespace flushes */
extern ulint fil_n_pending_tablespace_flushes;
#ifndef UNIV_HOTBACKUP
/*******************************************************************//**
Returns the version number of a tablespace, -1 if not found.
@return version number, -1 if the tablespace does not exist in the
memory cache */
UNIV_INTERN
ib_int64_t
fil_space_get_version(
/*==================*/
ulint id); /*!< in: space id */
/*******************************************************************//**
Returns the latch of a file space.
@return latch protecting storage allocation */
UNIV_INTERN
rw_lock_t*
fil_space_get_latch(
/*================*/
ulint id, /*!< in: space id */
ulint* zip_size);/*!< out: compressed page size, or
0 for uncompressed tablespaces */
/*******************************************************************//**
Returns the type of a file space.
@return FIL_TABLESPACE or FIL_LOG */
UNIV_INTERN
ulint
fil_space_get_type(
/*===============*/
ulint id); /*!< in: space id */
#endif /* !UNIV_HOTBACKUP */
/*******************************************************************//**
Appends a new file to the chain of files of a space. File must be closed. */
UNIV_INTERN
void
fil_node_create(
/*============*/
const char* name, /*!< in: file name (file must be closed) */
ulint size, /*!< in: file size in database blocks, rounded
downwards to an integer */
ulint id, /*!< in: space id where to append */
ibool is_raw);/*!< in: TRUE if a raw device or
a raw disk partition */
#ifdef UNIV_LOG_ARCHIVE
/****************************************************************//**
Drops files from the start of a file space, so that its size is cut by
the amount given. */
UNIV_INTERN
void
fil_space_truncate_start(
/*=====================*/
ulint id, /*!< in: space id */
ulint trunc_len); /*!< in: truncate by this much; it is an error
if this does not equal to the combined size of
some initial files in the space */
#endif /* UNIV_LOG_ARCHIVE */
/*******************************************************************//**
Creates a space memory object and puts it to the 'fil system' hash table. If
there is an error, prints an error message to the .err log.
@return TRUE if success */
UNIV_INTERN
ibool
fil_space_create(
/*=============*/
const char* name, /*!< in: space name */
ulint id, /*!< in: space id */
ulint zip_size,/*!< in: compressed page size, or
0 for uncompressed tablespaces */
ulint purpose);/*!< in: FIL_TABLESPACE, or FIL_LOG if log */
/*******************************************************************//**
Returns the size of the space in pages. The tablespace must be cached in the
memory cache.
@return space size, 0 if space not found */
UNIV_INTERN
ulint
fil_space_get_size(
/*===============*/
ulint id); /*!< in: space id */
/*******************************************************************//**
Returns the flags of the space. The tablespace must be cached
in the memory cache.
@return flags, ULINT_UNDEFINED if space not found */
UNIV_INTERN
ulint
fil_space_get_flags(
/*================*/
ulint id); /*!< in: space id */
/*******************************************************************//**
Returns the compressed page size of the space, or 0 if the space
is not compressed. The tablespace must be cached in the memory cache.
@return compressed page size, ULINT_UNDEFINED if space not found */
UNIV_INTERN
ulint
fil_space_get_zip_size(
/*===================*/
ulint id); /*!< in: space id */
/*******************************************************************//**
Checks if the pair space, page_no refers to an existing page in a tablespace
file space. The tablespace must be cached in the memory cache.
@return TRUE if the address is meaningful */
UNIV_INTERN
ibool
fil_check_adress_in_tablespace(
/*===========================*/
ulint id, /*!< in: space id */
ulint page_no);/*!< in: page number */
/****************************************************************//**
Initializes the tablespace memory cache. */
UNIV_INTERN
void
fil_init(
/*=====*/
ulint hash_size, /*!< in: hash table size */
ulint max_n_open); /*!< in: max number of open files */
/*******************************************************************//**
Initializes the tablespace memory cache. */
UNIV_INTERN
void
fil_close(void);
/*===========*/
/*******************************************************************//**
Opens all log files and system tablespace data files. They stay open until the
database server shutdown. This should be called at a server startup after the
space objects for the log and the system tablespace have been created. The
purpose of this operation is to make sure we never run out of file descriptors
if we need to read from the insert buffer or to write to the log. */
UNIV_INTERN
void
fil_open_log_and_system_tablespace_files(void);
/*==========================================*/
/*******************************************************************//**
Closes all open files. There must not be any pending i/o's or not flushed
modifications in the files. */
UNIV_INTERN
void
fil_close_all_files(void);
/*=====================*/
/*******************************************************************//**
Sets the max tablespace id counter if the given number is bigger than the
previous value. */
UNIV_INTERN
void
fil_set_max_space_id_if_bigger(
/*===========================*/
ulint max_id);/*!< in: maximum known id */
#ifndef UNIV_HOTBACKUP
/****************************************************************//**
Writes the flushed lsn and the latest archived log number to the page
header of the first page of each data file in the system tablespace.
@return DB_SUCCESS or error number */
UNIV_INTERN
ulint
fil_write_flushed_lsn_to_data_files(
/*================================*/
ib_uint64_t lsn, /*!< in: lsn to write */
ulint arch_log_no); /*!< in: latest archived log
file number */
/*******************************************************************//**
Reads the flushed lsn and arch no fields from a data file at database
startup. */
UNIV_INTERN
void
fil_read_flushed_lsn_and_arch_log_no(
/*=================================*/
os_file_t data_file, /*!< in: open data file */
ibool one_read_already, /*!< in: TRUE if min and max
parameters below already
contain sensible data */
#ifdef UNIV_LOG_ARCHIVE
ulint* min_arch_log_no, /*!< in/out: */
ulint* max_arch_log_no, /*!< in/out: */
#endif /* UNIV_LOG_ARCHIVE */
ib_uint64_t* min_flushed_lsn, /*!< in/out: */
ib_uint64_t* max_flushed_lsn); /*!< in/out: */
/*******************************************************************//**
Increments the count of pending insert buffer page merges, if space is not
being deleted.
@return TRUE if being deleted, and ibuf merges should be skipped */
UNIV_INTERN
ibool
fil_inc_pending_ibuf_merges(
/*========================*/
ulint id); /*!< in: space id */
/*******************************************************************//**
Decrements the count of pending insert buffer page merges. */
UNIV_INTERN
void
fil_decr_pending_ibuf_merges(
/*=========================*/
ulint id); /*!< in: space id */
#endif /* !UNIV_HOTBACKUP */
/*******************************************************************//**
Parses the body of a log record written about an .ibd file operation. That is,
the log record part after the standard (type, space id, page no) header of the
log record.
If desired, also replays the delete or rename operation if the .ibd file
exists and the space id in it matches. Replays the create operation if a file
at that path does not exist yet. If the database directory for the file to be
created does not exist, then we create the directory, too.
Note that ibbackup --apply-log sets fil_path_to_mysql_datadir to point to the
datadir that we should use in replaying the file operations.
@return end of log record, or NULL if the record was not completely
contained between ptr and end_ptr */
UNIV_INTERN
byte*
fil_op_log_parse_or_replay(
/*=======================*/
byte* ptr, /*!< in: buffer containing the log record body,
or an initial segment of it, if the record does
not fir completely between ptr and end_ptr */
byte* end_ptr, /*!< in: buffer end */
ulint type, /*!< in: the type of this log record */
ulint space_id, /*!< in: the space id of the tablespace in
question, or 0 if the log record should
only be parsed but not replayed */
ulint log_flags); /*!< in: redo log flags
(stored in the page number parameter) */
/*******************************************************************//**
Deletes a single-table tablespace. The tablespace must be cached in the
memory cache.
@return TRUE if success */
UNIV_INTERN
ibool
fil_delete_tablespace(
/*==================*/
ulint id); /*!< in: space id */
#ifndef UNIV_HOTBACKUP
/*******************************************************************//**
Discards a single-table tablespace. The tablespace must be cached in the
memory cache. Discarding is like deleting a tablespace, but
1) we do not drop the table from the data dictionary;
2) we remove all insert buffer entries for the tablespace immediately; in DROP
TABLE they are only removed gradually in the background;
3) when the user does IMPORT TABLESPACE, the tablespace will have the same id
as it originally had.
@return TRUE if success */
UNIV_INTERN
ibool
fil_discard_tablespace(
/*===================*/
ulint id); /*!< in: space id */
#endif /* !UNIV_HOTBACKUP */
/*******************************************************************//**
Renames a single-table tablespace. The tablespace must be cached in the
tablespace memory cache.
@return TRUE if success */
UNIV_INTERN
ibool
fil_rename_tablespace(
/*==================*/
const char* old_name, /*!< in: old table name in the standard
databasename/tablename format of
InnoDB, or NULL if we do the rename
based on the space id only */
ulint id, /*!< in: space id */
const char* new_name); /*!< in: new table name in the standard
databasename/tablename format
of InnoDB */
/*******************************************************************//**
Creates a new single-table tablespace to a database directory of MySQL.
Database directories are under the 'datadir' of MySQL. The datadir is the
directory of a running mysqld program. We can refer to it by simply the
path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
dir of the mysqld server.
@return DB_SUCCESS or error code */
UNIV_INTERN
ulint
fil_create_new_single_table_tablespace(
/*===================================*/
ulint* space_id, /*!< in/out: space id; if this is != 0,
then this is an input parameter,
otherwise output */
const char* tablename, /*!< in: the table name in the usual
databasename/tablename format
of InnoDB, or a dir path to a temp
table */
ibool is_temp, /*!< in: TRUE if a table created with
CREATE TEMPORARY TABLE */
ulint flags, /*!< in: tablespace flags */
ulint size); /*!< in: the initial size of the
tablespace file in pages,
must be >= FIL_IBD_FILE_INITIAL_SIZE */
#ifndef UNIV_HOTBACKUP
/********************************************************************//**
Tries to open a single-table tablespace and optionally checks the space id is
right in it. If does not succeed, prints an error message to the .err log. This
function is used to open a tablespace when we start up mysqld, and also in
IMPORT TABLESPACE.
NOTE that we assume this operation is used either at the database startup
or under the protection of the dictionary mutex, so that two users cannot
race here. This operation does not leave the file associated with the
tablespace open, but closes it after we have looked at the space id in it.
@return TRUE if success */
UNIV_INTERN
ibool
fil_open_single_table_tablespace(
/*=============================*/
ibool check_space_id, /*!< in: should we check that the space
id in the file is right; we assume
that this function runs much faster
if no check is made, since accessing
the file inode probably is much
faster (the OS caches them) than
accessing the first page of the file */
ulint id, /*!< in: space id */
ulint flags, /*!< in: tablespace flags */
const char* name); /*!< in: table name in the
databasename/tablename format */
/********************************************************************//**
It is possible, though very improbable, that the lsn's in the tablespace to be
imported have risen above the current system lsn, if a lengthy purge, ibuf
merge, or rollback was performed on a backup taken with ibbackup. If that is
the case, reset page lsn's in the file. We assume that mysqld was shut down
after it performed these cleanup operations on the .ibd file, so that it at
the shutdown stamped the latest lsn to the FIL_PAGE_FILE_FLUSH_LSN in the
first page of the .ibd file, and we can determine whether we need to reset the
lsn's just by looking at that flush lsn.
@return TRUE if success */
UNIV_INTERN
ibool
fil_reset_too_high_lsns(
/*====================*/
const char* name, /*!< in: table name in the
databasename/tablename format */
ib_uint64_t current_lsn); /*!< in: reset lsn's if the lsn stamped
to FIL_PAGE_FILE_FLUSH_LSN in the
first page is too high */
#endif /* !UNIV_HOTBACKUP */
/********************************************************************//**
At the server startup, if we need crash recovery, scans the database
directories under the MySQL datadir, looking for .ibd files. Those files are
single-table tablespaces. We need to know the space id in each of them so that
we know into which file we should look to check the contents of a page stored
in the doublewrite buffer, also to know where to apply log records where the
space id is != 0.
@return DB_SUCCESS or error number */
UNIV_INTERN
ulint
fil_load_single_table_tablespaces(void);
/*===================================*/
/********************************************************************//**
If we need crash recovery, and we have called
fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(),
we can call this function to print an error message of orphaned .ibd files
for which there is not a data dictionary entry with a matching table name
and space id. */
UNIV_INTERN
void
fil_print_orphaned_tablespaces(void);
/*================================*/
/*******************************************************************//**
Returns TRUE if a single-table tablespace does not exist in the memory cache,
or is being deleted there.
@return TRUE if does not exist or is being\ deleted */
UNIV_INTERN
ibool
fil_tablespace_deleted_or_being_deleted_in_mem(
/*===========================================*/
ulint id, /*!< in: space id */
ib_int64_t version);/*!< in: tablespace_version should be this; if
you pass -1 as the value of this, then this
parameter is ignored */
/*******************************************************************//**
Returns TRUE if a single-table tablespace exists in the memory cache.
@return TRUE if exists */
UNIV_INTERN
ibool
fil_tablespace_exists_in_mem(
/*=========================*/
ulint id); /*!< in: space id */
#ifndef UNIV_HOTBACKUP
/*******************************************************************//**
Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory
cache. Note that if we have not done a crash recovery at the database startup,
there may be many tablespaces which are not yet in the memory cache.
@return TRUE if a matching tablespace exists in the memory cache */
UNIV_INTERN
ibool
fil_space_for_table_exists_in_mem(
/*==============================*/
ulint id, /*!< in: space id */
const char* name, /*!< in: table name in the standard
'databasename/tablename' format or
the dir path to a temp table */
ibool is_temp, /*!< in: TRUE if created with CREATE
TEMPORARY TABLE */
ibool mark_space, /*!< in: in crash recovery, at database
startup we mark all spaces which have
an associated table in the InnoDB
data dictionary, so that
we can print a warning about orphaned
tablespaces */
ibool print_error_if_does_not_exist);
/*!< in: print detailed error
information to the .err log if a
matching tablespace is not found from
memory */
#else /* !UNIV_HOTBACKUP */
/********************************************************************//**
Extends all tablespaces to the size stored in the space header. During the
ibbackup --apply-log phase we extended the spaces on-demand so that log records
could be appllied, but that may have left spaces still too small compared to
the size stored in the space header. */
UNIV_INTERN
void
fil_extend_tablespaces_to_stored_len(void);
/*======================================*/
#endif /* !UNIV_HOTBACKUP */
/**********************************************************************//**
Tries to extend a data file so that it would accommodate the number of pages
given. The tablespace must be cached in the memory cache. If the space is big
enough already, does nothing.
@return TRUE if success */
UNIV_INTERN
ibool
fil_extend_space_to_desired_size(
/*=============================*/
ulint* actual_size, /*!< out: size of the space after extension;
if we ran out of disk space this may be lower
than the desired size */
ulint space_id, /*!< in: space id */
ulint size_after_extend);/*!< in: desired size in pages after the
extension; if the current space size is bigger
than this already, the function does nothing */
/*******************************************************************//**
Tries to reserve free extents in a file space.
@return TRUE if succeed */
UNIV_INTERN
ibool
fil_space_reserve_free_extents(
/*===========================*/
ulint id, /*!< in: space id */
ulint n_free_now, /*!< in: number of free extents now */
ulint n_to_reserve); /*!< in: how many one wants to reserve */
/*******************************************************************//**
Releases free extents in a file space. */
UNIV_INTERN
void
fil_space_release_free_extents(
/*===========================*/
ulint id, /*!< in: space id */
ulint n_reserved); /*!< in: how many one reserved */
/*******************************************************************//**
Gets the number of reserved extents. If the database is silent, this number
should be zero. */
UNIV_INTERN
ulint
fil_space_get_n_reserved_extents(
/*=============================*/
ulint id); /*!< in: space id */
/********************************************************************//**
Reads or writes data. This operation is asynchronous (aio).
@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
i/o on a tablespace which does not exist */
UNIV_INTERN
ulint
fil_io(
/*===*/
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE,
ORed to OS_FILE_LOG, if a log i/o
and ORed to OS_AIO_SIMULATED_WAKE_LATER
if simulated aio and we want to post a
batch of i/os; NOTE that a simulated batch
may introduce hidden chances of deadlocks,
because i/os are not actually handled until
all have been posted: use with great
caution! */
ibool sync, /*!< in: TRUE if synchronous aio is desired */
ulint space_id, /*!< in: space id */
ulint zip_size, /*!< in: compressed page size in bytes;
0 for uncompressed pages */
ulint block_offset, /*!< in: offset in number of blocks */
ulint byte_offset, /*!< in: remainder of offset in bytes; in
aio this must be divisible by the OS block
size */
ulint len, /*!< in: how many bytes to read or write; this
must not cross a file boundary; in aio this
must be a block size multiple */
void* buf, /*!< in/out: buffer where to store read data
or from where to write; in aio this must be
appropriately aligned */
void* message); /*!< in: message for aio handler if non-sync
aio used, else ignored */
/**********************************************************************//**
Waits for an aio operation to complete. This function is used to write the
handler for completed requests. The aio array of pending requests is divided
into segments (see os0file.c for more info). The thread specifies which
segment it wants to wait for. */
UNIV_INTERN
void
fil_aio_wait(
/*=========*/
ulint segment); /*!< in: the number of the segment in the aio
array to wait for */
/**********************************************************************//**
Flushes to disk possible writes cached by the OS. If the space does not exist
or is being dropped, does not do anything. */
UNIV_INTERN
void
fil_flush(
/*======*/
ulint space_id); /*!< in: file space id (this can be a group of
log files or a tablespace of the database) */
/**********************************************************************//**
Flushes to disk writes in file spaces of the given type possibly cached by
the OS. */
UNIV_INTERN
void
fil_flush_file_spaces(
/*==================*/
ulint purpose); /*!< in: FIL_TABLESPACE, FIL_LOG */
/******************************************************************//**
Checks the consistency of the tablespace cache.
@return TRUE if ok */
UNIV_INTERN
ibool
fil_validate(void);
/*==============*/
/********************************************************************//**
Returns TRUE if file address is undefined.
@return TRUE if undefined */
UNIV_INTERN
ibool
fil_addr_is_null(
/*=============*/
fil_addr_t addr); /*!< in: address */
/********************************************************************//**
Get the predecessor of a file page.
@return FIL_PAGE_PREV */
UNIV_INTERN
ulint
fil_page_get_prev(
/*==============*/
const byte* page); /*!< in: file page */
/********************************************************************//**
Get the successor of a file page.
@return FIL_PAGE_NEXT */
UNIV_INTERN
ulint
fil_page_get_next(
/*==============*/
const byte* page); /*!< in: file page */
/*********************************************************************//**
Sets the file page type. */
UNIV_INTERN
void
fil_page_set_type(
/*==============*/
byte* page, /*!< in/out: file page */
ulint type); /*!< in: type */
/*********************************************************************//**
Gets the file page type.
@return type; NOTE that if the type has not been written to page, the
return value not defined */
UNIV_INTERN
ulint
fil_page_get_type(
/*==============*/
const byte* page); /*!< in: file page */
typedef struct fil_space_struct fil_space_t;
#endif

View file

@ -0,0 +1,359 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/fsp0fsp.h
File space management
Created 12/18/1995 Heikki Tuuri
*******************************************************/
#ifndef fsp0fsp_h
#define fsp0fsp_h
#include "univ.i"
#include "mtr0mtr.h"
#include "fut0lst.h"
#include "ut0byte.h"
#include "page0types.h"
#include "fsp0types.h"
/**********************************************************************//**
Initializes the file space system. */
UNIV_INTERN
void
fsp_init(void);
/*==========*/
/**********************************************************************//**
Gets the current free limit of the system tablespace. The free limit
means the place of the first page which has never been put to the
free list for allocation. The space above that address is initialized
to zero. Sets also the global variable log_fsp_current_free_limit.
@return free limit in megabytes */
UNIV_INTERN
ulint
fsp_header_get_free_limit(void);
/*===========================*/
/**********************************************************************//**
Gets the size of the system tablespace from the tablespace header. If
we do not have an auto-extending data file, this should be equal to
the size of the data files. If there is an auto-extending data file,
this can be smaller.
@return size in pages */
UNIV_INTERN
ulint
fsp_header_get_tablespace_size(void);
/*================================*/
/**********************************************************************//**
Reads the file space size stored in the header page.
@return tablespace size stored in the space header */
UNIV_INTERN
ulint
fsp_get_size_low(
/*=============*/
page_t* page); /*!< in: header page (page 0 in the tablespace) */
/**********************************************************************//**
Reads the space id from the first page of a tablespace.
@return space id, ULINT UNDEFINED if error */
UNIV_INTERN
ulint
fsp_header_get_space_id(
/*====================*/
const page_t* page); /*!< in: first page of a tablespace */
/**********************************************************************//**
Reads the space flags from the first page of a tablespace.
@return flags */
UNIV_INTERN
ulint
fsp_header_get_flags(
/*=================*/
const page_t* page); /*!< in: first page of a tablespace */
/**********************************************************************//**
Reads the compressed page size from the first page of a tablespace.
@return compressed page size in bytes, or 0 if uncompressed */
UNIV_INTERN
ulint
fsp_header_get_zip_size(
/*====================*/
const page_t* page); /*!< in: first page of a tablespace */
/**********************************************************************//**
Writes the space id and compressed page size to a tablespace header.
This function is used past the buffer pool when we in fil0fil.c create
a new single-table tablespace. */
UNIV_INTERN
void
fsp_header_init_fields(
/*===================*/
page_t* page, /*!< in/out: first page in the space */
ulint space_id, /*!< in: space id */
ulint flags); /*!< in: tablespace flags (FSP_SPACE_FLAGS):
0, or table->flags if newer than COMPACT */
/**********************************************************************//**
Initializes the space header of a new created space and creates also the
insert buffer tree root if space == 0. */
UNIV_INTERN
void
fsp_header_init(
/*============*/
ulint space, /*!< in: space id */
ulint size, /*!< in: current size in blocks */
mtr_t* mtr); /*!< in: mini-transaction handle */
/**********************************************************************//**
Increases the space size field of a space. */
UNIV_INTERN
void
fsp_header_inc_size(
/*================*/
ulint space, /*!< in: space id */
ulint size_inc,/*!< in: size increment in pages */
mtr_t* mtr); /*!< in: mini-transaction handle */
/**********************************************************************//**
Creates a new segment.
@return the block where the segment header is placed, x-latched, NULL
if could not create segment because of lack of space */
UNIV_INTERN
buf_block_t*
fseg_create(
/*========*/
ulint space, /*!< in: space id */
ulint page, /*!< in: page where the segment header is placed: if
this is != 0, the page must belong to another segment,
if this is 0, a new page will be allocated and it
will belong to the created segment */
ulint byte_offset, /*!< in: byte offset of the created segment header
on the page */
mtr_t* mtr); /*!< in: mtr */
/**********************************************************************//**
Creates a new segment.
@return the block where the segment header is placed, x-latched, NULL
if could not create segment because of lack of space */
UNIV_INTERN
buf_block_t*
fseg_create_general(
/*================*/
ulint space, /*!< in: space id */
ulint page, /*!< in: page where the segment header is placed: if
this is != 0, the page must belong to another segment,
if this is 0, a new page will be allocated and it
will belong to the created segment */
ulint byte_offset, /*!< in: byte offset of the created segment header
on the page */
ibool has_done_reservation, /*!< in: TRUE if the caller has already
done the reservation for the pages with
fsp_reserve_free_extents (at least 2 extents: one for
the inode and the other for the segment) then there is
no need to do the check for this individual
operation */
mtr_t* mtr); /*!< in: mtr */
/**********************************************************************//**
Calculates the number of pages reserved by a segment, and how many pages are
currently used.
@return number of reserved pages */
UNIV_INTERN
ulint
fseg_n_reserved_pages(
/*==================*/
fseg_header_t* header, /*!< in: segment header */
ulint* used, /*!< out: number of pages used (<= reserved) */
mtr_t* mtr); /*!< in: mtr handle */
/**********************************************************************//**
Allocates a single free page from a segment. This function implements
the intelligent allocation strategy which tries to minimize
file space fragmentation.
@return the allocated page offset FIL_NULL if no page could be allocated */
UNIV_INTERN
ulint
fseg_alloc_free_page(
/*=================*/
fseg_header_t* seg_header, /*!< in: segment header */
ulint hint, /*!< in: hint of which page would be desirable */
byte direction, /*!< in: if the new page is needed because
of an index page split, and records are
inserted there in order, into which
direction they go alphabetically: FSP_DOWN,
FSP_UP, FSP_NO_DIR */
mtr_t* mtr); /*!< in: mtr handle */
/**********************************************************************//**
Allocates a single free page from a segment. This function implements
the intelligent allocation strategy which tries to minimize file space
fragmentation.
@return allocated page offset, FIL_NULL if no page could be allocated */
UNIV_INTERN
ulint
fseg_alloc_free_page_general(
/*=========================*/
fseg_header_t* seg_header,/*!< in: segment header */
ulint hint, /*!< in: hint of which page would be desirable */
byte direction,/*!< in: if the new page is needed because
of an index page split, and records are
inserted there in order, into which
direction they go alphabetically: FSP_DOWN,
FSP_UP, FSP_NO_DIR */
ibool has_done_reservation, /*!< in: TRUE if the caller has
already done the reservation for the page
with fsp_reserve_free_extents, then there
is no need to do the check for this individual
page */
mtr_t* mtr); /*!< in: mtr handle */
/**********************************************************************//**
Reserves free pages from a tablespace. All mini-transactions which may
use several pages from the tablespace should call this function beforehand
and reserve enough free extents so that they certainly will be able
to do their operation, like a B-tree page split, fully. Reservations
must be released with function fil_space_release_free_extents!
The alloc_type below has the following meaning: FSP_NORMAL means an
operation which will probably result in more space usage, like an
insert in a B-tree; FSP_UNDO means allocation to undo logs: if we are
deleting rows, then this allocation will in the long run result in
less space usage (after a purge); FSP_CLEANING means allocation done
in a physical record delete (like in a purge) or other cleaning operation
which will result in less space usage in the long run. We prefer the latter
two types of allocation: when space is scarce, FSP_NORMAL allocations
will not succeed, but the latter two allocations will succeed, if possible.
The purpose is to avoid dead end where the database is full but the
user cannot free any space because these freeing operations temporarily
reserve some space.
Single-table tablespaces whose size is < 32 pages are a special case. In this
function we would liberally reserve several 64 page extents for every page
split or merge in a B-tree. But we do not want to waste disk space if the table
only occupies < 32 pages. That is why we apply different rules in that special
case, just ensuring that there are 3 free pages available.
@return TRUE if we were able to make the reservation */
UNIV_INTERN
ibool
fsp_reserve_free_extents(
/*=====================*/
ulint* n_reserved,/*!< out: number of extents actually reserved; if we
return TRUE and the tablespace size is < 64 pages,
then this can be 0, otherwise it is n_ext */
ulint space, /*!< in: space id */
ulint n_ext, /*!< in: number of extents to reserve */
ulint alloc_type,/*!< in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
mtr_t* mtr); /*!< in: mtr */
/**********************************************************************//**
This function should be used to get information on how much we still
will be able to insert new data to the database without running out the
tablespace. Only free extents are taken into account and we also subtract
the safety margin required by the above function fsp_reserve_free_extents.
@return available space in kB */
UNIV_INTERN
ullint
fsp_get_available_space_in_free_extents(
/*====================================*/
ulint space); /*!< in: space id */
/**********************************************************************//**
Frees a single page of a segment. */
UNIV_INTERN
void
fseg_free_page(
/*===========*/
fseg_header_t* seg_header, /*!< in: segment header */
ulint space, /*!< in: space id */
ulint page, /*!< in: page offset */
mtr_t* mtr); /*!< in: mtr handle */
/**********************************************************************//**
Frees part of a segment. This function can be used to free a segment
by repeatedly calling this function in different mini-transactions.
Doing the freeing in a single mini-transaction might result in
too big a mini-transaction.
@return TRUE if freeing completed */
UNIV_INTERN
ibool
fseg_free_step(
/*===========*/
fseg_header_t* header, /*!< in, own: segment header; NOTE: if the header
resides on the first page of the frag list
of the segment, this pointer becomes obsolete
after the last freeing step */
mtr_t* mtr); /*!< in: mtr */
/**********************************************************************//**
Frees part of a segment. Differs from fseg_free_step because this function
leaves the header page unfreed.
@return TRUE if freeing completed, except the header page */
UNIV_INTERN
ibool
fseg_free_step_not_header(
/*======================*/
fseg_header_t* header, /*!< in: segment header which must reside on
the first fragment page of the segment */
mtr_t* mtr); /*!< in: mtr */
/***********************************************************************//**
Checks if a page address is an extent descriptor page address.
@return TRUE if a descriptor page */
UNIV_INLINE
ibool
fsp_descr_page(
/*===========*/
ulint zip_size,/*!< in: compressed page size in bytes;
0 for uncompressed pages */
ulint page_no);/*!< in: page number */
/***********************************************************//**
Parses a redo log record of a file page init.
@return end of log record or NULL */
UNIV_INTERN
byte*
fsp_parse_init_file_page(
/*=====================*/
byte* ptr, /*!< in: buffer */
byte* end_ptr, /*!< in: buffer end */
buf_block_t* block); /*!< in: block or NULL */
/*******************************************************************//**
Validates the file space system and its segments.
@return TRUE if ok */
UNIV_INTERN
ibool
fsp_validate(
/*=========*/
ulint space); /*!< in: space id */
/*******************************************************************//**
Prints info of a file space. */
UNIV_INTERN
void
fsp_print(
/*======*/
ulint space); /*!< in: space id */
#ifdef UNIV_DEBUG
/*******************************************************************//**
Validates a segment.
@return TRUE if ok */
UNIV_INTERN
ibool
fseg_validate(
/*==========*/
fseg_header_t* header, /*!< in: segment header */
mtr_t* mtr); /*!< in: mtr */
#endif /* UNIV_DEBUG */
#ifdef UNIV_BTR_PRINT
/*******************************************************************//**
Writes info of a segment. */
UNIV_INTERN
void
fseg_print(
/*=======*/
fseg_header_t* header, /*!< in: segment header */
mtr_t* mtr); /*!< in: mtr */
#endif /* UNIV_BTR_PRINT */
#ifndef UNIV_NONINL
#include "fsp0fsp.ic"
#endif
#endif

View file

@ -0,0 +1,45 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/fsp0fsp.ic
File space management
Created 12/18/1995 Heikki Tuuri
*******************************************************/
/***********************************************************************//**
Checks if a page address is an extent descriptor page address.
@return TRUE if a descriptor page */
UNIV_INLINE
ibool
fsp_descr_page(
/*===========*/
ulint zip_size,/*!< in: compressed page size in bytes;
0 for uncompressed pages */
ulint page_no)/*!< in: page number */
{
ut_ad(ut_is_2pow(zip_size));
if (!zip_size) {
return(UNIV_UNLIKELY((page_no & (UNIV_PAGE_SIZE - 1))
== FSP_XDES_OFFSET));
}
return(UNIV_UNLIKELY((page_no & (zip_size - 1)) == FSP_XDES_OFFSET));
}

View file

@ -0,0 +1,110 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/******************************************************
@file include/fsp0types.h
File space management types
Created May 26, 2009 Vasil Dimov
*******************************************************/
#ifndef fsp0types_h
#define fsp0types_h
#include "univ.i"
#include "fil0fil.h" /* for FIL_PAGE_DATA */
/** @name Flags for inserting records in order
If records are inserted in order, there are the following
flags to tell this (their type is made byte for the compiler
to warn if direction and hint parameters are switched in
fseg_alloc_free_page) */
/* @{ */
#define FSP_UP ((byte)111) /*!< alphabetically upwards */
#define FSP_DOWN ((byte)112) /*!< alphabetically downwards */
#define FSP_NO_DIR ((byte)113) /*!< no order */
/* @} */
/** File space extent size (one megabyte) in pages */
#define FSP_EXTENT_SIZE (1 << (20 - UNIV_PAGE_SIZE_SHIFT))
/** On a page of any file segment, data may be put starting from this
offset */
#define FSEG_PAGE_DATA FIL_PAGE_DATA
/** @name File segment header
The file segment header points to the inode describing the file segment. */
/* @{ */
/** Data type for file segment header */
typedef byte fseg_header_t;
#define FSEG_HDR_SPACE 0 /*!< space id of the inode */
#define FSEG_HDR_PAGE_NO 4 /*!< page number of the inode */
#define FSEG_HDR_OFFSET 8 /*!< byte offset of the inode */
#define FSEG_HEADER_SIZE 10 /*!< Length of the file system
header, in bytes */
/* @} */
/** Flags for fsp_reserve_free_extents @{ */
#define FSP_NORMAL 1000000
#define FSP_UNDO 2000000
#define FSP_CLEANING 3000000
/* @} */
/* Number of pages described in a single descriptor page: currently each page
description takes less than 1 byte; a descriptor page is repeated every
this many file pages */
/* #define XDES_DESCRIBED_PER_PAGE UNIV_PAGE_SIZE */
/* This has been replaced with either UNIV_PAGE_SIZE or page_zip->size. */
/** @name The space low address page map
The pages at FSP_XDES_OFFSET and FSP_IBUF_BITMAP_OFFSET are repeated
every XDES_DESCRIBED_PER_PAGE pages in every tablespace. */
/* @{ */
/*--------------------------------------*/
#define FSP_XDES_OFFSET 0 /* !< extent descriptor */
#define FSP_IBUF_BITMAP_OFFSET 1 /* !< insert buffer bitmap */
/* The ibuf bitmap pages are the ones whose
page number is the number above plus a
multiple of XDES_DESCRIBED_PER_PAGE */
#define FSP_FIRST_INODE_PAGE_NO 2 /*!< in every tablespace */
/* The following pages exist
in the system tablespace (space 0). */
#define FSP_IBUF_HEADER_PAGE_NO 3 /*!< insert buffer
header page, in
tablespace 0 */
#define FSP_IBUF_TREE_ROOT_PAGE_NO 4 /*!< insert buffer
B-tree root page in
tablespace 0 */
/* The ibuf tree root page number in
tablespace 0; its fseg inode is on the page
number FSP_FIRST_INODE_PAGE_NO */
#define FSP_TRX_SYS_PAGE_NO 5 /*!< transaction
system header, in
tablespace 0 */
#define FSP_FIRST_RSEG_PAGE_NO 6 /*!< first rollback segment
page, in tablespace 0 */
#define FSP_DICT_HDR_PAGE_NO 7 /*!< data dictionary header
page, in tablespace 0 */
/*--------------------------------------*/
/* @} */
#endif /* fsp0types_h */

View file

@ -0,0 +1,55 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/******************************************************************//**
@file include/fut0fut.h
File-based utilities
Created 12/13/1995 Heikki Tuuri
***********************************************************************/
#ifndef fut0fut_h
#define fut0fut_h
#include "univ.i"
#include "fil0fil.h"
#include "mtr0mtr.h"
/********************************************************************//**
Gets a pointer to a file address and latches the page.
@return pointer to a byte in a frame; the file page in the frame is
bufferfixed and latched */
UNIV_INLINE
byte*
fut_get_ptr(
/*========*/
ulint space, /*!< in: space id */
ulint zip_size,/*!< in: compressed page size in bytes
or 0 for uncompressed pages */
fil_addr_t addr, /*!< in: file address */
ulint rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH */
mtr_t* mtr); /*!< in: mtr handle */
#ifndef UNIV_NONINL
#include "fut0fut.ic"
#endif
#endif

View file

@ -0,0 +1,56 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/******************************************************************//**
@file include/fut0fut.ic
File-based utilities
Created 12/13/1995 Heikki Tuuri
***********************************************************************/
#include "sync0rw.h"
#include "buf0buf.h"
/********************************************************************//**
Gets a pointer to a file address and latches the page.
@return pointer to a byte in a frame; the file page in the frame is
bufferfixed and latched */
UNIV_INLINE
byte*
fut_get_ptr(
/*========*/
ulint space, /*!< in: space id */
ulint zip_size,/*!< in: compressed page size in bytes
or 0 for uncompressed pages */
fil_addr_t addr, /*!< in: file address */
ulint rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH */
mtr_t* mtr) /*!< in: mtr handle */
{
buf_block_t* block;
byte* ptr;
ut_ad(addr.boffset < UNIV_PAGE_SIZE);
ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
block = buf_page_get(space, zip_size, addr.page, rw_latch, mtr);
ptr = buf_block_get_frame(block) + addr.boffset;
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
return(ptr);
}

View file

@ -0,0 +1,217 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/******************************************************************//**
@file include/fut0lst.h
File-based list utilities
Created 11/28/1995 Heikki Tuuri
***********************************************************************/
#ifndef fut0lst_h
#define fut0lst_h
#include "univ.i"
#include "fil0fil.h"
#include "mtr0mtr.h"
/* The C 'types' of base node and list node: these should be used to
write self-documenting code. Of course, the sizeof macro cannot be
applied to these types! */
typedef byte flst_base_node_t;
typedef byte flst_node_t;
/* The physical size of a list base node in bytes */
#define FLST_BASE_NODE_SIZE (4 + 2 * FIL_ADDR_SIZE)
/* The physical size of a list node in bytes */
#define FLST_NODE_SIZE (2 * FIL_ADDR_SIZE)
#ifndef UNIV_HOTBACKUP
/********************************************************************//**
Initializes a list base node. */
UNIV_INLINE
void
flst_init(
/*======*/
flst_base_node_t* base, /*!< in: pointer to base node */
mtr_t* mtr); /*!< in: mini-transaction handle */
/********************************************************************//**
Adds a node as the last node in a list. */
UNIV_INTERN
void
flst_add_last(
/*==========*/
flst_base_node_t* base, /*!< in: pointer to base node of list */
flst_node_t* node, /*!< in: node to add */
mtr_t* mtr); /*!< in: mini-transaction handle */
/********************************************************************//**
Adds a node as the first node in a list. */
UNIV_INTERN
void
flst_add_first(
/*===========*/
flst_base_node_t* base, /*!< in: pointer to base node of list */
flst_node_t* node, /*!< in: node to add */
mtr_t* mtr); /*!< in: mini-transaction handle */
/********************************************************************//**
Inserts a node after another in a list. */
UNIV_INTERN
void
flst_insert_after(
/*==============*/
flst_base_node_t* base, /*!< in: pointer to base node of list */
flst_node_t* node1, /*!< in: node to insert after */
flst_node_t* node2, /*!< in: node to add */
mtr_t* mtr); /*!< in: mini-transaction handle */
/********************************************************************//**
Inserts a node before another in a list. */
UNIV_INTERN
void
flst_insert_before(
/*===============*/
flst_base_node_t* base, /*!< in: pointer to base node of list */
flst_node_t* node2, /*!< in: node to insert */
flst_node_t* node3, /*!< in: node to insert before */
mtr_t* mtr); /*!< in: mini-transaction handle */
/********************************************************************//**
Removes a node. */
UNIV_INTERN
void
flst_remove(
/*========*/
flst_base_node_t* base, /*!< in: pointer to base node of list */
flst_node_t* node2, /*!< in: node to remove */
mtr_t* mtr); /*!< in: mini-transaction handle */
/********************************************************************//**
Cuts off the tail of the list, including the node given. The number of
nodes which will be removed must be provided by the caller, as this function
does not measure the length of the tail. */
UNIV_INTERN
void
flst_cut_end(
/*=========*/
flst_base_node_t* base, /*!< in: pointer to base node of list */
flst_node_t* node2, /*!< in: first node to remove */
ulint n_nodes,/*!< in: number of nodes to remove,
must be >= 1 */
mtr_t* mtr); /*!< in: mini-transaction handle */
/********************************************************************//**
Cuts off the tail of the list, not including the given node. The number of
nodes which will be removed must be provided by the caller, as this function
does not measure the length of the tail. */
UNIV_INTERN
void
flst_truncate_end(
/*==============*/
flst_base_node_t* base, /*!< in: pointer to base node of list */
flst_node_t* node2, /*!< in: first node not to remove */
ulint n_nodes,/*!< in: number of nodes to remove */
mtr_t* mtr); /*!< in: mini-transaction handle */
/********************************************************************//**
Gets list length.
@return length */
UNIV_INLINE
ulint
flst_get_len(
/*=========*/
const flst_base_node_t* base, /*!< in: pointer to base node */
mtr_t* mtr); /*!< in: mini-transaction handle */
/********************************************************************//**
Gets list first node address.
@return file address */
UNIV_INLINE
fil_addr_t
flst_get_first(
/*===========*/
const flst_base_node_t* base, /*!< in: pointer to base node */
mtr_t* mtr); /*!< in: mini-transaction handle */
/********************************************************************//**
Gets list last node address.
@return file address */
UNIV_INLINE
fil_addr_t
flst_get_last(
/*==========*/
const flst_base_node_t* base, /*!< in: pointer to base node */
mtr_t* mtr); /*!< in: mini-transaction handle */
/********************************************************************//**
Gets list next node address.
@return file address */
UNIV_INLINE
fil_addr_t
flst_get_next_addr(
/*===============*/
const flst_node_t* node, /*!< in: pointer to node */
mtr_t* mtr); /*!< in: mini-transaction handle */
/********************************************************************//**
Gets list prev node address.
@return file address */
UNIV_INLINE
fil_addr_t
flst_get_prev_addr(
/*===============*/
const flst_node_t* node, /*!< in: pointer to node */
mtr_t* mtr); /*!< in: mini-transaction handle */
/********************************************************************//**
Writes a file address. */
UNIV_INLINE
void
flst_write_addr(
/*============*/
fil_faddr_t* faddr, /*!< in: pointer to file faddress */
fil_addr_t addr, /*!< in: file address */
mtr_t* mtr); /*!< in: mini-transaction handle */
/********************************************************************//**
Reads a file address.
@return file address */
UNIV_INLINE
fil_addr_t
flst_read_addr(
/*===========*/
const fil_faddr_t* faddr, /*!< in: pointer to file faddress */
mtr_t* mtr); /*!< in: mini-transaction handle */
/********************************************************************//**
Validates a file-based list.
@return TRUE if ok */
UNIV_INTERN
ibool
flst_validate(
/*==========*/
const flst_base_node_t* base, /*!< in: pointer to base node of list */
mtr_t* mtr1); /*!< in: mtr */
/********************************************************************//**
Prints info of a file-based list. */
UNIV_INTERN
void
flst_print(
/*=======*/
const flst_base_node_t* base, /*!< in: pointer to base node of list */
mtr_t* mtr); /*!< in: mtr */
#ifndef UNIV_NONINL
#include "fut0lst.ic"
#endif
#endif /* !UNIV_HOTBACKUP */
#endif

View file

@ -0,0 +1,167 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/******************************************************************//**
@file include/fut0lst.ic
File-based list utilities
Created 11/28/1995 Heikki Tuuri
***********************************************************************/
#include "fut0fut.h"
#include "mtr0log.h"
#include "buf0buf.h"
/* We define the field offsets of a node for the list */
#define FLST_PREV 0 /* 6-byte address of the previous list element;
the page part of address is FIL_NULL, if no
previous element */
#define FLST_NEXT FIL_ADDR_SIZE /* 6-byte address of the next
list element; the page part of address
is FIL_NULL, if no next element */
/* We define the field offsets of a base node for the list */
#define FLST_LEN 0 /* 32-bit list length field */
#define FLST_FIRST 4 /* 6-byte address of the first element
of the list; undefined if empty list */
#define FLST_LAST (4 + FIL_ADDR_SIZE) /* 6-byte address of the
last element of the list; undefined
if empty list */
/********************************************************************//**
Writes a file address. */
UNIV_INLINE
void
flst_write_addr(
/*============*/
fil_faddr_t* faddr, /*!< in: pointer to file faddress */
fil_addr_t addr, /*!< in: file address */
mtr_t* mtr) /*!< in: mini-transaction handle */
{
ut_ad(faddr && mtr);
ut_ad(mtr_memo_contains_page(mtr, faddr, MTR_MEMO_PAGE_X_FIX));
ut_a(addr.page == FIL_NULL || addr.boffset >= FIL_PAGE_DATA);
ut_a(ut_align_offset(faddr, UNIV_PAGE_SIZE) >= FIL_PAGE_DATA);
mlog_write_ulint(faddr + FIL_ADDR_PAGE, addr.page, MLOG_4BYTES, mtr);
mlog_write_ulint(faddr + FIL_ADDR_BYTE, addr.boffset,
MLOG_2BYTES, mtr);
}
/********************************************************************//**
Reads a file address.
@return file address */
UNIV_INLINE
fil_addr_t
flst_read_addr(
/*===========*/
const fil_faddr_t* faddr, /*!< in: pointer to file faddress */
mtr_t* mtr) /*!< in: mini-transaction handle */
{
fil_addr_t addr;
ut_ad(faddr && mtr);
addr.page = mtr_read_ulint(faddr + FIL_ADDR_PAGE, MLOG_4BYTES, mtr);
addr.boffset = mtr_read_ulint(faddr + FIL_ADDR_BYTE, MLOG_2BYTES,
mtr);
ut_a(addr.page == FIL_NULL || addr.boffset >= FIL_PAGE_DATA);
ut_a(ut_align_offset(faddr, UNIV_PAGE_SIZE) >= FIL_PAGE_DATA);
return(addr);
}
/********************************************************************//**
Initializes a list base node. */
UNIV_INLINE
void
flst_init(
/*======*/
flst_base_node_t* base, /*!< in: pointer to base node */
mtr_t* mtr) /*!< in: mini-transaction handle */
{
ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
mlog_write_ulint(base + FLST_LEN, 0, MLOG_4BYTES, mtr);
flst_write_addr(base + FLST_FIRST, fil_addr_null, mtr);
flst_write_addr(base + FLST_LAST, fil_addr_null, mtr);
}
/********************************************************************//**
Gets list length.
@return length */
UNIV_INLINE
ulint
flst_get_len(
/*=========*/
const flst_base_node_t* base, /*!< in: pointer to base node */
mtr_t* mtr) /*!< in: mini-transaction handle */
{
return(mtr_read_ulint(base + FLST_LEN, MLOG_4BYTES, mtr));
}
/********************************************************************//**
Gets list first node address.
@return file address */
UNIV_INLINE
fil_addr_t
flst_get_first(
/*===========*/
const flst_base_node_t* base, /*!< in: pointer to base node */
mtr_t* mtr) /*!< in: mini-transaction handle */
{
return(flst_read_addr(base + FLST_FIRST, mtr));
}
/********************************************************************//**
Gets list last node address.
@return file address */
UNIV_INLINE
fil_addr_t
flst_get_last(
/*==========*/
const flst_base_node_t* base, /*!< in: pointer to base node */
mtr_t* mtr) /*!< in: mini-transaction handle */
{
return(flst_read_addr(base + FLST_LAST, mtr));
}
/********************************************************************//**
Gets list next node address.
@return file address */
UNIV_INLINE
fil_addr_t
flst_get_next_addr(
/*===============*/
const flst_node_t* node, /*!< in: pointer to node */
mtr_t* mtr) /*!< in: mini-transaction handle */
{
return(flst_read_addr(node + FLST_NEXT, mtr));
}
/********************************************************************//**
Gets list prev node address.
@return file address */
UNIV_INLINE
fil_addr_t
flst_get_prev_addr(
/*===============*/
const flst_node_t* node, /*!< in: pointer to node */
mtr_t* mtr) /*!< in: mini-transaction handle */
{
return(flst_read_addr(node + FLST_PREV, mtr));
}

241
perfschema/include/ha0ha.h Normal file
View file

@ -0,0 +1,241 @@
/*****************************************************************************
Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/ha0ha.h
The hash table with external chains
Created 8/18/1994 Heikki Tuuri
*******************************************************/
#ifndef ha0ha_h
#define ha0ha_h
#include "univ.i"
#include "hash0hash.h"
#include "page0types.h"
#include "buf0types.h"
/*************************************************************//**
Looks for an element in a hash table.
@return pointer to the data of the first hash table node in chain
having the fold number, NULL if not found */
UNIV_INLINE
void*
ha_search_and_get_data(
/*===================*/
hash_table_t* table, /*!< in: hash table */
ulint fold); /*!< in: folded value of the searched data */
/*********************************************************//**
Looks for an element when we know the pointer to the data and updates
the pointer to data if found. */
UNIV_INTERN
void
ha_search_and_update_if_found_func(
/*===============================*/
hash_table_t* table, /*!< in/out: hash table */
ulint fold, /*!< in: folded value of the searched data */
void* data, /*!< in: pointer to the data */
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
buf_block_t* new_block,/*!< in: block containing new_data */
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
void* new_data);/*!< in: new pointer to the data */
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
/** Looks for an element when we know the pointer to the data and
updates the pointer to data if found.
@param table in/out: hash table
@param fold in: folded value of the searched data
@param data in: pointer to the data
@param new_block in: block containing new_data
@param new_data in: new pointer to the data */
# define ha_search_and_update_if_found(table,fold,data,new_block,new_data) \
ha_search_and_update_if_found_func(table,fold,data,new_block,new_data)
#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
/** Looks for an element when we know the pointer to the data and
updates the pointer to data if found.
@param table in/out: hash table
@param fold in: folded value of the searched data
@param data in: pointer to the data
@param new_block ignored: block containing new_data
@param new_data in: new pointer to the data */
# define ha_search_and_update_if_found(table,fold,data,new_block,new_data) \
ha_search_and_update_if_found_func(table,fold,data,new_data)
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
/*************************************************************//**
Creates a hash table with at least n array cells. The actual number
of cells is chosen to be a prime number slightly bigger than n.
@return own: created table */
UNIV_INTERN
hash_table_t*
ha_create_func(
/*===========*/
ulint n, /*!< in: number of array cells */
#ifdef UNIV_SYNC_DEBUG
ulint mutex_level, /*!< in: level of the mutexes in the latching
order: this is used in the debug version */
#endif /* UNIV_SYNC_DEBUG */
ulint n_mutexes); /*!< in: number of mutexes to protect the
hash table: must be a power of 2, or 0 */
#ifdef UNIV_SYNC_DEBUG
/** Creates a hash table.
@return own: created table
@param n_c in: number of array cells. The actual number of cells is
chosen to be a slightly bigger prime number.
@param level in: level of the mutexes in the latching order
@param n_m in: number of mutexes to protect the hash table;
must be a power of 2, or 0 */
# define ha_create(n_c,n_m,level) ha_create_func(n_c,level,n_m)
#else /* UNIV_SYNC_DEBUG */
/** Creates a hash table.
@return own: created table
@param n_c in: number of array cells. The actual number of cells is
chosen to be a slightly bigger prime number.
@param level in: level of the mutexes in the latching order
@param n_m in: number of mutexes to protect the hash table;
must be a power of 2, or 0 */
# define ha_create(n_c,n_m,level) ha_create_func(n_c,n_m)
#endif /* UNIV_SYNC_DEBUG */
/*************************************************************//**
Empties a hash table and frees the memory heaps. */
UNIV_INTERN
void
ha_clear(
/*=====*/
hash_table_t* table); /*!< in, own: hash table */
/*************************************************************//**
Inserts an entry into a hash table. If an entry with the same fold number
is found, its node is updated to point to the new data, and no new node
is inserted.
@return TRUE if succeed, FALSE if no more memory could be allocated */
UNIV_INTERN
ibool
ha_insert_for_fold_func(
/*====================*/
hash_table_t* table, /*!< in: hash table */
ulint fold, /*!< in: folded value of data; if a node with
the same fold value already exists, it is
updated to point to the same data, and no new
node is created! */
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
buf_block_t* block, /*!< in: buffer block containing the data */
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
void* data); /*!< in: data, must not be NULL */
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
/**
Inserts an entry into a hash table. If an entry with the same fold number
is found, its node is updated to point to the new data, and no new node
is inserted.
@return TRUE if succeed, FALSE if no more memory could be allocated
@param t in: hash table
@param f in: folded value of data
@param b in: buffer block containing the data
@param d in: data, must not be NULL */
# define ha_insert_for_fold(t,f,b,d) ha_insert_for_fold_func(t,f,b,d)
#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
/**
Inserts an entry into a hash table. If an entry with the same fold number
is found, its node is updated to point to the new data, and no new node
is inserted.
@return TRUE if succeed, FALSE if no more memory could be allocated
@param t in: hash table
@param f in: folded value of data
@param b ignored: buffer block containing the data
@param d in: data, must not be NULL */
# define ha_insert_for_fold(t,f,b,d) ha_insert_for_fold_func(t,f,d)
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
/*********************************************************//**
Looks for an element when we know the pointer to the data and deletes
it from the hash table if found.
@return TRUE if found */
UNIV_INLINE
ibool
ha_search_and_delete_if_found(
/*==========================*/
hash_table_t* table, /*!< in: hash table */
ulint fold, /*!< in: folded value of the searched data */
void* data); /*!< in: pointer to the data */
#ifndef UNIV_HOTBACKUP
/*****************************************************************//**
Removes from the chain determined by fold all nodes whose data pointer
points to the page given. */
UNIV_INTERN
void
ha_remove_all_nodes_to_page(
/*========================*/
hash_table_t* table, /*!< in: hash table */
ulint fold, /*!< in: fold value */
const page_t* page); /*!< in: buffer page */
/*************************************************************//**
Validates a given range of the cells in hash table.
@return TRUE if ok */
UNIV_INTERN
ibool
ha_validate(
/*========*/
hash_table_t* table, /*!< in: hash table */
ulint start_index, /*!< in: start index */
ulint end_index); /*!< in: end index */
/*************************************************************//**
Prints info of a hash table. */
UNIV_INTERN
void
ha_print_info(
/*==========*/
FILE* file, /*!< in: file where to print */
hash_table_t* table); /*!< in: hash table */
#endif /* !UNIV_HOTBACKUP */
/** The hash table external chain node */
typedef struct ha_node_struct ha_node_t;
/** The hash table external chain node */
struct ha_node_struct {
ha_node_t* next; /*!< next chain node or NULL if none */
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
buf_block_t* block; /*!< buffer block containing the data, or NULL */
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
void* data; /*!< pointer to the data */
ulint fold; /*!< fold value for the data */
};
#ifndef UNIV_HOTBACKUP
/** Assert that the current thread is holding the mutex protecting a
hash bucket corresponding to a fold value.
@param table in: hash table
@param fold in: fold value */
# define ASSERT_HASH_MUTEX_OWN(table, fold) \
ut_ad(!(table)->mutexes || mutex_own(hash_get_mutex(table, fold)))
#else /* !UNIV_HOTBACKUP */
/** Assert that the current thread is holding the mutex protecting a
hash bucket corresponding to a fold value.
@param table in: hash table
@param fold in: fold value */
# define ASSERT_HASH_MUTEX_OWN(table, fold) ((void) 0)
#endif /* !UNIV_HOTBACKUP */
#ifndef UNIV_NONINL
#include "ha0ha.ic"
#endif
#endif

220
perfschema/include/ha0ha.ic Normal file
View file

@ -0,0 +1,220 @@
/*****************************************************************************
Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/********************************************************************//**
@file include/ha0ha.ic
The hash table with external chains
Created 8/18/1994 Heikki Tuuri
*************************************************************************/
#include "ut0rnd.h"
#include "mem0mem.h"
/***********************************************************//**
Deletes a hash node. */
UNIV_INTERN
void
ha_delete_hash_node(
/*================*/
hash_table_t* table, /*!< in: hash table */
ha_node_t* del_node); /*!< in: node to be deleted */
/******************************************************************//**
Gets a hash node data.
@return pointer to the data */
UNIV_INLINE
void*
ha_node_get_data(
/*=============*/
ha_node_t* node) /*!< in: hash chain node */
{
return(node->data);
}
/******************************************************************//**
Sets hash node data. */
UNIV_INLINE
void
ha_node_set_data_func(
/*==================*/
ha_node_t* node, /*!< in: hash chain node */
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
buf_block_t* block, /*!< in: buffer block containing the data */
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
void* data) /*!< in: pointer to the data */
{
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
node->block = block;
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
node->data = data;
}
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
/** Sets hash node data.
@param n in: hash chain node
@param b in: buffer block containing the data
@param d in: pointer to the data */
# define ha_node_set_data(n,b,d) ha_node_set_data_func(n,b,d)
#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
/** Sets hash node data.
@param n in: hash chain node
@param b in: buffer block containing the data
@param d in: pointer to the data */
# define ha_node_set_data(n,b,d) ha_node_set_data_func(n,d)
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
/******************************************************************//**
Gets the next node in a hash chain.
@return next node, NULL if none */
UNIV_INLINE
ha_node_t*
ha_chain_get_next(
/*==============*/
ha_node_t* node) /*!< in: hash chain node */
{
return(node->next);
}
/******************************************************************//**
Gets the first node in a hash chain.
@return first node, NULL if none */
UNIV_INLINE
ha_node_t*
ha_chain_get_first(
/*===============*/
hash_table_t* table, /*!< in: hash table */
ulint fold) /*!< in: fold value determining the chain */
{
return((ha_node_t*)
hash_get_nth_cell(table, hash_calc_hash(fold, table))->node);
}
/*************************************************************//**
Looks for an element in a hash table.
@return pointer to the first hash table node in chain having the fold
number, NULL if not found */
UNIV_INLINE
ha_node_t*
ha_search(
/*======*/
hash_table_t* table, /*!< in: hash table */
ulint fold) /*!< in: folded value of the searched data */
{
ha_node_t* node;
ASSERT_HASH_MUTEX_OWN(table, fold);
node = ha_chain_get_first(table, fold);
while (node) {
if (node->fold == fold) {
return(node);
}
node = ha_chain_get_next(node);
}
return(NULL);
}
/*************************************************************//**
Looks for an element in a hash table.
@return pointer to the data of the first hash table node in chain
having the fold number, NULL if not found */
UNIV_INLINE
void*
ha_search_and_get_data(
/*===================*/
hash_table_t* table, /*!< in: hash table */
ulint fold) /*!< in: folded value of the searched data */
{
ha_node_t* node;
ASSERT_HASH_MUTEX_OWN(table, fold);
node = ha_chain_get_first(table, fold);
while (node) {
if (node->fold == fold) {
return(node->data);
}
node = ha_chain_get_next(node);
}
return(NULL);
}
/*********************************************************//**
Looks for an element when we know the pointer to the data.
@return pointer to the hash table node, NULL if not found in the table */
UNIV_INLINE
ha_node_t*
ha_search_with_data(
/*================*/
hash_table_t* table, /*!< in: hash table */
ulint fold, /*!< in: folded value of the searched data */
void* data) /*!< in: pointer to the data */
{
ha_node_t* node;
ASSERT_HASH_MUTEX_OWN(table, fold);
node = ha_chain_get_first(table, fold);
while (node) {
if (node->data == data) {
return(node);
}
node = ha_chain_get_next(node);
}
return(NULL);
}
/*********************************************************//**
Looks for an element when we know the pointer to the data, and deletes
it from the hash table, if found.
@return TRUE if found */
UNIV_INLINE
ibool
ha_search_and_delete_if_found(
/*==========================*/
hash_table_t* table, /*!< in: hash table */
ulint fold, /*!< in: folded value of the searched data */
void* data) /*!< in: pointer to the data */
{
ha_node_t* node;
ASSERT_HASH_MUTEX_OWN(table, fold);
node = ha_search_with_data(table, fold, data);
if (node) {
ha_delete_hash_node(table, node);
return(TRUE);
}
return(FALSE);
}

View file

@ -0,0 +1,140 @@
/*****************************************************************************
Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/ha0storage.h
Hash storage.
Provides a data structure that stores chunks of data in
its own storage, avoiding duplicates.
Created September 22, 2007 Vasil Dimov
*******************************************************/
#ifndef ha0storage_h
#define ha0storage_h
#include "univ.i"
/** This value is used by default by ha_storage_create(). More memory
is allocated later when/if it is needed. */
#define HA_STORAGE_DEFAULT_HEAP_BYTES 1024
/** This value is used by default by ha_storage_create(). It is a
constant per ha_storage's lifetime. */
#define HA_STORAGE_DEFAULT_HASH_CELLS 4096
/** Hash storage */
typedef struct ha_storage_struct ha_storage_t;
/*******************************************************************//**
Creates a hash storage. If any of the parameters is 0, then a default
value is used.
@return own: hash storage */
UNIV_INLINE
ha_storage_t*
ha_storage_create(
/*==============*/
ulint initial_heap_bytes, /*!< in: initial heap's size */
ulint initial_hash_cells); /*!< in: initial number of cells
in the hash table */
/*******************************************************************//**
Copies data into the storage and returns a pointer to the copy. If the
same data chunk is already present, then pointer to it is returned.
Data chunks are considered to be equal if len1 == len2 and
memcmp(data1, data2, len1) == 0. If "data" is not present (and thus
data_len bytes need to be allocated) and the size of storage is going to
become more than "memlim" then "data" is not added and NULL is returned.
To disable this behavior "memlim" can be set to 0, which stands for
"no limit".
@return pointer to the copy */
UNIV_INTERN
const void*
ha_storage_put_memlim(
/*==================*/
ha_storage_t* storage, /*!< in/out: hash storage */
const void* data, /*!< in: data to store */
ulint data_len, /*!< in: data length */
ulint memlim); /*!< in: memory limit to obey */
/*******************************************************************//**
Same as ha_storage_put_memlim() but without memory limit.
@param storage in/out: hash storage
@param data in: data to store
@param data_len in: data length
@return pointer to the copy of the string */
#define ha_storage_put(storage, data, data_len) \
ha_storage_put_memlim((storage), (data), (data_len), 0)
/*******************************************************************//**
Copies string into the storage and returns a pointer to the copy. If the
same string is already present, then pointer to it is returned.
Strings are considered to be equal if strcmp(str1, str2) == 0.
@param storage in/out: hash storage
@param str in: string to put
@return pointer to the copy of the string */
#define ha_storage_put_str(storage, str) \
((const char*) ha_storage_put((storage), (str), strlen(str) + 1))
/*******************************************************************//**
Copies string into the storage and returns a pointer to the copy obeying
a memory limit.
If the same string is already present, then pointer to it is returned.
Strings are considered to be equal if strcmp(str1, str2) == 0.
@param storage in/out: hash storage
@param str in: string to put
@param memlim in: memory limit to obey
@return pointer to the copy of the string */
#define ha_storage_put_str_memlim(storage, str, memlim) \
((const char*) ha_storage_put_memlim((storage), (str), \
strlen(str) + 1, (memlim)))
/*******************************************************************//**
Empties a hash storage, freeing memory occupied by data chunks.
This invalidates any pointers previously returned by ha_storage_put().
The hash storage is not invalidated itself and can be used again. */
UNIV_INLINE
void
ha_storage_empty(
/*=============*/
ha_storage_t** storage); /*!< in/out: hash storage */
/*******************************************************************//**
Frees a hash storage and everything it contains, it cannot be used after
this call.
This invalidates any pointers previously returned by ha_storage_put(). */
UNIV_INLINE
void
ha_storage_free(
/*============*/
ha_storage_t* storage); /*!< in, own: hash storage */
/*******************************************************************//**
Gets the size of the memory used by a storage.
@return bytes used */
UNIV_INLINE
ulint
ha_storage_get_size(
/*================*/
const ha_storage_t* storage); /*!< in: hash storage */
#ifndef UNIV_NONINL
#include "ha0storage.ic"
#endif
#endif /* ha0storage_h */

View file

@ -0,0 +1,148 @@
/*****************************************************************************
Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/ha0storage.ic
Hash storage.
Provides a data structure that stores chunks of data in
its own storage, avoiding duplicates.
Created September 24, 2007 Vasil Dimov
*******************************************************/
#include "univ.i"
#include "ha0storage.h"
#include "hash0hash.h"
#include "mem0mem.h"
/** Hash storage for strings */
struct ha_storage_struct {
mem_heap_t* heap; /*!< memory heap from which memory is
allocated */
hash_table_t* hash; /*!< hash table used to avoid
duplicates */
};
/** Objects of this type are stored in ha_storage_t */
typedef struct ha_storage_node_struct ha_storage_node_t;
/** Objects of this type are stored in ha_storage_struct */
struct ha_storage_node_struct {
ulint data_len;/*!< length of the data */
const void* data; /*!< pointer to data */
ha_storage_node_t* next; /*!< next node in hash chain */
};
/*******************************************************************//**
Creates a hash storage. If any of the parameters is 0, then a default
value is used.
@return own: hash storage */
UNIV_INLINE
ha_storage_t*
ha_storage_create(
/*==============*/
ulint initial_heap_bytes, /*!< in: initial heap's size */
ulint initial_hash_cells) /*!< in: initial number of cells
in the hash table */
{
ha_storage_t* storage;
mem_heap_t* heap;
if (initial_heap_bytes == 0) {
initial_heap_bytes = HA_STORAGE_DEFAULT_HEAP_BYTES;
}
if (initial_hash_cells == 0) {
initial_hash_cells = HA_STORAGE_DEFAULT_HASH_CELLS;
}
/* we put "storage" within "storage->heap" */
heap = mem_heap_create(sizeof(ha_storage_t)
+ initial_heap_bytes);
storage = (ha_storage_t*) mem_heap_alloc(heap,
sizeof(ha_storage_t));
storage->heap = heap;
storage->hash = hash_create(initial_hash_cells);
return(storage);
}
/*******************************************************************//**
Empties a hash storage, freeing memory occupied by data chunks.
This invalidates any pointers previously returned by ha_storage_put().
The hash storage is not invalidated itself and can be used again. */
UNIV_INLINE
void
ha_storage_empty(
/*=============*/
ha_storage_t** storage) /*!< in/out: hash storage */
{
ha_storage_t temp_storage;
temp_storage.heap = (*storage)->heap;
temp_storage.hash = (*storage)->hash;
hash_table_clear(temp_storage.hash);
mem_heap_empty(temp_storage.heap);
*storage = (ha_storage_t*) mem_heap_alloc(temp_storage.heap,
sizeof(ha_storage_t));
(*storage)->heap = temp_storage.heap;
(*storage)->hash = temp_storage.hash;
}
/*******************************************************************//**
Frees a hash storage and everything it contains, it cannot be used after
this call.
This invalidates any pointers previously returned by ha_storage_put(). */
UNIV_INLINE
void
ha_storage_free(
/*============*/
ha_storage_t* storage) /*!< in, own: hash storage */
{
/* order is important because the pointer storage->hash is
within the heap */
hash_table_free(storage->hash);
mem_heap_free(storage->heap);
}
/*******************************************************************//**
Gets the size of the memory used by a storage.
@return bytes used */
UNIV_INLINE
ulint
ha_storage_get_size(
/*================*/
const ha_storage_t* storage) /*!< in: hash storage */
{
ulint ret;
ret = mem_heap_get_size(storage->heap);
/* this assumes hash->heap and hash->heaps are NULL */
ret += sizeof(hash_table_t);
ret += sizeof(hash_cell_t) * hash_get_n_cells(storage->hash);
return(ret);
}

View file

@ -0,0 +1,261 @@
/*****************************************************************************
Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/*******************************************************************//**
@file include/ha_prototypes.h
Prototypes for global functions in ha_innodb.cc that are called by
InnoDB C code
Created 5/11/2006 Osku Salerma
************************************************************************/
#ifndef HA_INNODB_PROTOTYPES_H
#define HA_INNODB_PROTOTYPES_H
#include "trx0types.h"
#include "m_ctype.h" /* CHARSET_INFO */
/*********************************************************************//**
Wrapper around MySQL's copy_and_convert function.
@return number of bytes copied to 'to' */
UNIV_INTERN
ulint
innobase_convert_string(
/*====================*/
void* to, /*!< out: converted string */
ulint to_length, /*!< in: number of bytes reserved
for the converted string */
CHARSET_INFO* to_cs, /*!< in: character set to convert to */
const void* from, /*!< in: string to convert */
ulint from_length, /*!< in: number of bytes to convert */
CHARSET_INFO* from_cs, /*!< in: character set to convert from */
uint* errors); /*!< out: number of errors encountered
during the conversion */
/*******************************************************************//**
Formats the raw data in "data" (in InnoDB on-disk format) that is of
type DATA_(CHAR|VARCHAR|MYSQL|VARMYSQL) using "charset_coll" and writes
the result to "buf". The result is converted to "system_charset_info".
Not more than "buf_size" bytes are written to "buf".
The result is always NUL-terminated (provided buf_size > 0) and the
number of bytes that were written to "buf" is returned (including the
terminating NUL).
@return number of bytes that were written */
UNIV_INTERN
ulint
innobase_raw_format(
/*================*/
const char* data, /*!< in: raw data */
ulint data_len, /*!< in: raw data length
in bytes */
ulint charset_coll, /*!< in: charset collation */
char* buf, /*!< out: output buffer */
ulint buf_size); /*!< in: output buffer size
in bytes */
/*****************************************************************//**
Invalidates the MySQL query cache for the table. */
UNIV_INTERN
void
innobase_invalidate_query_cache(
/*============================*/
trx_t* trx, /*!< in: transaction which
modifies the table */
const char* full_name, /*!< in: concatenation of
database name, null char NUL,
table name, null char NUL;
NOTE that in Windows this is
always in LOWER CASE! */
ulint full_name_len); /*!< in: full name length where
also the null chars count */
/*****************************************************************//**
Convert a table or index name to the MySQL system_charset_info (UTF-8)
and quote it if needed.
@return pointer to the end of buf */
UNIV_INTERN
char*
innobase_convert_name(
/*==================*/
char* buf, /*!< out: buffer for converted identifier */
ulint buflen, /*!< in: length of buf, in bytes */
const char* id, /*!< in: identifier to convert */
ulint idlen, /*!< in: length of id, in bytes */
void* thd, /*!< in: MySQL connection thread, or NULL */
ibool table_id);/*!< in: TRUE=id is a table or database name;
FALSE=id is an index name */
/******************************************************************//**
Returns true if the thread is the replication thread on the slave
server. Used in srv_conc_enter_innodb() to determine if the thread
should be allowed to enter InnoDB - the replication thread is treated
differently than other threads. Also used in
srv_conc_force_exit_innodb().
@return true if thd is the replication thread */
UNIV_INTERN
ibool
thd_is_replication_slave_thread(
/*============================*/
void* thd); /*!< in: thread handle (THD*) */
/******************************************************************//**
Returns true if the transaction this thread is processing has edited
non-transactional tables. Used by the deadlock detector when deciding
which transaction to rollback in case of a deadlock - we try to avoid
rolling back transactions that have edited non-transactional tables.
@return true if non-transactional tables have been edited */
UNIV_INTERN
ibool
thd_has_edited_nontrans_tables(
/*===========================*/
void* thd); /*!< in: thread handle (THD*) */
/*************************************************************//**
Prints info of a THD object (== user session thread) to the given file. */
UNIV_INTERN
void
innobase_mysql_print_thd(
/*=====================*/
FILE* f, /*!< in: output stream */
void* thd, /*!< in: pointer to a MySQL THD object */
uint max_query_len); /*!< in: max query length to print, or 0 to
use the default max length */
/**************************************************************//**
Converts a MySQL type to an InnoDB type. Note that this function returns
the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'.
@return DATA_BINARY, DATA_VARCHAR, ... */
UNIV_INTERN
ulint
get_innobase_type_from_mysql_type(
/*==============================*/
ulint* unsigned_flag, /*!< out: DATA_UNSIGNED if an
'unsigned type';
at least ENUM and SET,
and unsigned integer
types are 'unsigned types' */
const void* field) /*!< in: MySQL Field */
__attribute__((nonnull));
/******************************************************************//**
Get the variable length bounds of the given character set. */
UNIV_INTERN
void
innobase_get_cset_width(
/*====================*/
ulint cset, /*!< in: MySQL charset-collation code */
ulint* mbminlen, /*!< out: minimum length of a char (in bytes) */
ulint* mbmaxlen); /*!< out: maximum length of a char (in bytes) */
/******************************************************************//**
Compares NUL-terminated UTF-8 strings case insensitively.
@return 0 if a=b, <0 if a<b, >1 if a>b */
UNIV_INTERN
int
innobase_strcasecmp(
/*================*/
const char* a, /*!< in: first string to compare */
const char* b); /*!< in: second string to compare */
/******************************************************************//**
Returns true if the thread is executing a SELECT statement.
@return true if thd is executing SELECT */
ibool
thd_is_select(
/*==========*/
const void* thd); /*!< in: thread handle (THD*) */
/******************************************************************//**
Converts an identifier to a table name. */
UNIV_INTERN
void
innobase_convert_from_table_id(
/*===========================*/
struct charset_info_st* cs, /*!< in: the 'from' character set */
char* to, /*!< out: converted identifier */
const char* from, /*!< in: identifier to convert */
ulint len); /*!< in: length of 'to', in bytes; should
be at least 5 * strlen(to) + 1 */
/******************************************************************//**
Converts an identifier to UTF-8. */
UNIV_INTERN
void
innobase_convert_from_id(
/*=====================*/
struct charset_info_st* cs, /*!< in: the 'from' character set */
char* to, /*!< out: converted identifier */
const char* from, /*!< in: identifier to convert */
ulint len); /*!< in: length of 'to', in bytes; should
be at least 3 * strlen(to) + 1 */
/******************************************************************//**
Makes all characters in a NUL-terminated UTF-8 string lower case. */
UNIV_INTERN
void
innobase_casedn_str(
/*================*/
char* a); /*!< in/out: string to put in lower case */
/**********************************************************************//**
Determines the connection character set.
@return connection character set */
struct charset_info_st*
innobase_get_charset(
/*=================*/
void* mysql_thd); /*!< in: MySQL thread handle */
/******************************************************************//**
This function is used to find the storage length in bytes of the first n
characters for prefix indexes using a multibyte character set. The function
finds charset information and returns length of prefix_len characters in the
index field in bytes.
@return number of bytes occupied by the first n characters */
UNIV_INTERN
ulint
innobase_get_at_most_n_mbchars(
/*===========================*/
ulint charset_id, /*!< in: character set id */
ulint prefix_len, /*!< in: prefix length in bytes of the index
(this has to be divided by mbmaxlen to get the
number of CHARACTERS n in the prefix) */
ulint data_len, /*!< in: length of the string in bytes */
const char* str); /*!< in: character string */
/******************************************************************//**
Returns true if the thread supports XA,
global value of innodb_supports_xa if thd is NULL.
@return true if thd supports XA */
ibool
thd_supports_xa(
/*============*/
void* thd); /*!< in: thread handle (THD*), or NULL to query
the global innodb_supports_xa */
/******************************************************************//**
Returns the lock wait timeout for the current connection.
@return the lock wait timeout, in seconds */
ulong
thd_lock_wait_timeout(
/*==================*/
void* thd); /*!< in: thread handle (THD*), or NULL to query
the global innodb_lock_wait_timeout */
#endif

View file

@ -0,0 +1,42 @@
/*****************************************************************************
Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/**************************************************//**
@file include/handler0alter.h
Smart ALTER TABLE
*******************************************************/
/*************************************************************//**
Copies an InnoDB record to table->record[0]. */
UNIV_INTERN
void
innobase_rec_to_mysql(
/*==================*/
struct TABLE* table, /*!< in/out: MySQL table */
const rec_t* rec, /*!< in: record */
const dict_index_t* index, /*!< in: index */
const ulint* offsets); /*!< in: rec_get_offsets(
rec, index, ...) */
/*************************************************************//**
Resets table->record[0]. */
UNIV_INTERN
void
innobase_rec_reset(
/*===============*/
struct TABLE* table); /*!< in/out: MySQL table */

Some files were not shown because too many files have changed in this diff Show more