mirror of
				https://github.com/MariaDB/server.git
				synced 2025-10-26 01:18:31 +02:00 
			
		
		
		
	
		
			
				
	
	
		
			7618 lines
		
	
	
	
		
			251 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			7618 lines
		
	
	
	
		
			251 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /* Copyright (C) 2007-2008 Michael Widenius
 | |
| 
 | |
|    This program is free software; you can redistribute it and/or modify
 | |
|    it under the terms of the GNU General Public License as published by
 | |
|    the Free Software Foundation; version 2 of the License.
 | |
| 
 | |
|    This program is distributed in the hope that it will be useful,
 | |
|    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
|    GNU General Public License for more details.
 | |
| 
 | |
|    You should have received a copy of the GNU General Public License
 | |
|    along with this program; if not, write to the Free Software
 | |
|    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
 | |
| 
 | |
| /*
 | |
|   Storage of records in block
 | |
| 
 | |
|   Some clarifications about the abbrev used:
 | |
| 
 | |
|   NULL fields      -> Fields that may have contain a NULL value.
 | |
|   Not null fields  -> Fields that may not contain a NULL value.
 | |
|   Critical fields  -> Fields that can't be null and can't be dropped without
 | |
| 		      causing a table reorganization.
 | |
| 
 | |
| 
 | |
|   Maria will have a LSN at start of each page (excluding the bitmap pages)
 | |
| 
 | |
|   The different page types that are in a data file are:
 | |
| 
 | |
|   Bitmap pages     Map of free pages in the next extent (8192 page size
 | |
|                    gives us 256M of mapped pages / bitmap)
 | |
|   Head page        Start of rows are stored on this page.
 | |
|                    A rowid always points to a head page
 | |
|   Blob page        This page is totally filled with data from one blob or by
 | |
|                    a set of long VARCHAR/CHAR fields
 | |
|   Tail page        This contains the last part from different rows, blobs
 | |
|                    or varchar fields.
 | |
| 
 | |
|   The data file starts with a bitmap page, followed by as many data
 | |
|   pages as the bitmap can cover. After this there is a new bitmap page
 | |
|   and more data pages etc.
 | |
| 
 | |
|   For information about the bitmap page, see ma_bitmap.c
 | |
| 
 | |
|   Structure of data and tail page:
 | |
| 
 | |
|   The page has a row directory at end of page to allow us to do deletes
 | |
|   without having to reorganize the page.  It also allows us to later store
 | |
|   some more bytes after each row to allow them to grow without having to move
 | |
|   around other rows.
 | |
| 
 | |
|   Page header:
 | |
| 
 | |
|   LSN            7 bytes   Log position for last page change
 | |
|   PAGE_TYPE      1 uchar   0 unalloced / 1 for head / 2 for tail / 3 for blob
 | |
|   DIR_COUNT      1 uchar   Number of row/tail entries on page
 | |
|   FREE_DIR_LINK  1 uchar   Pointer to first free director entry or 255 if no
 | |
|   empty space    2 bytes   Bytes of empty space on page
 | |
| 
 | |
|   The most significant bit in PAGE_TYPE is set to 1 if the data on the page
 | |
|   can be compacted to get more space. (PAGE_CAN_BE_COMPACTED)
 | |
| 
 | |
|   Row data
 | |
| 
 | |
|   Row directory of NO entries, that consist of the following for each row
 | |
|   (in reverse order; i.e., first record is stored last):
 | |
| 
 | |
|   Position     2 bytes Position of row on page
 | |
|   Length       2 bytes Length of entry
 | |
| 
 | |
|   For Position and Length, the 1 most significant bit of the position and
 | |
|   the 1 most significant bit of the length could be used for some states of
 | |
|   the row (in other words, we should try to keep these reserved)
 | |
| 
 | |
|   Position is 0 if the entry is not used.  In this case length[0] points
 | |
|   to a previous free entry (255 if no previous entry) and length[1]
 | |
|   to the next free entry (or 255 if last free entry). This works because
 | |
|   the directory entry 255 can never be marked free (if the first directory
 | |
|   entry is freed, the directory is shrinked).
 | |
| 
 | |
|   checksum     4 bytes  Reserved for full page read testing and live backup.
 | |
| 
 | |
|   ----------------
 | |
| 
 | |
|   Structure of blob pages:
 | |
| 
 | |
|   LSN          7 bytes  Log position for last page change
 | |
|   PAGE_TYPE    1 uchar   3
 | |
| 
 | |
|   data
 | |
| 
 | |
|   -----------------
 | |
| 
 | |
|   Row data structure:
 | |
| 
 | |
|   Flag                          1 uchar   Marker of which header field exists
 | |
|   TRANSID                       6 bytes  TRANSID of changing transaction
 | |
|                                          (optional, added on insert and first
 | |
|                                          update/delete)
 | |
|   VER_PTR                       7 bytes  Pointer to older version in log
 | |
|                                          (undo record)
 | |
|                                          (optional, added after first
 | |
|                                          update/delete)
 | |
|   DELETE_TRANSID                6 bytes  (optional). TRANSID of original row.
 | |
|                                          Added on delete.
 | |
|   Nulls_extended                1 uchar   To allow us to add new DEFAULT NULL
 | |
|                                          fields (optional, added after first
 | |
|                                          change of row after alter table)
 | |
|   Number of ROW_EXTENT's        1-3 uchar Length encoded, optional
 | |
|                                          This is the number of extents the
 | |
|                                          row is split into
 | |
|   First row_extent              7 uchar  Pointer to first row extent (optional)
 | |
| 
 | |
|   Total length of length array  1-3 uchar Only used if we have
 | |
|                                          char/varchar/blob fields.
 | |
|   Row checksum		        1 uchar   Only if table created with checksums
 | |
|   Null_bits             ..      One bit for each NULL field (a field that may
 | |
| 				have the value NULL)
 | |
|   Empty_bits            ..      One bit for each field that may be 'empty'.
 | |
| 				(Both for null and not null fields).
 | |
|                                 This bit is 1 if the value for the field is
 | |
|                                 0 or empty string.
 | |
| 
 | |
|   field_offsets                 2 byte/offset
 | |
|                                   For each 32'th field, there is one offset
 | |
|                                   that points to where the field information
 | |
|                                   starts in the block. This is to provide
 | |
|                                   fast access to later field in the row
 | |
|                                   when we only need to return a small
 | |
|                                   set of fields.
 | |
|                                   TODO: Implement this.
 | |
| 
 | |
|   Things marked above as 'optional' will only be present if the
 | |
|   corresponding bit is set in 'Flag' field.  Flag gives us a way to
 | |
|   get more space on a page when doing page compaction as we don't need
 | |
|   to store TRANSID that have committed before the smallest running
 | |
|   transaction we have in memory.
 | |
| 
 | |
|   Data in the following order:
 | |
|   (Field order is precalculated when table is created)
 | |
| 
 | |
|   Critical fixed length, not null, fields. (Note, these can't be dropped)
 | |
|   Fixed length, null fields
 | |
| 
 | |
|   Length array, 1-4 uchar per field for all CHAR/VARCHAR/BLOB fields.
 | |
|   Number of bytes used in length array per entry is depending on max length
 | |
|   for field.
 | |
| 
 | |
|   ROW_EXTENT's
 | |
|   CHAR data (space stripped)
 | |
|   VARCHAR data
 | |
|   BLOB data
 | |
| 
 | |
|   Fields marked in null_bits or empty_bits are not stored in data part or
 | |
|   length array.
 | |
| 
 | |
|   If row doesn't fit into the given block, then the first EXTENT will be
 | |
|   stored last on the row. This is done so that we don't break any field
 | |
|   data in the middle.
 | |
| 
 | |
|   We first try to store the full row into one block. If that's not possible
 | |
|   we move out each big blob into their own extents. If this is not enough we
 | |
|   move out a concatenation of all varchars to their own extent.
 | |
| 
 | |
|   Each blob and the concatenated char/varchar fields are stored the following
 | |
|   way:
 | |
|   - Store the parts in as many full-contiguous pages as possible.
 | |
|   - The last part, that doesn't fill a full page, is stored in tail page.
 | |
| 
 | |
|   When doing an insert of a new row, we don't have to have
 | |
|   VER_PTR in the row. This will make rows that are not changed stored
 | |
|   efficiently. On update and delete we would add TRANSID (if it was an old
 | |
|   committed row) and VER_PTR to
 | |
|   the row. On row page compaction we can easily detect rows where
 | |
|   TRANSID was committed before the longest running transaction
 | |
|   started and we can then delete TRANSID and VER_PTR from the row to
 | |
|   gain more space.
 | |
| 
 | |
|   If a row is deleted in Maria, we change TRANSID to the deleting
 | |
|   transaction's id, change VER_PTR to point to the undo record for the delete,
 | |
|   and add DELETE_TRANSID (the id of the transaction which last
 | |
|   inserted/updated the row before its deletion). DELETE_TRANSID allows an old
 | |
|   transaction to avoid reading the log to know if it can see the last version
 | |
|   before delete (in other words it reduces the probability of having to follow
 | |
|   VER_PTR). TODO: depending on a compilation option, evaluate the performance
 | |
|   impact of not storing DELETE_TRANSID (which would make the row smaller).
 | |
| 
 | |
|   Description of the different parts:
 | |
| 
 | |
|   Flag is coded as:
 | |
| 
 | |
|   Description           bit
 | |
|   TRANS_ID_exists       0
 | |
|   VER_PTR_exists        1
 | |
|   Row is deleted        2       (Means that DELETE_TRANSID exists)
 | |
|   Nulls_extended_exists 3
 | |
|   Row is split          7       This means that 'Number_of_row_extents' exists
 | |
| 
 | |
|   Nulls_extended is the number of new DEFAULT NULL fields in the row
 | |
|   compared to the number of DEFAULT NULL fields when the first version
 | |
|   of the table was created.  If Nulls_extended doesn't exist in the row,
 | |
|   we know it's 0 as this must be one of the original rows from when the
 | |
|   table was created first time.  This coding allows us to add 255*8 =
 | |
|   2048 new fields without requiring a full alter table.
 | |
| 
 | |
|   Empty_bits is used to allow us to store 0, 0.0, empty string, empty
 | |
|   varstring and empty blob efficiently. (This is very good for data
 | |
|   warehousing where NULL's are often regarded as evil). Having this
 | |
|   bitmap also allows us to drop information of a field during a future
 | |
|   delete if field was deleted with ALTER TABLE DROP COLUMN.  To be able
 | |
|   to handle DROP COLUMN, we must store in the index header the fields
 | |
|   that has been dropped. When unpacking a row we will ignore dropped
 | |
|   fields. When storing a row, we will mark a dropped field either with a
 | |
|   null in the null bit map or in the empty_bits and not store any data
 | |
|   for it.
 | |
|   TODO: Add code for handling dropped fields.
 | |
| 
 | |
| 
 | |
|   A ROW EXTENT is range of pages. One ROW_EXTENT is coded as:
 | |
| 
 | |
|   START_PAGE            5 bytes
 | |
|   PAGE_COUNT            2 bytes.  Bit 16 is set if this is a tail page.
 | |
|                                   Bit 15 is to set if this is start of a new
 | |
|                                   blob extent.
 | |
| 
 | |
|   With 8K pages, we can cover 256M in one extent. This coding gives us a
 | |
|   maximum file size of 2^40*8192 = 8192 tera
 | |
| 
 | |
|   As an example of ROW_EXTENT handling, assume a row with one integer
 | |
|   field (value 5), two big VARCHAR fields (size 250 and 8192*3), and 2
 | |
|   big BLOB fields that we have updated.
 | |
| 
 | |
|   The record format for storing this into an empty file would be:
 | |
| 
 | |
|   Page 1:
 | |
| 
 | |
|   00 00 00 00 00 00 00          LSN
 | |
|   01                            Only one row in page
 | |
|   FF                            No free dir entry
 | |
|   xx xx                         Empty space on page
 | |
| 
 | |
|   10                            Flag: row split, VER_PTR exists
 | |
|   01 00 00 00 00 00             TRANSID 1
 | |
|   00 00 00 00 00 01 00          VER_PTR to first block in LOG file 1
 | |
|   5                             Number of row extents
 | |
|   02 00 00 00 00 03 00          VARCHAR's are stored in full pages 2,3,4
 | |
|   0                             No null fields
 | |
|   0                             No empty fields
 | |
|   05 00 00 00 00 00 80          Tail page for VARCHAR, rowid 0
 | |
|   06 00 00 00 00 80 00          First blob, stored at page 6-133
 | |
|   05 00 00 00 00 01 80          Tail of first blob (896 bytes) at page 5
 | |
|   86 00 00 00 00 80 00          Second blob, stored at page 134-262
 | |
|   05 00 00 00 00 02 80          Tail of second blob (896 bytes) at page 5
 | |
|   05 00                         5 integer
 | |
|   FA                            Length of first varchar field (size 250)
 | |
|   00 60                         Length of second varchar field (size 8192*3)
 | |
|   00 60 10                      First medium BLOB, 1M
 | |
|   01 00 10 00                   Second BLOB, 1M
 | |
|   xx xx xx xx xx xx             Varchars are stored here until end of page
 | |
| 
 | |
|   ..... until end of page
 | |
| 
 | |
|   09 00 F4 1F                   Start position 9, length 8180
 | |
|   xx xx xx xx			Checksum
 | |
| 
 | |
|   A data page is allowed to have a wrong CRC and header as long as it is
 | |
|   marked empty in the bitmap and its directory's count is 0.
 | |
| */
 | |
| 
 | |
| #include "maria_def.h"
 | |
| #include "ma_blockrec.h"
 | |
| #include "trnman.h"
 | |
| #include "ma_trnman.h"
 | |
| #include "ma_key_recover.h"
 | |
| #include "ma_recovery_util.h"
 | |
| #include <lf.h>
 | |
| 
 | |
| /*
 | |
|   Struct for having a cursor over a set of extent.
 | |
|   This is used to loop over all extents for a row when reading
 | |
|   the row data. It's also used to store the tail positions for
 | |
|   a read row to be used by a later update/delete command.
 | |
| */
 | |
| 
 | |
| typedef struct st_maria_extent_cursor
 | |
| {
 | |
|   /*
 | |
|     Pointer to packed uchar array of extents for the row.
 | |
|     Format is described above in the header
 | |
|   */
 | |
|   uchar *extent;
 | |
|   /* Where data starts on page; Only for debugging */
 | |
|   uchar *data_start;
 | |
|   /* Position to all tails in the row. Updated when reading a row */
 | |
|   MARIA_RECORD_POS *tail_positions;
 | |
|   /* Current page */
 | |
|   pgcache_page_no_t page;
 | |
|   /* How many pages in the page region */
 | |
|   uint page_count;
 | |
|   /* What kind of lock to use for tail pages */
 | |
|   enum pagecache_page_lock lock_for_tail_pages;
 | |
|   /* Total number of extents (i.e., entries in the 'extent' slot) */
 | |
|   uint extent_count;
 | |
|   /* <> 0 if current extent is a tail page; Set while using cursor */
 | |
|   uint tail;
 | |
|   /* Position for tail on tail page */
 | |
|   uint tail_row_nr;
 | |
|   /*
 | |
|     == 1 if we are working on the first extent (i.e., the one that is stored in
 | |
|     the row header, not an extent that is stored as part of the row data).
 | |
|   */
 | |
|   my_bool first_extent;
 | |
| } MARIA_EXTENT_CURSOR;
 | |
| 
 | |
| 
 | |
| /**
 | |
|    @brief Structure for passing down info to write_hook_for_clr_end().
 | |
|    This hooks needs to know the variation of the live checksum caused by the
 | |
|    current operation to update state.checksum under log's mutex,
 | |
|    needs to know the transaction's previous undo_lsn to set
 | |
|    trn->undo_lsn under log mutex, and needs to know the type of UNDO being
 | |
|    undone now to modify state.records under log mutex.
 | |
| */
 | |
| 
 | |
| /** S:share,D:checksum_delta,E:expression,P:pointer_into_record,L:length */
 | |
| #define store_checksum_in_rec(S,D,E,P,L)        do      \
 | |
|   {                                                     \
 | |
|     D= 0;                                               \
 | |
|     if ((S)->calc_checksum != NULL)                     \
 | |
|     {                                                   \
 | |
|       D= (E);                                           \
 | |
|       ha_checksum_store(P, D);                          \
 | |
|       L+= HA_CHECKSUM_STORE_SIZE;                       \
 | |
|     }                                                   \
 | |
|   } while (0)
 | |
| 
 | |
| 
 | |
| static my_bool delete_tails(MARIA_HA *info, MARIA_RECORD_POS *tails);
 | |
| static my_bool delete_head_or_tail(MARIA_HA *info,
 | |
|                                    pgcache_page_no_t page, uint record_number,
 | |
|                                    my_bool head, my_bool from_update);
 | |
| #ifndef DBUG_OFF
 | |
| static void _ma_print_directory(MARIA_SHARE *share,
 | |
|                                 FILE *file, uchar *buff, uint block_size);
 | |
| #endif
 | |
| static uchar *store_page_range(MARIA_SHARE *share,
 | |
|                                uchar *to, MARIA_BITMAP_BLOCK *block,
 | |
|                                ulong length,
 | |
|                                uint *tot_ranges);
 | |
| static size_t fill_insert_undo_parts(MARIA_HA *info, const uchar *record,
 | |
|                                      LEX_CUSTRING *log_parts,
 | |
|                                      uint *log_parts_count);
 | |
| static size_t fill_update_undo_parts(MARIA_HA *info, const uchar *oldrec,
 | |
|                                      const uchar *newrec,
 | |
|                                      LEX_CUSTRING *log_parts,
 | |
|                                      uint *log_parts_count);
 | |
| 
 | |
| /****************************************************************************
 | |
|   Initialization
 | |
| ****************************************************************************/
 | |
| 
 | |
| /*
 | |
|   Initialize data needed for block structures
 | |
| */
 | |
| 
 | |
| 
 | |
| /* Size of the different header elements for a row */
 | |
| 
 | |
| static uchar header_sizes[]=
 | |
| {
 | |
|   TRANSID_SIZE,
 | |
|   VERPTR_SIZE,
 | |
|   TRANSID_SIZE,                                 /* Delete transid */
 | |
|   1                                             /* Null extends */
 | |
| };
 | |
| 
 | |
| /*
 | |
|   Calculate array of all used headers
 | |
| 
 | |
|   Used to speed up:
 | |
| 
 | |
|   size= 1;
 | |
|   if (flag & 1)
 | |
|     size+= TRANSID_SIZE;
 | |
|   if (flag & 2)
 | |
|     size+= VERPTR_SIZE;
 | |
|   if (flag & 4)
 | |
|     size+= TRANSID_SIZE
 | |
|   if (flag & 8)
 | |
|     size+= 1;
 | |
| 
 | |
|    NOTES
 | |
|      This is called only once at startup of Maria
 | |
| */
 | |
| 
 | |
| static uchar total_header_size[1 << array_elements(header_sizes)];
 | |
| #define PRECALC_HEADER_BITMASK (array_elements(total_header_size) -1)
 | |
| 
 | |
| void _ma_init_block_record_data(void)
 | |
| {
 | |
|   uint i;
 | |
|   bzero(total_header_size, sizeof(total_header_size));
 | |
|   total_header_size[0]= FLAG_SIZE;              /* Flag uchar */
 | |
|   for (i= 1; i < array_elements(total_header_size); i++)
 | |
|   {
 | |
|     uint size= FLAG_SIZE, j, bit;
 | |
|     for (j= 0; (bit= (1 << j)) <= i; j++)
 | |
|     {
 | |
|       if (i & bit)
 | |
|         size+= header_sizes[j];
 | |
|     }
 | |
|     total_header_size[i]= size;
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| my_bool _ma_once_init_block_record(MARIA_SHARE *share, File data_file)
 | |
| {
 | |
|   my_bool res;
 | |
|   pgcache_page_no_t last_page; 
 | |
| 
 | |
|   /*
 | |
|     First calculate the max file length with can have with a pointer of size
 | |
|     rec_reflength.
 | |
| 
 | |
|     The 'rec_reflength - 1' is because one byte is used for row
 | |
|     position withing the page.
 | |
|     The /2 comes from _ma_transaction_recpos_to_keypos() where we use
 | |
|     the lowest bit to mark if there is a transid following the rownr.
 | |
|   */
 | |
|   last_page= ((ulonglong) 1 << ((share->base.rec_reflength-1)*8))/2;
 | |
|   if (!last_page)                                  /* Overflow; set max size */
 | |
|     last_page= ~(pgcache_page_no_t) 0;
 | |
| 
 | |
|   res= _ma_bitmap_init(share, data_file, &last_page);
 | |
|   share->base.max_data_file_length= _ma_safe_mul(last_page + 1,
 | |
|                                                  share->block_size);
 | |
| #if SIZEOF_OFF_T == 4
 | |
|   set_if_smaller(share->base.max_data_file_length, INT_MAX32);
 | |
| #endif
 | |
|   return res;
 | |
| }
 | |
| 
 | |
| 
 | |
| my_bool _ma_once_end_block_record(MARIA_SHARE *share)
 | |
| {
 | |
|   int res= _ma_bitmap_end(share);
 | |
|   if (share->bitmap.file.file >= 0)
 | |
|   {
 | |
|     if (flush_pagecache_blocks(share->pagecache, &share->bitmap.file,
 | |
|                        share->deleting ? FLUSH_IGNORE_CHANGED : FLUSH_RELEASE))
 | |
|       res= 1;
 | |
|     /*
 | |
|       File must be synced as it is going out of the maria_open_list and so
 | |
|       becoming unknown to Checkpoint.
 | |
|     */
 | |
|     if (!share->s3_path)
 | |
|     {
 | |
|       if (share->now_transactional &&
 | |
|           mysql_file_sync(share->bitmap.file.file, MYF(MY_WME)))
 | |
|         res= 1;
 | |
|       if (mysql_file_close(share->bitmap.file.file, MYF(MY_WME)))
 | |
|         res= 1;
 | |
|     }
 | |
|     /*
 | |
|       Trivial assignment to guard against multiple invocations
 | |
|       (May happen if file are closed but we want to keep the maria object
 | |
|       around a bit longer)
 | |
|     */
 | |
|     share->bitmap.file.file= -1;
 | |
|   }
 | |
|   if (share->id != 0)
 | |
|   {
 | |
|     /*
 | |
|       We de-assign the id even though index has not been flushed, this is ok
 | |
|       as close_lock serializes us with a Checkpoint looking at our share.
 | |
|     */
 | |
|     translog_deassign_id_from_share(share);
 | |
|   }
 | |
|   return res;
 | |
| }
 | |
| 
 | |
| 
 | |
| /* Init info->cur_row structure */
 | |
| 
 | |
| my_bool _ma_init_block_record(MARIA_HA *info)
 | |
| {
 | |
|   MARIA_ROW *row= &info->cur_row, *new_row= &info->new_row;
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   myf flag= MY_WME | share->malloc_flag;
 | |
|   uint default_extents;
 | |
|   DBUG_ENTER("_ma_init_block_record");
 | |
| 
 | |
|   if (!my_multi_malloc(PSI_INSTRUMENT_ME, flag,
 | |
|                        &row->empty_bits, share->base.pack_bytes,
 | |
|                        &row->field_lengths,
 | |
|                        share->base.max_field_lengths + 2,
 | |
|                        &row->blob_lengths, sizeof(ulong) * share->base.blobs,
 | |
|                        &row->null_field_lengths, (sizeof(uint) *
 | |
|                                                   (share->base.fields -
 | |
|                                                    share->base.blobs +
 | |
|                                                    EXTRA_LENGTH_FIELDS)),
 | |
|                        &row->tail_positions, (sizeof(MARIA_RECORD_POS) *
 | |
|                                               (share->base.blobs + 2)),
 | |
|                        &new_row->empty_bits, share->base.pack_bytes,
 | |
|                        &new_row->field_lengths,
 | |
|                        share->base.max_field_lengths + 2,
 | |
|                        &new_row->blob_lengths,
 | |
|                        sizeof(ulong) * share->base.blobs,
 | |
|                        &new_row->null_field_lengths, (sizeof(uint) *
 | |
|                                                       (share->base.fields -
 | |
|                                                        share->base.blobs +
 | |
|                                                        EXTRA_LENGTH_FIELDS)),
 | |
|                        &info->log_row_parts,
 | |
|                        sizeof(*info->log_row_parts) *
 | |
|                        (TRANSLOG_INTERNAL_PARTS + 3 +
 | |
|                         share->base.fields + 3),
 | |
|                        &info->update_field_data,
 | |
|                        (share->base.fields * 4 +
 | |
|                         share->base.max_field_lengths + 1 + 4),
 | |
|                        NullS, 0))
 | |
|     DBUG_RETURN(1);
 | |
|   /* Skip over bytes used to store length of field length for logging */
 | |
|   row->field_lengths+= 2;
 | |
|   new_row->field_lengths+= 2;
 | |
| 
 | |
|   /* Reserve some initial space to avoid mallocs during execution */
 | |
|   default_extents= (ELEMENTS_RESERVED_FOR_MAIN_PART + 1 +
 | |
|                     (AVERAGE_BLOB_SIZE /
 | |
|                      FULL_PAGE_SIZE(share) /
 | |
|                      BLOB_SEGMENT_MIN_SIZE));
 | |
| 
 | |
|   if (my_init_dynamic_array(PSI_INSTRUMENT_ME, &info->bitmap_blocks,
 | |
|                             sizeof(MARIA_BITMAP_BLOCK),
 | |
|                             default_extents, 64, flag))
 | |
|     goto err;
 | |
|   info->cur_row.extents_buffer_length= default_extents * ROW_EXTENT_SIZE;
 | |
|   if (!(info->cur_row.extents= my_malloc(PSI_INSTRUMENT_ME,
 | |
|                                          info->cur_row.extents_buffer_length,
 | |
|                                          flag)))
 | |
|     goto err;
 | |
| 
 | |
|   info->row_base_length= share->base_length;
 | |
|   info->row_flag= share->base.default_row_flag;
 | |
| 
 | |
|   /*
 | |
|     We need to reserve 'EXTRA_LENGTH_FIELDS' number of parts in
 | |
|     null_field_lengths to allow splitting of rows in 'find_where_to_split_row'
 | |
|   */
 | |
|   row->null_field_lengths+= EXTRA_LENGTH_FIELDS;
 | |
|   new_row->null_field_lengths+= EXTRA_LENGTH_FIELDS;
 | |
| 
 | |
|   DBUG_RETURN(0);
 | |
| 
 | |
| err:
 | |
|   _ma_end_block_record(info);
 | |
|   DBUG_RETURN(1);
 | |
| }
 | |
| 
 | |
| 
 | |
| void _ma_end_block_record(MARIA_HA *info)
 | |
| {
 | |
|   DBUG_ENTER("_ma_end_block_record");
 | |
|   my_free(info->cur_row.empty_bits);
 | |
|   delete_dynamic(&info->bitmap_blocks);
 | |
|   my_free(info->cur_row.extents);
 | |
|   my_free(info->blob_buff);
 | |
|   /*
 | |
|     The data file is closed, when needed, in ma_once_end_block_record().
 | |
|     The following protects us from doing an extra, not allowed, close
 | |
|     in maria_close()
 | |
|   */
 | |
|   info->dfile.file= -1;
 | |
|   DBUG_VOID_RETURN;
 | |
| }
 | |
| 
 | |
| 
 | |
| /****************************************************************************
 | |
|   Helper functions
 | |
| ****************************************************************************/
 | |
| 
 | |
| /*
 | |
|   Return the next unused postion on the page after a directory entry.
 | |
| 
 | |
|   SYNOPSIS
 | |
|     start_of_next_entry()
 | |
|     dir		Directory entry to be used. This can not be the
 | |
|                 the last entry on the page!
 | |
| 
 | |
|   RETURN
 | |
|     #   Position in page where next entry starts.
 | |
|         Everything between the '*dir' and this are free to be used.
 | |
| */
 | |
| 
 | |
| static inline uint start_of_next_entry(uchar *dir)
 | |
| {
 | |
|   uchar *prev;
 | |
|   /*
 | |
|      Find previous used entry. (There is always a previous entry as
 | |
|      the directory never starts with a deleted entry)
 | |
|   */
 | |
|   for (prev= dir - DIR_ENTRY_SIZE ;
 | |
|        prev[0] == 0 && prev[1] == 0 ;
 | |
|        prev-= DIR_ENTRY_SIZE)
 | |
|   {}
 | |
|   return (uint) uint2korr(prev);
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Return the offset where the previous entry ends (before on page)
 | |
| 
 | |
|   SYNOPSIS
 | |
|     end_of_previous_entry()
 | |
|     dir		Address for current directory entry
 | |
|     end         Address to last directory entry
 | |
| 
 | |
|   RETURN
 | |
|     #   Position where previous entry ends (smallest address on page)
 | |
|         Everything between # and current entry are free to be used.
 | |
| */
 | |
| 
 | |
| 
 | |
| static inline uint end_of_previous_entry(MARIA_SHARE *share,
 | |
|                                          uchar *dir, uchar *end)
 | |
| {
 | |
|   uchar *pos;
 | |
|   for (pos= dir + DIR_ENTRY_SIZE ; pos < end ; pos+= DIR_ENTRY_SIZE)
 | |
|   {
 | |
|     uint offset;
 | |
|     if ((offset= uint2korr(pos)))
 | |
|       return offset + uint2korr(pos+2);
 | |
|   }
 | |
|   return PAGE_HEADER_SIZE(share);
 | |
| }
 | |
| 
 | |
| 
 | |
| #ifndef DBUG_OFF
 | |
| 
 | |
| static void _ma_print_directory(MARIA_SHARE *share,
 | |
|                                 FILE *file, uchar *buff, uint block_size)
 | |
| {
 | |
|   uint max_entry= (uint) ((uchar *) buff)[DIR_COUNT_OFFSET], row= 0;
 | |
|   uint end_of_prev_row= PAGE_HEADER_SIZE(share);
 | |
|   uchar *dir, *end;
 | |
| 
 | |
|   dir= dir_entry_pos(buff, block_size, max_entry-1);
 | |
|   end= dir_entry_pos(buff, block_size, 0);
 | |
| 
 | |
|   DBUG_LOCK_FILE;                               /* If using DBUG_FILE */
 | |
|   fprintf(file,"Directory dump (pos:length):\n");
 | |
| 
 | |
|   for (row= 1; dir <= end ; end-= DIR_ENTRY_SIZE, row++)
 | |
|   {
 | |
|     uint offset= uint2korr(end);
 | |
|     uint length= uint2korr(end+2);
 | |
|     fprintf(file, "   %4u:%4u", offset, offset ? length : 0);
 | |
|     if (!(row % (80/12)))
 | |
|       fputc('\n', file);
 | |
|     if (offset)
 | |
|     {
 | |
|       DBUG_ASSERT(offset >= end_of_prev_row);
 | |
|       end_of_prev_row= offset + length;
 | |
|     }
 | |
|   }
 | |
|   fputc('\n', file);
 | |
|   fflush(file);
 | |
|   DBUG_UNLOCK_FILE;
 | |
| }
 | |
| 
 | |
| 
 | |
| static void check_directory(MARIA_SHARE *share,
 | |
|                             uchar *buff, uint block_size, uint min_row_length,
 | |
|                             uint real_empty_size)
 | |
| {
 | |
|   uchar *dir, *end;
 | |
|   uint max_entry= (uint) buff[DIR_COUNT_OFFSET];
 | |
|   uint start_of_dir, deleted;
 | |
|   uint end_of_prev_row= PAGE_HEADER_SIZE(share);
 | |
|   uint empty_size_on_page;
 | |
|   uint empty_size;
 | |
|   uchar free_entry, prev_free_entry;
 | |
| 
 | |
|   dir= dir_entry_pos(buff, block_size, max_entry-1);
 | |
|   start_of_dir= (uint) (dir - buff);
 | |
|   end= dir_entry_pos(buff, block_size, 0);
 | |
|   deleted= empty_size= 0;
 | |
| 
 | |
|   empty_size_on_page= (real_empty_size != (uint) -1 ? real_empty_size :
 | |
|                        uint2korr(buff + EMPTY_SPACE_OFFSET));
 | |
| 
 | |
|   /* Ensure that all rows are in increasing order and no overlaps */
 | |
|   for (; dir <= end ; end-= DIR_ENTRY_SIZE)
 | |
|   {
 | |
|     uint offset= uint2korr(end);
 | |
|     uint length= uint2korr(end+2);
 | |
|     if (offset)
 | |
|     {
 | |
|       DBUG_ASSERT(offset >= end_of_prev_row);
 | |
|       DBUG_ASSERT(!length || length >= min_row_length);
 | |
|       empty_size+= offset - end_of_prev_row;
 | |
|       end_of_prev_row= offset + length;
 | |
|     }
 | |
|     else
 | |
|       deleted++;
 | |
|   }
 | |
|   empty_size+= start_of_dir - end_of_prev_row;
 | |
|   DBUG_ASSERT(end_of_prev_row <= start_of_dir);
 | |
|   DBUG_ASSERT(empty_size == empty_size_on_page);
 | |
| 
 | |
|   /* check free links */
 | |
|   free_entry= buff[DIR_FREE_OFFSET];
 | |
|   prev_free_entry= END_OF_DIR_FREE_LIST;
 | |
|   while (free_entry != END_OF_DIR_FREE_LIST)
 | |
|   {
 | |
|     uchar *dir= dir_entry_pos(buff, block_size, free_entry);
 | |
|     DBUG_ASSERT(dir[0] == 0 && dir[1] == 0);
 | |
|     DBUG_ASSERT(dir[2] == prev_free_entry);
 | |
|     prev_free_entry= free_entry;
 | |
|     free_entry= dir[3];
 | |
|     deleted--;
 | |
|   }
 | |
|   DBUG_ASSERT(deleted == 0);
 | |
| }
 | |
| #else
 | |
| #define check_directory(A,B,C,D,E)
 | |
| #endif /* DBUG_OFF */
 | |
| 
 | |
| 
 | |
| /**
 | |
|    @brief Calculate if there is enough entries on the page
 | |
| */
 | |
| 
 | |
| static my_bool enough_free_entries(uchar *buff, uint block_size,
 | |
|                                    uint wanted_entries)
 | |
| {
 | |
|   uint entries= (uint) buff[DIR_COUNT_OFFSET];
 | |
|   uint needed_free_entries, free_entry;
 | |
| 
 | |
|   if (entries + wanted_entries <= MAX_ROWS_PER_PAGE)
 | |
|     return 1;
 | |
| 
 | |
|   /* Check if enough free entries in free list */
 | |
|   needed_free_entries= entries + wanted_entries - MAX_ROWS_PER_PAGE;
 | |
| 
 | |
|   free_entry= (uint) buff[DIR_FREE_OFFSET];
 | |
|   while (free_entry != END_OF_DIR_FREE_LIST)
 | |
|   {
 | |
|     uchar *dir;
 | |
|     if (!--needed_free_entries)
 | |
|       return 1;
 | |
|     dir= dir_entry_pos(buff, block_size, free_entry);
 | |
|     free_entry= dir[3];
 | |
|   }
 | |
|   return 0;                                     /* Not enough entries */
 | |
| }
 | |
| 
 | |
| 
 | |
| /**
 | |
|    @brief Check if there is room for more rows on page
 | |
| 
 | |
|    @fn enough_free_entries_on_page
 | |
| 
 | |
|    @return 0    Directory is full
 | |
|    @return 1	There is room for more entries on the page
 | |
| */
 | |
| 
 | |
| my_bool enough_free_entries_on_page(MARIA_SHARE *share,
 | |
|                                     uchar *page_buff)
 | |
| {
 | |
|   enum en_page_type page_type;
 | |
|   page_type= (enum en_page_type) (page_buff[PAGE_TYPE_OFFSET] &
 | |
|                                   ~(uchar) PAGE_CAN_BE_COMPACTED);
 | |
| 
 | |
|   if (page_type == HEAD_PAGE)
 | |
|   {
 | |
|     uint row_count= (uint) page_buff[DIR_COUNT_OFFSET];
 | |
|     return !(row_count == MAX_ROWS_PER_PAGE &&
 | |
|              page_buff[DIR_FREE_OFFSET] == END_OF_DIR_FREE_LIST);
 | |
|   }
 | |
|   return enough_free_entries(page_buff, share->block_size,
 | |
|                              1 + share->base.blobs);
 | |
| }
 | |
| 
 | |
| 
 | |
| /**
 | |
|    @brief Extend a record area to fit a given size block
 | |
| 
 | |
|    @fn extend_area_on_page()
 | |
|    @param info                  Handler
 | |
|    @param buff			Page buffer
 | |
|    @param dir			Pointer to dir entry in buffer
 | |
|    @param rownr			Row number we working on
 | |
|    @param block_size		Block size of buffer
 | |
|    @param request_length	How much data we want to put at [dir]
 | |
|    @param empty_space		Total empty space in buffer
 | |
| 			        This is updated with length after dir
 | |
|                                 is allocated and current block freed
 | |
|    @param head_page		1 if head page, 0 for tail page
 | |
| 
 | |
|   @implementation
 | |
|     The logic is as follows (same as in _ma_update_block_record())
 | |
|     - If new data fits in old block, use old block.
 | |
|     - Extend block with empty space before block. If enough, use it.
 | |
|     - Extend block with empty space after block. If enough, use it.
 | |
|     - Use _ma_compact_block_page() to get all empty space at dir.
 | |
| 
 | |
|   @note
 | |
|     The given directory entry is set to rec length.
 | |
|     empty_space doesn't include the new directory entry
 | |
| 
 | |
| 
 | |
|   @return
 | |
|   @retval 0   ok
 | |
|   @retval ret_offset		Pointer to store offset to found area
 | |
|   @retval ret_length		Pointer to store length of found area
 | |
|   @retval [dir]                 rec_offset is store here too
 | |
| 
 | |
|   @retval 1   error (wrong info in block)
 | |
| */
 | |
| 
 | |
| static my_bool extend_area_on_page(MARIA_HA *info,
 | |
|                                    uchar *buff, uchar *dir,
 | |
|                                    uint rownr,
 | |
|                                    uint request_length,
 | |
|                                    uint *empty_space, uint *ret_offset,
 | |
|                                    uint *ret_length,
 | |
|                                    my_bool head_page)
 | |
| {
 | |
|   uint rec_offset, length, org_rec_length;
 | |
|   uint max_entry= (uint) buff[DIR_COUNT_OFFSET];
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   uint block_size= share->block_size;
 | |
|   DBUG_ENTER("extend_area_on_page");
 | |
| 
 | |
|   /*
 | |
|     We can't check for min length here as we may have called
 | |
|     extend_directory() to create a new (empty) entry just before
 | |
|   */
 | |
|   check_directory(share, buff, block_size, 0, *empty_space);
 | |
| 
 | |
|   rec_offset= uint2korr(dir);
 | |
|   if (rec_offset)
 | |
|   {
 | |
|     /* Extending old row;  Mark current space as 'free' */
 | |
|     length= org_rec_length= uint2korr(dir + 2);
 | |
|     DBUG_PRINT("info", ("rec_offset: %u  length: %u  request_length: %u  "
 | |
|                         "empty_space: %u",
 | |
|                         rec_offset, org_rec_length, request_length,
 | |
|                         *empty_space));
 | |
| 
 | |
|     *empty_space+= org_rec_length;
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     /* Reusing free directory entry; Free it from the directory list */
 | |
|     if (dir[2] == END_OF_DIR_FREE_LIST)
 | |
|       buff[DIR_FREE_OFFSET]= dir[3];
 | |
|     else
 | |
|     {
 | |
|       uchar *prev_dir= dir_entry_pos(buff, block_size, (uint) dir[2]);
 | |
|       DBUG_ASSERT(uint2korr(prev_dir) == 0 && prev_dir[3] == (uchar) rownr);
 | |
|       prev_dir[3]= dir[3];
 | |
|     }
 | |
|     if (dir[3] != END_OF_DIR_FREE_LIST)
 | |
|     {
 | |
|       uchar *next_dir= dir_entry_pos(buff, block_size, (uint) dir[3]);
 | |
|       DBUG_ASSERT(uint2korr(next_dir) == 0 && next_dir[2] == (uchar) rownr);
 | |
|       next_dir[2]= dir[2];
 | |
|     }
 | |
|     rec_offset= start_of_next_entry(dir);
 | |
|     length= 0;
 | |
|   }
 | |
|   if (length < request_length)
 | |
|   {
 | |
|     uint old_rec_offset;
 | |
|     /*
 | |
|       New data did not fit in old position.
 | |
|       Find first possible position where to put new data.
 | |
|     */
 | |
|     old_rec_offset= rec_offset;
 | |
|     rec_offset= end_of_previous_entry(share,
 | |
|                                       dir, buff + block_size -
 | |
|                                       PAGE_SUFFIX_SIZE);
 | |
|     length+= (uint) (old_rec_offset - rec_offset);
 | |
|     DBUG_ASSERT(old_rec_offset);
 | |
|     /*
 | |
|       'length' is 0 if we are doing an insert into a not allocated block.
 | |
|       This can only happen during "REDO of INSERT" or "UNDO of DELETE."
 | |
|     */
 | |
|     if (length < request_length)
 | |
|     {
 | |
|       /*
 | |
|         Did not fit in current block + empty space. Extend with
 | |
|         empty space after block.
 | |
|       */
 | |
|       if (rownr == max_entry - 1)
 | |
|       {
 | |
|         /* Last entry; Everything is free between this and directory */
 | |
|         length= ((block_size - PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE * max_entry) -
 | |
|                  rec_offset);
 | |
|       }
 | |
|       else
 | |
|         length= start_of_next_entry(dir) - rec_offset;
 | |
|       DBUG_ASSERT((int) length >= 0);
 | |
|       if (length < request_length)
 | |
|       {
 | |
|         /* Not enough continuous space, compact page to get more */
 | |
|         int2store(dir, rec_offset);
 | |
|         /* Reset length, as this may be a deleted block */
 | |
|         int2store(dir+2, 0);
 | |
|         _ma_compact_block_page(share,
 | |
|                                buff, rownr, 1,
 | |
|                                head_page ? info->trn->min_read_from: 0,
 | |
|                                head_page ? share->base.min_block_length : 0);
 | |
|         rec_offset= uint2korr(dir);
 | |
|         length=     uint2korr(dir+2);
 | |
|         if (length < request_length)
 | |
|         {
 | |
|           DBUG_PRINT("error", ("Not enough space: "
 | |
|                                "length: %u  request_length: %u",
 | |
|                                length, request_length));
 | |
|           _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD);
 | |
|           DBUG_RETURN(1);                       /* Error in block */
 | |
|         }
 | |
|         *empty_space= length;                   /* All space is here */
 | |
|       }
 | |
|     }
 | |
|   }
 | |
|   int2store(dir, rec_offset);
 | |
|   int2store(dir + 2, length);
 | |
|   *ret_offset= rec_offset;
 | |
|   *ret_length= length;
 | |
| 
 | |
|   check_directory(share,
 | |
|                   buff, block_size,
 | |
|                   head_page ? share->base.min_block_length : 0,
 | |
|                   *empty_space - length);
 | |
|   DBUG_RETURN(0);
 | |
| }
 | |
| 
 | |
| 
 | |
| /**
 | |
|    @brief Copy not changed fields from 'from' to 'to'
 | |
| 
 | |
|    @notes
 | |
|    Assumption is that most fields are not changed!
 | |
|    (Which is why we don't test if all bits are set for some bytes in bitmap)
 | |
| */
 | |
| 
 | |
| void copy_not_changed_fields(MARIA_HA *info, MY_BITMAP *changed_fields,
 | |
|                              uchar *to, uchar *from)
 | |
| {
 | |
|   MARIA_COLUMNDEF *column, *end_column;
 | |
|   uchar *bitmap= (uchar*) changed_fields->bitmap;
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   uint bit= 1;
 | |
| 
 | |
|   for (column= share->columndef, end_column= column+ share->base.fields;
 | |
|        column < end_column; column++)
 | |
|   {
 | |
|     if (!(*bitmap & bit))
 | |
|     {
 | |
|       uint field_length= column->length;
 | |
|       if (column->type == FIELD_VARCHAR)
 | |
|       {
 | |
|         if (column->fill_length == 1)
 | |
|           field_length= (uint) from[column->offset] + 1;
 | |
|         else
 | |
|           field_length= uint2korr(from + column->offset) + 2;
 | |
|       }
 | |
|       memcpy(to + column->offset, from + column->offset, field_length);
 | |
|     }
 | |
|     if ((bit= (bit << 1)) == 256)
 | |
|     {
 | |
|       bitmap++;
 | |
|       bit= 1;
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| #ifdef NOT_YET_NEEDED
 | |
| /* Calculate empty space on a page */
 | |
| 
 | |
| static uint empty_space_on_page(uchar *buff, uint block_size)
 | |
| {
 | |
|   enum en_page_type;
 | |
|   page_type= (enum en_page_type) (buff[PAGE_TYPE_OFFSET] &
 | |
|                                   ~(uchar) PAGE_CAN_BE_COMPACTED);
 | |
|   if (page_type == UNALLOCATED_PAGE)
 | |
|     return block_size;
 | |
|   if ((uint) page_type <= TAIL_PAGE)
 | |
|     return uint2korr(buff+EMPTY_SPACE_OFFSET);
 | |
|   return 0;                                     /* Blob page */
 | |
| }
 | |
| #endif
 | |
| 
 | |
| 
 | |
| /*
 | |
|   @brief Ensure we have space for new directory entries
 | |
| 
 | |
|   @fn make_space_for_directory()
 | |
|   @param info		Handler
 | |
|   @param buff		Page buffer
 | |
|   @param max_entry	Number of current entries in directory
 | |
|   @param count		Number of new entries to be added to directory
 | |
|   @param first_dir	First directory entry on page
 | |
|   @param empty_space    Total empty space in buffer. It's updated
 | |
| 			to reflect the new empty space
 | |
|   @param first_pos      Store position to last data byte on page here
 | |
|   @param head_page	1 if head page, 0 for tail page.
 | |
| 
 | |
|   @note
 | |
|   This function is inline as the argument passing is the biggest
 | |
|   part of the function
 | |
| 
 | |
|   @return
 | |
|   @retval 0  ok
 | |
|   @retval 1  error (No data on page, fatal error)
 | |
| */
 | |
| 
 | |
| static inline my_bool
 | |
| make_space_for_directory(MARIA_HA *info,
 | |
|                          uchar *buff, uint max_entry,
 | |
|                          uint count, uchar *first_dir, uint *empty_space,
 | |
|                          uint *first_pos,
 | |
|                          my_bool head_page)
 | |
| {
 | |
|   uint length_needed= DIR_ENTRY_SIZE * count;
 | |
|   MARIA_SHARE *share= info->s;
 | |
| 
 | |
|   /*
 | |
|     The following is not true only in the case and UNDO is used to reinsert
 | |
|     a row on a previously not used page
 | |
|   */
 | |
|   if (likely(max_entry))
 | |
|   {
 | |
|     /* Check if there is place for the directory entry on the page */
 | |
|     *first_pos= uint2korr(first_dir) + uint2korr(first_dir + 2);
 | |
| 
 | |
|     if ((uint) (first_dir - buff) < *first_pos + length_needed)
 | |
|     {
 | |
|       /* Create place for directory */
 | |
|       _ma_compact_block_page(share,
 | |
|                              buff, max_entry - 1, 0,
 | |
|                              head_page ? info->trn->min_read_from : 0,
 | |
|                              head_page ? share->base.min_block_length : 0);
 | |
|       *first_pos= (uint2korr(first_dir) + uint2korr(first_dir + 2));
 | |
|       *empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET);
 | |
|       if (*empty_space < length_needed)
 | |
|       {
 | |
|         /*
 | |
|           We should always have space, as we only come here for
 | |
|           UNDO of DELETE (in which case we know the row was on the
 | |
|           page before) or if the bitmap told us there was space on page
 | |
|         */
 | |
|         DBUG_ASSERT(!maria_assert_if_crashed_table);
 | |
|         return(1);
 | |
|       }
 | |
|     }
 | |
|   }
 | |
|   else
 | |
|     *first_pos= PAGE_HEADER_SIZE(share);
 | |
| 
 | |
|   /* Reduce directory entry size from free space size */
 | |
|   (*empty_space)-= length_needed;
 | |
|   buff[DIR_COUNT_OFFSET]= (uchar) (max_entry + count);
 | |
|   return(0);
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Find free position in directory
 | |
| 
 | |
|   SYNOPSIS
 | |
|   find_free_position()
 | |
|     info                Handler
 | |
|     buff                Page
 | |
|     block_size          Size of page
 | |
|     res_rownr           Store index to free position here
 | |
|     res_length		Store length of found segment here
 | |
|     empty_space		Store length of empty space on disk here. This is
 | |
| 		        all empty space, including the found block.
 | |
|   @param head_page	1 if head page, 0 for tail page.
 | |
| 
 | |
|   NOTES
 | |
|     If there is a free directory entry (entry with position == 0),
 | |
|     then use it and change it to be the size of the empty block
 | |
|     after the previous entry. This guarantees that all row entries
 | |
|     are stored on disk in inverse directory order, which makes life easier for
 | |
|     '_ma_compact_block_page()' and to know if there is free space after any
 | |
|     block.
 | |
| 
 | |
|     If there is no free entry (entry with position == 0), then we create
 | |
|     a new one. If there is not space for the directory entry (because
 | |
|     the last block overlapps with the directory), we compact the page.
 | |
| 
 | |
|     We will update the offset and the length of the found dir entry to
 | |
|     match the position and empty space found.
 | |
| 
 | |
|     buff[EMPTY_SPACE_OFFSET] is NOT updated but left up to the caller
 | |
| 
 | |
|     See start of file for description of how free directory entires are linked
 | |
| 
 | |
|   RETURN
 | |
|     0      Error (directory full or last block goes over directory)
 | |
|     #      Pointer to directory entry on page
 | |
| */
 | |
| 
 | |
| static uchar *find_free_position(MARIA_HA *info,
 | |
|                                  uchar *buff, uint block_size, uint *res_rownr,
 | |
|                                  uint *res_length, uint *empty_space,
 | |
|                                  my_bool head_page)
 | |
| {
 | |
|   uint max_entry, free_entry;
 | |
|   uint length, first_pos;
 | |
|   uchar *dir, *first_dir;
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   DBUG_ENTER("find_free_position");
 | |
| 
 | |
|   max_entry= (uint) buff[DIR_COUNT_OFFSET];
 | |
|   free_entry= (uint) buff[DIR_FREE_OFFSET];
 | |
|   *empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET);
 | |
| 
 | |
|   DBUG_PRINT("info", ("max_entry: %u  free_entry: %u", max_entry, free_entry));
 | |
| 
 | |
|   first_dir= dir_entry_pos(buff, block_size, max_entry - 1);
 | |
| 
 | |
|   /* Search after first free position */
 | |
|   if (free_entry != END_OF_DIR_FREE_LIST)
 | |
|   {
 | |
|     if (free_entry >= max_entry)
 | |
|       DBUG_RETURN(0);                           /* Consistency error */
 | |
|     dir= dir_entry_pos(buff, block_size, free_entry);
 | |
|     DBUG_ASSERT(uint2korr(dir) == 0 && dir[2] == END_OF_DIR_FREE_LIST);
 | |
|     /* Relink free list */
 | |
|     if ((buff[DIR_FREE_OFFSET]= dir[3]) != END_OF_DIR_FREE_LIST)
 | |
|     {
 | |
|       uchar *next_entry= dir_entry_pos(buff, block_size, (uint) dir[3]);
 | |
|       DBUG_ASSERT((uint) next_entry[2] == free_entry &&
 | |
|                   uint2korr(next_entry) == 0);
 | |
|       next_entry[2]= END_OF_DIR_FREE_LIST;      /* Backlink */
 | |
|     }
 | |
| 
 | |
|     first_pos= end_of_previous_entry(share,
 | |
|                                      dir, buff + block_size -
 | |
|                                      PAGE_SUFFIX_SIZE);
 | |
|     length= start_of_next_entry(dir) - first_pos;
 | |
|     int2store(dir, first_pos);                /* Update dir entry */
 | |
|     int2store(dir + 2, 0);
 | |
|     *res_rownr= free_entry;
 | |
|     *res_length= length;
 | |
| 
 | |
|     check_directory(share, buff, block_size,
 | |
|                     head_page ? share->base.min_block_length : 0, (uint) -1);
 | |
|     DBUG_RETURN(dir);
 | |
|   }
 | |
|   /* No free places in dir; create a new one */
 | |
| 
 | |
|   /* Check if there is place for the directory entry */
 | |
|   if (max_entry == MAX_ROWS_PER_PAGE)
 | |
|     DBUG_RETURN(0);
 | |
| 
 | |
|   if (make_space_for_directory(info, buff, max_entry, 1,
 | |
|                                first_dir, empty_space, &first_pos, head_page))
 | |
|     DBUG_RETURN(0);
 | |
| 
 | |
|   dir= first_dir - DIR_ENTRY_SIZE;
 | |
|   length= (uint) (dir - buff - first_pos);
 | |
|   DBUG_ASSERT(length <= *empty_space);
 | |
|   int2store(dir, first_pos);
 | |
|   int2store(dir + 2, 0);                      /* Max length of region */
 | |
|   *res_rownr= max_entry;
 | |
|   *res_length= length;
 | |
| 
 | |
|   check_directory(share,
 | |
|                   buff, block_size,
 | |
|                   head_page ? share->base.min_block_length : 0,
 | |
|                   *empty_space);
 | |
|   DBUG_RETURN(dir);
 | |
| }
 | |
| 
 | |
| 
 | |
| /**
 | |
|    @brief Enlarge page directory to hold more entries
 | |
| 
 | |
|    @fn extend_directory()
 | |
|    @param info          Handler
 | |
|    @param buff		Page buffer
 | |
|    @param block_size	Block size
 | |
|    @param max_entry	Number of directory entries on page
 | |
|    @param new_entry	Position for new entry
 | |
|    @param empty_space	Total empty space in buffer. It's updated
 | |
| 			to reflect the new empty space
 | |
|    @param head_page	1 if head page, 0 for tail page.
 | |
| 
 | |
|    @note
 | |
|    This is only called on UNDO when we want to expand the directory
 | |
|    to be able to re-insert row in a given position
 | |
| 
 | |
|    The new directory entry will be set to cover the maximum possible space
 | |
| 
 | |
|    @return
 | |
|    @retval 0  ok
 | |
|    @retval 1  error (No data on page, fatal error)
 | |
| */
 | |
| 
 | |
| static my_bool extend_directory(MARIA_HA *info, uchar *buff, uint block_size,
 | |
|                                 uint max_entry, uint new_entry,
 | |
|                                 uint *empty_space, my_bool head_page)
 | |
| {
 | |
|   uint length, first_pos;
 | |
|   uchar *dir, *first_dir;
 | |
|   DBUG_ENTER("extend_directory");
 | |
| 
 | |
|   /*
 | |
|     Note that in if max_entry is 0, then first_dir will point to
 | |
|     an illegal directory entry. This is ok, as in this case we will
 | |
|     not access anything through first_dir.
 | |
|   */
 | |
|   first_dir= dir_entry_pos(buff, block_size, max_entry) + DIR_ENTRY_SIZE;
 | |
| 
 | |
|   if (make_space_for_directory(info, buff, max_entry,
 | |
|                                new_entry - max_entry + 1,
 | |
|                                first_dir, empty_space, &first_pos, head_page))
 | |
|     DBUG_RETURN(1);
 | |
| 
 | |
|   /* Set the new directory entry to cover the max possible length */
 | |
|   dir= first_dir - DIR_ENTRY_SIZE * (new_entry - max_entry + 1);
 | |
|   length= (uint) (dir - buff - first_pos);
 | |
|   int2store(dir, first_pos);
 | |
|   int2store(dir+2, length);
 | |
|   *empty_space-= length;
 | |
| 
 | |
|   if (new_entry-- > max_entry)
 | |
|   {
 | |
|     /* Link all row entries between new_entry and max_entry into free list */
 | |
|     uint free_entry= (uint) buff[DIR_FREE_OFFSET];
 | |
|     uint prev_entry= END_OF_DIR_FREE_LIST;
 | |
|     buff[DIR_FREE_OFFSET]= new_entry;
 | |
|     do
 | |
|     {
 | |
|       dir+= DIR_ENTRY_SIZE;
 | |
|       dir[0]= dir[1]= 0;
 | |
|       dir[2]= (uchar) prev_entry;
 | |
|       dir[3]= (uchar) new_entry-1;
 | |
|       prev_entry= new_entry;
 | |
|     } while (new_entry-- > max_entry);
 | |
|     if ((dir[3]= free_entry) != END_OF_DIR_FREE_LIST)
 | |
|     {
 | |
|       /* Relink next entry to point to newly freed entry */
 | |
|       uchar *next_entry= dir_entry_pos(buff, block_size, (uint) dir[3]);
 | |
|       DBUG_ASSERT(uint2korr(next_entry) == 0 &&
 | |
|                   next_entry[2] == END_OF_DIR_FREE_LIST);
 | |
|       next_entry[2]= max_entry;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   check_directory(info->s,
 | |
|                   buff, block_size,
 | |
|                   head_page ? MY_MIN(info->s->base.min_block_length, length) :
 | |
|                   0, *empty_space);
 | |
|   DBUG_RETURN(0);
 | |
| }
 | |
| 
 | |
| 
 | |
| /****************************************************************************
 | |
|   Updating records
 | |
| ****************************************************************************/
 | |
| 
 | |
| /*
 | |
|   Calculate length of all the different field parts
 | |
| 
 | |
|   SYNOPSIS
 | |
|     calc_record_size()
 | |
|     info	Maria handler
 | |
|     record      Row to store
 | |
|     row		Store statistics about row here
 | |
| 
 | |
|   NOTES
 | |
|     The statistics is used to find out how much space a row will need
 | |
|     and also where we can split a row when we need to split it into several
 | |
|     extents.
 | |
| */
 | |
| 
 | |
| static void calc_record_size(MARIA_HA *info, const uchar *record,
 | |
|                              MARIA_ROW *row)
 | |
| {
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   uchar *field_length_data;
 | |
|   MARIA_COLUMNDEF *column, *end_column;
 | |
|   uint *null_field_lengths= row->null_field_lengths;
 | |
|   ulong *blob_lengths= row->blob_lengths;
 | |
|   DBUG_ENTER("calc_record_size");
 | |
| 
 | |
|   row->normal_length= row->char_length= row->varchar_length=
 | |
|     row->blob_length= row->extents_count= 0;
 | |
| 
 | |
|   /* Create empty bitmap and calculate length of each varlength/char field */
 | |
|   bzero(row->empty_bits, share->base.pack_bytes);
 | |
|   field_length_data= row->field_lengths;
 | |
|   for (column= share->columndef + share->base.fixed_not_null_fields,
 | |
|        end_column= share->columndef + share->base.fields;
 | |
|        column < end_column; column++, null_field_lengths++)
 | |
|   {
 | |
|     if ((record[column->null_pos] & column->null_bit))
 | |
|     {
 | |
|       if (column->type != FIELD_BLOB)
 | |
|         *null_field_lengths= 0;
 | |
|       else
 | |
|         *blob_lengths++= 0;
 | |
|       continue;
 | |
|     }
 | |
|     switch (column->type) {
 | |
|     case FIELD_CHECK:
 | |
|     case FIELD_NORMAL:                          /* Fixed length field */
 | |
|     case FIELD_ZERO:
 | |
|       DBUG_ASSERT(column->empty_bit == 0);
 | |
|       /* fall through */
 | |
|     case FIELD_SKIP_PRESPACE:                   /* Not packed */
 | |
|       row->normal_length+= column->length;
 | |
|       *null_field_lengths= column->length;
 | |
|       break;
 | |
|     case FIELD_SKIP_ZERO:                       /* Fixed length field */
 | |
|       if (memcmp(record+ column->offset, maria_zero_string,
 | |
|                  column->length) == 0)
 | |
|       {
 | |
|         row->empty_bits[column->empty_pos] |= column->empty_bit;
 | |
|         *null_field_lengths= 0;
 | |
|       }
 | |
|       else
 | |
|       {
 | |
|         row->normal_length+= column->length;
 | |
|         *null_field_lengths= column->length;
 | |
|       }
 | |
|       break;
 | |
|     case FIELD_SKIP_ENDSPACE:                   /* CHAR */
 | |
|     {
 | |
|       const uchar *pos, *end;
 | |
|       for (pos= record + column->offset, end= pos + column->length;
 | |
|            end > pos && end[-1] == ' '; end--)
 | |
|         ;
 | |
|       if (pos == end)                           /* If empty string */
 | |
|       {
 | |
|         row->empty_bits[column->empty_pos]|= column->empty_bit;
 | |
|         *null_field_lengths= 0;
 | |
|       }
 | |
|       else
 | |
|       {
 | |
|         uint length= (uint) (end - pos);
 | |
|         if (column->length <= 255)
 | |
|           *field_length_data++= (uchar) length;
 | |
|         else
 | |
|         {
 | |
|           int2store(field_length_data, length);
 | |
|           field_length_data+= 2;
 | |
|         }
 | |
|         row->char_length+= length;
 | |
|         *null_field_lengths= length;
 | |
|       }
 | |
|       break;
 | |
|     }
 | |
|     case FIELD_VARCHAR:
 | |
|     {
 | |
|       uint length, field_length_data_length;
 | |
|       const uchar *field_pos= record + column->offset;
 | |
| 
 | |
|       /* 256 is correct as this includes the length uchar */
 | |
|       field_length_data[0]= field_pos[0];
 | |
|       if (column->length <= 256)
 | |
|       {
 | |
|         length= (uint) (uchar) *field_pos;
 | |
|         field_length_data_length= 1;
 | |
|       }
 | |
|       else
 | |
|       {
 | |
|         length= uint2korr(field_pos);
 | |
|         field_length_data[1]= field_pos[1];
 | |
|         field_length_data_length= 2;
 | |
|       }
 | |
|       *null_field_lengths= length;
 | |
|       if (!length)
 | |
|       {
 | |
|         row->empty_bits[column->empty_pos]|= column->empty_bit;
 | |
|         break;
 | |
|       }
 | |
|       row->varchar_length+= length;
 | |
|       *null_field_lengths= length;
 | |
|       field_length_data+= field_length_data_length;
 | |
|       break;
 | |
|     }
 | |
|     case FIELD_BLOB:
 | |
|     {
 | |
|       const uchar *field_pos= record + column->offset;
 | |
|       uint size_length= column->length - portable_sizeof_char_ptr;
 | |
|       ulong blob_length= _ma_calc_blob_length(size_length, field_pos);
 | |
| 
 | |
|       *blob_lengths++= blob_length;
 | |
|       if (!blob_length)
 | |
|         row->empty_bits[column->empty_pos]|= column->empty_bit;
 | |
|       else
 | |
|       {
 | |
|         row->blob_length+= blob_length;
 | |
|         memcpy(field_length_data, field_pos, size_length);
 | |
|         field_length_data+= size_length;
 | |
|       }
 | |
|       break;
 | |
|     }
 | |
|     default:
 | |
|       DBUG_ASSERT(0);
 | |
|     }
 | |
|   }
 | |
|   row->field_lengths_length= (uint) (field_length_data - row->field_lengths);
 | |
|   /*
 | |
|     - info->row_base_length is base information we must have on a page in first
 | |
|       extent:
 | |
|       - flag byte (1) + is_nulls_extended (0 | 1) + null_bytes + pack_bytes +
 | |
|         table_checksum (0 | 1)
 | |
|     - row->min_length is minimum amount of data we must store on
 | |
|       a page. bitmap code will ensure we get at list this much +
 | |
|       total number of extents and one extent information
 | |
|     - fixed_not_null_fields_length is length of fixed length fields that can't
 | |
|       be compacted
 | |
|     - head_length is the amount of data for the head page
 | |
|      (ie, all fields except blobs)
 | |
|   */
 | |
|   row->min_length=   (info->row_base_length +
 | |
|                       (share->base.max_field_lengths ?
 | |
|                        size_to_store_key_length(row->field_lengths_length) :
 | |
|                        0));
 | |
|   row->head_length= (row->min_length +
 | |
|                      share->base.fixed_not_null_fields_length +
 | |
|                      row->field_lengths_length +
 | |
|                      row->normal_length +
 | |
|                      row->char_length + row->varchar_length);
 | |
|   row->total_length= (row->head_length + row->blob_length);
 | |
|   if (row->total_length < share->base.min_block_length)
 | |
|     row->total_length= share->base.min_block_length;
 | |
|   DBUG_PRINT("exit", ("head_length: %lu  total_length: %lu",
 | |
|                       (ulong) row->head_length, (ulong) row->total_length));
 | |
|   DBUG_VOID_RETURN;
 | |
| }
 | |
| 
 | |
| 
 | |
| /**
 | |
|   Compact page by removing all space between rows
 | |
| 
 | |
|   Moves up all rows to start of page. Moves blocks that are directly after
 | |
|   each other with one memmove.
 | |
| 
 | |
|   @note if rownr is the last row in the page, and extend_block is false,
 | |
|   caller has to make sure to update bitmap page afterwards to reflect freed
 | |
|   space.
 | |
| 
 | |
|   @param  buff          Page to compact
 | |
|   @param  block_size    Size of page
 | |
|   @param  rownr         Put empty data after this row
 | |
|   @param  extend_block	If 1, extend the block at 'rownr' to cover the
 | |
|                         whole block.
 | |
|   @param  min_read_from If <> 0, remove all trid's that are less than this
 | |
| */
 | |
| 
 | |
| void _ma_compact_block_page(MARIA_SHARE *share,
 | |
|                             uchar *buff, uint rownr,
 | |
|                             my_bool extend_block, TrID min_read_from,
 | |
|                             uint min_row_length)
 | |
| {
 | |
|   uint max_entry= (uint) buff[DIR_COUNT_OFFSET];
 | |
|   uint page_pos, next_free_pos, start_of_found_block, diff, end_of_found_block;
 | |
|   uint freed_size= 0;
 | |
|   uint block_size= share->block_size;
 | |
|   uchar *dir, *end;
 | |
|   DBUG_ENTER("_ma_compact_block_page");
 | |
|   DBUG_PRINT("enter", ("rownr: %u  min_read_from: %lu", rownr,
 | |
|                        (ulong) min_read_from));
 | |
|   DBUG_ASSERT(max_entry > 0 &&
 | |
|               max_entry < (block_size - PAGE_HEADER_SIZE(share) -
 | |
|                            PAGE_SUFFIX_SIZE) / DIR_ENTRY_SIZE);
 | |
| 
 | |
|   /* Move all entries before and including rownr up to start of page */
 | |
|   dir= dir_entry_pos(buff, block_size, rownr);
 | |
|   end= dir_entry_pos(buff, block_size, 0);
 | |
|   page_pos= next_free_pos= start_of_found_block= PAGE_HEADER_SIZE(share);
 | |
|   diff= 0;
 | |
|   for (; dir <= end ; end-= DIR_ENTRY_SIZE)
 | |
|   {
 | |
|     uint offset= uint2korr(end);
 | |
| 
 | |
|     if (offset)
 | |
|     {
 | |
|       uint row_length= uint2korr(end + 2);
 | |
|       DBUG_ASSERT(offset >= page_pos);
 | |
|       DBUG_ASSERT(buff + offset + row_length <= dir);
 | |
|       DBUG_ASSERT(row_length >= min_row_length || row_length == 0);
 | |
| 
 | |
|       /* Row length can be zero if row is to be deleted */
 | |
|       if (min_read_from && row_length && (buff[offset] & ROW_FLAG_TRANSID))
 | |
|       {
 | |
|         TrID transid= transid_korr(buff+offset+1);
 | |
|         if (transid < min_read_from)
 | |
|         {
 | |
|           /* Remove transid from row by moving the start point of the row up */
 | |
|           buff[offset + TRANSID_SIZE]= buff[offset] & ~ROW_FLAG_TRANSID;
 | |
|           offset+= TRANSID_SIZE;
 | |
|           freed_size+= TRANSID_SIZE;
 | |
|           row_length-= TRANSID_SIZE;
 | |
|           int2store(end+2, row_length);
 | |
|         }
 | |
|       }
 | |
| 
 | |
|       if (offset != next_free_pos)
 | |
|       {
 | |
|         uint length= (next_free_pos - start_of_found_block);
 | |
|         /*
 | |
|           There was empty space before this and prev block
 | |
|           Check if we have to move previous block up to page start
 | |
|         */
 | |
|         if (page_pos != start_of_found_block)
 | |
|         {
 | |
|           /* move up previous block */
 | |
|           memmove(buff + page_pos, buff + start_of_found_block, length);
 | |
|         }
 | |
|         page_pos+= length;
 | |
|         /* next continuous block starts here */
 | |
|         start_of_found_block= offset;
 | |
|         diff= offset - page_pos;
 | |
|       }
 | |
|       int2store(end, offset - diff);            /* correct current pos */
 | |
|       next_free_pos= offset + row_length;
 | |
| 
 | |
|       if (unlikely(row_length < min_row_length) && row_length)
 | |
|       {
 | |
|         /*
 | |
|           This can only happen in the case we compacted transid and
 | |
|           the row become 'too short'
 | |
| 
 | |
|           Move the current row down to it's right place and extend it
 | |
|           with 0.
 | |
|         */
 | |
|         uint row_diff= min_row_length - row_length;
 | |
|         uint length= (next_free_pos - start_of_found_block);
 | |
| 
 | |
|         DBUG_ASSERT(page_pos != start_of_found_block);
 | |
|         bmove(buff + page_pos, buff + start_of_found_block, length);
 | |
|         bzero(buff+ page_pos + length, row_diff);
 | |
|         page_pos+= min_row_length;
 | |
|         int2store(end+2, min_row_length);
 | |
|         freed_size-= row_diff;
 | |
|         next_free_pos= start_of_found_block= page_pos;
 | |
|         diff= 0;
 | |
|       }
 | |
|     }
 | |
|   }
 | |
|   if (page_pos != start_of_found_block)
 | |
|   {
 | |
|     uint length= (next_free_pos - start_of_found_block);
 | |
|     memmove(buff + page_pos, buff + start_of_found_block, length);
 | |
|   }
 | |
|   start_of_found_block= uint2korr(dir);
 | |
| 
 | |
|   if (rownr != max_entry - 1)
 | |
|   {
 | |
|     /* Move all entries after rownr to end of page */
 | |
|     uint rownr_length;
 | |
| 
 | |
|     DBUG_ASSERT(extend_block);                  /* Should always be true */
 | |
|     next_free_pos= end_of_found_block= page_pos=
 | |
|       block_size - DIR_ENTRY_SIZE * max_entry - PAGE_SUFFIX_SIZE;
 | |
|     diff= 0;
 | |
|     /* End points to entry before 'rownr' */
 | |
|     for (dir= buff + end_of_found_block ; dir <= end ; dir+= DIR_ENTRY_SIZE)
 | |
|     {
 | |
|       uint offset= uint2korr(dir);
 | |
|       uint row_length;
 | |
|       uint row_end;
 | |
|       if (!offset)
 | |
|         continue;
 | |
|       row_length= uint2korr(dir + 2);
 | |
|       row_end= offset + row_length;
 | |
|       DBUG_ASSERT(offset >= start_of_found_block &&
 | |
|                   row_end <= next_free_pos && row_length >= min_row_length);
 | |
| 
 | |
|       if (min_read_from && (buff[offset] & ROW_FLAG_TRANSID))
 | |
|       {
 | |
|         TrID transid= transid_korr(buff + offset+1);
 | |
|         if (transid < min_read_from)
 | |
|         {
 | |
|           /* Remove transid from row */
 | |
|           buff[offset + TRANSID_SIZE]= buff[offset] & ~ROW_FLAG_TRANSID;
 | |
|           offset+= TRANSID_SIZE;
 | |
|           row_length-= TRANSID_SIZE;
 | |
|           int2store(dir+2, row_length);
 | |
|         }
 | |
|         if (unlikely(row_length < min_row_length))
 | |
|         {
 | |
|           /*
 | |
|             This can only happen in the case we compacted transid and
 | |
|             the row become 'too short'
 | |
|           */
 | |
|           uint row_diff= min_row_length - row_length;
 | |
|           if (next_free_pos < row_end + row_diff)
 | |
|           {
 | |
|             /*
 | |
|               Not enough space for extending next block with enough
 | |
|               end 0's. Move current data down to get place for them
 | |
|             */
 | |
|             uint move_down= row_diff - (next_free_pos - row_end);
 | |
|             bmove(buff + offset - move_down, buff + offset, row_length);
 | |
|             offset-= move_down;
 | |
|           }
 | |
|           /*
 | |
|             Extend the next block with 0, which will be part of current
 | |
|             row when the blocks are joined together later
 | |
|           */
 | |
|           bzero(buff + next_free_pos - row_diff, row_diff);
 | |
|           next_free_pos-= row_diff;
 | |
|           int2store(dir+2, min_row_length);
 | |
|         }
 | |
|         row_end= offset + row_length;
 | |
|       }
 | |
| 
 | |
|       if (row_end != next_free_pos)
 | |
|       {
 | |
|         uint length= (end_of_found_block - next_free_pos);
 | |
|         if (page_pos != end_of_found_block)
 | |
|         {
 | |
|           /* move next block down */
 | |
|           memmove(buff + page_pos - length, buff + next_free_pos, length);
 | |
|         }
 | |
|         page_pos-= length;
 | |
|         /* next continuous block starts here */
 | |
|         end_of_found_block= row_end;
 | |
|         diff= page_pos - row_end;
 | |
|       }
 | |
|       int2store(dir, offset + diff);            /* correct current pos */
 | |
|       next_free_pos= offset;
 | |
|     }
 | |
|     if (page_pos != end_of_found_block)
 | |
|     {
 | |
|       uint length= (end_of_found_block - next_free_pos);
 | |
|       memmove(buff + page_pos - length, buff + next_free_pos, length);
 | |
|       next_free_pos= page_pos- length;
 | |
|     }
 | |
| 
 | |
|     /* Extend rownr block to cover hole */
 | |
|     rownr_length= next_free_pos - start_of_found_block;
 | |
|     int2store(dir+2, rownr_length);
 | |
|     DBUG_ASSERT(rownr_length >= min_row_length);
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     if (extend_block)
 | |
|     {
 | |
|       /* Extend last block to cover whole page */
 | |
|       uint length= ((uint) (dir - buff) - start_of_found_block);
 | |
|       int2store(dir+2, length);
 | |
|       DBUG_ASSERT(length >= min_row_length);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|       /* Add length gained from freed transaction id's to this page */
 | |
|       uint length= uint2korr(buff+ EMPTY_SPACE_OFFSET) + freed_size;
 | |
|       int2store(buff + EMPTY_SPACE_OFFSET, length);
 | |
|     }
 | |
|     buff[PAGE_TYPE_OFFSET]&= ~(uchar) PAGE_CAN_BE_COMPACTED;
 | |
|   }
 | |
|   check_directory(share, buff, block_size, min_row_length,
 | |
|                   extend_block ? 0 : (uint) -1);
 | |
|   DBUG_EXECUTE("directory", _ma_print_directory(share,
 | |
|                                                 DBUG_FILE, buff, block_size););
 | |
|   DBUG_VOID_RETURN;
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Create an empty tail or head page
 | |
| 
 | |
|   SYNOPSIS
 | |
|     make_empty_page()
 | |
|     buff		Page buffer
 | |
|     block_size		Block size
 | |
|     page_type		HEAD_PAGE or TAIL_PAGE
 | |
|     create_dir_entry	TRUE of we should create a directory entry
 | |
| 
 | |
|   NOTES
 | |
|     EMPTY_SPACE is not updated
 | |
| */
 | |
| 
 | |
| static void make_empty_page(MARIA_HA *info, uchar *buff, uint page_type,
 | |
|                             my_bool create_dir_entry)
 | |
| {
 | |
|   uint block_size= info->s->block_size;
 | |
|   DBUG_ENTER("make_empty_page");
 | |
| 
 | |
|   bzero(buff, PAGE_HEADER_SIZE(info->s));
 | |
| 
 | |
| #if !defined(DONT_ZERO_PAGE_BLOCKS) || defined(HAVE_valgrind)
 | |
|   /*
 | |
|     We zero the rest of the block to avoid getting old memory information
 | |
|     to disk and to allow the file to be compressed better if archived.
 | |
|     The code does not assume the block is zeroed.
 | |
|   */
 | |
|   if (page_type != BLOB_PAGE)
 | |
|     bzero(buff+ PAGE_HEADER_SIZE(info->s),
 | |
|           block_size - PAGE_HEADER_SIZE(info->s));
 | |
| #endif
 | |
|   buff[PAGE_TYPE_OFFSET]= (uchar) page_type;
 | |
|   buff[DIR_COUNT_OFFSET]= (int) create_dir_entry;
 | |
|   buff[DIR_FREE_OFFSET]=  END_OF_DIR_FREE_LIST;
 | |
|   if (create_dir_entry)
 | |
|   {
 | |
|     /* Create directory entry to point to start of page with size 0 */
 | |
|     buff+= block_size - PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE;
 | |
|     int2store(buff, PAGE_HEADER_SIZE(info->s));
 | |
|     int2store(buff+2, 0);
 | |
|   }
 | |
|   DBUG_VOID_RETURN;
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Read or initialize new head or tail page
 | |
| 
 | |
|   SYNOPSIS
 | |
|     get_head_or_tail_page()
 | |
|     info                        Maria handler
 | |
|     block                       Block to read
 | |
|     buff                        Suggest this buffer to key cache
 | |
|     length                      Minimum space needed
 | |
|     page_type			HEAD_PAGE || TAIL_PAGE
 | |
|     res                         Store result position here
 | |
| 
 | |
|   NOTES
 | |
|     We don't decremented buff[EMPTY_SPACE_OFFSET] with the allocated data
 | |
|     as we don't know how much data the caller will actually use.
 | |
| 
 | |
|     res->empty_space is set to length of empty space
 | |
| 
 | |
|   RETURN
 | |
|     0  ok     All slots in 'res' are updated
 | |
|     1  error  my_errno is set
 | |
| */
 | |
| 
 | |
| struct st_row_pos_info
 | |
| {
 | |
|   uchar *buff;                                  /* page buffer */
 | |
|   uchar *data;                                  /* Place for data */
 | |
|   uchar *dir;                                   /* Directory */
 | |
|   uint length;                                  /* Length for data */
 | |
|   uint rownr;                                   /* Offset in directory */
 | |
|   uint empty_space;                             /* Space left on page */
 | |
| };
 | |
| 
 | |
| 
 | |
| static my_bool get_head_or_tail_page(MARIA_HA *info,
 | |
|                                      const MARIA_BITMAP_BLOCK *block,
 | |
|                                      uchar *buff, uint length, uint page_type,
 | |
|                                      enum pagecache_page_lock lock,
 | |
|                                      struct st_row_pos_info *res)
 | |
| {
 | |
|   uint block_size;
 | |
|   MARIA_PINNED_PAGE page_link;
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   DBUG_ENTER("get_head_or_tail_page");
 | |
|   DBUG_PRINT("enter", ("page_type: %u  length: %u", page_type, length));
 | |
| 
 | |
|   block_size= share->block_size;
 | |
|   if (block->org_bitmap_value == 0)             /* Empty block */
 | |
|   {
 | |
|     /* New page */
 | |
|     make_empty_page(info, buff, page_type, 1);
 | |
|     res->buff= buff;
 | |
|     res->empty_space= res->length= (block_size - PAGE_OVERHEAD_SIZE(share));
 | |
|     res->data= (buff + PAGE_HEADER_SIZE(share));
 | |
|     res->dir= res->data + res->length;
 | |
|     res->rownr= 0;
 | |
|     DBUG_ASSERT(length <= res->length);
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     uchar *dir;
 | |
|     /* Read old page */
 | |
|     page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
 | |
|     res->buff= pagecache_read(share->pagecache, &info->dfile,
 | |
|                               block->page, 0, 0, share->page_type,
 | |
|                               lock, &page_link.link);
 | |
|     page_link.changed= res->buff != 0;
 | |
|     push_dynamic(&info->pinned_pages, (void*) &page_link);
 | |
|     if (!page_link.changed)
 | |
|     {
 | |
|       _ma_set_fatal_error(info, my_errno);
 | |
|       DBUG_RETURN(1);
 | |
|     }
 | |
| 
 | |
|     DBUG_ASSERT((uint) (res->buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) ==
 | |
|                 page_type);
 | |
|     if (!(dir= find_free_position(info, res->buff, block_size, &res->rownr,
 | |
|                                   &res->length, &res->empty_space,
 | |
|                                   page_type == HEAD_PAGE)))
 | |
|       goto crashed;
 | |
| 
 | |
|     if (res->length < length)
 | |
|     {
 | |
|       if (res->empty_space + res->length >= length)
 | |
|       {
 | |
|         _ma_compact_block_page(share,
 | |
|                                res->buff, res->rownr, 1,
 | |
|                                (page_type == HEAD_PAGE ?
 | |
|                                 info->trn->min_read_from : 0),
 | |
|                                (page_type == HEAD_PAGE ?
 | |
|                                 share->base.min_block_length :
 | |
|                                 0));
 | |
|         /* All empty space are now after current position */
 | |
|         dir= dir_entry_pos(res->buff, block_size, res->rownr);
 | |
|         res->length= res->empty_space= uint2korr(dir+2);
 | |
|       }
 | |
|       if (res->length < length)
 | |
|       {
 | |
|         DBUG_PRINT("error", ("length: %u  res->length: %u  empty_space: %u",
 | |
|                              length, res->length, res->empty_space));
 | |
|         goto crashed;                         /* Wrong bitmap information */
 | |
|       }
 | |
|     }
 | |
|     res->dir= dir;
 | |
|     res->data= res->buff + uint2korr(dir);
 | |
|   }
 | |
|   DBUG_RETURN(0);
 | |
| 
 | |
| crashed:
 | |
|   DBUG_ASSERT(!maria_assert_if_crashed_table);
 | |
|   _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD);  /* File crashed */
 | |
|   DBUG_RETURN(1);
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   @brief Create room for a head or tail row on a given page at given position
 | |
| 
 | |
|   @fn get_rowpos_in_head_or_tail_page()
 | |
|   @param info                        Maria handler
 | |
|   @param block                       Block to read
 | |
|   @param buff                        Suggest this buffer to key cache
 | |
|   @param length                      Minimum space needed
 | |
|   @param page_type	             HEAD_PAGE || TAIL_PAGE
 | |
|   @param rownr			     Rownr to use
 | |
|   @param res                         Store result position here
 | |
| 
 | |
|   @note
 | |
|     This is essential same as get_head_or_tail_page, with the difference
 | |
|     that the caller species at what position the row should be put.
 | |
|     This is used when restoring a row to it's original position as
 | |
|     part of UNDO DELETE or UNDO UPDATE
 | |
| 
 | |
|   @return
 | |
|   @retval 0  ok     All slots in 'res' are updated
 | |
|   @retval 1  error  my_errno is set
 | |
| */
 | |
| 
 | |
| static my_bool get_rowpos_in_head_or_tail_page(MARIA_HA *info,
 | |
|                                                const MARIA_BITMAP_BLOCK *block,
 | |
|                                                uchar *buff, uint length,
 | |
|                                                uint page_type,
 | |
|                                                enum pagecache_page_lock lock,
 | |
|                                                uint rownr,
 | |
|                                                struct st_row_pos_info *res)
 | |
| {
 | |
|   MARIA_PINNED_PAGE page_link;
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   uchar *dir;
 | |
|   uint block_size= share->block_size;
 | |
|   uint max_entry, max_length, rec_offset;
 | |
|   DBUG_ENTER("get_rowpos_in_head_or_tail_page");
 | |
| 
 | |
|   if (block->org_bitmap_value == 0)             /* Empty block */
 | |
|   {
 | |
|     /* New page */
 | |
|     make_empty_page(info, buff, page_type, 0);
 | |
|     res->empty_space= block_size - PAGE_HEADER_SIZE(share) - PAGE_SUFFIX_SIZE;
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
 | |
|     buff= pagecache_read(share->pagecache, &info->dfile,
 | |
|                          block->page, 0, 0, share->page_type,
 | |
|                          lock, &page_link.link);
 | |
|     page_link.changed= buff != 0;
 | |
|     push_dynamic(&info->pinned_pages, (void*) &page_link);
 | |
|     if (!page_link.changed)                     /* Read error */
 | |
|     {
 | |
|       _ma_set_fatal_error(info, my_errno);
 | |
|       DBUG_RETURN(1);
 | |
|     }
 | |
|     DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) ==
 | |
|                 (uchar) page_type);
 | |
|     if ((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) != (uchar) page_type)
 | |
|       goto err;
 | |
|     res->empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET);
 | |
|   }
 | |
| 
 | |
|   max_entry= (uint) buff[DIR_COUNT_OFFSET];
 | |
|   if (max_entry <= rownr)
 | |
|   {
 | |
|     if (extend_directory(info, buff, block_size,
 | |
|                          max_entry, rownr, &res->empty_space,
 | |
|                          page_type == HEAD_PAGE))
 | |
|       goto err;
 | |
|   }
 | |
| 
 | |
|   /*
 | |
|     The following dir entry is unused in case of insert / update but
 | |
|     not in case of undo_update / undo_delete
 | |
|   */
 | |
|   dir= dir_entry_pos(buff, block_size, rownr);
 | |
| 
 | |
|   if (extend_area_on_page(info, buff, dir, rownr, length,
 | |
|                           &res->empty_space, &rec_offset, &max_length,
 | |
|                           page_type == HEAD_PAGE))
 | |
|     goto err;
 | |
| 
 | |
|   res->buff= buff;
 | |
|   res->rownr= rownr;
 | |
|   res->dir= dir;
 | |
|   res->data= buff + rec_offset;
 | |
|   res->length= length;
 | |
|   DBUG_RETURN(0);
 | |
| 
 | |
| err:
 | |
|   DBUG_ASSERT(!maria_assert_if_crashed_table);
 | |
|   _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD);  /* File crashed */
 | |
|   DBUG_RETURN(1);
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Write tail for head data or blob
 | |
| 
 | |
|   SYNOPSIS
 | |
|     write_tail()
 | |
|     info                Maria handler
 | |
|     block               Block to tail page
 | |
|     row_part            Data to write to page
 | |
|     length              Length of data
 | |
| 
 | |
|   NOTES
 | |
|     block->page_count is updated to the directory offset for the tail
 | |
|     so that we can store the position in the row extent information
 | |
| 
 | |
|   RETURN
 | |
|     0  ok
 | |
|        block->page_count is set to point (dir entry + TAIL_BIT)
 | |
| 
 | |
|     1  error; In this case my_errno is set to the error
 | |
| */
 | |
| 
 | |
| static my_bool write_tail(MARIA_HA *info,
 | |
|                           MARIA_BITMAP_BLOCK *block,
 | |
|                           uchar *row_part, uint org_length)
 | |
| {
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   MARIA_PINNED_PAGE page_link;
 | |
|   uint block_size= share->block_size, empty_space, length= org_length;
 | |
|   struct st_row_pos_info row_pos;
 | |
|   my_off_t position;
 | |
|   my_bool res, block_is_read;
 | |
|   DBUG_ENTER("write_tail");
 | |
|   DBUG_PRINT("enter", ("page: %lu  length: %u",
 | |
|                        (ulong) block->page, length));
 | |
| 
 | |
|   info->keyread_buff_used= 1;
 | |
|   /*
 | |
|     Don't allocate smaller block than MIN_TAIL_SIZE (we want to give rows
 | |
|     some place to grow in the future)
 | |
|   */
 | |
|   if (length < MIN_TAIL_SIZE)
 | |
|     length= MIN_TAIL_SIZE;
 | |
| 
 | |
|   if (block->page_count == TAIL_PAGE_COUNT_MARKER)
 | |
|   {
 | |
|     /*
 | |
|       Create new tail
 | |
|       page will be pinned & locked by get_head_or_tail_page
 | |
|     */
 | |
|     if (get_head_or_tail_page(info, block, info->keyread_buff, length,
 | |
|                               TAIL_PAGE, PAGECACHE_LOCK_WRITE,
 | |
|                               &row_pos))
 | |
|       DBUG_RETURN(1);
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     /* Write tail on predefined row position */
 | |
|     if (get_rowpos_in_head_or_tail_page(info, block, info->keyread_buff,
 | |
|                                         length, TAIL_PAGE,
 | |
|                                         PAGECACHE_LOCK_WRITE,
 | |
|                                         block->page_count & ~TAIL_BIT,
 | |
|                                         &row_pos))
 | |
|       DBUG_RETURN(1);
 | |
|   }
 | |
|   DBUG_PRINT("info", ("tailid: %lu (%lu:%u)",
 | |
|                       (ulong) ma_recordpos(block->page, row_pos.rownr),
 | |
|                       (ulong) block->page, row_pos.rownr));
 | |
| 
 | |
|   block_is_read= block->org_bitmap_value != 0;
 | |
| 
 | |
|   memcpy(row_pos.data, row_part, org_length);
 | |
| 
 | |
|   if (share->now_transactional)
 | |
|   {
 | |
|     /* Log changes in tail block */
 | |
|     uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE];
 | |
|     LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
 | |
|     LSN lsn;
 | |
| 
 | |
|     /*
 | |
|       Log REDO changes of tail page
 | |
|       Note that we have to log length, not org_length, to be sure that
 | |
|       REDO, which doesn't use write_tail, also creates a block of at least
 | |
|       MIN_TAIL_SIZE
 | |
|      */
 | |
|     page_store(log_data + FILEID_STORE_SIZE, block->page);
 | |
|     dirpos_store(log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE,
 | |
|                  row_pos.rownr);
 | |
|     log_array[TRANSLOG_INTERNAL_PARTS + 0].str=    log_data;
 | |
|     log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
 | |
|     log_array[TRANSLOG_INTERNAL_PARTS + 1].str=    row_pos.data;
 | |
|     log_array[TRANSLOG_INTERNAL_PARTS + 1].length= length;
 | |
|     if (translog_write_record(&lsn,
 | |
|                               (block_is_read ? LOGREC_REDO_INSERT_ROW_TAIL :
 | |
|                                LOGREC_REDO_NEW_ROW_TAIL),
 | |
|                               info->trn, info,
 | |
|                               (translog_size_t) (sizeof(log_data) + length),
 | |
|                               TRANSLOG_INTERNAL_PARTS + 2, log_array,
 | |
|                               log_data, NULL))
 | |
|       DBUG_RETURN(1);
 | |
|   }
 | |
| 
 | |
|   int2store(row_pos.dir + 2, length);
 | |
|   empty_space= row_pos.empty_space - length;
 | |
|   int2store(row_pos.buff + EMPTY_SPACE_OFFSET, empty_space);
 | |
|   block->page_count= row_pos.rownr + TAIL_BIT;
 | |
|   /*
 | |
|     If there is less directory entries free than number of possible tails
 | |
|     we can write for a row, we mark the page full to ensure that we don't
 | |
|     during _ma_bitmap_find_place() allocate more entries on the tail page
 | |
|     than it can hold
 | |
|   */
 | |
|   block->empty_space= (enough_free_entries(row_pos.buff, share->block_size,
 | |
|                                            1 + share->base.blobs) ?
 | |
|                        empty_space : 0);
 | |
|   /* Keep BLOCKUSED_USE_ORG_BITMAP */
 | |
|   block->used|= BLOCKUSED_USED | BLOCKUSED_TAIL;
 | |
| 
 | |
|   if (block_is_read)
 | |
|   {
 | |
|     /* Current page link is last element in pinned_pages */
 | |
|     MARIA_PINNED_PAGE *page_link;
 | |
|     page_link= dynamic_element(&info->pinned_pages,
 | |
|                                info->pinned_pages.elements-1,
 | |
|                                MARIA_PINNED_PAGE*);
 | |
|     pagecache_unlock_by_link(share->pagecache, page_link->link,
 | |
|                              PAGECACHE_LOCK_WRITE_TO_READ,
 | |
|                              PAGECACHE_PIN_LEFT_PINNED, LSN_IMPOSSIBLE,
 | |
|                              LSN_IMPOSSIBLE, 1, FALSE);
 | |
|     DBUG_ASSERT(page_link->changed);
 | |
|     page_link->unlock= PAGECACHE_LOCK_READ_UNLOCK;
 | |
|     res= 0;
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     if (!(res= pagecache_write(share->pagecache,
 | |
|                                &info->dfile, block->page, 0,
 | |
|                                row_pos.buff,share->page_type,
 | |
|                                PAGECACHE_LOCK_READ,
 | |
|                                PAGECACHE_PIN,
 | |
|                                PAGECACHE_WRITE_DELAY, &page_link.link,
 | |
|                                LSN_IMPOSSIBLE)))
 | |
|     {
 | |
|       DBUG_ASSERT(page_link.link);
 | |
|       page_link.unlock= PAGECACHE_LOCK_READ_UNLOCK;
 | |
|       page_link.changed= 1;
 | |
|       push_dynamic(&info->pinned_pages, (void*) &page_link);
 | |
|     }
 | |
| 
 | |
|     /* Increase data file size, if extended */
 | |
|     position= (my_off_t) block->page * block_size;
 | |
|     if (share->state.state.data_file_length <= position)
 | |
|     {
 | |
|       /*
 | |
|         We are modifying a state member before writing the UNDO; this is a WAL
 | |
|         violation. But for data_file_length this is ok, as long as we change
 | |
|         data_file_length after writing any log record (FILE_ID/REDO/UNDO) (see
 | |
|         collect_tables()).
 | |
|       */
 | |
|       _ma_set_share_data_file_length(share, position + block_size);
 | |
|     }
 | |
|   }
 | |
|   DBUG_RETURN(res);
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Write full pages
 | |
| 
 | |
|   SYNOPSIS
 | |
|     write_full_pages()
 | |
|     info                Maria handler
 | |
|     lsn			LSN for the undo record
 | |
|     block               Where to write data
 | |
|     data                Data to write
 | |
|     length              Length of data
 | |
| 
 | |
|   NOTES
 | |
|     Logging of the changes to the full pages are done in the caller
 | |
|     write_block_record().
 | |
| 
 | |
|   RETURN
 | |
|     0  ok
 | |
|     1  error on write
 | |
| */
 | |
| 
 | |
| static my_bool write_full_pages(MARIA_HA *info,
 | |
|                                 LSN lsn,
 | |
|                                 MARIA_BITMAP_BLOCK *block,
 | |
|                                 uchar *data, ulong length)
 | |
| {
 | |
|   pgcache_page_no_t page;
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   uint block_size= share->block_size;
 | |
|   uint data_size= FULL_PAGE_SIZE(share);
 | |
|   uchar *buff= info->keyread_buff;
 | |
|   uint page_count, sub_blocks;
 | |
|   my_off_t position, max_position;
 | |
|   DBUG_ENTER("write_full_pages");
 | |
|   DBUG_PRINT("enter", ("length: %lu  page: %lu  page_count: %lu",
 | |
|                        (ulong) length, (ulong) block->page,
 | |
|                        (ulong) block->page_count));
 | |
|   DBUG_ASSERT((block->page_count & TAIL_BIT) == 0);
 | |
| 
 | |
|   info->keyread_buff_used= 1;
 | |
|   page=       block->page;
 | |
|   page_count= block->page_count;
 | |
|   sub_blocks= block->sub_blocks;
 | |
| 
 | |
|   max_position= (my_off_t) (page + page_count) * block_size;
 | |
| 
 | |
|   /* Increase data file size, if extended */
 | |
| 
 | |
|   for (; length; data+= data_size)
 | |
|   {
 | |
|     uint copy_length;
 | |
|     if (!page_count--)
 | |
|     {
 | |
|       if (!--sub_blocks)
 | |
|       {
 | |
|         _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD);
 | |
|         DBUG_RETURN(1);
 | |
|       }
 | |
| 
 | |
|       block++;
 | |
|       page= block->page;
 | |
|       page_count= block->page_count - 1;
 | |
|       DBUG_PRINT("info", ("page: %lu  page_count: %lu",
 | |
|                           (ulong) block->page, (ulong) block->page_count));
 | |
| 
 | |
|       position= (page + page_count + 1) * block_size;
 | |
|       set_if_bigger(max_position, position);
 | |
|     }
 | |
|     lsn_store(buff, lsn);
 | |
|     buff[PAGE_TYPE_OFFSET]= (uchar) BLOB_PAGE;
 | |
|     bzero(buff + LSN_SIZE + PAGE_TYPE_SIZE,
 | |
|           FULL_PAGE_HEADER_SIZE(share) - (LSN_SIZE + PAGE_TYPE_SIZE));
 | |
|     copy_length= MY_MIN(data_size, length);
 | |
|     memcpy(buff + FULL_PAGE_HEADER_SIZE(share), data, copy_length);
 | |
|     length-= copy_length;
 | |
| 
 | |
|     /*
 | |
|       Zero out old information from the block. This removes possible
 | |
|       sensitive information from the block and also makes the file
 | |
|       easier to compress and easier to compare after recovery.
 | |
|     */
 | |
|     if (copy_length != data_size)
 | |
|       bzero(buff + block_size - PAGE_SUFFIX_SIZE - (data_size - copy_length),
 | |
|             (data_size - copy_length) + PAGE_SUFFIX_SIZE);
 | |
| 
 | |
|     if (pagecache_write(share->pagecache,
 | |
|                         &info->dfile, page, 0,
 | |
|                         buff, share->page_type,
 | |
|                         PAGECACHE_LOCK_LEFT_UNLOCKED,
 | |
|                         PAGECACHE_PIN_LEFT_UNPINNED,
 | |
|                         PAGECACHE_WRITE_DELAY,
 | |
|                         0, info->trn->rec_lsn))
 | |
|       DBUG_RETURN(1);
 | |
|     page++;
 | |
|     DBUG_ASSERT(block->used & BLOCKUSED_USED);
 | |
|   }
 | |
|   if (share->state.state.data_file_length < max_position)
 | |
|     _ma_set_share_data_file_length(share, max_position);
 | |
|   DBUG_RETURN(0);
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Store ranges of full pages in compact format for logging
 | |
| 
 | |
|   SYNOPSIS
 | |
|     store_page_range()
 | |
|     to		Store data here
 | |
|     block       Where pages are to be written
 | |
|     length	Length of data to be written
 | |
| 		Normally this is full pages, except for the last
 | |
|                 tail block that may only partly fit the last page.
 | |
|     tot_ranges  Add here the number of ranges used
 | |
| 
 | |
|   NOTES
 | |
|     The format of one entry is:
 | |
| 
 | |
|      Ranges				 SUB_RANGE_SIZE
 | |
|      Empty bytes at end of last byte     BLOCK_FILLER_SIZE
 | |
|      For each range
 | |
|        Page number                       PAGE_STORE_SIZE
 | |
|        Number of pages			 PAGERANGE_STORE_SIZE
 | |
| 
 | |
|   RETURN
 | |
|     #  end position for 'to'
 | |
| */
 | |
| 
 | |
| static uchar *store_page_range(MARIA_SHARE *share,
 | |
|                                uchar *to, MARIA_BITMAP_BLOCK *block,
 | |
|                                ulong length,
 | |
|                                uint *tot_ranges)
 | |
| {
 | |
|   uint data_size= FULL_PAGE_SIZE(share);
 | |
|   ulong pages_left= (length + data_size -1) / data_size;
 | |
|   uint page_count, ranges, empty_space;
 | |
|   uchar *to_start;
 | |
|   DBUG_ENTER("store_page_range");
 | |
| 
 | |
|   to_start= to;
 | |
|   to+= SUB_RANGE_SIZE;
 | |
| 
 | |
|   /* Store number of unused bytes at last page */
 | |
|   empty_space= (uint) (pages_left * data_size - length);
 | |
|   int2store(to, empty_space);
 | |
|   to+= BLOCK_FILLER_SIZE;
 | |
| 
 | |
|   ranges= 0;
 | |
|   do
 | |
|   {
 | |
|     pgcache_page_no_t page;
 | |
|     page=       block->page;
 | |
|     page_count= block->page_count;
 | |
|     block++;
 | |
|     if (page_count > pages_left)
 | |
|       page_count= pages_left;
 | |
| 
 | |
|     page_store(to, page);
 | |
|     to+= PAGE_STORE_SIZE;
 | |
|     pagerange_store(to, page_count);
 | |
|     to+= PAGERANGE_STORE_SIZE;
 | |
|     ranges++;
 | |
|   } while ((pages_left-= page_count));
 | |
|   /* Store number of ranges for this block */
 | |
|   int2store(to_start, ranges);
 | |
|   (*tot_ranges)+= ranges;
 | |
| 
 | |
|   DBUG_RETURN(to);
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Store packed extent data
 | |
| 
 | |
|   SYNOPSIS
 | |
|    store_extent_info()
 | |
|    to				Store first packed data here
 | |
|    row_extents_second_part	Store rest here
 | |
|    first_block		        First block to store
 | |
|    count			Number of blocks
 | |
| 
 | |
|   NOTES
 | |
|     We don't have to store the position for the head block
 | |
| 
 | |
|     We have to set the START_EXTENT_BIT for every extent where the
 | |
|     blob will be stored on a page of it's own. We need this in the
 | |
|     UNDO phase to generate MARIA_BITMAP_BLOCK's for undo-delete and
 | |
|     undo-update.
 | |
| */
 | |
| 
 | |
| static void store_extent_info(uchar *to,
 | |
|                               uchar *row_extents_second_part,
 | |
|                               MARIA_BITMAP_BLOCK *first_block,
 | |
|                               uint count)
 | |
| {
 | |
|   MARIA_BITMAP_BLOCK *block, *end_block;
 | |
|   uint copy_length;
 | |
|   my_bool first_found= 0;
 | |
|   DBUG_ENTER("store_extent_info");
 | |
|   DBUG_PRINT("enter", ("count: %u", count));
 | |
| 
 | |
|   for (block= first_block, end_block= first_block+count ;
 | |
|        block < end_block; block++)
 | |
|   {
 | |
|     /* The following is only false for marker (unused) blocks */
 | |
|     if (likely(block->used & BLOCKUSED_USED))
 | |
|     {
 | |
|       uint page_count= block->page_count;
 | |
|       DBUG_ASSERT(page_count != 0);
 | |
|       page_store(to, block->page);
 | |
|       if (block->sub_blocks)
 | |
|       {
 | |
|         /*
 | |
|           Set a bit so that we later know that this was the first block
 | |
|           for a blob
 | |
|         */
 | |
|         page_count|= START_EXTENT_BIT;
 | |
|       }
 | |
|       pagerange_store(to + PAGE_STORE_SIZE, page_count);
 | |
|       DBUG_DUMP("extent", to, ROW_EXTENT_SIZE);
 | |
|       to+= ROW_EXTENT_SIZE;
 | |
|       if (!first_found)
 | |
|       {
 | |
|         first_found= 1;
 | |
|         to= row_extents_second_part;
 | |
|       }
 | |
|     }
 | |
|   }
 | |
|   copy_length= (count - 1) * ROW_EXTENT_SIZE;
 | |
|   /*
 | |
|     In some unlikely cases we have allocated to many blocks. Clear this
 | |
|     data.
 | |
|   */
 | |
|   bzero(to, (size_t) (row_extents_second_part + copy_length - to));
 | |
|   DBUG_VOID_RETURN;
 | |
| }
 | |
| 
 | |
| 
 | |
| /**
 | |
|    @brief
 | |
|    Convert extent info read from file to MARIA_BITMAP_BLOCKS suitable
 | |
|    for write_block_record
 | |
| 
 | |
|    @note
 | |
|    In case of blobs, this function marks all the blob pages in the bitmap
 | |
|    as full pages. The bitmap bits for other pages will be marked
 | |
|    when write_block_record() calls _ma_bitmap_release_unused().
 | |
| 
 | |
|    This function will be removed in Maria 2.0 when we instead of delete rows
 | |
|    mark them as deleted and only remove them after commit.
 | |
| 
 | |
|    @return
 | |
|    @retval 0  ok
 | |
|    @retval 1  Error (out of memory or disk error changing bitmap) or
 | |
|               wrong information in extent information
 | |
| */
 | |
| 
 | |
| static my_bool extent_to_bitmap_blocks(MARIA_HA *info,
 | |
|                                        MARIA_BITMAP_BLOCKS *blocks,
 | |
|                                        pgcache_page_no_t head_page,
 | |
|                                        uint extent_count,
 | |
|                                        const uchar *extent_info)
 | |
| {
 | |
|   MARIA_BITMAP_BLOCK *block, *start_block;
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   uint i, tail_page;
 | |
|   DBUG_ENTER("extent_to_bitmap_blocks");
 | |
| 
 | |
|   if (allocate_dynamic(&info->bitmap_blocks, extent_count + 2))
 | |
|     DBUG_RETURN(1);
 | |
|   block= blocks->block=  dynamic_element(&info->bitmap_blocks, 0,
 | |
|                                         MARIA_BITMAP_BLOCK*);
 | |
|   blocks->count= extent_count + 1;
 | |
|   blocks->tail_page_skipped= blocks->page_skipped= 0;
 | |
|   block->page= head_page;
 | |
|   block->page_count= 1;
 | |
|   block->used= BLOCKUSED_USED | BLOCKUSED_USE_ORG_BITMAP;
 | |
|   /* Impossible value, will force storage of real value */
 | |
|   block->org_bitmap_value= 255;
 | |
| 
 | |
|   start_block= block++;
 | |
|   for (i=0 ;
 | |
|        i++ < extent_count ;
 | |
|        block++, extent_info+= ROW_EXTENT_SIZE)
 | |
|   {
 | |
|     uint page_count= uint2korr(extent_info + ROW_EXTENT_PAGE_SIZE);
 | |
|     if (page_count & START_EXTENT_BIT)
 | |
|     {
 | |
|       page_count&= ~START_EXTENT_BIT;
 | |
|       start_block->sub_blocks= (uint) (block - start_block);
 | |
|       start_block= block;
 | |
|     }
 | |
|     block->page= page_korr(extent_info);
 | |
|     block->page_count= page_count;
 | |
|     block->sub_blocks= 0;
 | |
|     if (block->page_count == 0)
 | |
|     {
 | |
|       /* Extend allocated but not used by write_block_record() */
 | |
|       DBUG_ASSERT(block->page == 0);
 | |
|       /* This is the last block */
 | |
|       blocks->count= i;
 | |
|       break;
 | |
|     }
 | |
|     if ((tail_page= page_count & TAIL_BIT))
 | |
|       page_count= 1;
 | |
| 
 | |
|     /* Check if wrong data */
 | |
|     if (block->page == 0 || page_count == 0 ||
 | |
|         (block->page + page_count) * share->block_size >
 | |
|          share->state.state.data_file_length)
 | |
|     {
 | |
|       DBUG_PRINT("error", ("page: %lu  page_count: %u  tail: %u  length: %ld  data_length: %ld",
 | |
|                            (ulong) block->page,
 | |
|                            (block->page_count & ~TAIL_BIT),
 | |
|                            (uint) MY_TEST(block->page_count & TAIL_BIT),
 | |
|                            (ulong) ((block->page + (page_count & ~TAIL_BIT)) *
 | |
|                                     share->block_size),
 | |
|                            (ulong) share->state.state.data_file_length));
 | |
|       DBUG_RETURN(1);
 | |
|     }
 | |
|     if (tail_page)
 | |
|     {
 | |
|       block->org_bitmap_value= _ma_bitmap_get_page_bits(info, &share->bitmap,
 | |
|                                                         block->page);
 | |
|       block->used= (BLOCKUSED_TAIL | BLOCKUSED_USED |
 | |
|                     BLOCKUSED_USE_ORG_BITMAP);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|       my_bool res;
 | |
|       mysql_mutex_lock(&share->bitmap.bitmap_lock);
 | |
|       res= _ma_bitmap_set_full_page_bits(info, &share->bitmap,
 | |
|                                          block->page, page_count);
 | |
|       mysql_mutex_unlock(&share->bitmap.bitmap_lock);
 | |
|       if (res)
 | |
|         DBUG_RETURN(1);
 | |
|       block->used= BLOCKUSED_USED;
 | |
|     }
 | |
|   }
 | |
|   start_block->sub_blocks= (uint) (block - start_block);
 | |
|   DBUG_RETURN(0);
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Free regions of pages with logging
 | |
| 
 | |
|   NOTES
 | |
|     We are removing filler events and tail page events from
 | |
|     row->extents to get smaller log.
 | |
| 
 | |
|   RETURN
 | |
|     0   ok
 | |
|     1   error
 | |
| */
 | |
| 
 | |
| static my_bool free_full_pages(MARIA_HA *info, MARIA_ROW *row)
 | |
| {
 | |
|   uchar log_data[FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE];
 | |
|   LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
 | |
|   LSN lsn;
 | |
|   size_t extents_length;
 | |
|   uchar *extents= row->extents;
 | |
|   DBUG_ENTER("free_full_pages");
 | |
| 
 | |
|   if (info->s->now_transactional)
 | |
|   {
 | |
|     /* Compact events by removing filler and tail events */
 | |
|     uchar *new_block= 0;
 | |
|     uchar *end, *to, *compact_extent_info;
 | |
|     my_bool res, buff_alloced;
 | |
|     uint extents_count;
 | |
| 
 | |
|     alloc_on_stack(*info->stack_end_ptr, compact_extent_info, buff_alloced,
 | |
|                    row->extents_count * ROW_EXTENT_SIZE);
 | |
|     if (!compact_extent_info)
 | |
|       DBUG_RETURN(1);
 | |
| 
 | |
|     to= compact_extent_info;
 | |
|     for (end= extents + row->extents_count * ROW_EXTENT_SIZE ;
 | |
|          extents < end ;
 | |
|          extents+= ROW_EXTENT_SIZE)
 | |
|     {
 | |
|       uint page_count= uint2korr(extents + ROW_EXTENT_PAGE_SIZE);
 | |
|       page_count&= ~START_EXTENT_BIT;
 | |
|       if (! (page_count & TAIL_BIT) && page_count != 0)
 | |
|       {
 | |
|         /* Found correct extent */
 | |
|         if (!new_block)
 | |
|           new_block= extents;                   /* First extent in range */
 | |
|         continue;
 | |
|       }
 | |
|       /* Found extent to remove, copy everything found so far */
 | |
|       if (new_block)
 | |
|       {
 | |
|         size_t length= (size_t) (extents - new_block);
 | |
|         memcpy(to, new_block, length);
 | |
|         to+= length;
 | |
|         new_block= 0;
 | |
|       }
 | |
|     }
 | |
|     if (new_block)
 | |
|     {
 | |
|       size_t length= (size_t) (extents - new_block);
 | |
|       memcpy(to, new_block, length);
 | |
|       to+= length;
 | |
|     }
 | |
| 
 | |
|     if (!unlikely(extents_length= (uint) (to - compact_extent_info)))
 | |
|     {
 | |
|       /*
 | |
|         No ranges. This happens in the rear case when we have a allocated
 | |
|         place for a blob on a tail page but it did fit into the main page.
 | |
|       */
 | |
|       stack_alloc_free(compact_extent_info, buff_alloced);
 | |
|       DBUG_RETURN(0);
 | |
|     }
 | |
|     extents_count= (uint) (extents_length / ROW_EXTENT_SIZE);
 | |
|     pagerange_store(log_data + FILEID_STORE_SIZE, extents_count);
 | |
|     log_array[TRANSLOG_INTERNAL_PARTS + 0].str=    log_data;
 | |
|     log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
 | |
|     log_array[TRANSLOG_INTERNAL_PARTS + 1].str=    compact_extent_info;
 | |
|     log_array[TRANSLOG_INTERNAL_PARTS + 1].length= extents_length;
 | |
|     res= translog_write_record(&lsn, LOGREC_REDO_FREE_BLOCKS, info->trn,
 | |
|                                info,
 | |
|                                (translog_size_t) (sizeof(log_data) +
 | |
|                                                   extents_length),
 | |
|                                TRANSLOG_INTERNAL_PARTS + 2, log_array,
 | |
|                                log_data, NULL);
 | |
|     stack_alloc_free(compact_extent_info, buff_alloced);
 | |
|     if (res)
 | |
|       DBUG_RETURN(1);
 | |
|   }
 | |
| 
 | |
|   DBUG_RETURN(_ma_bitmap_free_full_pages(info, row->extents,
 | |
|                                          row->extents_count));
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Free one page range
 | |
| 
 | |
|   NOTES
 | |
|     This is very similar to free_full_pages()
 | |
| 
 | |
|   RETURN
 | |
|     0   ok
 | |
|     1   error
 | |
| */
 | |
| 
 | |
| static my_bool free_full_page_range(MARIA_HA *info, pgcache_page_no_t page,
 | |
|                                     uint count)
 | |
| {
 | |
|   my_bool res= 0;
 | |
|   uint delete_count;
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   DBUG_ENTER("free_full_page_range");
 | |
| 
 | |
|   delete_count= count;
 | |
|   if (share->state.state.data_file_length ==
 | |
|       (page + count) * share->block_size)
 | |
|   {
 | |
|     /*
 | |
|       Don't delete last page from pagecache as this will make the file
 | |
|       shorter than expected if the last operation extended the file
 | |
|     */
 | |
|     delete_count--;
 | |
|   }
 | |
|   if (delete_count &&
 | |
|       pagecache_delete_pages(share->pagecache, &info->dfile,
 | |
|                              page, delete_count, PAGECACHE_LOCK_WRITE, 1))
 | |
|     res= 1;
 | |
| 
 | |
|   if (share->now_transactional)
 | |
|   {
 | |
|     LSN lsn;
 | |
|     /** @todo unify log_data's shape with delete_head_or_tail() */
 | |
|     uchar log_data[FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE +
 | |
|                    ROW_EXTENT_SIZE];
 | |
|     LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
 | |
|     DBUG_ASSERT(info->trn->rec_lsn);
 | |
|     pagerange_store(log_data + FILEID_STORE_SIZE, 1);
 | |
|     page_store(log_data + FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE,
 | |
|               page);
 | |
|     int2store(log_data + FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE +
 | |
|               PAGE_STORE_SIZE, count);
 | |
|     log_array[TRANSLOG_INTERNAL_PARTS + 0].str=    log_data;
 | |
|     log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
 | |
| 
 | |
|     if (translog_write_record(&lsn, LOGREC_REDO_FREE_BLOCKS,
 | |
|                               info->trn, info,
 | |
|                               (translog_size_t) sizeof(log_data),
 | |
|                               TRANSLOG_INTERNAL_PARTS + 1, log_array,
 | |
|                               log_data, NULL))
 | |
|       res= 1;
 | |
|   }
 | |
|   mysql_mutex_lock(&share->bitmap.bitmap_lock);
 | |
|   if (_ma_bitmap_reset_full_page_bits(info, &share->bitmap, page, count))
 | |
|     res= 1;
 | |
|   mysql_mutex_unlock(&share->bitmap.bitmap_lock);
 | |
|   DBUG_RETURN(res);
 | |
| }
 | |
| 
 | |
| 
 | |
| /**
 | |
|    @brief Write a record to a (set of) pages
 | |
| 
 | |
|    @fn     write_block_record()
 | |
|    @param  info            Maria handler
 | |
|    @param  old_record      Original record in case of update; NULL in case of
 | |
|                            insert
 | |
|    @param  record          Record we should write
 | |
|    @param  row             Statistics about record (calculated by
 | |
|                            calc_record_size())
 | |
|    @param  bitmap_blocks   On which pages the record should be stored
 | |
|    @param  head_block_is_read  1 if head block existed. 0 if new block.
 | |
|    @param  row_pos         Position on head page where to put head part of
 | |
|                            record
 | |
|    @param  undo_lsn	   <> LSN_ERROR if we are executing an UNDO
 | |
|    @param  old_record_checksum Checksum of old_record: ignored if table does
 | |
|                                not have live checksum; otherwise if
 | |
|                                old_record==NULL it must be 0.
 | |
| 
 | |
|    @note
 | |
|      On return all pinned pages are released.
 | |
| 
 | |
|      [page_buff + EMPTY_SPACE_OFFSET] is set to
 | |
|      row_pos->empty_space - head_length
 | |
| 
 | |
|    @return Operation status
 | |
|    @retval 0      OK
 | |
|    @retval 1      Error
 | |
| */
 | |
| 
 | |
| static my_bool write_block_record(MARIA_HA *info,
 | |
|                                   const uchar *old_record,
 | |
|                                   const uchar *record,
 | |
|                                   MARIA_ROW *row,
 | |
|                                   MARIA_BITMAP_BLOCKS *bitmap_blocks,
 | |
|                                   my_bool head_block_is_read,
 | |
|                                   struct st_row_pos_info *row_pos,
 | |
|                                   LSN undo_lsn,
 | |
|                                   ha_checksum old_record_checksum)
 | |
| {
 | |
|   uchar *data, *end_of_data, *tmp_data_used, *tmp_data;
 | |
|   uchar *UNINIT_VAR(row_extents_first_part), *UNINIT_VAR(row_extents_second_part);
 | |
|   uchar *field_length_data;
 | |
|   uchar *page_buff;
 | |
|   MARIA_BITMAP_BLOCK *block, *head_block;
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   MARIA_COLUMNDEF *column, *end_column;
 | |
|   MARIA_PINNED_PAGE page_link;
 | |
|   uint block_size, flag, head_length;
 | |
|   ulong *blob_lengths;
 | |
|   my_bool row_extents_in_use, blob_full_pages_exists;
 | |
|   LSN lsn;
 | |
|   my_off_t position;
 | |
|   uint save_my_errno;
 | |
|   DBUG_ENTER("write_block_record");
 | |
| 
 | |
|   head_block= bitmap_blocks->block;
 | |
|   block_size= share->block_size;
 | |
| 
 | |
|   page_buff= row_pos->buff;
 | |
|   /* Position on head page where we should store the head part */
 | |
|   data= row_pos->data;
 | |
|   end_of_data= data + row_pos->length;
 | |
| 
 | |
|   /* Write header */
 | |
|   flag= info->row_flag;
 | |
|   row_extents_in_use= 0;
 | |
|   if (unlikely(row->total_length > row_pos->length))
 | |
|   {
 | |
|     /* Need extent */
 | |
|     DBUG_ASSERT(bitmap_blocks->count > 1);
 | |
|     if (bitmap_blocks->count <= 1)
 | |
|       goto crashed;                             /* Wrong in bitmap */
 | |
|     flag|= ROW_FLAG_EXTENTS;
 | |
|     row_extents_in_use= 1;
 | |
|   }
 | |
|   /* For now we have only a minimum header */
 | |
|   *data++= (uchar) flag;
 | |
|   if (flag & ROW_FLAG_TRANSID)
 | |
|   {
 | |
|     transid_store(data, info->trn->trid);
 | |
|     data+= TRANSID_SIZE;
 | |
|   }
 | |
| 
 | |
|   if (unlikely(flag & ROW_FLAG_NULLS_EXTENDED))
 | |
|     *data++= (uchar) (share->base.null_bytes -
 | |
|                       share->base.original_null_bytes);
 | |
|   if (row_extents_in_use)
 | |
|   {
 | |
|     /* Store first extent in header */
 | |
|     store_key_length_inc(data, bitmap_blocks->count - 1);
 | |
|     row_extents_first_part= data;
 | |
|     data+= ROW_EXTENT_SIZE;
 | |
|   }
 | |
|   if (share->base.max_field_lengths)
 | |
|     store_key_length_inc(data, row->field_lengths_length);
 | |
|   if (share->calc_checksum)
 | |
|   {
 | |
|     *(data++)= (uchar) (row->checksum); /* store least significant byte */
 | |
|     DBUG_ASSERT(!((old_record_checksum != 0) && (old_record == NULL)));
 | |
|   }
 | |
|   memcpy(data, record, share->base.null_bytes);
 | |
|   data+= share->base.null_bytes;
 | |
|   memcpy(data, row->empty_bits, share->base.pack_bytes);
 | |
|   data+= share->base.pack_bytes;
 | |
| 
 | |
|   DBUG_ASSERT(row_extents_in_use || undo_lsn != LSN_ERROR ||
 | |
|               (uint) (data - row_pos->data) == row->min_length);
 | |
| 
 | |
|   /*
 | |
|     Allocate a buffer of rest of data (except blobs)
 | |
| 
 | |
|     To avoid double copying of data, we copy as many columns that fits into
 | |
|     the page. The rest goes into info->packed_row.
 | |
| 
 | |
|     Using an extra buffer, instead of doing continuous writes to different
 | |
|     pages, uses less code and we don't need to have to do a complex call
 | |
|     for every data segment we want to store.
 | |
|   */
 | |
|   if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size,
 | |
|                        row->head_length, MY_WME | share->malloc_flag))
 | |
|     DBUG_RETURN(1);
 | |
| 
 | |
|   tmp_data_used= 0;                 /* Either 0 or last used uchar in 'data' */
 | |
|   tmp_data= data;
 | |
| 
 | |
|   if (row_extents_in_use)
 | |
|   {
 | |
|     uint copy_length= (bitmap_blocks->count - 2) * ROW_EXTENT_SIZE;
 | |
|     if (!tmp_data_used && tmp_data + copy_length > end_of_data)
 | |
|     {
 | |
|       tmp_data_used= tmp_data;
 | |
|       tmp_data= info->rec_buff;
 | |
|     }
 | |
|     row_extents_second_part= tmp_data;
 | |
|     /*
 | |
|        We will copy the extents here when we have figured out the tail
 | |
|        positions.
 | |
|     */
 | |
|     tmp_data+= copy_length;
 | |
|   }
 | |
| 
 | |
|   /* Copy fields that has fixed lengths (primary key etc) */
 | |
|   for (column= share->columndef,
 | |
|          end_column= column + share->base.fixed_not_null_fields;
 | |
|        column < end_column; column++)
 | |
|   {
 | |
|     if (!tmp_data_used && tmp_data + column->length > end_of_data)
 | |
|     {
 | |
|       tmp_data_used= tmp_data;
 | |
|       tmp_data= info->rec_buff;
 | |
|     }
 | |
|     memcpy(tmp_data, record + column->offset, column->length);
 | |
|     tmp_data+= column->length;
 | |
|   }
 | |
| 
 | |
|   /* Copy length of data for variable length fields */
 | |
|   if (!tmp_data_used && tmp_data + row->field_lengths_length > end_of_data)
 | |
|   {
 | |
|     tmp_data_used= tmp_data;
 | |
|     tmp_data= info->rec_buff;
 | |
|   }
 | |
|   field_length_data= row->field_lengths;
 | |
|   memcpy(tmp_data, field_length_data, row->field_lengths_length);
 | |
|   tmp_data+= row->field_lengths_length;
 | |
| 
 | |
|   DBUG_ASSERT(row_extents_in_use || undo_lsn != LSN_ERROR ||
 | |
|               (uint) (tmp_data - row_pos->data) == row->min_length +
 | |
|               share->base.fixed_not_null_fields_length +
 | |
|               row->field_lengths_length);
 | |
| 
 | |
|   /* Copy variable length fields and fields with null/zero */
 | |
|   for (end_column= share->columndef + share->base.fields - share->base.blobs;
 | |
|        column < end_column ;
 | |
|        column++)
 | |
|   {
 | |
|     const uchar *field_pos;
 | |
|     ulong length;
 | |
|     if ((record[column->null_pos] & column->null_bit) ||
 | |
|         (column->empty_bit &&
 | |
|          (row->empty_bits[column->empty_pos] & column->empty_bit)))
 | |
|       continue;
 | |
| 
 | |
|     field_pos= record + column->offset;
 | |
|     switch (column->type) {
 | |
|     case FIELD_NORMAL:                          /* Fixed length field */
 | |
|     case FIELD_SKIP_PRESPACE:
 | |
|     case FIELD_SKIP_ZERO:                       /* Fixed length field */
 | |
|       length= column->length;
 | |
|       break;
 | |
|     case FIELD_SKIP_ENDSPACE:                   /* CHAR */
 | |
|       /* Char that is space filled */
 | |
|       if (column->length <= 255)
 | |
|         length= (uint) (uchar) *field_length_data++;
 | |
|       else
 | |
|       {
 | |
|         length= uint2korr(field_length_data);
 | |
|         field_length_data+= 2;
 | |
|       }
 | |
|       break;
 | |
|     case FIELD_VARCHAR:
 | |
|       if (column->length <= 256)
 | |
|       {
 | |
|         length= (uint) (uchar) *field_length_data++;
 | |
|         field_pos++;                            /* Skip length uchar */
 | |
|       }
 | |
|       else
 | |
|       {
 | |
|         length= uint2korr(field_length_data);
 | |
|         field_length_data+= 2;
 | |
|         field_pos+= 2;
 | |
|       }
 | |
|       DBUG_ASSERT(length <= column->length);
 | |
|       break;
 | |
|     default:                                    /* Wrong data */
 | |
|       DBUG_ASSERT(!maria_assert_if_crashed_table);
 | |
|       length=0;
 | |
|       break;
 | |
|     }
 | |
|     if (!tmp_data_used && tmp_data + length > end_of_data)
 | |
|     {
 | |
|       /* Data didn't fit in page; Change to use tmp buffer */
 | |
|       tmp_data_used= tmp_data;
 | |
|       tmp_data= info->rec_buff;
 | |
|     }
 | |
|     memcpy((char*) tmp_data, field_pos, length);
 | |
|     tmp_data+= length;
 | |
|   }
 | |
| 
 | |
|   block= head_block + head_block->sub_blocks;   /* Point to first blob data */
 | |
| 
 | |
|   end_column= column + share->base.blobs;
 | |
|   blob_lengths= row->blob_lengths;
 | |
|   if (!tmp_data_used)
 | |
|   {
 | |
|     /* Still room on page; Copy as many blobs we can into this page */
 | |
|     data= tmp_data;
 | |
|     for (; column < end_column &&
 | |
|            *blob_lengths <= (ulong)(end_of_data - data);
 | |
|          column++, blob_lengths++)
 | |
|     {
 | |
|       uchar *tmp_pos;
 | |
|       uint length;
 | |
|       if (!*blob_lengths)                       /* Null or "" */
 | |
|         continue;
 | |
|       length= column->length - portable_sizeof_char_ptr;
 | |
|       memcpy(&tmp_pos, record + column->offset + length, sizeof(char*));
 | |
|       memcpy(data, tmp_pos, *blob_lengths);
 | |
|       data+= *blob_lengths;
 | |
|       /*
 | |
|         The following is not true when we want to insert data into original
 | |
|         place. In this case we don't have any extra blocks allocated
 | |
|       */
 | |
|       if (likely(undo_lsn == LSN_ERROR))
 | |
|       {
 | |
|         /* Skip over tail page that was prepared for storing blob */
 | |
|         block++;
 | |
|         bitmap_blocks->tail_page_skipped= 1;
 | |
|       }
 | |
|     }
 | |
|     if (head_block->sub_blocks > 1)
 | |
|     {
 | |
|       /* We have allocated pages that where not used */
 | |
|       bitmap_blocks->page_skipped= 1;
 | |
|     }
 | |
|   }
 | |
|   else
 | |
|     data= tmp_data_used;                        /* Get last used on page */
 | |
| 
 | |
|   /* Update page directory */
 | |
|   head_length= (uint) (data - row_pos->data);
 | |
|   DBUG_PRINT("info", ("Used head length on page: %u  header_length: %u",
 | |
|                       head_length,
 | |
|                       (uint) (flag & ROW_FLAG_TRANSID ? TRANSID_SIZE : 0)));
 | |
|   if (head_length < share->base.min_block_length)
 | |
|   {
 | |
|     /* Extend row to be of size min_block_length */
 | |
|     uint diff_length= share->base.min_block_length - head_length;
 | |
|     bzero(data, diff_length);
 | |
|     data+= diff_length;
 | |
|     head_length= share->base.min_block_length;
 | |
|   }
 | |
|   DBUG_ASSERT(data <= end_of_data);
 | |
|   /*
 | |
|     If this is a redo entry (ie, undo_lsn != LSN_ERROR) then we should have
 | |
|     written exactly head_length bytes (same as original record).
 | |
|   */
 | |
|   DBUG_ASSERT(undo_lsn == LSN_ERROR || head_length == row_pos->length);
 | |
|   int2store(row_pos->dir + 2, head_length);
 | |
|   /* update empty space at start of block */
 | |
|   row_pos->empty_space-= head_length;
 | |
|   int2store(page_buff + EMPTY_SPACE_OFFSET, row_pos->empty_space);
 | |
|   /* Mark in bitmaps how the current page was actually used */
 | |
|   head_block->empty_space= row_pos->empty_space;
 | |
|   if (page_buff[DIR_COUNT_OFFSET] == MAX_ROWS_PER_PAGE &&
 | |
|       page_buff[DIR_FREE_OFFSET] == END_OF_DIR_FREE_LIST)
 | |
|     head_block->empty_space= 0;               /* Page is full */
 | |
|   head_block->used|= BLOCKUSED_USED;
 | |
| 
 | |
|   check_directory(share,
 | |
|                   page_buff, share->block_size, share->base.min_block_length,
 | |
|                   (uint) -1);
 | |
| 
 | |
|   /*
 | |
|      Now we have to write tail pages, as we need to store the position
 | |
|      to them in the row extent header.
 | |
| 
 | |
|      We first write out all blob tails, to be able to store them in
 | |
|      the current page or 'tmp_data'.
 | |
| 
 | |
|      Then we write the tail of the non-blob fields (The position to the
 | |
|      tail page is stored either in row header, the extents in the head
 | |
|      page or in the first full page of the non-blob data. It's never in
 | |
|      the tail page of the non-blob data)
 | |
|   */
 | |
| 
 | |
|   blob_full_pages_exists= 0;
 | |
|   if (row_extents_in_use)
 | |
|   {
 | |
|     if (column != end_column)                   /* If blob fields */
 | |
|     {
 | |
|       MARIA_COLUMNDEF    *save_column=       column;
 | |
|       MARIA_BITMAP_BLOCK *save_block=        block;
 | |
|       MARIA_BITMAP_BLOCK *end_block;
 | |
|       ulong              *save_blob_lengths= blob_lengths;
 | |
| 
 | |
|       for (; column < end_column; column++, blob_lengths++)
 | |
|       {
 | |
|         uchar *blob_pos;
 | |
|         if (!*blob_lengths)                     /* Null or "" */
 | |
|           continue;
 | |
|         if (block[block->sub_blocks - 1].used & BLOCKUSED_TAIL)
 | |
|         {
 | |
|           uint length;
 | |
|           length= column->length - portable_sizeof_char_ptr;
 | |
|           memcpy(&blob_pos, record + column->offset + length, sizeof(char*));
 | |
|           length= *blob_lengths % FULL_PAGE_SIZE(share);   /* tail size */
 | |
|           if (length != *blob_lengths)
 | |
|             blob_full_pages_exists= 1;
 | |
|           if (write_tail(info, block + block->sub_blocks-1,
 | |
|                          blob_pos + *blob_lengths - length,
 | |
|                          length))
 | |
|             goto disk_err;
 | |
|         }
 | |
|         else
 | |
|           blob_full_pages_exists= 1;
 | |
| 
 | |
|         for (end_block= block + block->sub_blocks; block < end_block; block++)
 | |
|         {
 | |
|           /*
 | |
|             Set only a bit, to not cause bitmap code to believe a block is full
 | |
|             when there is still a lot of entries in it.
 | |
|           */
 | |
|           block->used|= BLOCKUSED_USED;
 | |
|         }
 | |
|       }
 | |
|       DBUG_ASSERT((undo_lsn == LSN_ERROR ||
 | |
|                    block == bitmap_blocks->block + bitmap_blocks->count));
 | |
|       column= save_column;
 | |
|       block= save_block;
 | |
|       blob_lengths= save_blob_lengths;
 | |
|     }
 | |
| 
 | |
|     if (tmp_data_used)                          /* non blob data overflows */
 | |
|     {
 | |
|       MARIA_BITMAP_BLOCK *cur_block, *end_block, *last_head_block;
 | |
|       MARIA_BITMAP_BLOCK *head_tail_block= 0;
 | |
|       ulong length;
 | |
|       ulong data_length= (ulong) (tmp_data - info->rec_buff);
 | |
| 
 | |
| #ifdef SANITY_CHECKS
 | |
|       DBUG_ASSERT(head_block->sub_blocks != 1);
 | |
|       if (head_block->sub_blocks == 1)
 | |
|         goto crashed;                           /* no reserved full or tails */
 | |
| #endif
 | |
|       /*
 | |
|         Find out where to write tail for non-blob fields.
 | |
| 
 | |
|         Problem here is that the bitmap code may have allocated more
 | |
|         space than we need. We have to handle the following cases:
 | |
| 
 | |
|         - Bitmap code allocated a tail page we don't need.
 | |
|         - The last full page allocated needs to be changed to a tail page
 | |
|         (Because we where able to put more data on the head page than
 | |
|         the bitmap allocation assumed)
 | |
| 
 | |
|         The reserved pages in bitmap_blocks for the main page has one of
 | |
|         the following allocations:
 | |
|         - Full pages, with following blocks:
 | |
|           # * full pages
 | |
|           empty page  ; To be used if we change last full to tail page. This
 | |
|           has 'count' = 0.
 | |
|           tail page  (optional, if last full page was part full)
 | |
|         - One tail page
 | |
|       */
 | |
| 
 | |
|       cur_block= head_block + 1;
 | |
|       end_block= head_block + head_block->sub_blocks;
 | |
|       /*
 | |
|         Loop until we have find a block bigger than we need or
 | |
|         we find the empty page block.
 | |
|       */
 | |
|       while (data_length >= (length= (cur_block->page_count *
 | |
|                                       FULL_PAGE_SIZE(share))) &&
 | |
|              cur_block->page_count)
 | |
|       {
 | |
| #ifdef SANITY_CHECKS
 | |
|         DBUG_ASSERT(!((cur_block == end_block) ||
 | |
|                       (cur_block->used & BLOCKUSED_USED)));
 | |
|         if ((cur_block == end_block) || (cur_block->used & BLOCKUSED_USED))
 | |
|           goto crashed;
 | |
| #endif
 | |
|         data_length-= length;
 | |
|         (cur_block++)->used|= BLOCKUSED_USED;
 | |
|       }
 | |
|       last_head_block= cur_block;
 | |
|       if (data_length)
 | |
|       {
 | |
|         if (cur_block->page_count == 0)
 | |
|         {
 | |
|           /* Skip empty filler block */
 | |
|           cur_block++;
 | |
|         }
 | |
| #ifdef SANITY_CHECKS
 | |
|         DBUG_ASSERT(!(cur_block >= end_block));
 | |
|         if ((cur_block >= end_block))
 | |
|           goto crashed;
 | |
| #endif
 | |
|         if (cur_block->used & BLOCKUSED_TAIL)
 | |
|         {
 | |
|           DBUG_ASSERT(data_length < MAX_TAIL_SIZE(block_size));
 | |
|           /* tail written to tail page */
 | |
|           cur_block->used|= BLOCKUSED_USED;
 | |
|           head_tail_block= cur_block;
 | |
|         }
 | |
|         else if (data_length > length - MAX_TAIL_SIZE(block_size))
 | |
|         {
 | |
|           /* tail written to full page */
 | |
|           cur_block->used|= BLOCKUSED_USED;
 | |
|           if ((cur_block != end_block - 1) &&
 | |
|               (end_block[-1].used & BLOCKUSED_TAIL))
 | |
|             bitmap_blocks->tail_page_skipped= 1;
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|           /*
 | |
|             cur_block is a full block, followed by an empty and optional
 | |
|             tail block. Change cur_block to a tail block or split it
 | |
|             into full blocks and tail blocks.
 | |
| 
 | |
|             TODO:
 | |
|              If there is enough space on the following tail block, use
 | |
|              this instead of creating a new tail block.
 | |
|           */
 | |
|           DBUG_ASSERT(cur_block[1].page_count == 0);
 | |
|           if (cur_block->page_count == 1)
 | |
|           {
 | |
|             /* convert full block to tail block */
 | |
|             cur_block->used|= BLOCKUSED_USED | BLOCKUSED_TAIL;
 | |
|             head_tail_block= cur_block;
 | |
|           }
 | |
|           else
 | |
|           {
 | |
|             DBUG_ASSERT(data_length < length - FULL_PAGE_SIZE(share));
 | |
|             DBUG_PRINT("info", ("Splitting blocks into full and tail"));
 | |
|             cur_block[1].page= (cur_block->page + cur_block->page_count - 1);
 | |
|             cur_block[1].page_count= 1;         /* Avoid DBUG_ASSERT */
 | |
|             cur_block[1].used= BLOCKUSED_USED | BLOCKUSED_TAIL;
 | |
|             cur_block->page_count--;
 | |
|             cur_block->used|= BLOCKUSED_USED;
 | |
|             last_head_block= head_tail_block= cur_block+1;
 | |
|           }
 | |
|           if (end_block[-1].used & BLOCKUSED_TAIL)
 | |
|             bitmap_blocks->tail_page_skipped= 1;
 | |
|         }
 | |
|       }
 | |
|       else
 | |
|       {
 | |
|         /* Must be an empty or tail page */
 | |
|         DBUG_ASSERT(cur_block->page_count == 0 ||
 | |
|                     cur_block->used & BLOCKUSED_TAIL);
 | |
|         if (end_block[-1].used & BLOCKUSED_TAIL)
 | |
|           bitmap_blocks->tail_page_skipped= 1;
 | |
|       }
 | |
| 
 | |
|       /*
 | |
|         Write all extents into page or tmp_data
 | |
| 
 | |
|         Note that we still don't have a correct position for the tail
 | |
|         of the non-blob fields.
 | |
|       */
 | |
|       store_extent_info(row_extents_first_part,
 | |
|                         row_extents_second_part,
 | |
|                         head_block+1, bitmap_blocks->count - 1);
 | |
|       if (head_tail_block)
 | |
|       {
 | |
|         ulong block_length= (ulong) (tmp_data - info->rec_buff);
 | |
|         uchar *extent_data;
 | |
| 
 | |
|         length= (uint) (block_length % FULL_PAGE_SIZE(share));
 | |
|         if (write_tail(info, head_tail_block,
 | |
|                        info->rec_buff + block_length - length,
 | |
|                        length))
 | |
|           goto disk_err;
 | |
|         tmp_data-= length;                      /* Remove the tail */
 | |
|         if (tmp_data == info->rec_buff)
 | |
|         {
 | |
|           /* We have no full blocks to write for the head part */
 | |
|           tmp_data_used= 0;
 | |
|         }
 | |
| 
 | |
|         /* Store the tail position for the non-blob fields */
 | |
|         if (head_tail_block == head_block + 1)
 | |
|         {
 | |
|           /*
 | |
|             We had a head block + tail block, which means that the
 | |
|             tail block is the first extent
 | |
|           */
 | |
|           extent_data= row_extents_first_part;
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|           /*
 | |
|             We have a head block + some full blocks + tail block
 | |
|             last_head_block is pointing after the last used extent
 | |
|             for the head block.
 | |
|           */
 | |
|           extent_data= row_extents_second_part +
 | |
|             ((last_head_block - head_block) - 2) * ROW_EXTENT_SIZE;
 | |
|         }
 | |
|         /* Write information for tail block in the reserved space */
 | |
|         page_store(extent_data, head_tail_block->page);
 | |
|         pagerange_store(extent_data + PAGE_STORE_SIZE,
 | |
|                         head_tail_block->page_count);
 | |
|       }
 | |
|     }
 | |
|     else
 | |
|       store_extent_info(row_extents_first_part,
 | |
|                         row_extents_second_part,
 | |
|                         head_block+1, bitmap_blocks->count - 1);
 | |
|   }
 | |
| 
 | |
|   if (share->now_transactional)
 | |
|   {
 | |
|     uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE];
 | |
|     LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
 | |
| 
 | |
|     /* Log REDO changes of head page */
 | |
|     page_store(log_data + FILEID_STORE_SIZE, head_block->page);
 | |
|     dirpos_store(log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE,
 | |
|                  row_pos->rownr);
 | |
|     log_array[TRANSLOG_INTERNAL_PARTS + 0].str=    log_data;
 | |
|     log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
 | |
|     log_array[TRANSLOG_INTERNAL_PARTS + 1].str=    row_pos->data;
 | |
|     log_array[TRANSLOG_INTERNAL_PARTS + 1].length= head_length;
 | |
|     if (translog_write_record(&lsn,
 | |
|                               head_block_is_read ?
 | |
|                               LOGREC_REDO_INSERT_ROW_HEAD :
 | |
|                               LOGREC_REDO_NEW_ROW_HEAD,
 | |
|                               info->trn,
 | |
|                               info,
 | |
|                               (translog_size_t) (sizeof(log_data) +
 | |
|                                                  head_length),
 | |
|                               TRANSLOG_INTERNAL_PARTS + 2, log_array,
 | |
|                               log_data, NULL))
 | |
|       goto disk_err;
 | |
|   }
 | |
| 
 | |
| #ifdef RECOVERY_EXTRA_DEBUG
 | |
|   if (info->trn->undo_lsn != LSN_IMPOSSIBLE)
 | |
|   {
 | |
|     /* Stop right after the REDO; testing incomplete log record groups */
 | |
|     DBUG_EXECUTE_IF("maria_flush_whole_log",
 | |
|                     {
 | |
|                       DBUG_PRINT("maria_flush_whole_log", ("now"));
 | |
|                       translog_flush(translog_get_horizon());
 | |
|                     });
 | |
|     DBUG_EXECUTE_IF("maria_crash",
 | |
|                     { DBUG_PRINT("maria_crash", ("now")); DBUG_SUICIDE(); });
 | |
|   }
 | |
| #endif
 | |
| 
 | |
|   if (head_block_is_read)
 | |
|   {
 | |
|     MARIA_PINNED_PAGE *page_link;
 | |
|     /* Head page is always the first pinned page */
 | |
|     page_link= dynamic_element(&info->pinned_pages, 0,
 | |
|                                MARIA_PINNED_PAGE*);
 | |
|     pagecache_unlock_by_link(share->pagecache, page_link->link,
 | |
|                              PAGECACHE_LOCK_WRITE_TO_READ,
 | |
|                              PAGECACHE_PIN_LEFT_PINNED, LSN_IMPOSSIBLE,
 | |
|                              LSN_IMPOSSIBLE, 1, FALSE);
 | |
|     page_link->unlock= PAGECACHE_LOCK_READ_UNLOCK;
 | |
|     page_link->changed= 1;
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     if (pagecache_write(share->pagecache,
 | |
|                         &info->dfile, head_block->page, 0,
 | |
|                         page_buff, share->page_type,
 | |
|                         head_block_is_read ? PAGECACHE_LOCK_WRITE_TO_READ :
 | |
|                         PAGECACHE_LOCK_READ,
 | |
|                         head_block_is_read ? PAGECACHE_PIN_LEFT_PINNED :
 | |
|                         PAGECACHE_PIN,
 | |
|                         PAGECACHE_WRITE_DELAY, &page_link.link,
 | |
|                         LSN_IMPOSSIBLE))
 | |
|       goto disk_err;
 | |
|     DBUG_ASSERT(page_link.link);
 | |
|     page_link.unlock= PAGECACHE_LOCK_READ_UNLOCK;
 | |
|     page_link.changed= 1;
 | |
|     push_dynamic(&info->pinned_pages, (void*) &page_link);
 | |
| 
 | |
|     /* Increase data file size, if extended */
 | |
|     position= (my_off_t) head_block->page * block_size;
 | |
|     if (share->state.state.data_file_length <= position)
 | |
|       _ma_set_share_data_file_length(share, position + block_size);
 | |
|   }
 | |
| 
 | |
|   if (share->now_transactional && (tmp_data_used || blob_full_pages_exists))
 | |
|   {
 | |
|     /*
 | |
|       Log REDO writes for all full pages (head part and all blobs)
 | |
|       We write all here to be able to generate the UNDO record early
 | |
|       so that we can write the LSN for the UNDO record to all full pages.
 | |
|     */
 | |
|     uchar tmp_log_data[FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE +
 | |
|                        (ROW_EXTENT_SIZE + BLOCK_FILLER_SIZE + SUB_RANGE_SIZE) *
 | |
|                        ROW_EXTENTS_ON_STACK];
 | |
|     uchar *log_data, *log_pos;
 | |
|     LEX_CUSTRING tmp_log_array[TRANSLOG_INTERNAL_PARTS + 2 +
 | |
|                                ROW_EXTENTS_ON_STACK];
 | |
|     LEX_CUSTRING *log_array_pos, *log_array;
 | |
|     int error;
 | |
|     translog_size_t log_entry_length= 0;
 | |
|     uint ext_length, extents= 0, sub_extents= 0;
 | |
| 
 | |
|     /* If few extents, then allocate things on stack to avoid a malloc call */
 | |
|     if (bitmap_blocks->count < ROW_EXTENTS_ON_STACK)
 | |
|     {
 | |
|       log_array= tmp_log_array;
 | |
|       log_data= tmp_log_data;
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|       if (!my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME), &log_array,
 | |
|                           (uint) ((bitmap_blocks->count +
 | |
|                                    TRANSLOG_INTERNAL_PARTS + 2) *
 | |
|                                   sizeof(*log_array)),
 | |
|                           &log_data, FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE +
 | |
|                           bitmap_blocks->count * (ROW_EXTENT_SIZE +
 | |
|                                                   BLOCK_FILLER_SIZE +
 | |
|                                                   SUB_RANGE_SIZE),
 | |
|                           NullS))
 | |
|         goto disk_err;
 | |
|     }
 | |
|     log_pos= log_data + FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE * 2;
 | |
|     log_array_pos= log_array+ TRANSLOG_INTERNAL_PARTS+1;
 | |
| 
 | |
|     if (tmp_data_used)
 | |
|     {
 | |
|       /* Full head page */
 | |
|       translog_size_t block_length= (translog_size_t) (tmp_data -
 | |
|                                                        info->rec_buff);
 | |
|       log_pos= store_page_range(share,
 | |
|                                 log_pos, head_block+1,
 | |
|                                 (ulong) block_length, &extents);
 | |
|       log_array_pos->str= info->rec_buff;
 | |
|       log_array_pos->length= block_length;
 | |
|       log_entry_length+= block_length;
 | |
|       log_array_pos++;
 | |
|       sub_extents++;
 | |
|     }
 | |
|     if (blob_full_pages_exists)
 | |
|     {
 | |
|       MARIA_COLUMNDEF *tmp_column= column;
 | |
|       ulong *tmp_blob_lengths= blob_lengths;
 | |
|       MARIA_BITMAP_BLOCK *tmp_block= block;
 | |
| 
 | |
|       /* Full blob pages */
 | |
|       for (; tmp_column < end_column; tmp_column++, tmp_blob_lengths++)
 | |
|       {
 | |
|         ulong blob_length;
 | |
|         uint length;
 | |
| 
 | |
|         if (!*tmp_blob_lengths)                 /* Null or "" */
 | |
|           continue;
 | |
|         blob_length= *tmp_blob_lengths;
 | |
|         length= tmp_column->length - portable_sizeof_char_ptr;
 | |
|         /*
 | |
|           If last part of blog was on tail page, change blob_length to
 | |
|           reflect this
 | |
|         */
 | |
|         if (tmp_block[tmp_block->sub_blocks - 1].used & BLOCKUSED_TAIL)
 | |
|           blob_length-= (blob_length % FULL_PAGE_SIZE(share));
 | |
|         if (blob_length)
 | |
|         {
 | |
|           memcpy((void*) &log_array_pos->str,
 | |
|                  record + tmp_column->offset + length,
 | |
|                  sizeof(uchar*));
 | |
|           log_array_pos->length= blob_length;
 | |
|           log_entry_length+= blob_length;
 | |
|           log_array_pos++;
 | |
|           sub_extents++;
 | |
| 
 | |
|           log_pos= store_page_range(share,
 | |
|                                     log_pos, tmp_block,
 | |
|                                     blob_length, &extents);
 | |
|         }
 | |
|         tmp_block+= tmp_block->sub_blocks;
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     log_array[TRANSLOG_INTERNAL_PARTS + 0].str=    log_data;
 | |
|     ext_length=  (uint) (log_pos - log_data);
 | |
|     log_array[TRANSLOG_INTERNAL_PARTS + 0].length= ext_length;
 | |
|     pagerange_store(log_data+ FILEID_STORE_SIZE, extents);
 | |
|     pagerange_store(log_data+ FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE,
 | |
|                     sub_extents);
 | |
| 
 | |
|     log_entry_length+= ext_length;
 | |
|     /* trn->rec_lsn is already set earlier in this function */
 | |
|     error= translog_write_record(&lsn, LOGREC_REDO_INSERT_ROW_BLOBS,
 | |
|                                  info->trn, info, log_entry_length,
 | |
|                                  (uint) (log_array_pos - log_array),
 | |
|                                  log_array, log_data, NULL);
 | |
|     if (log_array != tmp_log_array)
 | |
|       my_free(log_array);
 | |
|     if (error)
 | |
|       goto disk_err;
 | |
|   }
 | |
| 
 | |
|   /* Write UNDO or CLR record */
 | |
|   lsn= LSN_IMPOSSIBLE;
 | |
|   if (share->now_transactional)
 | |
|   {
 | |
|     LEX_CUSTRING *log_array= info->log_row_parts;
 | |
| 
 | |
|     if (undo_lsn != LSN_ERROR)
 | |
|     {
 | |
|       /*
 | |
|         Store if this CLR is about UNDO_DELETE or UNDO_UPDATE;
 | |
|         in the first case, Recovery, when it sees the CLR_END in the
 | |
|         REDO phase, may decrement the records' count.
 | |
|       */
 | |
|       if (_ma_write_clr(info, undo_lsn,
 | |
|                         old_record ? LOGREC_UNDO_ROW_UPDATE :
 | |
|                         LOGREC_UNDO_ROW_DELETE,
 | |
|                         share->calc_checksum != 0,
 | |
|                         row->checksum - old_record_checksum,
 | |
|                         &lsn, (void*) 0))
 | |
|         goto disk_err;
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|       uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE +
 | |
|                      PAGE_STORE_SIZE + DIRPOS_STORE_SIZE + 2 +
 | |
|                      HA_CHECKSUM_STORE_SIZE + 2 + PAGERANGE_STORE_SIZE +
 | |
|                      ROW_EXTENT_SIZE];
 | |
|       uchar *log_pos;
 | |
|       ha_checksum checksum_delta;
 | |
| 
 | |
|       /* LOGREC_UNDO_ROW_INSERT & LOGREC_UNDO_ROW_UPDATE share same header */
 | |
|       lsn_store(log_data, info->trn->undo_lsn);
 | |
|       page_store(log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE,
 | |
|                  head_block->page);
 | |
|       dirpos_store(log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE +
 | |
|                    PAGE_STORE_SIZE,
 | |
|                    row_pos->rownr);
 | |
|       log_pos= (log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE +
 | |
|                 PAGE_STORE_SIZE + DIRPOS_STORE_SIZE);
 | |
|       store_checksum_in_rec(share, checksum_delta,
 | |
|                             row->checksum - old_record_checksum,
 | |
|                             log_pos, log_pos);
 | |
|       compile_time_assert(sizeof(ha_checksum) == HA_CHECKSUM_STORE_SIZE);
 | |
| 
 | |
|       log_array[TRANSLOG_INTERNAL_PARTS + 0].str=    log_data;
 | |
|       log_array[TRANSLOG_INTERNAL_PARTS + 0].length= (uint) (log_pos -
 | |
|                                                              log_data);
 | |
| 
 | |
|       if (!old_record)
 | |
|       {
 | |
|         /* Store undo_lsn in case we are aborting the insert */
 | |
|         row->orig_undo_lsn= info->trn->undo_lsn;
 | |
|         /* Write UNDO log record for the INSERT */
 | |
|         if (translog_write_record(&lsn, LOGREC_UNDO_ROW_INSERT,
 | |
|                                   info->trn, info,
 | |
|                                   (translog_size_t)
 | |
|                                   log_array[TRANSLOG_INTERNAL_PARTS +
 | |
|                                             0].length,
 | |
|                                   TRANSLOG_INTERNAL_PARTS + 1,
 | |
|                                   log_array,
 | |
|                                   log_data + LSN_STORE_SIZE, &checksum_delta))
 | |
|           goto disk_err;
 | |
|       }
 | |
|       else
 | |
|       {
 | |
|         /* Write UNDO log record for the UPDATE */
 | |
|         size_t row_length, extents_length;
 | |
|         uint row_parts_count, cur_head_length;
 | |
| 
 | |
|         /*
 | |
|           Write head length and extents of the original row so that we
 | |
|           during UNDO can put it back in the original position.
 | |
|           We don't store size for TRANSID, as we don't write this during
 | |
|           UNDO.
 | |
|         */
 | |
|         cur_head_length= (info->cur_row.head_length -
 | |
|                           info->cur_row.header_length);
 | |
|         int2store(log_pos, cur_head_length);
 | |
|         pagerange_store(log_pos + 2, info->cur_row.extents_count);
 | |
|         log_pos+= 2 + PAGERANGE_STORE_SIZE;
 | |
|         log_array[TRANSLOG_INTERNAL_PARTS + 0].length+= (2 +
 | |
|                                                          PAGERANGE_STORE_SIZE);
 | |
|         info->log_row_parts[TRANSLOG_INTERNAL_PARTS+1].str=
 | |
|           info->cur_row.extents;
 | |
|         info->log_row_parts[TRANSLOG_INTERNAL_PARTS+1].length=
 | |
|           extents_length= info->cur_row.extents_count * ROW_EXTENT_SIZE;
 | |
| 
 | |
|         row_length= fill_update_undo_parts(info, old_record, record,
 | |
|                                            log_array +
 | |
|                                            TRANSLOG_INTERNAL_PARTS + 2,
 | |
|                                            &row_parts_count);
 | |
|         if (translog_write_record(&lsn, LOGREC_UNDO_ROW_UPDATE, info->trn,
 | |
|                                   info,
 | |
|                                   (translog_size_t)
 | |
|                                   (log_array[TRANSLOG_INTERNAL_PARTS +
 | |
|                                              0].length + extents_length +
 | |
|                                    row_length),
 | |
|                                   TRANSLOG_INTERNAL_PARTS + 2 +
 | |
|                                   row_parts_count,
 | |
|                                   log_array,
 | |
|                                   log_data + LSN_STORE_SIZE,
 | |
|                                   &checksum_delta))
 | |
|           goto disk_err;
 | |
|       }
 | |
|     }
 | |
|   }
 | |
|   /* Release not used space in used pages */
 | |
|   if (_ma_bitmap_release_unused(info, bitmap_blocks))
 | |
|     goto disk_err;
 | |
|   _ma_unpin_all_pages(info, lsn);
 | |
| 
 | |
|   if (tmp_data_used)
 | |
|   {
 | |
|     /*
 | |
|       Write data stored in info->rec_buff to pages
 | |
|       This is the char/varchar data that didn't fit into the head page.
 | |
|     */
 | |
|     DBUG_ASSERT(bitmap_blocks->count != 0);
 | |
|     if (write_full_pages(info, lsn, head_block + 1,
 | |
|                          info->rec_buff, (ulong) (tmp_data - info->rec_buff)))
 | |
|       goto disk_err;
 | |
|   }
 | |
| 
 | |
|   /* Write rest of blobs (data, but no tails as they are already written) */
 | |
|   for (; column < end_column; column++, blob_lengths++)
 | |
|   {
 | |
|     uchar *blob_pos;
 | |
|     uint length;
 | |
|     ulong blob_length;
 | |
|     if (!*blob_lengths)                         /* Null or "" */
 | |
|       continue;
 | |
|     length= column->length - portable_sizeof_char_ptr;
 | |
|     memcpy(&blob_pos, record + column->offset + length, sizeof(char*));
 | |
|     /* remove tail part */
 | |
|     blob_length= *blob_lengths;
 | |
|     if (block[block->sub_blocks - 1].used & BLOCKUSED_TAIL)
 | |
|       blob_length-= (blob_length % FULL_PAGE_SIZE(share));
 | |
| 
 | |
|     if (blob_length && write_full_pages(info, lsn, block,
 | |
|                                          blob_pos, blob_length))
 | |
|       goto disk_err;
 | |
|     block+= block->sub_blocks;
 | |
|   }
 | |
| 
 | |
|   _ma_finalize_row(info);
 | |
|   DBUG_RETURN(0);
 | |
| 
 | |
| crashed:
 | |
|   DBUG_ASSERT(!maria_assert_if_crashed_table);
 | |
|   /* Something was wrong with data on page */
 | |
|   _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD);
 | |
| 
 | |
| disk_err:
 | |
|   /**
 | |
|      @todo RECOVERY we are going to let dirty pages go to disk while we have
 | |
|      logged UNDO, this violates WAL. We must mark the table corrupted!
 | |
| 
 | |
|      @todo RECOVERY we have written some REDOs without a closing UNDO,
 | |
|      it's possible that a next operation by this transaction succeeds and then
 | |
|      Recovery would glue the "orphan REDOs" to the succeeded operation and
 | |
|      execute the failed REDOs. We need some mark "abort this group" in the
 | |
|      log, or mark the table corrupted (then user will repair it and thus REDOs
 | |
|      will be skipped).
 | |
| 
 | |
|      @todo RECOVERY to not let write errors go unnoticed, pagecache_write()
 | |
|      should take a MARIA_HA* in argument, and it it
 | |
|      fails when flushing a page to disk it should call
 | |
|      (*the_maria_ha->write_error_func)(the_maria_ha)
 | |
|      and this hook will mark the table corrupted.
 | |
|      Maybe hook should be stored in the pagecache's block structure, or in a
 | |
|      hash "file->maria_ha*".
 | |
| 
 | |
|      @todo RECOVERY we should distinguish below between log write error and
 | |
|      table write error. The former should stop Maria immediately, the latter
 | |
|      should mark the table corrupted.
 | |
|   */
 | |
|   /*
 | |
|     Unpin all pinned pages to not cause problems for disk cache. This is
 | |
|     safe to call even if we already called _ma_unpin_all_pages() above.
 | |
|   */
 | |
|   save_my_errno= my_errno;
 | |
|   _ma_unpin_all_pages_and_finalize_row(info, LSN_IMPOSSIBLE);
 | |
|   my_errno= save_my_errno;
 | |
|   DBUG_RETURN(1);
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   @brief Write a record
 | |
| 
 | |
|   @fn    allocate_and_write_block_record()
 | |
|   @param info                Maria handler
 | |
|   @param record              Record to write
 | |
|   @param row		     Information about fields in 'record'
 | |
|   @param undo_lsn	     <> LSN_ERROR if we are executing an UNDO
 | |
| 
 | |
|   @return
 | |
|   @retval 0	ok
 | |
|   @retval 1	Error
 | |
| */
 | |
| 
 | |
| static my_bool allocate_and_write_block_record(MARIA_HA *info,
 | |
|                                                const uchar *record,
 | |
|                                                MARIA_ROW *row,
 | |
|                                                LSN undo_lsn)
 | |
| {
 | |
|   struct st_row_pos_info row_pos;
 | |
|   MARIA_BITMAP_BLOCKS *blocks= &row->insert_blocks;
 | |
|   int save_my_errno;
 | |
|   DBUG_ENTER("allocate_and_write_block_record");
 | |
| 
 | |
|   _ma_bitmap_flushable(info, 1);
 | |
|   if (_ma_bitmap_find_place(info, row, blocks))
 | |
|     goto err;                         /* Error reading bitmap */
 | |
| 
 | |
|   /*
 | |
|     Sleep; a checkpoint will happen and should not send this over-allocated
 | |
|     bitmap to disk but rather wait.
 | |
|   */
 | |
|   DBUG_EXECUTE_IF("maria_over_alloc_bitmap", sleep(10););
 | |
| 
 | |
|   /* page will be pinned & locked by get_head_or_tail_page */
 | |
|   if (get_head_or_tail_page(info, blocks->block, info->buff,
 | |
|                             MY_MAX(row->space_on_head_page,
 | |
|                                 info->s->base.min_block_length),
 | |
|                             HEAD_PAGE,
 | |
|                             PAGECACHE_LOCK_WRITE, &row_pos))
 | |
|     goto err;
 | |
|   row->lastpos= ma_recordpos(blocks->block->page, row_pos.rownr);
 | |
|   if (info->s->calc_checksum)
 | |
|   {
 | |
|     if (undo_lsn == LSN_ERROR)
 | |
|       row->checksum= (info->s->calc_checksum)(info, record);
 | |
|     else
 | |
|     {
 | |
|       /* _ma_apply_undo_row_delete() already set row's checksum. Verify it. */
 | |
|       DBUG_ASSERT(row->checksum == (info->s->calc_checksum)(info, record));
 | |
|     }
 | |
|   }
 | |
|   DBUG_PRINT("info", ("rowid: %lu (%lu:%u) length: %u", (ulong) row->lastpos,
 | |
|                       (ulong) ma_recordpos_to_page(row->lastpos),
 | |
|                       ma_recordpos_to_dir_entry(row->lastpos),
 | |
|                       row_pos.length));
 | |
|   if (write_block_record(info, (uchar*) 0, record, row,
 | |
|                          blocks, blocks->block->org_bitmap_value != 0,
 | |
|                          &row_pos, undo_lsn, 0))
 | |
|     goto err;
 | |
|   /* Now let checkpoint happen but don't commit */
 | |
|   DBUG_EXECUTE_IF("maria_over_alloc_bitmap", sleep(1000););
 | |
|   DBUG_RETURN(0);
 | |
| 
 | |
| err:
 | |
|   save_my_errno= my_errno;
 | |
|   if (info->non_flushable_state)
 | |
|     _ma_bitmap_flushable(info, -1);
 | |
|   _ma_unpin_all_pages_and_finalize_row(info, LSN_IMPOSSIBLE);
 | |
|   my_errno= save_my_errno;
 | |
|   DBUG_RETURN(1);
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Write a record and return rowid for it
 | |
| 
 | |
|   SYNOPSIS
 | |
|     _ma_write_init_block_record()
 | |
|     info                Maria handler
 | |
|     record              Record to write
 | |
| 
 | |
|   NOTES
 | |
|     This is done BEFORE we write the keys to the row!
 | |
| 
 | |
|   RETURN
 | |
|     HA_OFFSET_ERROR     Something went wrong
 | |
|     #                   Rowid for row
 | |
| */
 | |
| 
 | |
| MARIA_RECORD_POS _ma_write_init_block_record(MARIA_HA *info,
 | |
|                                              const uchar *record)
 | |
| {
 | |
|   DBUG_ENTER("_ma_write_init_block_record");
 | |
| 
 | |
|   calc_record_size(info, record, &info->cur_row);
 | |
|   if (allocate_and_write_block_record(info, record,
 | |
|                                       &info->cur_row, LSN_ERROR))
 | |
|     DBUG_RETURN(HA_OFFSET_ERROR);
 | |
|   DBUG_RETURN(info->cur_row.lastpos);
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Dummy function for (*info->s->write_record)()
 | |
| 
 | |
|   Nothing to do here, as we already wrote the record in
 | |
|   _ma_write_init_block_record()
 | |
| */
 | |
| 
 | |
| my_bool _ma_write_block_record(MARIA_HA *info __attribute__ ((unused)),
 | |
|                                const uchar *record __attribute__ ((unused)))
 | |
| {
 | |
|   return 0;                                     /* Row already written */
 | |
| }
 | |
| 
 | |
| 
 | |
| /**
 | |
|    @brief Remove row written by _ma_write_block_record() and log undo
 | |
| 
 | |
|    @param  info            Maria handler
 | |
| 
 | |
|    @note
 | |
|      This is called in case we got a duplicate unique key while
 | |
|      writing keys.
 | |
| 
 | |
|    @return Operation status
 | |
|      @retval 0      OK
 | |
|      @retval 1      Error
 | |
| */
 | |
| 
 | |
| my_bool _ma_write_abort_block_record(MARIA_HA *info)
 | |
| {
 | |
|   my_bool res= 0;
 | |
|   MARIA_BITMAP_BLOCKS *blocks= &info->cur_row.insert_blocks;
 | |
|   MARIA_BITMAP_BLOCK *block, *end;
 | |
|   LSN lsn= LSN_IMPOSSIBLE;
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   DBUG_ENTER("_ma_write_abort_block_record");
 | |
| 
 | |
|   _ma_bitmap_lock(share);  /* Lock bitmap from other insert threads */
 | |
|   if (delete_head_or_tail(info,
 | |
|                           ma_recordpos_to_page(info->cur_row.lastpos),
 | |
|                           ma_recordpos_to_dir_entry(info->cur_row.lastpos), 1,
 | |
|                           0))
 | |
|     res= 1;
 | |
|   for (block= blocks->block + 1, end= block + blocks->count - 1; block < end;
 | |
|        block++)
 | |
|   {
 | |
|     if (block->used & BLOCKUSED_USED)
 | |
|     {
 | |
|       if (block->used & BLOCKUSED_TAIL)
 | |
|       {
 | |
|         /*
 | |
|           block->page_count is set to the tail directory entry number in
 | |
|           write_block_record()
 | |
|         */
 | |
|         if (delete_head_or_tail(info, block->page,
 | |
|                                 block->page_count & ~TAIL_BIT,
 | |
|                                 0, 0))
 | |
|           res= 1;
 | |
|       }
 | |
|       else
 | |
|       {
 | |
|         if (free_full_page_range(info, block->page, block->page_count))
 | |
|           res= 1;
 | |
|       }
 | |
|     }
 | |
|   }
 | |
|   _ma_bitmap_unlock(share);
 | |
|   if (share->now_transactional)
 | |
|   {
 | |
|     /*
 | |
|       Write clr to mark end of aborted row insert.
 | |
|       The above delete_head_or_tail() calls will only log redo, not undo.
 | |
|       The undo just before the row insert is stored in row->orig_undo_lsn.
 | |
| 
 | |
|       When applying undo's, we can skip all undo records between current
 | |
|       lsn and row->orig_undo_lsn as logically things are as before the
 | |
|       attempted insert.
 | |
|     */
 | |
|     if (_ma_write_clr(info, info->cur_row.orig_undo_lsn,
 | |
|                       LOGREC_UNDO_ROW_INSERT,
 | |
|                       share->calc_checksum != 0,
 | |
|                       (ha_checksum) 0 - info->cur_row.checksum,
 | |
|                       &lsn, (void*) 0))
 | |
|       res= 1;
 | |
|   }
 | |
|   _ma_unpin_all_pages_and_finalize_row(info, lsn);
 | |
|   DBUG_RETURN(res);
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Update a record
 | |
| 
 | |
|   NOTES
 | |
|     For the moment, we assume that info->curr_row.extents is always updated
 | |
|     when a row is read. In the future we may decide to read this on demand
 | |
|     for rows split into many extents.
 | |
| */
 | |
| 
 | |
| static my_bool _ma_update_block_record2(MARIA_HA *info,
 | |
|                                         MARIA_RECORD_POS record_pos,
 | |
|                                         const uchar *oldrec,
 | |
|                                         const uchar *record,
 | |
|                                         LSN undo_lsn)
 | |
| {
 | |
|   MARIA_BITMAP_BLOCKS *blocks= &info->cur_row.insert_blocks;
 | |
|   uchar *buff;
 | |
|   MARIA_ROW *cur_row= &info->cur_row, *new_row= &info->new_row;
 | |
|   MARIA_PINNED_PAGE page_link;
 | |
|   uint rownr, org_empty_size, head_length;
 | |
|   uint block_size= info->s->block_size;
 | |
|   uint errpos __attribute__((unused)) = 0;
 | |
|   uchar *dir;
 | |
|   pgcache_page_no_t page;
 | |
|   struct st_row_pos_info row_pos;
 | |
|   my_bool res;
 | |
|   ha_checksum old_checksum;
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   DBUG_ENTER("_ma_update_block_record2");
 | |
|   DBUG_PRINT("enter", ("rowid: %lu", (long) record_pos));
 | |
| 
 | |
| #ifdef ENABLE_IF_PROBLEM_WITH_UPDATE
 | |
|   DBUG_DUMP("oldrec", oldrec, share->base.reclength);
 | |
|   DBUG_DUMP("newrec", record, share->base.reclength);
 | |
| #endif
 | |
| 
 | |
|   /*
 | |
|     Checksums of new and old rows were computed by callers already; new
 | |
|     row's was put into cur_row, old row's was put into new_row.
 | |
|   */
 | |
|   old_checksum= new_row->checksum;
 | |
|   new_row->checksum= cur_row->checksum;
 | |
|   calc_record_size(info, record, new_row);
 | |
|   page= ma_recordpos_to_page(record_pos);
 | |
| 
 | |
|   _ma_bitmap_flushable(info, 1);
 | |
|   buff= pagecache_read(share->pagecache,
 | |
|                        &info->dfile, (pgcache_page_no_t) page, 0, 0,
 | |
|                        share->page_type,
 | |
|                        PAGECACHE_LOCK_WRITE, &page_link.link);
 | |
|   page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
 | |
|   page_link.changed= buff != 0;
 | |
|   push_dynamic(&info->pinned_pages, (void*) &page_link);
 | |
|   if (!buff)
 | |
|   {
 | |
|     _ma_set_fatal_error(info, my_errno);
 | |
|     goto err;
 | |
|   }
 | |
| 
 | |
|   org_empty_size= uint2korr(buff + EMPTY_SPACE_OFFSET);
 | |
|   rownr= ma_recordpos_to_dir_entry(record_pos);
 | |
|   dir= dir_entry_pos(buff, block_size, rownr);
 | |
| 
 | |
|   /*
 | |
|     We can't use cur_row->head_length as the block may have been compacted
 | |
|     since we read it.
 | |
|   */
 | |
|   head_length= uint2korr(dir + 2);
 | |
| 
 | |
|   if ((org_empty_size + head_length) >= new_row->total_length)
 | |
|   {
 | |
|     uint rec_offset, length;
 | |
|     MARIA_BITMAP_BLOCK block;
 | |
| 
 | |
|     DBUG_PRINT("info", ("org_empty_size: %u  org_length: %u  new_length: %lu",
 | |
|                         org_empty_size, head_length,
 | |
|                         new_row->total_length));
 | |
| 
 | |
|     /*
 | |
|       We can fit the new row in the same page as the original head part
 | |
|       of the row
 | |
|     */
 | |
|     block.org_bitmap_value= _ma_free_size_to_head_pattern(&share->bitmap,
 | |
|                                                           org_empty_size);
 | |
|     if (extend_area_on_page(info, buff, dir, rownr,
 | |
|                             new_row->total_length, &org_empty_size,
 | |
|                             &rec_offset, &length, 1))
 | |
|     {
 | |
|       errpos= 1;
 | |
|       goto err;
 | |
|     }
 | |
| 
 | |
|     row_pos.buff= buff;
 | |
|     row_pos.rownr= rownr;
 | |
|     row_pos.empty_space= org_empty_size;
 | |
|     row_pos.dir= dir;
 | |
|     row_pos.data= buff + rec_offset;
 | |
|     row_pos.length= length;
 | |
|     blocks->block= █
 | |
|     blocks->count= 1;
 | |
|     block.page= page;
 | |
|     block.sub_blocks= 1;
 | |
|     block.used= BLOCKUSED_USED | BLOCKUSED_USE_ORG_BITMAP;
 | |
|     block.empty_space= row_pos.empty_space;
 | |
| 
 | |
|     if (*cur_row->tail_positions &&
 | |
|         delete_tails(info, cur_row->tail_positions))
 | |
|     {
 | |
|       errpos= 2;
 | |
|       goto err;
 | |
|     }
 | |
|     if (cur_row->extents_count && free_full_pages(info, cur_row))
 | |
|     {
 | |
|       errpos= 3;
 | |
|       goto err;
 | |
|     }
 | |
|     res= write_block_record(info, oldrec, record, new_row, blocks,
 | |
|                             1, &row_pos, undo_lsn, old_checksum);
 | |
|     /* We can't update or delete this without re-reading it again */
 | |
|     info->update&= ~HA_STATE_AKTIV;
 | |
|     DBUG_RETURN(res);
 | |
|   }
 | |
|   /* Delete old row */
 | |
|   if (*cur_row->tail_positions &&
 | |
|       delete_tails(info, cur_row->tail_positions))
 | |
|   {
 | |
|     errpos= 4;
 | |
|     goto err;
 | |
|   }
 | |
|   if (cur_row->extents_count && free_full_pages(info, cur_row))
 | |
|   {
 | |
|     errpos= 5;
 | |
|     goto err;
 | |
|   }
 | |
| 
 | |
|   head_length= uint2korr(dir + 2);
 | |
|   if (_ma_bitmap_find_new_place(info, new_row, page, head_length +
 | |
|                                 org_empty_size, blocks))
 | |
|   {
 | |
|     errpos= 6;
 | |
|     goto err;
 | |
|   }
 | |
| 
 | |
|   /*
 | |
|     Allocate all size in block for record
 | |
|     TODO:
 | |
|     Need to improve this to do compact if we can fit one more blob into
 | |
|     the head page
 | |
|   */
 | |
|   if ((head_length < new_row->space_on_head_page ||
 | |
|        (new_row->total_length <= head_length &&
 | |
|         org_empty_size + head_length >= new_row->total_length)))
 | |
|   {
 | |
|     _ma_compact_block_page(share,
 | |
|                            buff, rownr, 1,
 | |
|                            info->trn->min_read_from,
 | |
|                            share->base.min_block_length);
 | |
|     org_empty_size= 0;
 | |
|     head_length= uint2korr(dir + 2);
 | |
|   }
 | |
| 
 | |
|   row_pos.buff= buff;
 | |
|   row_pos.rownr= rownr;
 | |
|   row_pos.empty_space= org_empty_size + head_length;
 | |
|   row_pos.dir= dir;
 | |
|   row_pos.data= buff + uint2korr(dir);
 | |
|   row_pos.length= head_length;
 | |
|   if ((res= write_block_record(info, oldrec, record, new_row, blocks, 1,
 | |
|                                &row_pos, undo_lsn, old_checksum)))
 | |
|   {
 | |
|     errpos= 7;
 | |
|     goto err;
 | |
|   }
 | |
|   DBUG_RETURN(0);
 | |
| 
 | |
| err:
 | |
|   DBUG_ASSERT(!maria_assert_if_crashed_table);
 | |
|   DBUG_PRINT("error", ("errpos: %d", errpos));
 | |
|   if (info->non_flushable_state)
 | |
|     _ma_bitmap_flushable(info, -1);
 | |
|   _ma_unpin_all_pages_and_finalize_row(info, LSN_IMPOSSIBLE);
 | |
|   DBUG_RETURN(1);
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   @brief Store new row on it's original position
 | |
| 
 | |
|   @note
 | |
|   This is basicly a copy of _ma_update_block_record2
 | |
|   When we have a purge thread for deleted row, we can remove this function
 | |
|   and use _ma_update_block_record2 instead.
 | |
| 
 | |
|   This is the main reason we don't make a lot of subfunctions that are
 | |
|   common between _ma_update_block_record2() and this function.
 | |
| 
 | |
|   Note: If something goes wrong we mark the file crashed
 | |
| */
 | |
| 
 | |
| static my_bool _ma_update_at_original_place(MARIA_HA *info,
 | |
|                                             pgcache_page_no_t page,
 | |
|                                             uint rownr,
 | |
|                                             uint length_on_head_page,
 | |
|                                             uint extent_count,
 | |
|                                             const uchar *extent_info,
 | |
|                                             const uchar *oldrec,
 | |
|                                             const uchar *record,
 | |
|                                             LSN undo_lsn)
 | |
| {
 | |
|   MARIA_BITMAP_BLOCKS *blocks;
 | |
|   MARIA_BITMAP_BLOCK *block;
 | |
|   MARIA_ROW *cur_row= &info->cur_row, *new_row= &info->new_row;
 | |
|   MARIA_PINNED_PAGE page_link;
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   ha_checksum old_checksum;
 | |
|   uint org_empty_size, empty_size;
 | |
|   uint block_size= info->s->block_size;
 | |
|   uchar *dir, *buff;
 | |
|   struct st_row_pos_info row_pos;
 | |
|   my_bool res;
 | |
|   uint rec_offset, length;
 | |
|   DBUG_ENTER("_ma_update_at_original_place");
 | |
| 
 | |
| #ifdef ENABLE_IF_PROBLEM_WITH_UPDATE
 | |
|   DBUG_DUMP("oldrec", oldrec, share->base.reclength);
 | |
|   DBUG_DUMP("newrec", record, share->base.reclength);
 | |
| #endif
 | |
| 
 | |
|   /*
 | |
|     Checksums of new and old rows were computed by callers already; new
 | |
|     row's was put into cur_row, old row's was put into new_row.
 | |
|   */
 | |
|   old_checksum= new_row->checksum;
 | |
|   new_row->checksum= cur_row->checksum;
 | |
|   calc_record_size(info, record, new_row);
 | |
| 
 | |
|   _ma_bitmap_flushable(info, 1);
 | |
|   buff= pagecache_read(share->pagecache,
 | |
|                        &info->dfile, (pgcache_page_no_t) page, 0, 0,
 | |
|                        share->page_type,
 | |
|                        PAGECACHE_LOCK_WRITE, &page_link.link);
 | |
|   page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
 | |
|   page_link.changed= buff != 0;
 | |
|   push_dynamic(&info->pinned_pages, (void*) &page_link);
 | |
|   if (!buff)
 | |
|   {
 | |
|     _ma_set_fatal_error(info, my_errno);
 | |
|     goto err;
 | |
|   }
 | |
| 
 | |
|   org_empty_size= uint2korr(buff + EMPTY_SPACE_OFFSET);
 | |
|   dir= dir_entry_pos(buff, block_size, rownr);
 | |
| 
 | |
|   if ((org_empty_size + cur_row->head_length) < length_on_head_page)
 | |
|   {
 | |
|     DBUG_PRINT("error",
 | |
|                ("org_empty_size: %u  head_length: %u  length_on_page: %u",
 | |
|                 org_empty_size, (uint) cur_row->head_length,
 | |
|                 length_on_head_page));
 | |
|     _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD);
 | |
|     goto err;
 | |
|   }
 | |
| 
 | |
|   /*
 | |
|     We can fit the new row in the same page as the original head part
 | |
|     of the row
 | |
|   */
 | |
|   empty_size= org_empty_size;
 | |
|   if (extend_area_on_page(info, buff, dir, rownr,
 | |
|                           length_on_head_page, &empty_size,
 | |
|                           &rec_offset, &length, 1))
 | |
|     goto err;
 | |
| 
 | |
|   row_pos.buff= buff;
 | |
|   row_pos.rownr= rownr;
 | |
|   row_pos.empty_space= empty_size;
 | |
|   row_pos.dir= dir;
 | |
|   row_pos.data= buff + rec_offset;
 | |
| 
 | |
|   /* Delete old row */
 | |
|   if (*cur_row->tail_positions &&
 | |
|       delete_tails(info, cur_row->tail_positions))
 | |
|     goto err;
 | |
|   if (cur_row->extents_count && free_full_pages(info, cur_row))
 | |
|     goto err;
 | |
| 
 | |
|   /* Change extent information to be usable by write_block_record() */
 | |
|   blocks= &cur_row->insert_blocks;
 | |
|   if (extent_to_bitmap_blocks(info, blocks, page, extent_count, extent_info))
 | |
|     goto err;
 | |
|   block= blocks->block;
 | |
|   block->empty_space= row_pos.empty_space;
 | |
|   block->org_bitmap_value=
 | |
|     _ma_free_size_to_head_pattern(&share->bitmap,
 | |
|                                   (enough_free_entries_on_page(share, buff) ?
 | |
|                                    org_empty_size : 0));
 | |
|                             
 | |
|   DBUG_ASSERT(block->org_bitmap_value ==
 | |
|               _ma_bitmap_get_page_bits(info, &info->s->bitmap, page));
 | |
|   block->used|= BLOCKUSED_USE_ORG_BITMAP;
 | |
| 
 | |
|   /*
 | |
|     We have to use <= below as the new_row may be smaller than the original
 | |
|     row as the new row doesn't have transaction id
 | |
|   */
 | |
| 
 | |
|   DBUG_ASSERT(blocks->count > 1 ||
 | |
|               MY_MAX(new_row->total_length, share->base.min_block_length) <=
 | |
|               length_on_head_page);
 | |
| 
 | |
|   /* Store same amount of data on head page as on original page */
 | |
|   row_pos.length= (length_on_head_page - 
 | |
|                    (extent_count + 1 - blocks->count) * ROW_EXTENT_SIZE);
 | |
|   set_if_bigger(row_pos.length, share->base.min_block_length);
 | |
|   if ((res= write_block_record(info, oldrec, record, new_row, blocks,
 | |
|                                1, &row_pos, undo_lsn, old_checksum)))
 | |
|     goto err;
 | |
|   DBUG_RETURN(0);
 | |
| 
 | |
| err:
 | |
|   DBUG_ASSERT(!maria_assert_if_crashed_table);
 | |
|   _ma_mark_file_crashed(share);
 | |
|   if (info->non_flushable_state)
 | |
|     _ma_bitmap_flushable(info, -1);
 | |
|   _ma_unpin_all_pages_and_finalize_row(info, LSN_IMPOSSIBLE);
 | |
|   DBUG_RETURN(1);
 | |
| }
 | |
| 
 | |
| 
 | |
| /* Wrapper for _ma_update_block_record2() used by ma_update() */
 | |
| 
 | |
| my_bool _ma_update_block_record(MARIA_HA *info, MARIA_RECORD_POS record_pos,
 | |
|                                 const uchar *orig_rec, const uchar *new_rec)
 | |
| {
 | |
|   return _ma_update_block_record2(info, record_pos, orig_rec, new_rec,
 | |
|                                   LSN_ERROR);
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Delete a directory entry
 | |
| 
 | |
|   SYNOPSIS
 | |
|     delete_dir_entry()
 | |
|     buff		Page buffer
 | |
|     record_number	Record number to delete
 | |
|     empty_space		Empty space on page after delete
 | |
| 
 | |
|   RETURN
 | |
|     -1    Error on page
 | |
|     0     ok
 | |
|     1     Page is now empty
 | |
| */
 | |
| 
 | |
| static int delete_dir_entry(MARIA_SHARE *share,
 | |
|                             uchar *buff, uint record_number,
 | |
|                             uint *empty_space_res)
 | |
| {
 | |
|   uint block_size= share->block_size;
 | |
|   uint number_of_records= (uint) buff[DIR_COUNT_OFFSET];
 | |
|   uint length, empty_space;
 | |
|   uchar *dir;
 | |
|   DBUG_ENTER("delete_dir_entry");
 | |
|   DBUG_PRINT("enter", ("record_number: %u  number_of_records: %u",
 | |
|                        record_number, number_of_records));
 | |
| 
 | |
| #ifdef SANITY_CHECKS
 | |
|   if (record_number >= number_of_records ||
 | |
|       record_number > ((block_size - LSN_SIZE - PAGE_TYPE_SIZE - 1 -
 | |
|                         PAGE_SUFFIX_SIZE) / DIR_ENTRY_SIZE))
 | |
|   {
 | |
|     DBUG_PRINT("error", ("record_number: %u  number_of_records: %u",
 | |
|                          record_number, number_of_records));
 | |
| 
 | |
|     DBUG_RETURN(-1);
 | |
|   }
 | |
| #endif
 | |
| 
 | |
|   check_directory(share, buff, block_size, 0, (uint) -1);
 | |
|   empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET);
 | |
|   dir= dir_entry_pos(buff, block_size, record_number);
 | |
|   length= uint2korr(dir + 2);  /* Length of entry we just deleted */
 | |
|   DBUG_ASSERT(uint2korr(dir) != 0 && length < block_size);
 | |
| 
 | |
|   if (record_number == number_of_records - 1)
 | |
|   {
 | |
|     /* Delete this entry and all following free directory entries */
 | |
|     uchar *end= buff + block_size - PAGE_SUFFIX_SIZE;
 | |
|     number_of_records--;
 | |
|     dir+= DIR_ENTRY_SIZE;
 | |
|     empty_space+= DIR_ENTRY_SIZE;
 | |
| 
 | |
|     /* Unlink and free the next empty ones */
 | |
|     while (dir < end && dir[0] == 0 && dir[1] == 0)
 | |
|     {
 | |
|       number_of_records--;
 | |
|       if (dir[2] == END_OF_DIR_FREE_LIST)
 | |
|         buff[DIR_FREE_OFFSET]= dir[3];
 | |
|       else
 | |
|       {
 | |
|         uchar *prev_entry= dir_entry_pos(buff, block_size, (uint) dir[2]);
 | |
|         DBUG_ASSERT(uint2korr(prev_entry) == 0 && prev_entry[3] ==
 | |
|                     number_of_records);
 | |
|         prev_entry[3]= dir[3];
 | |
|       }
 | |
|       if (dir[3] != END_OF_DIR_FREE_LIST)
 | |
|       {
 | |
|         uchar *next_entry= dir_entry_pos(buff, block_size, (uint) dir[3]);
 | |
|         DBUG_ASSERT(uint2korr(next_entry) == 0 && next_entry[2] ==
 | |
|                     number_of_records);
 | |
|         next_entry[2]= dir[2];
 | |
|       }
 | |
|       dir+= DIR_ENTRY_SIZE;
 | |
|       empty_space+= DIR_ENTRY_SIZE;
 | |
|     }
 | |
| 
 | |
|     if (number_of_records == 0)
 | |
|     {
 | |
|       /* All entries on page deleted */
 | |
|       DBUG_PRINT("info", ("Page marked as unallocated"));
 | |
|       buff[PAGE_TYPE_OFFSET]= UNALLOCATED_PAGE;
 | |
| #ifdef IDENTICAL_PAGES_AFTER_RECOVERY
 | |
|       {
 | |
|         dir= dir_entry_pos(buff, block_size, record_number);
 | |
|         bzero(dir, (record_number+1) * DIR_ENTRY_SIZE);
 | |
|       }
 | |
| #endif
 | |
|       *empty_space_res= block_size;
 | |
|       DBUG_RETURN(1);
 | |
|     }
 | |
|     buff[DIR_COUNT_OFFSET]= (uchar) number_of_records;
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     /* Update directory */
 | |
|     dir[0]= dir[1]= 0;
 | |
|     dir[2]= END_OF_DIR_FREE_LIST;
 | |
|     if ((dir[3]= buff[DIR_FREE_OFFSET]) != END_OF_DIR_FREE_LIST)
 | |
|     {
 | |
|       /* Relink next entry to point to newly freed entry */
 | |
|       uchar *next_entry= dir_entry_pos(buff, block_size, (uint) dir[3]);
 | |
|       DBUG_ASSERT(uint2korr(next_entry) == 0 &&
 | |
|                   next_entry[2] == END_OF_DIR_FREE_LIST);
 | |
|       next_entry[2]= record_number;
 | |
|     }
 | |
|     buff[DIR_FREE_OFFSET]= record_number;
 | |
|   }
 | |
|   empty_space+= length;
 | |
| 
 | |
|   int2store(buff + EMPTY_SPACE_OFFSET, empty_space);
 | |
|   buff[PAGE_TYPE_OFFSET]|= (uchar) PAGE_CAN_BE_COMPACTED;
 | |
| 
 | |
|   *empty_space_res= empty_space;
 | |
| 
 | |
|   check_directory(share, buff, block_size, 0, empty_space);
 | |
|   DBUG_RETURN(0);
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Delete a head a tail part
 | |
| 
 | |
|   SYNOPSIS
 | |
|     delete_head_or_tail()
 | |
|     info                Maria handler
 | |
|     page                Page (not file offset!) on which the row is
 | |
|     head                1 if this is a head page
 | |
|     from_update		1 if we are called from update. In this case we
 | |
| 			leave the page as write locked as we may put
 | |
|                         the new row into the old position.
 | |
| 
 | |
|   RETURN
 | |
|     0  ok
 | |
|     1  error
 | |
| */
 | |
| 
 | |
| static my_bool delete_head_or_tail(MARIA_HA *info,
 | |
|                                    pgcache_page_no_t page, uint record_number,
 | |
|                                    my_bool head, my_bool from_update)
 | |
| {
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   uint empty_space;
 | |
|   int res;
 | |
|   my_bool page_is_empty;
 | |
|   uchar *buff;
 | |
|   LSN lsn;
 | |
|   MARIA_PINNED_PAGE page_link;
 | |
|   enum pagecache_page_lock lock_at_write, lock_at_unpin;
 | |
|   DBUG_ENTER("delete_head_or_tail");
 | |
|   DBUG_PRINT("enter", ("id: %lu (%lu:%u)",
 | |
|                        (ulong) ma_recordpos(page, record_number),
 | |
|                        (ulong) page, record_number));
 | |
| 
 | |
|   buff= pagecache_read(share->pagecache,
 | |
|                        &info->dfile, page, 0, 0,
 | |
|                        share->page_type,
 | |
|                        PAGECACHE_LOCK_WRITE, &page_link.link);
 | |
|   page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
 | |
|   page_link.changed= buff != 0;
 | |
|   push_dynamic(&info->pinned_pages, (void*) &page_link);
 | |
|   if (!buff)
 | |
|   {
 | |
|     _ma_set_fatal_error(info, my_errno);
 | |
|     DBUG_RETURN(1);
 | |
|   }
 | |
|   DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) ==
 | |
|               (head ? HEAD_PAGE : TAIL_PAGE));
 | |
| 
 | |
|   if (from_update)
 | |
|   {
 | |
|     lock_at_write= PAGECACHE_LOCK_LEFT_WRITELOCKED;
 | |
|     lock_at_unpin= PAGECACHE_LOCK_WRITE_UNLOCK;
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     lock_at_write= PAGECACHE_LOCK_WRITE_TO_READ;
 | |
|     lock_at_unpin= PAGECACHE_LOCK_READ_UNLOCK;
 | |
|   }
 | |
| 
 | |
|   res= delete_dir_entry(share, buff, record_number, &empty_space);
 | |
|   if (res < 0)
 | |
|     DBUG_RETURN(1);
 | |
|   if (res == 0) /* after our deletion, page is still not empty */
 | |
|   {
 | |
|     uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE];
 | |
|     LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
 | |
|     page_is_empty= 0;
 | |
|     if (share->now_transactional)
 | |
|     {
 | |
|       /* Log REDO data */
 | |
|       page_store(log_data + FILEID_STORE_SIZE, page);
 | |
|       dirpos_store(log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE,
 | |
|                    record_number);
 | |
| 
 | |
|       log_array[TRANSLOG_INTERNAL_PARTS + 0].str=    log_data;
 | |
|       log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
 | |
|       if (translog_write_record(&lsn, (head ? LOGREC_REDO_PURGE_ROW_HEAD :
 | |
|                                        LOGREC_REDO_PURGE_ROW_TAIL),
 | |
|                                 info->trn, info,
 | |
|                                 (translog_size_t) sizeof(log_data),
 | |
|                                 TRANSLOG_INTERNAL_PARTS + 1, log_array,
 | |
|                                 log_data, NULL))
 | |
|         DBUG_RETURN(1);
 | |
|     }
 | |
|   }
 | |
|   else /* page is now empty */
 | |
|   {
 | |
|     page_is_empty= 1;
 | |
|     if (share->now_transactional)
 | |
|     {
 | |
|       uchar log_data[FILEID_STORE_SIZE + PAGE_STORE_SIZE];
 | |
|       LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
 | |
|       page_store(log_data + FILEID_STORE_SIZE, page);
 | |
|       log_array[TRANSLOG_INTERNAL_PARTS + 0].str=    log_data;
 | |
|       log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
 | |
|       if (translog_write_record(&lsn, LOGREC_REDO_FREE_HEAD_OR_TAIL,
 | |
|                                 info->trn, info,
 | |
|                                 (translog_size_t) sizeof(log_data),
 | |
|                                 TRANSLOG_INTERNAL_PARTS + 1, log_array,
 | |
|                                 log_data, NULL))
 | |
|         DBUG_RETURN(1);
 | |
|     }
 | |
|     /*
 | |
|       Mark that this page must be written to disk by page cache, even
 | |
|       if we could call pagecache_delete() on it.
 | |
|       This is needed to ensure that repair finds the empty page on disk
 | |
|       and not old data.
 | |
|     */
 | |
|     pagecache_set_write_on_delete_by_link(page_link.link);
 | |
|     DBUG_ASSERT(empty_space >= share->bitmap.sizes[0]);
 | |
|   }
 | |
| 
 | |
|   pagecache_unlock_by_link(share->pagecache, page_link.link,
 | |
|                            lock_at_write,
 | |
|                            PAGECACHE_PIN_LEFT_PINNED, LSN_IMPOSSIBLE,
 | |
|                            LSN_IMPOSSIBLE, 1, FALSE);
 | |
|   page_link.unlock= lock_at_unpin;
 | |
|   set_dynamic(&info->pinned_pages, (void*) &page_link,
 | |
|               info->pinned_pages.elements-1);
 | |
| 
 | |
|   DBUG_PRINT("info", ("empty_space: %u", empty_space));
 | |
| 
 | |
|   /*
 | |
|     If there is not enough space for all possible tails, mark the
 | |
|     page full
 | |
|   */
 | |
|   if (!head && !page_is_empty && !enough_free_entries(buff, share->block_size,
 | |
|                                                       1 + share->base.blobs))
 | |
|     empty_space= 0;
 | |
| 
 | |
|   DBUG_RETURN(_ma_bitmap_set(info, page, head, empty_space));
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   delete all tails
 | |
| 
 | |
|   SYNOPSIS
 | |
|     delete_tails()
 | |
|     info                Handler
 | |
|     tails               Pointer to vector of tail positions, ending with 0
 | |
| 
 | |
|   RETURN
 | |
|     0  ok
 | |
|     1  error
 | |
| */
 | |
| 
 | |
| static my_bool delete_tails(MARIA_HA *info, MARIA_RECORD_POS *tails)
 | |
| {
 | |
|   my_bool res= 0;
 | |
|   DBUG_ENTER("delete_tails");
 | |
|   for (; *tails; tails++)
 | |
|   {
 | |
|     if (delete_head_or_tail(info,
 | |
|                             ma_recordpos_to_page(*tails),
 | |
|                             ma_recordpos_to_dir_entry(*tails), 0, 1))
 | |
|       res= 1;
 | |
|   }
 | |
|   DBUG_RETURN(res);
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Delete a record
 | |
| 
 | |
|   NOTES
 | |
|    For the moment, we assume that info->cur_row.extents is always updated
 | |
|    when a row is read. In the future we may decide to read this on demand
 | |
|    for rows with many splits.
 | |
| */
 | |
| 
 | |
| my_bool _ma_delete_block_record(MARIA_HA *info, const uchar *record)
 | |
| {
 | |
|   pgcache_page_no_t page;
 | |
|   uint record_number;
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   LSN lsn= LSN_IMPOSSIBLE;
 | |
|   DBUG_ENTER("_ma_delete_block_record");
 | |
| 
 | |
|   page=          ma_recordpos_to_page(info->cur_row.lastpos);
 | |
|   record_number= ma_recordpos_to_dir_entry(info->cur_row.lastpos);
 | |
|   DBUG_PRINT("enter", ("rowid: %lu (%lu:%u)", (ulong) info->cur_row.lastpos,
 | |
|                        (ulong) page, record_number));
 | |
| 
 | |
|   _ma_bitmap_flushable(info, 1);
 | |
|   if (delete_head_or_tail(info, page, record_number, 1, 0) ||
 | |
|       delete_tails(info, info->cur_row.tail_positions))
 | |
|     goto err;
 | |
| 
 | |
|   if (info->cur_row.extents_count && free_full_pages(info, &info->cur_row))
 | |
|     goto err;
 | |
| 
 | |
|   if (share->now_transactional)
 | |
|   {
 | |
|     uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE +
 | |
|                    DIRPOS_STORE_SIZE + 2 + PAGERANGE_STORE_SIZE +
 | |
|                    HA_CHECKSUM_STORE_SIZE];
 | |
|     uchar *log_pos;
 | |
|     size_t row_length;
 | |
|     uint row_parts_count, extents_length;
 | |
|     ha_checksum checksum_delta;
 | |
| 
 | |
|     /* Write UNDO record */
 | |
|     lsn_store(log_data, info->trn->undo_lsn);
 | |
|     page_store(log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE, page);
 | |
|     log_pos= log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE;
 | |
|     dirpos_store(log_pos, record_number);
 | |
|     log_pos+= DIRPOS_STORE_SIZE;
 | |
|     int2store(log_pos, info->cur_row.head_length -
 | |
|               info->cur_row.header_length);
 | |
|     log_pos+= 2;
 | |
|     pagerange_store(log_pos, info->cur_row.extents_count);
 | |
|     log_pos+= PAGERANGE_STORE_SIZE;
 | |
| 
 | |
|     info->log_row_parts[TRANSLOG_INTERNAL_PARTS].str= log_data;
 | |
|     info->log_row_parts[TRANSLOG_INTERNAL_PARTS].length=
 | |
|       sizeof(log_data) - HA_CHECKSUM_STORE_SIZE;
 | |
|     store_checksum_in_rec(share, checksum_delta,
 | |
|                           (ha_checksum) 0 - info->cur_row.checksum, log_pos,
 | |
|                           info->log_row_parts[TRANSLOG_INTERNAL_PARTS +
 | |
|                                               0].length);
 | |
|     info->log_row_parts[TRANSLOG_INTERNAL_PARTS+1].str=
 | |
|       info->cur_row.extents;
 | |
|     info->log_row_parts[TRANSLOG_INTERNAL_PARTS+1].length=
 | |
|       extents_length= info->cur_row.extents_count * ROW_EXTENT_SIZE;
 | |
| 
 | |
|     row_length= fill_insert_undo_parts(info, record,
 | |
|                                        (info->log_row_parts +
 | |
|                                         TRANSLOG_INTERNAL_PARTS + 2),
 | |
|                                        &row_parts_count);
 | |
| 
 | |
|     if (translog_write_record(&lsn, LOGREC_UNDO_ROW_DELETE, info->trn,
 | |
|                               info,
 | |
|                               (translog_size_t)
 | |
|                               (info->log_row_parts[TRANSLOG_INTERNAL_PARTS +
 | |
|                                                    0].length + row_length +
 | |
|                                extents_length),
 | |
|                               TRANSLOG_INTERNAL_PARTS + 2 + row_parts_count,
 | |
|                               info->log_row_parts,
 | |
|                               log_data + LSN_STORE_SIZE,
 | |
|                               &checksum_delta))
 | |
|       goto err;
 | |
|   }
 | |
| 
 | |
|   _ma_bitmap_flushable(info, -1);
 | |
|   _ma_unpin_all_pages_and_finalize_row(info, lsn);
 | |
|   DBUG_RETURN(0);
 | |
| 
 | |
| err:
 | |
|   DBUG_ASSERT(!maria_assert_if_crashed_table);
 | |
|   _ma_bitmap_flushable(info, -1);
 | |
|   _ma_unpin_all_pages_and_finalize_row(info, LSN_IMPOSSIBLE);
 | |
|   DBUG_RETURN(1);
 | |
| }
 | |
| 
 | |
| 
 | |
| /****************************************************************************
 | |
|   Reading of records
 | |
| ****************************************************************************/
 | |
| 
 | |
| /*
 | |
|   Read position to record from record directory at end of page
 | |
| 
 | |
|   SYNOPSIS
 | |
|    get_record_position()
 | |
|    buff                 page buffer
 | |
|    block_size           block size for page
 | |
|    record_number        Record number in index
 | |
|    end_of_data          pointer to end of data for record
 | |
| 
 | |
|   RETURN
 | |
|     0  Error in data
 | |
|     #  Pointer to start of record.
 | |
|        In this case *end_of_data is set.
 | |
| */
 | |
| 
 | |
| static uchar *get_record_position(MARIA_SHARE *share, uchar *buff,
 | |
|                                  uint record_number, uchar **end_of_data)
 | |
| {
 | |
|   uint block_size= share->block_size;
 | |
|   uint number_of_records= (uint) buff[DIR_COUNT_OFFSET];
 | |
|   uchar *dir;
 | |
|   uchar *data;
 | |
|   uint offset, length;
 | |
| 
 | |
| #ifdef SANITY_CHECKS
 | |
|   if (record_number >= number_of_records ||
 | |
|       record_number > ((block_size - PAGE_HEADER_SIZE(share) - PAGE_SUFFIX_SIZE)
 | |
|                        / DIR_ENTRY_SIZE))
 | |
|   {
 | |
|     DBUG_PRINT("error",
 | |
|                ("Wrong row number: record_number: %u  number_of_records: %u",
 | |
|                 record_number, number_of_records));
 | |
|     return 0;
 | |
|   }
 | |
| #endif
 | |
| 
 | |
|   dir= dir_entry_pos(buff, block_size, record_number);
 | |
|   offset= uint2korr(dir);
 | |
|   length= uint2korr(dir + 2);
 | |
| #ifdef SANITY_CHECKS
 | |
|   if (offset < PAGE_HEADER_SIZE(share) ||
 | |
|       offset + length > (block_size -
 | |
|                          number_of_records * DIR_ENTRY_SIZE -
 | |
|                          PAGE_SUFFIX_SIZE))
 | |
|   {
 | |
|     DBUG_PRINT("error",
 | |
|                ("Wrong row position:  record_number: %u  offset: %u  "
 | |
|                 "length: %u  number_of_records: %u",
 | |
|                 record_number, offset, length, number_of_records));
 | |
|     return 0;
 | |
|   }
 | |
| #endif
 | |
|   data= buff + offset;
 | |
|   *end_of_data= data + length;
 | |
|   return data;
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Init extent
 | |
| 
 | |
|   NOTES
 | |
|     extent is a cursor over which pages to read
 | |
| */
 | |
| 
 | |
| static void init_extent(MARIA_EXTENT_CURSOR *extent, uchar *extent_info,
 | |
|                         uint extents, MARIA_RECORD_POS *tail_positions)
 | |
| {
 | |
|   uint page_count;
 | |
|   extent->extent=       extent_info;
 | |
|   extent->extent_count= extents;
 | |
|   extent->page=         page_korr(extent_info);         /* First extent */
 | |
|   page_count=           (uint2korr(extent_info + ROW_EXTENT_PAGE_SIZE) &
 | |
|                          ~START_EXTENT_BIT);
 | |
|   extent->tail=         page_count & TAIL_BIT;
 | |
|   if (extent->tail)
 | |
|   {
 | |
|     extent->page_count=   1;
 | |
|     extent->tail_row_nr=  page_count & ~TAIL_BIT;
 | |
|   }
 | |
|   else
 | |
|     extent->page_count=   page_count;
 | |
|   extent->tail_positions= tail_positions;
 | |
|   extent->lock_for_tail_pages= PAGECACHE_LOCK_LEFT_UNLOCKED;
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Read next extent
 | |
| 
 | |
|   SYNOPSIS
 | |
|     read_next_extent()
 | |
|     info                Maria handler
 | |
|     extent              Pointer to current extent (this is updated to point
 | |
|                         to next)
 | |
|     end_of_data         Pointer to end of data in read block (out)
 | |
| 
 | |
|   NOTES
 | |
|     New block is read into info->buff
 | |
| 
 | |
|   RETURN
 | |
|     0   Error;  my_errno is set
 | |
|     #   Pointer to start of data in read block
 | |
|         In this case end_of_data is updated to point to end of data.
 | |
| */
 | |
| 
 | |
| static uchar *read_next_extent(MARIA_HA *info, MARIA_EXTENT_CURSOR *extent,
 | |
|                               uchar **end_of_data)
 | |
| {
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   uchar *buff, *data;
 | |
|   MARIA_PINNED_PAGE page_link;
 | |
|   enum pagecache_page_lock lock;
 | |
|   DBUG_ENTER("read_next_extent");
 | |
| 
 | |
|   if (!extent->page_count)
 | |
|   {
 | |
|     uint page_count;
 | |
|     if (!--extent->extent_count)
 | |
|       goto crashed;
 | |
|     extent->extent+=    ROW_EXTENT_SIZE;
 | |
|     extent->page=       page_korr(extent->extent);
 | |
|     page_count=         (uint2korr(extent->extent+ROW_EXTENT_PAGE_SIZE) &
 | |
|                          ~START_EXTENT_BIT);
 | |
|     if (!page_count)
 | |
|       goto crashed;
 | |
|     extent->tail=       page_count & TAIL_BIT;
 | |
|     if (extent->tail)
 | |
|       extent->tail_row_nr= page_count & ~TAIL_BIT;
 | |
|     else
 | |
|       extent->page_count= page_count;
 | |
|     DBUG_PRINT("info",("New extent.  Page: %lu  page_count: %u  tail_flag: %d",
 | |
|                        (ulong) extent->page, extent->page_count,
 | |
|                        extent->tail != 0));
 | |
|   }
 | |
|   extent->first_extent= 0;
 | |
| 
 | |
|   lock= PAGECACHE_LOCK_LEFT_UNLOCKED;
 | |
|   if (extent->tail)
 | |
|     lock= extent->lock_for_tail_pages;
 | |
| 
 | |
|   buff= pagecache_read(share->pagecache,
 | |
|                        &info->dfile, extent->page, 0,
 | |
|                        info->buff, share->page_type,
 | |
|                        lock, &page_link.link);
 | |
|   if (lock != PAGECACHE_LOCK_LEFT_UNLOCKED)
 | |
|   {
 | |
|     /* Read during UNDO */
 | |
|     page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
 | |
|     page_link.changed= buff != 0;
 | |
|     push_dynamic(&info->pinned_pages, (void*) &page_link);
 | |
|   }
 | |
|   if (!buff)
 | |
|   {
 | |
|     /* check if we tried to read over end of file (ie: bad data in record) */
 | |
|     if ((extent->page + 1) * share->block_size >
 | |
|         share->state.state.data_file_length)
 | |
|       goto crashed;
 | |
|     DBUG_RETURN(0);
 | |
|   }
 | |
| 
 | |
|   if (!extent->tail)
 | |
|   {
 | |
|     /* Full data page */
 | |
|     if ((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) != BLOB_PAGE)
 | |
|       goto crashed;
 | |
|     extent->page++;                             /* point to next page */
 | |
|     extent->page_count--;
 | |
|     *end_of_data= buff + share->block_size - PAGE_SUFFIX_SIZE;
 | |
|     info->cur_row.full_page_count++;            /* For maria_chk */
 | |
|     DBUG_RETURN(extent->data_start= buff + FULL_PAGE_HEADER_SIZE(share));
 | |
|   }
 | |
| 
 | |
|   /* Found tail */
 | |
|   if ((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) != TAIL_PAGE)
 | |
|     goto crashed;
 | |
|   *(extent->tail_positions++)= ma_recordpos(extent->page,
 | |
|                                             extent->tail_row_nr);
 | |
|   info->cur_row.tail_count++;                   /* For maria_chk */
 | |
| 
 | |
|   if (!(data= get_record_position(share, buff,
 | |
|                                   extent->tail_row_nr,
 | |
|                                   end_of_data)))
 | |
|     goto crashed;
 | |
|   extent->data_start= data;
 | |
|   extent->page_count= 0;                        /* No more data in extent */
 | |
|   DBUG_RETURN(data);
 | |
| 
 | |
| 
 | |
| crashed:
 | |
|   DBUG_ASSERT(!maria_assert_if_crashed_table);
 | |
|   _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD);
 | |
|   DBUG_PRINT("error", ("wrong extent information"));
 | |
|   DBUG_RETURN(0);
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Read data that may be split over many blocks
 | |
| 
 | |
|   SYNOPSIS
 | |
|     read_long_data()
 | |
|     info                Maria handler
 | |
|     to                  Store result string here (this is allocated)
 | |
|     extent              Pointer to current extent position
 | |
|     data                Current position in buffer
 | |
|     end_of_data         End of data in buffer
 | |
| 
 | |
|   NOTES
 | |
|     When we have to read a new buffer, it's read into info->buff
 | |
| 
 | |
|     This loop is implemented by goto's instead of a for() loop as
 | |
|     the code is notable smaller and faster this way (and it's not nice
 | |
|     to jump into a for loop() or into a 'then' clause)
 | |
| 
 | |
|   RETURN
 | |
|     0   ok
 | |
|     1   error
 | |
| */
 | |
| 
 | |
| static my_bool read_long_data2(MARIA_HA *info, uchar *to, ulong length,
 | |
|                               MARIA_EXTENT_CURSOR *extent,
 | |
|                               uchar **data, uchar **end_of_data)
 | |
| {
 | |
|   uint left_length= (uint) (*end_of_data - *data);
 | |
|   DBUG_ENTER("read_long_data2");
 | |
|   DBUG_PRINT("enter", ("length: %lu  left_length: %u",
 | |
|                        length, left_length));
 | |
|   DBUG_ASSERT(*data <= *end_of_data);
 | |
| 
 | |
|   /*
 | |
|     Fields are never split in middle. This means that if length > rest-of-data
 | |
|     we should start reading from the next extent.  The reason we may have
 | |
|     data left on the page is that if the fixed part of the row was less than
 | |
|     min_block_length the head block was extended to min_block_length.
 | |
| 
 | |
|     This may change in the future, which is why we have the loop written
 | |
|     the way it's written.
 | |
|   */
 | |
|   if (extent->first_extent && length > left_length)
 | |
|   {
 | |
|     *end_of_data= *data;
 | |
|     left_length= 0;
 | |
|   }
 | |
| 
 | |
|   for(;;)
 | |
|   {
 | |
|     if (unlikely(left_length >= length))
 | |
|     {
 | |
|       memcpy(to, *data, length);
 | |
|       (*data)+= length;
 | |
|       DBUG_PRINT("info", ("left_length: %u", left_length - (uint) length));
 | |
|       DBUG_RETURN(0);
 | |
|     }
 | |
|     memcpy(to, *data, left_length);
 | |
|     to+= left_length;
 | |
|     length-= left_length;
 | |
|     if (!(*data= read_next_extent(info, extent, end_of_data)))
 | |
|       break;
 | |
|     left_length= (uint) (*end_of_data - *data);
 | |
|   }
 | |
|   DBUG_RETURN(1);
 | |
| }
 | |
| 
 | |
| static inline my_bool read_long_data(MARIA_HA *info, uchar *to, ulong length,
 | |
|                               MARIA_EXTENT_CURSOR *extent,
 | |
|                               uchar **data, uchar **end_of_data)
 | |
| {
 | |
|   uint left_length= (uint) (*end_of_data - *data);
 | |
|   if (likely(left_length >= length))
 | |
|   {
 | |
|     memcpy(to, *data, length);
 | |
|     (*data)+= length;
 | |
|     return 0;
 | |
|   }
 | |
|   return read_long_data2(info, to, length, extent, data, end_of_data);
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Read a record from page (helper function for _ma_read_block_record())
 | |
| 
 | |
|   SYNOPSIS
 | |
|     _ma_read_block_record2()
 | |
|     info                Maria handler
 | |
|     record              Store record here
 | |
|     data                Start of head data for row
 | |
|     end_of_data         End of data for row
 | |
| 
 | |
|   NOTES
 | |
|     The head page is already read by caller
 | |
|     Following data is update in info->cur_row:
 | |
| 
 | |
|     cur_row.head_length is set to size of entry in head block
 | |
|     cur_row.tail_positions is set to point to all tail blocks
 | |
|     cur_row.extents points to extents data
 | |
|     cur_row.extents_counts contains number of extents
 | |
|     cur_row.empty_bits is set to empty bits
 | |
|     cur_row.field_lengths contains packed length of all fields
 | |
|     cur_row.blob_length contains total length of all blobs
 | |
|     cur_row.checksum contains checksum of read record.
 | |
| 
 | |
|    RETURN
 | |
|      0  ok
 | |
|      #  Error code
 | |
| */
 | |
| 
 | |
| int _ma_read_block_record2(MARIA_HA *info, uchar *record,
 | |
|                            uchar *data, uchar *end_of_data)
 | |
| {
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   uchar *field_length_data= 0, *UNINIT_VAR(blob_buffer), *start_of_data;
 | |
|   uint flag, null_bytes, cur_null_bytes, row_extents, field_lengths;
 | |
|   my_bool found_blob= 0;
 | |
|   MARIA_EXTENT_CURSOR extent;
 | |
|   MARIA_COLUMNDEF *column, *end_column;
 | |
|   MARIA_ROW *cur_row= &info->cur_row;
 | |
|   myf myflag= MY_WME | share->malloc_flag;
 | |
|   DBUG_ENTER("_ma_read_block_record2");
 | |
| 
 | |
|   start_of_data= data;
 | |
|   flag= (uint) (uchar) data[0];
 | |
|   cur_null_bytes= share->base.original_null_bytes;
 | |
|   null_bytes=     share->base.null_bytes;
 | |
|   cur_row->head_length= (uint) (end_of_data - data);
 | |
|   cur_row->full_page_count= cur_row->tail_count= 0;
 | |
|   cur_row->blob_length= 0;
 | |
|   /* Number of bytes in header that we don't need to write during undo */
 | |
|   cur_row->header_length= total_header_size[(flag & PRECALC_HEADER_BITMASK)]-1;
 | |
| 
 | |
|   if (flag & ROW_FLAG_TRANSID)
 | |
|   {
 | |
|     cur_row->trid= transid_korr(data+1);
 | |
|     if (!info->trn)
 | |
|     {
 | |
|       /* File crashed */
 | |
|       DBUG_ASSERT(!maria_assert_if_crashed_table);
 | |
|       _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD);
 | |
|       DBUG_RETURN(HA_ERR_WRONG_IN_RECORD);
 | |
|     }
 | |
|     if (!trnman_can_read_from(info->trn, cur_row->trid))
 | |
|       DBUG_RETURN(my_errno= HA_ERR_ROW_NOT_VISIBLE);
 | |
|   }
 | |
| 
 | |
|   /* Skip trans header (for now, until we have MVCC csupport) */
 | |
|   data+= cur_row->header_length + 1 ;
 | |
|   if (flag & ROW_FLAG_NULLS_EXTENDED)
 | |
|     cur_null_bytes+= data[-1];
 | |
| 
 | |
|   row_extents= 0;
 | |
|   if (flag & ROW_FLAG_EXTENTS)
 | |
|   {
 | |
|     uint row_extent_size;
 | |
|     /*
 | |
|       Record is split over many data pages.
 | |
|       Get number of extents and first extent
 | |
|     */
 | |
|     get_key_length(row_extents, data);
 | |
|     cur_row->extents_count= row_extents;
 | |
|     row_extent_size= row_extents * ROW_EXTENT_SIZE;
 | |
|     if (cur_row->extents_buffer_length < row_extent_size &&
 | |
|         _ma_alloc_buffer(&cur_row->extents,
 | |
|                          &cur_row->extents_buffer_length,
 | |
|                          row_extent_size, myflag))
 | |
|       DBUG_RETURN(my_errno);
 | |
|     memcpy(cur_row->extents, data, ROW_EXTENT_SIZE);
 | |
|     data+= ROW_EXTENT_SIZE;
 | |
|     init_extent(&extent, cur_row->extents, row_extents,
 | |
|                 cur_row->tail_positions);
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     cur_row->extents_count= 0;
 | |
|     (*cur_row->tail_positions)= 0;
 | |
|     extent.page_count= 0;
 | |
|     extent.extent_count= 1;
 | |
|   }
 | |
|   extent.first_extent= 1;
 | |
| 
 | |
|   field_lengths= 0;
 | |
|   if (share->base.max_field_lengths)
 | |
|   {
 | |
|     get_key_length(field_lengths, data);
 | |
|     cur_row->field_lengths_length= field_lengths;
 | |
| #ifdef SANITY_CHECKS
 | |
|     if (field_lengths > share->base.max_field_lengths)
 | |
|       goto err;
 | |
| #endif
 | |
|   }
 | |
| 
 | |
|   if (share->calc_checksum)
 | |
|     cur_row->checksum= (uint) (uchar) *data++;
 | |
|   /* data now points on null bits */
 | |
|   memcpy(record, data, cur_null_bytes);
 | |
|   if (unlikely(cur_null_bytes != null_bytes))
 | |
|   {
 | |
|     /*
 | |
|       This only happens if we have added more NULL columns with
 | |
|       ALTER TABLE and are fetching an old, not yet modified old row
 | |
|     */
 | |
|     bzero(record + cur_null_bytes, (uint) (null_bytes - cur_null_bytes));
 | |
|   }
 | |
|   data+= null_bytes;
 | |
|   /* We copy the empty bits to be able to use them for delete/update */
 | |
|   memcpy(cur_row->empty_bits, data, share->base.pack_bytes);
 | |
|   data+= share->base.pack_bytes;
 | |
| 
 | |
|   /* TODO: Use field offsets, instead of just skipping them */
 | |
|   data+= share->base.field_offsets * FIELD_OFFSET_SIZE;
 | |
| 
 | |
|   /*
 | |
|     Read row extents (note that first extent was already read into
 | |
|     cur_row->extents above)
 | |
|   */
 | |
|   if (row_extents > 1)
 | |
|   {
 | |
|     if (read_long_data(info, cur_row->extents + ROW_EXTENT_SIZE,
 | |
|                        (row_extents - 1) * ROW_EXTENT_SIZE,
 | |
|                        &extent, &data, &end_of_data))
 | |
|       DBUG_RETURN(my_errno);
 | |
|   }
 | |
| 
 | |
|   /*
 | |
|     Data now points to start of fixed length field data that can't be null
 | |
|     or 'empty'. Note that these fields can't be split over blocks.
 | |
|   */
 | |
|   for (column= share->columndef,
 | |
|          end_column= column + share->base.fixed_not_null_fields;
 | |
|        column < end_column; column++)
 | |
|   {
 | |
|     uint column_length= column->length;
 | |
|     if (data + column_length > end_of_data &&
 | |
|         !(data= read_next_extent(info, &extent, &end_of_data)))
 | |
|       goto err;
 | |
|     memcpy(record + column->offset, data, column_length);
 | |
|     data+= column_length;
 | |
|   }
 | |
| 
 | |
|   /* Read array of field lengths. This may be stored in several extents */
 | |
|   if (field_lengths)
 | |
|   {
 | |
|     field_length_data= cur_row->field_lengths;
 | |
|     if (read_long_data(info, field_length_data, field_lengths, &extent,
 | |
|                        &data, &end_of_data))
 | |
|       DBUG_RETURN(my_errno);
 | |
|   }
 | |
| 
 | |
|   /* Read variable length data. Each of these may be split over many extents */
 | |
|   for (end_column= share->columndef + share->base.fields;
 | |
|        column < end_column; column++)
 | |
|   {
 | |
|     enum en_fieldtype type= column->type;
 | |
|     uchar *field_pos= record + column->offset;
 | |
|     /* First check if field is present in record */
 | |
|     if ((record[column->null_pos] & column->null_bit) ||
 | |
|         (column->empty_bit &&
 | |
|          (cur_row->empty_bits[column->empty_pos] & column->empty_bit)))
 | |
|     {
 | |
|       bfill(record + column->offset, column->fill_length,
 | |
|             type == FIELD_SKIP_ENDSPACE ? ' ' : 0);
 | |
|       continue;
 | |
|     }
 | |
|     switch (type) {
 | |
|     case FIELD_NORMAL:                          /* Fixed length field */
 | |
|     case FIELD_SKIP_PRESPACE:
 | |
|     case FIELD_SKIP_ZERO:                       /* Fixed length field */
 | |
|       if (data + column->length > end_of_data &&
 | |
|           !(data= read_next_extent(info, &extent, &end_of_data)))
 | |
|         goto err;
 | |
|       memcpy(field_pos, data, column->length);
 | |
|       data+= column->length;
 | |
|       break;
 | |
|     case FIELD_SKIP_ENDSPACE:                   /* CHAR */
 | |
|     {
 | |
|       /* Char that is space filled */
 | |
|       uint length;
 | |
|       if (column->length <= 255)
 | |
|         length= (uint) (uchar) *field_length_data++;
 | |
|       else
 | |
|       {
 | |
|         length= uint2korr(field_length_data);
 | |
|         field_length_data+= 2;
 | |
|       }
 | |
| #ifdef SANITY_CHECKS
 | |
|       if (length > column->length)
 | |
|         goto err;
 | |
| #endif
 | |
|       if (read_long_data(info, field_pos, length, &extent, &data,
 | |
|                          &end_of_data))
 | |
|         DBUG_RETURN(my_errno);
 | |
|       bfill(field_pos + length, column->length - length, ' ');
 | |
|       break;
 | |
|     }
 | |
|     case FIELD_VARCHAR:
 | |
|     {
 | |
|       ulong length;
 | |
|       uint pack_length __attribute__((unused));
 | |
|       if (column->length <= 256)
 | |
|       {
 | |
|         length= (uint) (uchar) (*field_pos++= *field_length_data++);
 | |
|         pack_length= 1;
 | |
|       }
 | |
|       else
 | |
|       {
 | |
|         length= uint2korr(field_length_data);
 | |
|         field_pos[0]= field_length_data[0];
 | |
|         field_pos[1]= field_length_data[1];
 | |
|         field_pos+= 2;
 | |
|         field_length_data+= 2;
 | |
|         pack_length= 2;
 | |
|       }
 | |
| #ifdef SANITY_CHECKS
 | |
|       if (length > column->length - pack_length)
 | |
|         goto err;
 | |
| #endif
 | |
|       if (read_long_data(info, field_pos, length, &extent, &data,
 | |
|                          &end_of_data))
 | |
|         DBUG_RETURN(my_errno);
 | |
|       MEM_UNDEFINED(field_pos + length, column->length - length - pack_length);
 | |
|       break;
 | |
|     }
 | |
|     case FIELD_BLOB:
 | |
|     {
 | |
|       uint column_size_length= column->length - portable_sizeof_char_ptr;
 | |
|       ulong blob_length= _ma_calc_blob_length(column_size_length,
 | |
|                                               field_length_data);
 | |
| 
 | |
|       if (!found_blob)
 | |
|       {
 | |
|         /* Calculate total length for all blobs */
 | |
|         ulong blob_lengths= 0;
 | |
|         uchar *length_data= field_length_data;
 | |
|         MARIA_COLUMNDEF *blob_field= column;
 | |
| 
 | |
|         found_blob= 1;
 | |
|         for (; blob_field < end_column; blob_field++)
 | |
|         {
 | |
|           uint size_length;
 | |
|           if ((record[blob_field->null_pos] & blob_field->null_bit) ||
 | |
|               (blob_field->empty_bit &
 | |
|                (cur_row->empty_bits[blob_field->empty_pos] &
 | |
|                 blob_field->empty_bit)))
 | |
|             continue;
 | |
|           size_length= blob_field->length - portable_sizeof_char_ptr;
 | |
|           blob_lengths+= _ma_calc_blob_length(size_length, length_data);
 | |
|           length_data+= size_length;
 | |
|         }
 | |
|         cur_row->blob_length= blob_lengths;
 | |
|         DBUG_PRINT("info", ("Total blob length: %lu", blob_lengths));
 | |
|         if (_ma_alloc_buffer(&info->blob_buff, &info->blob_buff_size,
 | |
|                              blob_lengths, myflag))
 | |
|           DBUG_RETURN(my_errno);
 | |
|         blob_buffer= info->blob_buff;
 | |
|       }
 | |
| 
 | |
|       memcpy(field_pos, field_length_data, column_size_length);
 | |
|       memcpy(field_pos + column_size_length, (uchar *) &blob_buffer,
 | |
|              sizeof(char*));
 | |
|       field_length_data+= column_size_length;
 | |
| 
 | |
|       /*
 | |
|         After we have read one extent, then each blob is in it's own extent
 | |
|       */
 | |
|       if (!extent.first_extent || (ulong) (end_of_data - data) < blob_length)
 | |
|         end_of_data= data;                      /* Force read of next extent */
 | |
| 
 | |
|       if (read_long_data(info, blob_buffer, blob_length, &extent, &data,
 | |
|                          &end_of_data))
 | |
|         DBUG_RETURN(my_errno);
 | |
|       blob_buffer+= blob_length;
 | |
|       break;
 | |
|     }
 | |
|     default:
 | |
| #ifdef EXTRA_DEBUG
 | |
|       DBUG_ASSERT(0);                           /* purecov: deadcode */
 | |
| #endif
 | |
|       goto err;
 | |
|     }
 | |
|     continue;
 | |
|   }
 | |
| 
 | |
|   if (row_extents)
 | |
|   {
 | |
|     DBUG_PRINT("info", ("Row read:  page_count: %u  extent_count: %u",
 | |
|                         extent.page_count, extent.extent_count));
 | |
|     *extent.tail_positions= 0;                  /* End marker */
 | |
|     if (extent.page_count)
 | |
|       goto err;
 | |
|     if (extent.extent_count > 1)
 | |
|     {
 | |
|       if (_ma_check_if_zero(extent.extent + ROW_EXTENT_SIZE,
 | |
|                             (extent.extent_count-1) * ROW_EXTENT_SIZE))
 | |
|       {
 | |
|         DBUG_PRINT("error", ("Data in extent is not zero"));
 | |
|         DBUG_DUMP("extent", extent.extent + ROW_EXTENT_SIZE,
 | |
|                   (extent.extent_count-1) * ROW_EXTENT_SIZE);
 | |
|         goto err;
 | |
|       }
 | |
|     }
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     DBUG_PRINT("info", ("Row read"));
 | |
|     /*
 | |
|       data should normally point to end_of_date. The only exception is if
 | |
|       the row is very short in which case we allocated 'min_block_length' data
 | |
|       for allowing the row to expand.
 | |
|     */
 | |
|     if (data != end_of_data && (uint) (end_of_data - start_of_data) >
 | |
|         share->base.min_block_length)
 | |
|       goto err;
 | |
|   }
 | |
| #ifdef EXTRA_DEBUG
 | |
|   if (share->calc_checksum && !info->in_check_table)
 | |
|   {
 | |
|     /* Esnure that row checksum is correct */
 | |
|     DBUG_ASSERT(((share->calc_checksum)(info, record) & 255) ==
 | |
|                 cur_row->checksum);
 | |
|   }
 | |
| #endif
 | |
|   info->update|= HA_STATE_AKTIV;	/* We have an active record */
 | |
|   DBUG_RETURN(0);
 | |
| 
 | |
| err:
 | |
|   DBUG_ASSERT(!maria_assert_if_crashed_table);
 | |
|   /* Something was wrong with data on record */
 | |
|   DBUG_PRINT("error", ("Found record with wrong data"));
 | |
|   _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD);
 | |
|   DBUG_RETURN(HA_ERR_WRONG_IN_RECORD);
 | |
| }
 | |
| 
 | |
| 
 | |
| /** @brief Read positions to tail blocks and full blocks
 | |
| 
 | |
|   @fn    read_row_extent_info()
 | |
|   @param info	Handler
 | |
| 
 | |
|   @notes
 | |
|     This function is a simpler version of _ma_read_block_record2()
 | |
|     The data about the used pages is stored in info->cur_row.
 | |
| 
 | |
|   @return Status
 | |
|   @retval 0   ok
 | |
|   @retval 1   Error. my_errno contains error number
 | |
| */
 | |
| 
 | |
| static my_bool read_row_extent_info(MARIA_HA *info, uchar *buff,
 | |
|                                     uint record_number)
 | |
| {
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   MARIA_EXTENT_CURSOR extent;
 | |
|   MARIA_RECORD_POS *tail_pos;
 | |
|   uchar *data, *end_of_data;
 | |
|   uint flag, row_extents, row_extents_size;
 | |
|   uint field_lengths __attribute__ ((unused));
 | |
|   uchar *extents, *end;
 | |
|   DBUG_ENTER("read_row_extent_info");
 | |
| 
 | |
|   if (!(data= get_record_position(share, buff,
 | |
|                                   record_number, &end_of_data)))
 | |
|     DBUG_RETURN(1);                             /* Wrong in record */
 | |
| 
 | |
|   flag= (uint) (uchar) data[0];
 | |
|   /* Skip trans header */
 | |
|   data+= total_header_size[(flag & PRECALC_HEADER_BITMASK)];
 | |
| 
 | |
|   row_extents= 0;
 | |
|   row_extents_size= 0;
 | |
|   if (flag & ROW_FLAG_EXTENTS)
 | |
|   {
 | |
|     /*
 | |
|       Record is split over many data pages.
 | |
|       Get number of extents and first extent
 | |
|     */
 | |
|     get_key_length(row_extents, data);
 | |
|     row_extents_size= row_extents * ROW_EXTENT_SIZE;
 | |
|     if (info->cur_row.extents_buffer_length < row_extents_size &&
 | |
|         _ma_alloc_buffer(&info->cur_row.extents,
 | |
|                          &info->cur_row.extents_buffer_length,
 | |
|                          row_extents_size, MY_WME | share->malloc_flag))
 | |
|       DBUG_RETURN(1);
 | |
|     memcpy(info->cur_row.extents, data, ROW_EXTENT_SIZE);
 | |
|     data+= ROW_EXTENT_SIZE;
 | |
|     init_extent(&extent, info->cur_row.extents, row_extents,
 | |
|                 info->cur_row.tail_positions);
 | |
|     extent.first_extent= 1;
 | |
|   }
 | |
|   info->cur_row.extents_count= row_extents;
 | |
| 
 | |
|   /*
 | |
|     field_lengths looks unused but get_key_length will
 | |
|     increment data, which is required as data it's used later.
 | |
|   */
 | |
|   if (share->base.max_field_lengths)
 | |
|     get_key_length(field_lengths, data);
 | |
| 
 | |
|   if (share->calc_checksum)
 | |
|     info->cur_row.checksum= (uint) (uchar) *data++;
 | |
|   if (row_extents > 1)
 | |
|   {
 | |
|     data+= share->base.null_bytes;
 | |
|     data+= share->base.pack_bytes;
 | |
|     data+= share->base.field_offsets * FIELD_OFFSET_SIZE;
 | |
| 
 | |
|     /*
 | |
|       Read row extents (note that first extent was already read into
 | |
|       info->cur_row.extents above)
 | |
|       Lock tails with write lock as we will delete them later.
 | |
|     */
 | |
|     extent.lock_for_tail_pages= PAGECACHE_LOCK_LEFT_WRITELOCKED;
 | |
|     if (read_long_data(info, info->cur_row.extents + ROW_EXTENT_SIZE,
 | |
|                        row_extents_size - ROW_EXTENT_SIZE,
 | |
|                        &extent, &data, &end_of_data))
 | |
|       DBUG_RETURN(1);
 | |
|   }
 | |
| 
 | |
|   /* Update tail_positions with pointer to tails */
 | |
|   tail_pos= info->cur_row.tail_positions;
 | |
|   for (extents= info->cur_row.extents, end= extents + row_extents_size;
 | |
|        extents < end;
 | |
|        extents+= ROW_EXTENT_SIZE)
 | |
|   {
 | |
|     pgcache_page_no_t page=  uint5korr(extents);
 | |
|     uint page_count= uint2korr(extents + ROW_EXTENT_PAGE_SIZE);
 | |
|     if (page_count & TAIL_BIT)
 | |
|       *(tail_pos++)= ma_recordpos(page, (page_count & ~ (TAIL_BIT |
 | |
|                                                          START_EXTENT_BIT)));
 | |
|   }
 | |
|   *tail_pos= 0;                               /* End marker */
 | |
|   DBUG_RETURN(0);
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Read a record based on record position
 | |
| 
 | |
|   @fn     _ma_read_block_record()
 | |
|   @param info                Maria handler
 | |
|   @param record              Store record here
 | |
|   @param record_pos          Record position
 | |
| 
 | |
|   @return Status
 | |
|   @retval 0  ok
 | |
|   @retval #  Error number
 | |
| */
 | |
| 
 | |
| int _ma_read_block_record(MARIA_HA *info, uchar *record,
 | |
|                           MARIA_RECORD_POS record_pos)
 | |
| {
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   uchar *data, *end_of_data, *buff;
 | |
|   uint offset;
 | |
|   int ret;
 | |
|   DBUG_ENTER("_ma_read_block_record");
 | |
|   DBUG_PRINT("enter", ("rowid: %lu  page: %lu  rownr: %u",
 | |
|                        (ulong) record_pos,
 | |
|                        (ulong) ma_recordpos_to_page(record_pos),
 | |
|                        ma_recordpos_to_dir_entry(record_pos)));
 | |
| 
 | |
|   offset= ma_recordpos_to_dir_entry(record_pos);
 | |
| 
 | |
|   if (!(buff= pagecache_read(share->pagecache,
 | |
|                              &info->dfile, ma_recordpos_to_page(record_pos), 0,
 | |
|                              info->buff, share->page_type,
 | |
|                              PAGECACHE_LOCK_LEFT_UNLOCKED, 0)))
 | |
|     DBUG_RETURN(my_errno);
 | |
| 
 | |
|   /*
 | |
|     Unallocated page access can happen if this is an access to a page where
 | |
|     all rows where deleted as part of this statement.
 | |
|   */
 | |
|   DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == HEAD_PAGE ||
 | |
|               (buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == UNALLOCATED_PAGE);
 | |
| 
 | |
|   if (((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == UNALLOCATED_PAGE) ||
 | |
|       !(data= get_record_position(share, buff, offset, &end_of_data)))
 | |
|   {
 | |
|     DBUG_ASSERT(!maria_assert_if_crashed_table);
 | |
|     DBUG_PRINT("warning", ("Wrong directory entry in data block"));
 | |
|     my_errno= HA_ERR_RECORD_DELETED;           /* File crashed */
 | |
|     DBUG_RETURN(HA_ERR_RECORD_DELETED);
 | |
|   }
 | |
|   ret= _ma_read_block_record2(info, record, data, end_of_data);
 | |
|   DBUG_RETURN(ret);
 | |
| }
 | |
| 
 | |
| 
 | |
| /* compare unique constraint between stored rows */
 | |
| 
 | |
| my_bool _ma_cmp_block_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def,
 | |
|                              const uchar *record, MARIA_RECORD_POS pos)
 | |
| {
 | |
|   uchar *org_rec_buff, *old_record;
 | |
|   size_t org_rec_buff_size;
 | |
|   int error;
 | |
|   my_bool buff_alloced;
 | |
|   DBUG_ENTER("_ma_cmp_block_unique");
 | |
| 
 | |
|   alloc_on_stack(*info->stack_end_ptr, old_record, buff_alloced,
 | |
|                  info->s->base.reclength);
 | |
|   if (!old_record)
 | |
|     DBUG_RETURN(1);
 | |
| 
 | |
|   /* Don't let the compare destroy blobs that may be in use */
 | |
|   org_rec_buff=      info->rec_buff;
 | |
|   org_rec_buff_size= info->rec_buff_size;
 | |
|   if (info->s->base.blobs)
 | |
|   {
 | |
|     /* Force realloc of record buffer*/
 | |
|     info->rec_buff= 0;
 | |
|     info->rec_buff_size= 0;
 | |
|   }
 | |
|   error= _ma_read_block_record(info, old_record, pos);
 | |
|   if (!error)
 | |
|     error= _ma_unique_comp(def, record, old_record, def->null_are_equal);
 | |
|   if (info->s->base.blobs)
 | |
|   {
 | |
|     my_free(info->rec_buff);
 | |
|     info->rec_buff=      org_rec_buff;
 | |
|     info->rec_buff_size= org_rec_buff_size;
 | |
|   }
 | |
|   DBUG_PRINT("exit", ("result: %d", error));
 | |
|   stack_alloc_free(old_record, buff_alloced);
 | |
|   DBUG_RETURN(error != 0);
 | |
| }
 | |
| 
 | |
| 
 | |
| /****************************************************************************
 | |
|   Table scan
 | |
| ****************************************************************************/
 | |
| 
 | |
| /*
 | |
|   Allocate buffers for table scan
 | |
| 
 | |
|   SYNOPSIS
 | |
|    _ma_scan_init_block_record(MARIA_HA *info)
 | |
| 
 | |
|   IMPLEMENTATION
 | |
|     We allocate one buffer for the current bitmap and one buffer for the
 | |
|     current page
 | |
| 
 | |
|   RETURN
 | |
|     0  ok
 | |
|     1  error (couldn't allocate memory or disk error)
 | |
| */
 | |
| 
 | |
| my_bool _ma_scan_init_block_record(MARIA_HA *info)
 | |
| {
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   myf flag= MY_WME | share->malloc_flag;
 | |
|   DBUG_ENTER("_ma_scan_init_block_record");
 | |
|   DBUG_ASSERT(info->dfile.file == share->bitmap.file.file);
 | |
| 
 | |
|   /*
 | |
|     bitmap_buff may already be allocated if this is the second call to
 | |
|     rnd_init() without a rnd_end() in between, see sql/handler.h
 | |
|   */
 | |
|   if (!(info->scan.bitmap_buff ||
 | |
|         ((info->scan.bitmap_buff=
 | |
|           (uchar *) my_malloc(PSI_INSTRUMENT_ME, share->block_size * 2,
 | |
|                               flag)))))
 | |
|     DBUG_RETURN(1);
 | |
|   info->scan.page_buff= info->scan.bitmap_buff + share->block_size;
 | |
|   info->scan.bitmap_end= info->scan.bitmap_buff + share->bitmap.max_total_size;
 | |
| 
 | |
|   /* Set scan variables to get _ma_scan_block() to start with reading bitmap */
 | |
|   info->scan.number_of_rows= 0;
 | |
|   info->scan.bitmap_pos= info->scan.bitmap_end;
 | |
|   info->scan.bitmap_page= (pgcache_page_no_t) 0 - share->bitmap.pages_covered;
 | |
|   info->scan.max_page= share->state.state.data_file_length / share->block_size;
 | |
|   /*
 | |
|     We need to flush what's in memory (bitmap.map) to page cache otherwise, as
 | |
|     we are going to read bitmaps from page cache in table scan (see
 | |
|     _ma_scan_block_record()), we may miss recently inserted rows (bitmap page
 | |
|     in page cache would be too old).
 | |
|   */
 | |
|   DBUG_RETURN(_ma_bitmap_flush(info->s));
 | |
| }
 | |
| 
 | |
| 
 | |
| /* Free buffers allocated by _ma_scan_block_init() */
 | |
| 
 | |
| void _ma_scan_end_block_record(MARIA_HA *info)
 | |
| {
 | |
|   DBUG_ENTER("_ma_scan_end_block_record");
 | |
|   my_free(info->scan.bitmap_buff);
 | |
|   info->scan.bitmap_buff= 0;
 | |
|   if (info->scan_save)
 | |
|   {
 | |
|     my_free(info->scan_save);
 | |
|     info->scan_save= 0;
 | |
|   }
 | |
|   DBUG_VOID_RETURN;
 | |
| }
 | |
| 
 | |
| 
 | |
| /**
 | |
|   @brief Save current scan position
 | |
| 
 | |
|   @note
 | |
|   For the moment we can only remember one position, but this is
 | |
|   good enough for MySQL usage
 | |
| 
 | |
|   @return
 | |
|   @retval 0			  ok
 | |
|   @retval HA_ERR_WRONG_IN_RECORD  Could not allocate memory to hold position
 | |
| */
 | |
| 
 | |
| int _ma_scan_remember_block_record(MARIA_HA *info,
 | |
|                                    MARIA_RECORD_POS *lastpos)
 | |
| {
 | |
|   uchar *bitmap_buff;
 | |
|   DBUG_ENTER("_ma_scan_remember_block_record");
 | |
|   if (!(info->scan_save))
 | |
|   {
 | |
|     if (!(info->scan_save= my_malloc(PSI_INSTRUMENT_ME,
 | |
|                                      ALIGN_SIZE(sizeof(*info->scan_save)) +
 | |
|                                      info->s->block_size * 2,
 | |
|                                      MYF(MY_WME))))
 | |
|       DBUG_RETURN(HA_ERR_OUT_OF_MEM);
 | |
|     info->scan_save->bitmap_buff= ((uchar*) info->scan_save +
 | |
|                                    ALIGN_SIZE(sizeof(*info->scan_save)));
 | |
|   }
 | |
|   /* For checking if pages have changed since we last read it */
 | |
|   info->scan.row_changes= info->row_changes;
 | |
| 
 | |
|   /* Remember used bitmap and used head page */
 | |
|   bitmap_buff= info->scan_save->bitmap_buff;
 | |
|   memcpy(info->scan_save, &info->scan, sizeof(*info->scan_save));
 | |
|   info->scan_save->bitmap_buff= bitmap_buff;
 | |
|   memcpy(bitmap_buff, info->scan.bitmap_buff, info->s->block_size * 2);
 | |
| 
 | |
|   /* Point to the last read row */
 | |
|   *lastpos= info->cur_row.nextpos - 1;
 | |
|   info->scan_save->dir+= DIR_ENTRY_SIZE;
 | |
|   DBUG_RETURN(0);
 | |
| }
 | |
| 
 | |
| 
 | |
| /**
 | |
|    @brief restore scan block it's original values
 | |
| 
 | |
|    @return
 | |
|    0 ok
 | |
|    # error
 | |
| 
 | |
|    @note
 | |
|    In theory we could swap bitmap buffers instead of copy them.
 | |
|    For the moment we don't do that because there are variables pointing
 | |
|    inside the buffers and it's a bit of hassle to either make them relative
 | |
|    or repoint them.
 | |
| 
 | |
|    If the data file has changed, we will re-read the new block record
 | |
|    to ensure that when we continue scanning we can ignore any deleted rows.
 | |
| */
 | |
| 
 | |
| int _ma_scan_restore_block_record(MARIA_HA *info,
 | |
|                                   MARIA_RECORD_POS lastpos)
 | |
| {
 | |
|   uchar *bitmap_buff;
 | |
|   DBUG_ENTER("_ma_scan_restore_block_record");
 | |
| 
 | |
|   info->cur_row.nextpos= lastpos;
 | |
|   bitmap_buff= info->scan.bitmap_buff;
 | |
|   memcpy(&info->scan, info->scan_save, sizeof(*info->scan_save));
 | |
|   info->scan.bitmap_buff= bitmap_buff;
 | |
|   memcpy(bitmap_buff, info->scan_save->bitmap_buff, info->s->block_size * 2);
 | |
| 
 | |
|   if (info->scan.row_changes != info->row_changes)
 | |
|   {
 | |
|     /*
 | |
|       Table has been changed. We have to re-read the current page block as
 | |
|       data may have changed on it that we have to see.
 | |
|     */
 | |
|     if (!(pagecache_read(info->s->pagecache,
 | |
|                          &info->dfile,
 | |
|                          ma_recordpos_to_page(info->scan.row_base_page),
 | |
|                          0, info->scan.page_buff,
 | |
|                          info->s->page_type,
 | |
|                          PAGECACHE_LOCK_LEFT_UNLOCKED, 0)))
 | |
|       DBUG_RETURN(my_errno);
 | |
|     info->scan.number_of_rows=
 | |
|       (uint) (uchar) info->scan.page_buff[DIR_COUNT_OFFSET];
 | |
|     info->scan.dir_end= (info->scan.page_buff + info->s->block_size -
 | |
|                          PAGE_SUFFIX_SIZE -
 | |
|                          info->scan.number_of_rows * DIR_ENTRY_SIZE);
 | |
|   }
 | |
|   DBUG_RETURN(0);
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Read next record while scanning table
 | |
| 
 | |
|   SYNOPSIS
 | |
|     _ma_scan_block_record()
 | |
|     info                Maria handler
 | |
|     record              Store found here
 | |
|     record_pos          Value stored in info->cur_row.next_pos after last call
 | |
|                         This is offset inside the current pagebuff
 | |
|     skip_deleted
 | |
| 
 | |
|   NOTES
 | |
|     - One must have called mi_scan() before this
 | |
|     - In this version, we don't actually need record_pos, we as easily
 | |
|       use a variable in info->scan
 | |
| 
 | |
|   IMPLEMENTATION
 | |
|     Current code uses a lot of goto's to separate the different kind of
 | |
|     states we may be in. This gives us a minimum of executed if's for
 | |
|     the normal cases.  I tried several different ways to code this, but
 | |
|     the current one was in the end the most readable and fastest.
 | |
| 
 | |
|   RETURN
 | |
|     0   ok
 | |
|     #   Error code  (Normally HA_ERR_END_OF_FILE)
 | |
| */
 | |
| 
 | |
| int _ma_scan_block_record(MARIA_HA *info, uchar *record,
 | |
|                           MARIA_RECORD_POS record_pos,
 | |
|                           my_bool skip_deleted __attribute__ ((unused)))
 | |
| {
 | |
|   uint block_size;
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   DBUG_ENTER("_ma_scan_block_record");
 | |
| 
 | |
| restart_record_read:
 | |
|   /* Find next row in current page */
 | |
|   while (likely(record_pos < info->scan.number_of_rows))
 | |
|   {
 | |
|     uint length, offset;
 | |
|     uchar *data, *end_of_data;
 | |
|     int error;
 | |
| 
 | |
|     /* Ensure that scan.dir and record_pos are in sync */
 | |
|     DBUG_ASSERT(info->scan.dir == dir_entry_pos(info->scan.page_buff,
 | |
|                                                 share->block_size,
 | |
|                                                 (uint) record_pos));
 | |
| 
 | |
|     /* Search for a valid directory entry (not 0) */
 | |
|     while (!(offset= uint2korr(info->scan.dir)))
 | |
|     {
 | |
|       info->scan.dir-= DIR_ENTRY_SIZE;
 | |
|       record_pos++;
 | |
| #ifdef SANITY_CHECKS
 | |
|       if (info->scan.dir < info->scan.dir_end)
 | |
|       {
 | |
|         DBUG_ASSERT(!maria_assert_if_crashed_table);
 | |
|         goto err;
 | |
|       }
 | |
| #endif
 | |
|     }
 | |
|     /*
 | |
|       This should always be true as the directory should always start with
 | |
|       a valid entry.
 | |
|     */
 | |
|     DBUG_ASSERT(info->scan.dir >= info->scan.dir_end);
 | |
| 
 | |
|     /* found row */
 | |
|     info->cur_row.lastpos= info->scan.row_base_page + record_pos;
 | |
|     info->cur_row.nextpos= record_pos + 1;
 | |
|     data= info->scan.page_buff + offset;
 | |
|     length= uint2korr(info->scan.dir + 2);
 | |
|     end_of_data= data + length;
 | |
|     info->scan.dir-= DIR_ENTRY_SIZE;      /* Point to next row to process */
 | |
| #ifdef SANITY_CHECKS
 | |
|     if (end_of_data > info->scan.dir_end ||
 | |
|         offset < PAGE_HEADER_SIZE(share) ||
 | |
|         length < share->base.min_block_length)
 | |
|     {
 | |
|       DBUG_ASSERT(!(end_of_data > info->scan.dir_end));
 | |
|       DBUG_ASSERT(!(offset < PAGE_HEADER_SIZE(share)));
 | |
|       DBUG_ASSERT(!(length < share->base.min_block_length));
 | |
|       goto err;
 | |
|     }
 | |
| #endif
 | |
|     DBUG_PRINT("info", ("rowid: %lu", (ulong) info->cur_row.lastpos));
 | |
|     error= _ma_read_block_record2(info, record, data, end_of_data);
 | |
|     if (error != HA_ERR_ROW_NOT_VISIBLE)
 | |
|       DBUG_RETURN(error);
 | |
|     record_pos++;
 | |
|   }
 | |
| 
 | |
|   /* Find next head page in current bitmap */
 | |
| restart_bitmap_scan:
 | |
|   block_size= share->block_size;
 | |
|   if (likely(info->scan.bitmap_pos < info->scan.bitmap_end))
 | |
|   {
 | |
|     uchar *data=    info->scan.bitmap_pos;
 | |
|     longlong bits= info->scan.bits;
 | |
|     uint bit_pos=  info->scan.bit_pos;
 | |
| 
 | |
|     do
 | |
|     {
 | |
|       while (likely(bits))
 | |
|       {
 | |
|         uint pattern= (uint) (bits & 7);
 | |
|         bits >>= 3;
 | |
|         bit_pos++;
 | |
|         if (pattern > 0 && pattern <= 4)
 | |
|         {
 | |
|           /* Found head page; Read it */
 | |
|           pgcache_page_no_t page;
 | |
|           info->scan.bitmap_pos= data;
 | |
|           info->scan.bits= bits;
 | |
|           info->scan.bit_pos= bit_pos;
 | |
|           page= (info->scan.bitmap_page + 1 +
 | |
|                  (data - info->scan.bitmap_buff) / 6 * 16 + bit_pos - 1);
 | |
|           info->scan.row_base_page= ma_recordpos(page, 0);
 | |
|           if (page >= info->scan.max_page)
 | |
|           {
 | |
|             DBUG_PRINT("info", ("Found end of file"));
 | |
|             DBUG_RETURN((my_errno= HA_ERR_END_OF_FILE));
 | |
|           }
 | |
|           if (!(pagecache_read(share->pagecache,
 | |
|                                &info->dfile,
 | |
|                                page, 0, info->scan.page_buff,
 | |
|                                share->page_type,
 | |
|                                PAGECACHE_LOCK_LEFT_UNLOCKED, 0)))
 | |
|             DBUG_RETURN(my_errno);
 | |
|           if (((info->scan.page_buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) !=
 | |
|                HEAD_PAGE))
 | |
|           {
 | |
|             /*
 | |
|               This may happen if someone has been deleting all rows
 | |
|               from a page since we read the bitmap, so it may be ok.
 | |
|               Print warning in debug log and continue.
 | |
|             */
 | |
|             DBUG_PRINT("warning",
 | |
|                        ("Found page of type %d when expecting head page",
 | |
|                         (info->scan.page_buff[PAGE_TYPE_OFFSET] &
 | |
|                          PAGE_TYPE_MASK)));
 | |
|             continue;
 | |
|           }
 | |
|           if ((info->scan.number_of_rows=
 | |
|                (uint) (uchar) info->scan.page_buff[DIR_COUNT_OFFSET]) == 0)
 | |
|           {
 | |
|             DBUG_PRINT("error", ("Wrong page header"));
 | |
|             _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD);
 | |
|             DBUG_RETURN(HA_ERR_WRONG_IN_RECORD);
 | |
|           }
 | |
|           DBUG_PRINT("info", ("Page %lu has %u rows",
 | |
|                               (ulong) page, info->scan.number_of_rows));
 | |
|           info->scan.dir= (info->scan.page_buff + block_size -
 | |
|                            PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE);
 | |
|           info->scan.dir_end= (info->scan.dir -
 | |
|                                (info->scan.number_of_rows - 1) *
 | |
|                                DIR_ENTRY_SIZE);
 | |
|           record_pos= 0;
 | |
|           goto restart_record_read;
 | |
|         }
 | |
|       }
 | |
|       for (data+= 6; data < info->scan.bitmap_end; data+= 6)
 | |
|       {
 | |
|         bits= uint6korr(data);
 | |
|         /* Skip not allocated pages and blob / full tail pages */
 | |
|         if (bits && bits != 07777777777777777LL)
 | |
|           break;
 | |
|       }
 | |
|       bit_pos= 0;
 | |
|     } while (data < info->scan.bitmap_end);
 | |
|   }
 | |
| 
 | |
|   /* Read next bitmap */
 | |
|   info->scan.bitmap_page+= share->bitmap.pages_covered;
 | |
|   if (unlikely(info->scan.bitmap_page >= info->scan.max_page))
 | |
|   {
 | |
|     DBUG_PRINT("info", ("Found end of file"));
 | |
|     DBUG_RETURN((my_errno= HA_ERR_END_OF_FILE));
 | |
|   }
 | |
|   DBUG_PRINT("info", ("Reading bitmap at %lu",
 | |
|                       (ulong) info->scan.bitmap_page));
 | |
|   if (!(pagecache_read(share->pagecache, &info->s->bitmap.file,
 | |
|                        info->scan.bitmap_page,
 | |
|                        0, info->scan.bitmap_buff, PAGECACHE_PLAIN_PAGE,
 | |
|                        PAGECACHE_LOCK_LEFT_UNLOCKED, 0)))
 | |
|     DBUG_RETURN(my_errno);
 | |
|   /* Skip scanning 'bits' in bitmap scan code */
 | |
|   info->scan.bitmap_pos= info->scan.bitmap_buff - 6;
 | |
|   info->scan.bits= 0;
 | |
|   goto restart_bitmap_scan;
 | |
| 
 | |
| err:
 | |
|   DBUG_ASSERT(!maria_assert_if_crashed_table);
 | |
|   DBUG_PRINT("error", ("Wrong data on page"));
 | |
|   _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD);
 | |
|   DBUG_RETURN(HA_ERR_WRONG_IN_RECORD);
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Compare a row against a stored one
 | |
| 
 | |
|   NOTES
 | |
|     Not implemented, as block record is not supposed to be used in a shared
 | |
|     global environment
 | |
| */
 | |
| 
 | |
| my_bool _ma_compare_block_record(MARIA_HA *info __attribute__ ((unused)),
 | |
|                                  const uchar *record __attribute__ ((unused)))
 | |
| {
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Store an integer with simple packing
 | |
| 
 | |
|   SYNOPSIS
 | |
|     ma_store_integer()
 | |
|     to                  Store the packed integer here
 | |
|     nr                  Integer to store
 | |
| 
 | |
|   NOTES
 | |
|     This is mostly used to store field numbers and lengths of strings.
 | |
|     We have to cast the result for the LL() becasue of a bug in Forte CC
 | |
|     compiler.
 | |
| 
 | |
|     Packing used is:
 | |
|     nr < 251 is stored as is (in 1 byte)
 | |
|     Numbers that require 1-4 bytes are stored as char(250+byte_length), data
 | |
|     Bigger numbers are stored as 255, data as ulonglong (not yet done).
 | |
| 
 | |
|   RETURN
 | |
|     Position in 'to' after the packed length
 | |
| */
 | |
| 
 | |
| uchar *ma_store_length(uchar *to, ulong nr)
 | |
| {
 | |
|   if (nr < 251)
 | |
|   {
 | |
|     *to=(uchar) nr;
 | |
|     return to+1;
 | |
|   }
 | |
|   if (nr < 65536)
 | |
|   {
 | |
|     if (nr <= 255)
 | |
|     {
 | |
|       to[0]= (uchar) 251;
 | |
|       to[1]= (uchar) nr;
 | |
|       return to+2;
 | |
|     }
 | |
|     to[0]= (uchar) 252;
 | |
|     int2store(to+1, nr);
 | |
|     return to+3;
 | |
|   }
 | |
|   if (nr < 16777216)
 | |
|   {
 | |
|     *to++= (uchar) 253;
 | |
|     int3store(to, nr);
 | |
|     return to+3;
 | |
|   }
 | |
|   *to++= (uchar) 254;
 | |
|   int4store(to, nr);
 | |
|   return to+4;
 | |
| }
 | |
| 
 | |
| 
 | |
| /* Calculate how many bytes needed to store a number */
 | |
| 
 | |
| uint ma_calc_length_for_store_length(ulong nr)
 | |
| {
 | |
|   if (nr < 251)
 | |
|     return 1;
 | |
|   if (nr < 65536)
 | |
|   {
 | |
|     if (nr <= 255)
 | |
|       return 2;
 | |
|     return 3;
 | |
|   }
 | |
|   if (nr < 16777216)
 | |
|     return 4;
 | |
|   return 5;
 | |
| }
 | |
| 
 | |
| 
 | |
| /* Retrive a stored number */
 | |
| 
 | |
| static ulong ma_get_length(const uchar **packet)
 | |
| {
 | |
|   reg1 const uchar *pos= *packet;
 | |
|   if (*pos < 251)
 | |
|   {
 | |
|     (*packet)++;
 | |
|     return (ulong) *pos;
 | |
|   }
 | |
|   if (*pos == 251)
 | |
|   {
 | |
|     (*packet)+= 2;
 | |
|     return (ulong) pos[1];
 | |
|   }
 | |
|   if (*pos == 252)
 | |
|   {
 | |
|     (*packet)+= 3;
 | |
|     return (ulong) uint2korr(pos+1);
 | |
|   }
 | |
|   if (*pos == 253)
 | |
|   {
 | |
|     (*packet)+= 4;
 | |
|     return (ulong) uint3korr(pos+1);
 | |
|   }
 | |
|   DBUG_ASSERT(*pos == 254);
 | |
|   (*packet)+= 5;
 | |
|   return (ulong) uint4korr(pos+1);
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Fill array with pointers to field parts to be stored in log for insert
 | |
| 
 | |
|   SYNOPSIS
 | |
|     fill_insert_undo_parts()
 | |
|     info                Maria handler
 | |
|     record              Inserted row
 | |
|     log_parts           Store pointers to changed memory areas here
 | |
|     log_parts_count     See RETURN
 | |
| 
 | |
|   NOTES
 | |
|     We have information in info->cur_row about the read row.
 | |
| 
 | |
|   RETURN
 | |
|     length of data in log_parts.
 | |
|     log_parts_count contains number of used log_parts
 | |
| */
 | |
| 
 | |
| static size_t fill_insert_undo_parts(MARIA_HA *info, const uchar *record,
 | |
|                                      LEX_CUSTRING *log_parts,
 | |
|                                      uint *log_parts_count)
 | |
| {
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   MARIA_COLUMNDEF *column, *end_column;
 | |
|   uchar *field_lengths= info->cur_row.field_lengths;
 | |
|   size_t row_length;
 | |
|   MARIA_ROW *cur_row= &info->cur_row;
 | |
|   LEX_CUSTRING *start_log_parts;
 | |
|   DBUG_ENTER("fill_insert_undo_parts");
 | |
| 
 | |
|   start_log_parts= log_parts;
 | |
| 
 | |
|   /* Store null bits */
 | |
|   log_parts->str=      record;
 | |
|   log_parts->length=   share->base.null_bytes;
 | |
|   row_length=          log_parts->length;
 | |
|   log_parts++;
 | |
| 
 | |
|   /* Stored bitmap over packed (zero length or all-zero fields) */
 | |
|   log_parts->str= info->cur_row.empty_bits;
 | |
|   log_parts->length= share->base.pack_bytes;
 | |
|   row_length+=       log_parts->length;
 | |
|   log_parts++;
 | |
| 
 | |
|   if (share->base.max_field_lengths)
 | |
|   {
 | |
|     /* Store length of all not empty char, varchar and blob fields */
 | |
|     log_parts->str= field_lengths - 2;
 | |
|     log_parts->length=   info->cur_row.field_lengths_length+2;
 | |
|     int2store(log_parts->str, info->cur_row.field_lengths_length);
 | |
|     row_length+= log_parts->length;
 | |
|     log_parts++;
 | |
|   }
 | |
| 
 | |
|   if (share->base.blobs)
 | |
|   {
 | |
|     /*
 | |
|       Store total blob length to make buffer allocation easier during UNDO
 | |
|      */
 | |
|     log_parts->str=  info->length_buff;
 | |
|     log_parts->length= (uint) (ma_store_length(info->length_buff,
 | |
|                                                  info->cur_row.blob_length) -
 | |
|                                  (uchar*) log_parts->str);
 | |
|     row_length+=          log_parts->length;
 | |
|     log_parts++;
 | |
|   }
 | |
| 
 | |
|   /* Handle constant length fields that are always present */
 | |
|   for (column= share->columndef,
 | |
|        end_column= column+ share->base.fixed_not_null_fields;
 | |
|        column < end_column;
 | |
|        column++)
 | |
|   {
 | |
|     log_parts->str= record + column->offset;
 | |
|     log_parts->length= column->length;
 | |
|     row_length+= log_parts->length;
 | |
|     log_parts++;
 | |
|   }
 | |
| 
 | |
|   /* Handle NULL fields and CHAR/VARCHAR fields */
 | |
|   for (end_column= share->columndef + share->base.fields - share->base.blobs;
 | |
|        column < end_column;
 | |
|        column++)
 | |
|   {
 | |
|     const uchar *column_pos;
 | |
|     size_t column_length;
 | |
|     if ((record[column->null_pos] & column->null_bit) ||
 | |
|         (column->empty_bit &&
 | |
|          cur_row->empty_bits[column->empty_pos] & column->empty_bit))
 | |
|       continue;
 | |
| 
 | |
|     column_pos=    record+ column->offset;
 | |
|     column_length= column->length;
 | |
| 
 | |
|     switch (column->type) {
 | |
|     case FIELD_CHECK:
 | |
|     case FIELD_NORMAL:                          /* Fixed length field */
 | |
|     case FIELD_ZERO:
 | |
|     case FIELD_SKIP_PRESPACE:                   /* Not packed */
 | |
|     case FIELD_SKIP_ZERO:                       /* Fixed length field */
 | |
|       break;
 | |
|     case FIELD_SKIP_ENDSPACE:                   /* CHAR */
 | |
|     {
 | |
|       if (column->length <= 255)
 | |
|         column_length= *field_lengths++;
 | |
|       else
 | |
|       {
 | |
|         column_length= uint2korr(field_lengths);
 | |
|         field_lengths+= 2;
 | |
|       }
 | |
|       break;
 | |
|     }
 | |
|     case FIELD_VARCHAR:
 | |
|     {
 | |
|       if (column->fill_length == 1)
 | |
|         column_length= *field_lengths;
 | |
|       else
 | |
|         column_length= uint2korr(field_lengths);
 | |
|       field_lengths+= column->fill_length;
 | |
|       column_pos+= column->fill_length;
 | |
|       break;
 | |
|     }
 | |
|     default:
 | |
|       DBUG_ASSERT(0);
 | |
|     }
 | |
|     log_parts->str= column_pos;
 | |
|     log_parts->length= column_length;
 | |
|     row_length+= log_parts->length;
 | |
|     log_parts++;
 | |
|   }
 | |
| 
 | |
|   /* Add blobs */
 | |
|   for (end_column+= share->base.blobs; column < end_column; column++)
 | |
|   {
 | |
|     const uchar *field_pos= record + column->offset;
 | |
|     uint size_length= column->length - portable_sizeof_char_ptr;
 | |
|     ulong blob_length= _ma_calc_blob_length(size_length, field_pos);
 | |
| 
 | |
|     /*
 | |
|       We don't have to check for null, as blob_length is guranteed to be 0
 | |
|       if the blob is null
 | |
|     */
 | |
|     if (blob_length)
 | |
|     {
 | |
|       uchar *blob_pos;
 | |
|       memcpy(&blob_pos, record + column->offset + size_length,
 | |
|              sizeof(blob_pos));
 | |
|       log_parts->str= blob_pos;
 | |
|       log_parts->length= blob_length;
 | |
|       row_length+= log_parts->length;
 | |
|       log_parts++;
 | |
|     }
 | |
|   }
 | |
|   *log_parts_count= (uint) (log_parts - start_log_parts);
 | |
|   DBUG_RETURN(row_length);
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|    Fill array with pointers to field parts to be stored in log for update
 | |
| 
 | |
|   SYNOPSIS
 | |
|     fill_update_undo_parts()
 | |
|     info                Maria handler
 | |
|     oldrec		Original row
 | |
|     newrec              New row
 | |
|     log_parts           Store pointers to changed memory areas here
 | |
|     log_parts_count     See RETURN
 | |
| 
 | |
|   IMPLEMENTATION
 | |
|     Format of undo record:
 | |
| 
 | |
|     Fields are stored in same order as the field array.
 | |
| 
 | |
|     Offset to changed field data (packed)
 | |
| 
 | |
|     For each changed field
 | |
|       Fieldnumber (packed)
 | |
|       Length, if variable length field (packed)
 | |
| 
 | |
|     For each changed field
 | |
|      Data
 | |
| 
 | |
|    Packing is using ma_store_integer()
 | |
| 
 | |
|    The reason we store field numbers & length separated from data (ie, not
 | |
|    after each other) is to get better cpu caching when we loop over
 | |
|    fields (as we probably don't have to access data for each field when we
 | |
|    want to read and old row through the undo log record).
 | |
| 
 | |
|    As a special case, we use '255' for the field number of the null bitmap.
 | |
| 
 | |
|   RETURN
 | |
|     length of data in log_parts.
 | |
|     log_parts_count contains number of used log_parts
 | |
| */
 | |
| 
 | |
| static size_t fill_update_undo_parts(MARIA_HA *info, const uchar *oldrec,
 | |
|                                      const uchar *newrec,
 | |
|                                      LEX_CUSTRING *log_parts,
 | |
|                                      uint *log_parts_count)
 | |
| {
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   MARIA_COLUMNDEF *column, *end_column;
 | |
|   MARIA_ROW *old_row= &info->cur_row, *new_row= &info->new_row;
 | |
|   uchar *field_data, *start_field_data, *length_str;
 | |
|   uchar *old_field_lengths= old_row->field_lengths;
 | |
|   uchar *new_field_lengths= new_row->field_lengths;
 | |
|   size_t row_length= 0;
 | |
|   uint field_lengths;
 | |
|   LEX_CUSTRING *start_log_parts;
 | |
|   my_bool new_column_is_empty;
 | |
|   DBUG_ENTER("fill_update_undo_parts");
 | |
| 
 | |
|   start_log_parts= log_parts;
 | |
| 
 | |
|   /*
 | |
|     First log part is for number of fields, field numbers and lengths
 | |
|     The +4 is to reserve place for the number of changed fields.
 | |
|   */
 | |
|   start_field_data= field_data= info->update_field_data + 4;
 | |
|   log_parts++;
 | |
| 
 | |
|   if (memcmp(oldrec, newrec, share->base.null_bytes))
 | |
|   {
 | |
|     /* Store changed null bits */
 | |
|     *field_data++=       (uchar) 255;           /* Special case */
 | |
|     log_parts->str=      oldrec;
 | |
|     log_parts->length=   share->base.null_bytes;
 | |
|     row_length=          log_parts->length;
 | |
|     log_parts++;
 | |
|   }
 | |
| 
 | |
|   /* Handle constant length fields */
 | |
|   for (column= share->columndef,
 | |
|        end_column= column+ share->base.fixed_not_null_fields;
 | |
|        column < end_column;
 | |
|        column++)
 | |
|   {
 | |
|     if (memcmp(oldrec + column->offset, newrec + column->offset,
 | |
|                column->length))
 | |
|     {
 | |
|       field_data= ma_store_length(field_data,
 | |
|                                   (uint) (column - share->columndef));
 | |
|       log_parts->str= oldrec + column->offset;
 | |
|       log_parts->length= column->length;
 | |
|       row_length+=       column->length;
 | |
|       log_parts++;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   /* Handle the rest: NULL fields and CHAR/VARCHAR fields and BLOB's */
 | |
|   for (end_column= share->columndef + share->base.fields;
 | |
|        column < end_column;
 | |
|        column++)
 | |
|   {
 | |
|     const uchar *new_column_pos, *old_column_pos;
 | |
|     size_t new_column_length, old_column_length;
 | |
| 
 | |
|     /* First check if old column is null or empty */
 | |
|     if (oldrec[column->null_pos] & column->null_bit)
 | |
|     {
 | |
|       /*
 | |
|         It's safe to skip this one as either the new column is also null
 | |
|         (no change) or the new_column is not null, in which case the null-bit
 | |
|         maps differed and we have already stored the null bitmap.
 | |
|       */
 | |
|       continue;
 | |
|     }
 | |
|     if (column->empty_bit &&
 | |
|         (old_row->empty_bits[column->empty_pos] & column->empty_bit))
 | |
|     {
 | |
|       if (new_row->empty_bits[column->empty_pos] & column->empty_bit)
 | |
|         continue;                               /* Both are empty; skip */
 | |
| 
 | |
|       /* Store null length column */
 | |
|       field_data= ma_store_length(field_data,
 | |
|                                   (uint) (column - share->columndef));
 | |
|       field_data= ma_store_length(field_data, 0);
 | |
|       continue;
 | |
|     }
 | |
|     /*
 | |
|       Remember if the 'new' value is empty (as in this case we must always
 | |
|       log the original value
 | |
|     */
 | |
|     new_column_is_empty= ((newrec[column->null_pos] & column->null_bit) ||
 | |
|                           (column->empty_bit &&
 | |
|                            (new_row->empty_bits[column->empty_pos] &
 | |
|                             column->empty_bit)));
 | |
| 
 | |
|     old_column_pos=      oldrec + column->offset;
 | |
|     new_column_pos=      newrec + column->offset;
 | |
|     old_column_length= new_column_length= column->length;
 | |
| 
 | |
|     switch (column->type) {
 | |
|     case FIELD_CHECK:
 | |
|     case FIELD_NORMAL:                          /* Fixed length field */
 | |
|     case FIELD_ZERO:
 | |
|     case FIELD_SKIP_PRESPACE:                   /* Not packed */
 | |
|     case FIELD_SKIP_ZERO:                       /* Fixed length field */
 | |
|       break;
 | |
|     case FIELD_VARCHAR:
 | |
|       new_column_length--;                      /* Skip length prefix */
 | |
|       old_column_pos+= column->fill_length;
 | |
|       new_column_pos+= column->fill_length;
 | |
|       /* Fall through */
 | |
|     case FIELD_SKIP_ENDSPACE:                   /* CHAR */
 | |
|     {
 | |
|       if (new_column_length <= 255)
 | |
|       {
 | |
|         old_column_length= *old_field_lengths++;
 | |
|         if (!new_column_is_empty)
 | |
|           new_column_length= *new_field_lengths++;
 | |
|       }
 | |
|       else
 | |
|       {
 | |
|         old_column_length= uint2korr(old_field_lengths);
 | |
|         old_field_lengths+= 2;
 | |
|         if (!new_column_is_empty)
 | |
|         {
 | |
|           new_column_length= uint2korr(new_field_lengths);
 | |
|           new_field_lengths+= 2;
 | |
|         }
 | |
|       }
 | |
|       break;
 | |
|     }
 | |
|     case FIELD_BLOB:
 | |
|     {
 | |
|       uint size_length= column->length - portable_sizeof_char_ptr;
 | |
|       old_column_length= _ma_calc_blob_length(size_length, old_column_pos);
 | |
|       memcpy((void*) &old_column_pos, oldrec + column->offset + size_length,
 | |
|              sizeof(old_column_pos));
 | |
|       if (!new_column_is_empty)
 | |
|       {
 | |
|         new_column_length= _ma_calc_blob_length(size_length, new_column_pos);
 | |
|         memcpy((void*) &new_column_pos, newrec + column->offset + size_length,
 | |
|                sizeof(old_column_pos));
 | |
|       }
 | |
|       break;
 | |
|     }
 | |
|     default:
 | |
|       DBUG_ASSERT(0);
 | |
|     }
 | |
| 
 | |
|     if (new_column_is_empty || new_column_length != old_column_length ||
 | |
|         memcmp(old_column_pos, new_column_pos, new_column_length))
 | |
|     {
 | |
|       field_data= ma_store_length(field_data,
 | |
|                                   (ulong) (column - share->columndef));
 | |
|       field_data= ma_store_length(field_data, (ulong) old_column_length);
 | |
| 
 | |
|       log_parts->str=     old_column_pos;
 | |
|       log_parts->length=  old_column_length;
 | |
|       row_length+=        old_column_length;
 | |
|       log_parts++;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   *log_parts_count= (uint) (log_parts - start_log_parts);
 | |
| 
 | |
|   /* Store length of field length data before the field/field_lengths */
 | |
|   field_lengths= (uint) (field_data - start_field_data);
 | |
|   length_str= start_field_data - ma_calc_length_for_store_length(field_lengths);
 | |
|   start_log_parts->str= length_str;
 | |
|   ma_store_length(length_str, field_lengths);
 | |
|   start_log_parts->length= (size_t) (field_data - start_log_parts->str);
 | |
|   row_length+= start_log_parts->length;
 | |
|   DBUG_RETURN(row_length);
 | |
| }
 | |
| 
 | |
| /***************************************************************************
 | |
|   In-write hooks called under log's lock when log record is written
 | |
| ***************************************************************************/
 | |
| 
 | |
| /**
 | |
|    @brief Sets transaction's rec_lsn if needed
 | |
| 
 | |
|    A transaction sometimes writes a REDO even before the page is in the
 | |
|    pagecache (example: brand new head or tail pages; full pages). So, if
 | |
|    Checkpoint happens just after the REDO write, it needs to know that the
 | |
|    REDO phase must start before this REDO. Scanning the pagecache cannot
 | |
|    tell that as the page is not in the cache. So, transaction sets its rec_lsn
 | |
|    to the REDO's LSN or somewhere before, and Checkpoint reads the
 | |
|    transaction's rec_lsn.
 | |
| 
 | |
|    @return Operation status, always 0 (success)
 | |
| */
 | |
| 
 | |
| my_bool write_hook_for_redo(enum translog_record_type type
 | |
|                             __attribute__ ((unused)),
 | |
|                             TRN *trn, MARIA_HA *tbl_info
 | |
|                             __attribute__ ((unused)),
 | |
|                             LSN *lsn, void *hook_arg
 | |
|                             __attribute__ ((unused)))
 | |
| {
 | |
|   /*
 | |
|     Users of dummy_transaction_object must keep this TRN clean as it
 | |
|     is used by many threads (like those manipulating non-transactional
 | |
|     tables). It might be dangerous if one user sets rec_lsn or some other
 | |
|     member and it is picked up by another user (like putting this rec_lsn into
 | |
|     a page of a non-transactional table); it's safer if all members stay 0. So
 | |
|     non-transactional log records (REPAIR, CREATE, RENAME, DROP) should not
 | |
|     call this hook; we trust them but verify ;)
 | |
|   */
 | |
|   DBUG_ASSERT(trn->trid != 0);
 | |
|   /*
 | |
|     If the hook stays so simple, it would be faster to pass
 | |
|     !trn->rec_lsn ? trn->rec_lsn : some_dummy_lsn
 | |
|     to translog_write_record(), like Monty did in his original code, and not
 | |
|     have a hook. For now we keep it like this.
 | |
|   */
 | |
|   if (trn->rec_lsn == 0)
 | |
|     trn->rec_lsn= *lsn;
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| /**
 | |
|    @brief Sets transaction's undo_lsn, first_undo_lsn if needed
 | |
| 
 | |
|    @return Operation status, always 0 (success)
 | |
| */
 | |
| 
 | |
| my_bool write_hook_for_undo(enum translog_record_type type
 | |
|                             __attribute__ ((unused)),
 | |
|                             TRN *trn, MARIA_HA *tbl_info
 | |
|                             __attribute__ ((unused)),
 | |
|                             LSN *lsn, void *hook_arg
 | |
|                             __attribute__ ((unused)))
 | |
| {
 | |
|   DBUG_ASSERT(trn->trid != 0);
 | |
|   trn->undo_lsn= *lsn;
 | |
|   if (unlikely(LSN_WITH_FLAGS_TO_LSN(trn->first_undo_lsn) == 0))
 | |
|     trn->first_undo_lsn=
 | |
|       trn->undo_lsn | LSN_WITH_FLAGS_TO_FLAGS(trn->first_undo_lsn);
 | |
|   return 0;
 | |
|   /*
 | |
|     when we implement purging, we will specialize this hook: UNDO_PURGE
 | |
|     records will additionally set trn->undo_purge_lsn
 | |
|   */
 | |
| }
 | |
| 
 | |
| 
 | |
| /**
 | |
|    @brief Sets the table's records count and checksum and others to 0, then
 | |
|    calls the generic REDO hook.
 | |
| 
 | |
|    @return Operation status, always 0 (success)
 | |
| */
 | |
| 
 | |
| my_bool write_hook_for_redo_delete_all(enum translog_record_type type
 | |
|                                        __attribute__ ((unused)),
 | |
|                                        TRN *trn, MARIA_HA *tbl_info
 | |
|                                        __attribute__ ((unused)),
 | |
|                                        LSN *lsn, void *hook_arg)
 | |
| {
 | |
|   _ma_reset_status(tbl_info);
 | |
|   return write_hook_for_redo(type, trn, tbl_info, lsn, hook_arg);
 | |
| }
 | |
| 
 | |
| 
 | |
| /**
 | |
|    @brief Updates "records" and "checksum" and calls the generic UNDO hook
 | |
| 
 | |
|    @return Operation status, always 0 (success)
 | |
| */
 | |
| 
 | |
| my_bool write_hook_for_undo_row_insert(enum translog_record_type type
 | |
|                                        __attribute__ ((unused)),
 | |
|                                        TRN *trn, MARIA_HA *tbl_info,
 | |
|                                        LSN *lsn, void *hook_arg)
 | |
| {
 | |
|   MARIA_SHARE *share= tbl_info->s;
 | |
|   share->state.state.records++;
 | |
|   share->state.state.checksum+= *(ha_checksum *)hook_arg;
 | |
|   return write_hook_for_undo(type, trn, tbl_info, lsn, hook_arg);
 | |
| }
 | |
| 
 | |
| 
 | |
| /**
 | |
|    @brief Updates "records" and calls the generic UNDO hook
 | |
| 
 | |
|    @return Operation status, always 0 (success)
 | |
| */
 | |
| 
 | |
| my_bool write_hook_for_undo_row_delete(enum translog_record_type type
 | |
|                                        __attribute__ ((unused)),
 | |
|                                        TRN *trn, MARIA_HA *tbl_info,
 | |
|                                        LSN *lsn, void *hook_arg)
 | |
| {
 | |
|   MARIA_SHARE *share= tbl_info->s;
 | |
|   share->state.state.records--;
 | |
|   share->state.state.checksum+= *(ha_checksum *)hook_arg;
 | |
|   return write_hook_for_undo(type, trn, tbl_info, lsn, hook_arg);
 | |
| }
 | |
| 
 | |
| 
 | |
| /**
 | |
|    @brief Upates "records" and "checksum" and calls the generic UNDO hook
 | |
| 
 | |
|    @return Operation status, always 0 (success)
 | |
| */
 | |
| 
 | |
| my_bool write_hook_for_undo_row_update(enum translog_record_type type
 | |
|                                        __attribute__ ((unused)),
 | |
|                                        TRN *trn, MARIA_HA *tbl_info,
 | |
|                                        LSN *lsn, void *hook_arg)
 | |
| {
 | |
|   MARIA_SHARE *share= tbl_info->s;
 | |
|   share->state.state.checksum+= *(ha_checksum *)hook_arg;
 | |
|   return write_hook_for_undo(type, trn, tbl_info, lsn, hook_arg);
 | |
| }
 | |
| 
 | |
| 
 | |
| my_bool write_hook_for_undo_bulk_insert(enum translog_record_type type
 | |
|                                         __attribute__ ((unused)),
 | |
|                                         TRN *trn, MARIA_HA *tbl_info,
 | |
|                                         LSN *lsn, void *hook_arg)
 | |
| {
 | |
|   /*
 | |
|     We are going to call maria_delete_all_rows(), but without logging and
 | |
|     syncing, as an optimization (if we crash before commit, the UNDO will
 | |
|     empty; if we crash after commit, we have flushed and forced the files).
 | |
|     Status still needs to be reset under log mutex, in case of a concurrent
 | |
|     checkpoint.
 | |
|   */
 | |
|   _ma_reset_status(tbl_info);
 | |
|   return write_hook_for_undo(type, trn, tbl_info, lsn, hook_arg);
 | |
| }
 | |
| 
 | |
| 
 | |
| /**
 | |
|    @brief Updates table's lsn_of_file_id.
 | |
| 
 | |
|    @return Operation status, always 0 (success)
 | |
| */
 | |
| 
 | |
| my_bool write_hook_for_file_id(enum translog_record_type type
 | |
|                                __attribute__ ((unused)),
 | |
|                                TRN *trn
 | |
|                                __attribute__ ((unused)),
 | |
|                                MARIA_HA *tbl_info,
 | |
|                                LSN *lsn,
 | |
|                                void *hook_arg
 | |
|                                __attribute__ ((unused)))
 | |
| {
 | |
|   DBUG_ASSERT(cmp_translog_addr(tbl_info->s->lsn_of_file_id, *lsn) < 0);
 | |
|   tbl_info->s->lsn_of_file_id= *lsn;
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| /**
 | |
|    Updates transaction's rec_lsn when committing.
 | |
| 
 | |
|    A transaction writes its commit record before being committed in trnman, so
 | |
|    if Checkpoint happens just between the COMMIT record log write and the
 | |
|    commit in trnman, it will record that transaction is not committed. Assume
 | |
|    the transaction (trn1) did an INSERT; after the checkpoint, a second
 | |
|    transaction (trn2) does a DELETE of what trn1 has inserted. Then crash,
 | |
|    Checkpoint record says that trn1 was not committed, and REDO phase starts
 | |
|    from Checkpoint record's LSN. So it will not find the COMMIT record of
 | |
|    trn1, will want to roll back trn1, which will fail because the row/key
 | |
|    which it wants to delete does not exist anymore.
 | |
|    To avoid this, Checkpoint needs to know that the REDO phase must start
 | |
|    before this COMMIT record, so transaction sets its rec_lsn to the COMMIT's
 | |
|    record LSN, and as Checkpoint reads the transaction's rec_lsn, Checkpoint
 | |
|    will know.
 | |
| 
 | |
|    @note so after commit trn->rec_lsn is a "commit LSN", which could be of
 | |
|    use later.
 | |
| 
 | |
|    @return Operation status, always 0 (success)
 | |
| */
 | |
| 
 | |
| my_bool write_hook_for_commit(enum translog_record_type type
 | |
|                               __attribute__ ((unused)),
 | |
|                               TRN *trn,
 | |
|                               MARIA_HA *tbl_info
 | |
|                               __attribute__ ((unused)),
 | |
|                               LSN *lsn,
 | |
|                               void *hook_arg
 | |
|                               __attribute__ ((unused)))
 | |
| {
 | |
|   trn->rec_lsn= *lsn;
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| /***************************************************************************
 | |
|   Applying of REDO log records
 | |
| ***************************************************************************/
 | |
| 
 | |
| /*
 | |
|   Apply changes to head and tail pages
 | |
| 
 | |
|   SYNOPSIS
 | |
|     _ma_apply_redo_insert_row_head_or_tail()
 | |
|     info		Maria handler
 | |
|     lsn			LSN to put on page
 | |
|     page_type		HEAD_PAGE or TAIL_PAGE
 | |
|     new_page		True if this is first entry on page
 | |
|     header		Header (without FILEID)
 | |
|     data		Data to be put on page
 | |
|     data_length		Length of data
 | |
| 
 | |
|   NOTE
 | |
|     Handles LOGREC_REDO_INSERT_ROW_HEAD, LOGREC_REDO_INSERT_ROW_TAIL
 | |
|     LOGREC_REDO_NEW_ROW_HEAD and LOGREC_REDO_NEW_ROW_TAIL
 | |
| 
 | |
|   RETURN
 | |
|     0   ok
 | |
|     #   Error number
 | |
| */
 | |
| 
 | |
| uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn,
 | |
|                                             uint page_type,
 | |
|                                             my_bool new_page,
 | |
|                                             const uchar *header,
 | |
|                                             const uchar *data,
 | |
|                                             size_t data_length)
 | |
| {
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   pgcache_page_no_t page;
 | |
|   uint      rownr, empty_space;
 | |
|   uint      block_size= share->block_size;
 | |
|   uint      rec_offset;
 | |
|   uchar      *buff, *dir;
 | |
|   uint      result;
 | |
|   MARIA_PINNED_PAGE page_link;
 | |
|   enum pagecache_page_lock lock_method;
 | |
|   enum pagecache_page_pin pin_method;
 | |
|   my_off_t end_of_page;
 | |
|   uint error;
 | |
|   DBUG_ENTER("_ma_apply_redo_insert_row_head_or_tail");
 | |
| 
 | |
|   page=  page_korr(header);
 | |
|   rownr= dirpos_korr(header + PAGE_STORE_SIZE);
 | |
| 
 | |
|   DBUG_PRINT("enter", ("rowid: %lu  page: %lu  rownr: %u  data_length: %u",
 | |
|                        (ulong) ma_recordpos(page, rownr),
 | |
|                        (ulong) page, rownr, (uint) data_length));
 | |
| 
 | |
|   share->state.changed|= (STATE_CHANGED | STATE_NOT_ZEROFILLED |
 | |
|                           STATE_NOT_MOVABLE);
 | |
| 
 | |
|   end_of_page= (page + 1) * share->block_size;
 | |
|   if (end_of_page > share->state.state.data_file_length)
 | |
|   {
 | |
|     DBUG_PRINT("info", ("Enlarging data file from %lu to %lu",
 | |
|                         (ulong) share->state.state.data_file_length,
 | |
|                         (ulong) end_of_page));
 | |
|     /*
 | |
|       New page at end of file. Note that the test above is also positive if
 | |
|       data_file_length is not a multiple of block_size (system crashed while
 | |
|       writing the last page): in this case we just extend the last page and
 | |
|       fill it entirely with zeroes, then the REDO will put correct data on
 | |
|       it.
 | |
|     */
 | |
|     lock_method= PAGECACHE_LOCK_WRITE;
 | |
|     pin_method=  PAGECACHE_PIN;
 | |
| 
 | |
|     DBUG_ASSERT(rownr == 0 && new_page);
 | |
|     if (rownr != 0 || !new_page)
 | |
|       goto crashed_file;
 | |
| 
 | |
|     buff= info->keyread_buff;
 | |
|     info->keyread_buff_used= 1;
 | |
|     make_empty_page(info, buff, page_type, 1);
 | |
|     empty_space= (block_size - PAGE_OVERHEAD_SIZE(share));
 | |
|     rec_offset= PAGE_HEADER_SIZE(share);
 | |
|     dir= buff+ block_size - PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE;
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     lock_method= PAGECACHE_LOCK_LEFT_WRITELOCKED;
 | |
|     pin_method=  PAGECACHE_PIN_LEFT_PINNED;
 | |
| 
 | |
|     share->pagecache->readwrite_flags&= ~MY_WME;
 | |
|     share->silence_encryption_errors= 1;
 | |
|     buff= pagecache_read(share->pagecache, &info->dfile,
 | |
|                          page, 0, 0,
 | |
|                          PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE,
 | |
|                          &page_link.link);
 | |
|     share->pagecache->readwrite_flags= share->pagecache->org_readwrite_flags;
 | |
|     share->silence_encryption_errors= 0;
 | |
|     if (!buff)
 | |
|     {
 | |
|       /* Skip errors when reading outside of file and uninitialized pages */
 | |
|       if (!new_page || (my_errno != HA_ERR_FILE_TOO_SHORT &&
 | |
|                         my_errno != HA_ERR_WRONG_CRC &&
 | |
|                         my_errno != HA_ERR_DECRYPTION_FAILED))
 | |
|       {
 | |
|         DBUG_PRINT("error", ("Error %d when reading page", (int) my_errno));
 | |
|         goto err;
 | |
|       }
 | |
|       /* Create new page */
 | |
|       buff= pagecache_block_link_to_buffer(page_link.link);
 | |
|       buff[PAGE_TYPE_OFFSET]= UNALLOCATED_PAGE;
 | |
|     }
 | |
|     else if (lsn_korr(buff) >= lsn)           /* Test if already applied */
 | |
|     {
 | |
|       check_skipped_lsn(info, lsn_korr(buff), 1, page);
 | |
|       /* Fix bitmap, just in case */
 | |
|       empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET);
 | |
|       if (!enough_free_entries_on_page(share, buff))
 | |
|         empty_space= 0;                         /* Page is full */
 | |
| 
 | |
|       if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space))
 | |
|         goto err;
 | |
|       pagecache_unlock_by_link(share->pagecache, page_link.link,
 | |
|                                PAGECACHE_LOCK_WRITE_UNLOCK,
 | |
|                                PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
 | |
|                                LSN_IMPOSSIBLE, 0, FALSE);
 | |
|       DBUG_RETURN(0);
 | |
|     }
 | |
| 
 | |
|     if (((uint) (buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) != page_type))
 | |
|     {
 | |
|       /*
 | |
|         This is a page that has been freed before and now should be
 | |
|         changed to new type.
 | |
|       */
 | |
|       if (!new_page)
 | |
|       {
 | |
|         DBUG_PRINT("error",
 | |
|                    ("Found page of wrong type: %u, should have been %u",
 | |
|                     (uint) (buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK),
 | |
|                     page_type));
 | |
|         goto crashed_file;
 | |
|       }
 | |
|       make_empty_page(info, buff, page_type, 0);
 | |
|       empty_space= block_size - PAGE_HEADER_SIZE(share) - PAGE_SUFFIX_SIZE;
 | |
|       (void) extend_directory(info, buff, block_size, 0, rownr, &empty_space,
 | |
|                               page_type == HEAD_PAGE);
 | |
|       rec_offset= PAGE_HEADER_SIZE(share);
 | |
|       dir= dir_entry_pos(buff, block_size, rownr);
 | |
|       empty_space+= uint2korr(dir+2);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|       uint max_entry= (uint) buff[DIR_COUNT_OFFSET];
 | |
|       uint length;
 | |
| 
 | |
|       DBUG_ASSERT(!new_page);
 | |
|       dir= dir_entry_pos(buff, block_size, rownr);
 | |
|       empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET);
 | |
| 
 | |
|       if (max_entry <= rownr)
 | |
|       {
 | |
|         /* Add directory entry first in directory and data last on page */
 | |
|         if (extend_directory(info, buff, block_size, max_entry, rownr,
 | |
|                              &empty_space, page_type == HEAD_PAGE))
 | |
|           goto crashed_file;
 | |
|       }
 | |
|       if (extend_area_on_page(info, buff, dir, rownr,
 | |
|                               (uint) data_length, &empty_space,
 | |
|                               &rec_offset, &length, page_type == HEAD_PAGE))
 | |
|         goto crashed_file;
 | |
|     }
 | |
|   }
 | |
|   /* Copy data */
 | |
|   int2store(dir+2, data_length);
 | |
|   memcpy(buff + rec_offset, data, data_length);
 | |
|   empty_space-= (uint) data_length;
 | |
|   int2store(buff + EMPTY_SPACE_OFFSET, empty_space);
 | |
| 
 | |
|   /* Fix bitmap */
 | |
|   if (!enough_free_entries_on_page(share, buff))
 | |
|     empty_space= 0;                         /* Page is full */
 | |
|   if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space))
 | |
|     goto err;
 | |
| 
 | |
|   /*
 | |
|     If page was not read before, write it but keep it pinned.
 | |
|     We don't update its LSN When we have processed all REDOs for this page
 | |
|     in the current REDO's group, we will stamp page with UNDO's LSN
 | |
|     (if we stamped it now, a next REDO, in
 | |
|     this group, for this page, would be skipped) and unpin then.
 | |
|   */
 | |
|   result= 0;
 | |
|   if (lock_method == PAGECACHE_LOCK_WRITE &&
 | |
|       pagecache_write(share->pagecache,
 | |
|                       &info->dfile, page, 0,
 | |
|                       buff, PAGECACHE_PLAIN_PAGE,
 | |
|                       lock_method, pin_method,
 | |
|                       PAGECACHE_WRITE_DELAY, &page_link.link,
 | |
|                       LSN_IMPOSSIBLE))
 | |
|     result= my_errno;
 | |
| 
 | |
|   page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
 | |
|   page_link.changed= 1;
 | |
|   push_dynamic(&info->pinned_pages, (void*) &page_link);
 | |
| 
 | |
|   /*
 | |
|     Data page and bitmap page are in place, we can update data_file_length in
 | |
|     case we extended the file. We could not do it earlier: bitmap code tests
 | |
|     data_file_length to know if it has to create a new page or not.
 | |
|   */
 | |
|   set_if_bigger(share->state.state.data_file_length, end_of_page);
 | |
|   DBUG_RETURN(result);
 | |
| 
 | |
| crashed_file:
 | |
|   _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD);
 | |
| err:
 | |
|   error= my_errno;
 | |
|   if (lock_method == PAGECACHE_LOCK_LEFT_WRITELOCKED)
 | |
|     pagecache_unlock_by_link(share->pagecache, page_link.link,
 | |
|                              PAGECACHE_LOCK_WRITE_UNLOCK,
 | |
|                              PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
 | |
|                              LSN_IMPOSSIBLE, 0, FALSE);
 | |
|   _ma_mark_file_crashed(share);
 | |
|   DBUG_ASSERT(!maria_assert_if_crashed_table); /* catch recovery error early */
 | |
|   DBUG_RETURN((my_errno= error));
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Apply LOGREC_REDO_PURGE_ROW_HEAD & LOGREC_REDO_PURGE_ROW_TAIL
 | |
| 
 | |
|   SYNOPSIS
 | |
|     _ma_apply_redo_purge_row_head_or_tail()
 | |
|     info		Maria handler
 | |
|     lsn			LSN to put on page
 | |
|     page_type		HEAD_PAGE or TAIL_PAGE
 | |
|     header		Header (without FILEID)
 | |
| 
 | |
|   NOTES
 | |
|     This function is very similar to delete_head_or_tail()
 | |
| 
 | |
|   RETURN
 | |
|     0   ok
 | |
|     #   Error number
 | |
| */
 | |
| 
 | |
| uint _ma_apply_redo_purge_row_head_or_tail(MARIA_HA *info, LSN lsn,
 | |
|                                            uint page_type,
 | |
|                                            const uchar *header)
 | |
| {
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   pgcache_page_no_t page;
 | |
|   uint      rownr, empty_space;
 | |
|   uchar     *buff;
 | |
|   int result;
 | |
|   uint error;
 | |
|   MARIA_PINNED_PAGE page_link;
 | |
|   DBUG_ENTER("_ma_apply_redo_purge_row_head_or_tail");
 | |
| 
 | |
|   page=  page_korr(header);
 | |
|   rownr= dirpos_korr(header+PAGE_STORE_SIZE);
 | |
|   DBUG_PRINT("enter", ("rowid: %lu  page: %lu  rownr: %u",
 | |
|                        (ulong) ma_recordpos(page, rownr),
 | |
|                        (ulong) page, rownr));
 | |
| 
 | |
|   share->state.changed|= (STATE_CHANGED | STATE_NOT_ZEROFILLED |
 | |
|                           STATE_NOT_MOVABLE);
 | |
| 
 | |
|   if (!(buff= pagecache_read(share->pagecache, &info->dfile,
 | |
|                              page, 0, 0,
 | |
|                              PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE,
 | |
|                              &page_link.link)))
 | |
|     goto err;
 | |
| 
 | |
|   if (lsn_korr(buff) >= lsn)
 | |
|   {
 | |
|     /*
 | |
|       Already applied
 | |
|       Note that in case the page is not anymore a head or tail page
 | |
|       a future redo will fix the bitmap.
 | |
|     */
 | |
|     check_skipped_lsn(info, lsn_korr(buff), 1, page);
 | |
|     if ((uint) (buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == page_type)
 | |
|     {
 | |
|       empty_space= uint2korr(buff+EMPTY_SPACE_OFFSET);
 | |
|       if (!enough_free_entries_on_page(share, buff))
 | |
|         empty_space= 0;                         /* Page is full */
 | |
|       if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE,
 | |
|                          empty_space))
 | |
|         goto err;
 | |
|     }
 | |
|     pagecache_unlock_by_link(share->pagecache, page_link.link,
 | |
|                              PAGECACHE_LOCK_WRITE_UNLOCK,
 | |
|                              PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
 | |
|                              LSN_IMPOSSIBLE, 0, FALSE);
 | |
|     DBUG_RETURN(0);
 | |
|   }
 | |
| 
 | |
|   DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == (uchar) page_type);
 | |
| 
 | |
|   if (delete_dir_entry(share, buff, rownr, &empty_space) < 0)
 | |
|   {
 | |
|     _ma_set_fatal_error(info, HA_ERR_WRONG_IN_RECORD);
 | |
|     goto err;
 | |
|   }
 | |
| 
 | |
|   page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
 | |
|   page_link.changed= 1;
 | |
|   push_dynamic(&info->pinned_pages, (void*) &page_link);
 | |
| 
 | |
|   result= 0;
 | |
|   if (!enough_free_entries_on_page(share, buff))
 | |
|     empty_space= 0;                         /* Page is full */
 | |
|   /* This will work even if the page was marked as UNALLOCATED_PAGE */
 | |
|   if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space))
 | |
|     result= my_errno;
 | |
| 
 | |
|   DBUG_RETURN(result);
 | |
| 
 | |
| err:
 | |
|   error= my_errno;
 | |
|   pagecache_unlock_by_link(share->pagecache, page_link.link,
 | |
|                            PAGECACHE_LOCK_WRITE_UNLOCK,
 | |
|                            PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
 | |
|                            LSN_IMPOSSIBLE, 0, FALSE);
 | |
|   _ma_mark_file_crashed(share);
 | |
|   DBUG_ASSERT(!maria_assert_if_crashed_table);
 | |
|   DBUG_RETURN((my_errno= error));
 | |
| 
 | |
| }
 | |
| 
 | |
| 
 | |
| /**
 | |
|    @brief Apply LOGREC_REDO_FREE_BLOCKS
 | |
| 
 | |
|    @param  info            Maria handler
 | |
|    @param  header          Header (without FILEID)
 | |
| 
 | |
|    Mark the pages free in the bitmap.
 | |
| 
 | |
|    We have to check against _ma_redo_not_needed_for_page()
 | |
|    to guard against the case where we first clear a block and after
 | |
|    that insert new data into the blocks.  If we would unconditionally
 | |
|    clear the bitmap here, future changes would be ignored for the page
 | |
|    if it's not in the dirty list (ie, it would be flushed).
 | |
| 
 | |
|    @return Operation status
 | |
|      @retval 0      OK
 | |
|      @retval 1      Error
 | |
| */
 | |
| 
 | |
| uint _ma_apply_redo_free_blocks(MARIA_HA *info,
 | |
|                                 LSN lsn __attribute__((unused)),
 | |
|                                 LSN redo_lsn,
 | |
|                                 const uchar *header)
 | |
| {
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   uint ranges;
 | |
|   uint16 sid;
 | |
|   DBUG_ENTER("_ma_apply_redo_free_blocks");
 | |
| 
 | |
|   share->state.changed|= (STATE_CHANGED | STATE_NOT_ZEROFILLED |
 | |
|                           STATE_NOT_MOVABLE);
 | |
| 
 | |
|   sid= fileid_korr(header);
 | |
|   header+= FILEID_STORE_SIZE;
 | |
|   ranges= pagerange_korr(header);
 | |
|   header+= PAGERANGE_STORE_SIZE;
 | |
|   DBUG_ASSERT(ranges > 0);
 | |
| 
 | |
|   /** @todo leave bitmap lock to the bitmap code... */
 | |
|   mysql_mutex_lock(&share->bitmap.bitmap_lock);
 | |
|   while (ranges--)
 | |
|   {
 | |
|     my_bool res;
 | |
|     uint page_range;
 | |
|     pgcache_page_no_t page, start_page;
 | |
| 
 | |
|     start_page= page= page_korr(header);
 | |
|     header+= PAGE_STORE_SIZE;
 | |
|     /* Page range may have this bit set to indicate a tail page */
 | |
|     page_range= pagerange_korr(header) & ~(TAIL_BIT | START_EXTENT_BIT);
 | |
|     DBUG_ASSERT(page_range > 0);
 | |
| 
 | |
|     header+= PAGERANGE_STORE_SIZE;
 | |
| 
 | |
|     DBUG_PRINT("info", ("page: %lu  pages: %u", (long) page, page_range));
 | |
| 
 | |
|     for ( ; page_range-- ; start_page++)
 | |
|     {
 | |
|       if (_ma_redo_not_needed_for_page(sid, redo_lsn, start_page, FALSE))
 | |
|         continue;
 | |
|       res= _ma_bitmap_reset_full_page_bits(info, &share->bitmap, start_page,
 | |
|                                            1);
 | |
|       if (res)
 | |
|       {
 | |
|         mysql_mutex_unlock(&share->bitmap.bitmap_lock);
 | |
|         _ma_mark_file_crashed(share);
 | |
|         DBUG_ASSERT(!maria_assert_if_crashed_table);
 | |
|         DBUG_RETURN(res);
 | |
|       }
 | |
|     }
 | |
|   }
 | |
|   mysql_mutex_unlock(&share->bitmap.bitmap_lock);
 | |
|   DBUG_RETURN(0);
 | |
| }
 | |
| 
 | |
| 
 | |
| /**
 | |
|    @brief Apply LOGREC_REDO_FREE_HEAD_OR_TAIL
 | |
| 
 | |
|    @param  info            Maria handler
 | |
|    @param  header          Header (without FILEID)
 | |
| 
 | |
|    @note It marks the page free in the bitmap, and sets the directory's count
 | |
|    to 0.
 | |
| 
 | |
|    @return Operation status
 | |
|      @retval 0      OK
 | |
|      @retval 1      Error
 | |
| */
 | |
| 
 | |
| uint _ma_apply_redo_free_head_or_tail(MARIA_HA *info, LSN lsn,
 | |
|                                       const uchar *header)
 | |
| {
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   uchar *buff;
 | |
|   pgcache_page_no_t page;
 | |
|   MARIA_PINNED_PAGE page_link;
 | |
|   my_bool res;
 | |
|   DBUG_ENTER("_ma_apply_redo_free_head_or_tail");
 | |
| 
 | |
|   share->state.changed|= (STATE_CHANGED | STATE_NOT_ZEROFILLED |
 | |
|                           STATE_NOT_MOVABLE);
 | |
| 
 | |
|   page= page_korr(header);
 | |
| 
 | |
|   if (!(buff= pagecache_read(share->pagecache,
 | |
|                              &info->dfile,
 | |
|                              page, 0, 0,
 | |
|                              PAGECACHE_PLAIN_PAGE,
 | |
|                              PAGECACHE_LOCK_WRITE, &page_link.link)))
 | |
|   {
 | |
|     pagecache_unlock_by_link(share->pagecache, page_link.link,
 | |
|                              PAGECACHE_LOCK_WRITE_UNLOCK,
 | |
|                              PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
 | |
|                              LSN_IMPOSSIBLE, 0, FALSE);
 | |
|     goto err;
 | |
|   }
 | |
|   if (lsn_korr(buff) >= lsn)
 | |
|   {
 | |
|     /* Already applied */
 | |
|     check_skipped_lsn(info, lsn_korr(buff), 1, page);
 | |
|     pagecache_unlock_by_link(share->pagecache, page_link.link,
 | |
|                              PAGECACHE_LOCK_WRITE_UNLOCK,
 | |
|                              PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
 | |
|                              LSN_IMPOSSIBLE, 0, FALSE);
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     buff[PAGE_TYPE_OFFSET]= UNALLOCATED_PAGE;
 | |
| #ifdef IDENTICAL_PAGES_AFTER_RECOVERY
 | |
|     {
 | |
|       uint number_of_records= (uint) buff[DIR_COUNT_OFFSET];
 | |
|       uchar *dir= dir_entry_pos(buff, share->block_size,
 | |
|                                 number_of_records-1);
 | |
|       buff[DIR_FREE_OFFSET]=  END_OF_DIR_FREE_LIST;
 | |
|       bzero(dir, number_of_records * DIR_ENTRY_SIZE);
 | |
|     }
 | |
| #endif
 | |
| 
 | |
|     page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
 | |
|     page_link.changed= 1;
 | |
|     push_dynamic(&info->pinned_pages, (void*) &page_link);
 | |
|   }
 | |
|   /** @todo leave bitmap lock to the bitmap code... */
 | |
|   mysql_mutex_lock(&share->bitmap.bitmap_lock);
 | |
|   res= _ma_bitmap_reset_full_page_bits(info, &share->bitmap, page, 1);
 | |
|   mysql_mutex_unlock(&share->bitmap.bitmap_lock);
 | |
|   if (res)
 | |
|     goto err;
 | |
|   DBUG_RETURN(0);
 | |
| 
 | |
| err:
 | |
|   _ma_mark_file_crashed(share);
 | |
|   DBUG_ASSERT(!maria_assert_if_crashed_table);
 | |
|   DBUG_RETURN(1);
 | |
| }
 | |
| 
 | |
| 
 | |
| /**
 | |
|    @brief Apply LOGREC_REDO_INSERT_ROW_BLOBS
 | |
| 
 | |
|    @param  info            Maria handler
 | |
|    @parma  lsn             LSN to put on pages
 | |
|    @param  header          Header (with FILEID)
 | |
|    @param  redo_lsn        REDO record's LSN
 | |
|    @param[out] number_of_blobs Number of blobs found in log record
 | |
|    @param[out] number_of_ranges Number of ranges found
 | |
|    @param[out] first_page  First page touched
 | |
|    @param[out] last_page   Last page touched
 | |
| 
 | |
|    @note Write full pages (full head & blob pages)
 | |
| 
 | |
|    @return Operation status
 | |
|      @retval 0      OK
 | |
|      @retval !=0    Error
 | |
| */
 | |
| 
 | |
| uint _ma_apply_redo_insert_row_blobs(MARIA_HA *info,
 | |
|                                      LSN lsn, const uchar *header,
 | |
|                                      LSN redo_lsn,
 | |
|                                      uint * const number_of_blobs,
 | |
|                                      uint * const number_of_ranges,
 | |
|                                      pgcache_page_no_t * const first_page,
 | |
|                                      pgcache_page_no_t * const last_page)
 | |
| {
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   const uchar *data;
 | |
|   uint      data_size= FULL_PAGE_SIZE(share);
 | |
|   uint      blob_count, ranges;
 | |
|   uint16    sid;
 | |
|   pgcache_page_no_t first_page2= ULONGLONG_MAX, last_page2= 0;
 | |
|   DBUG_ENTER("_ma_apply_redo_insert_row_blobs");
 | |
| 
 | |
|   share->state.changed|= (STATE_CHANGED | STATE_NOT_ZEROFILLED |
 | |
|                           STATE_NOT_MOVABLE);
 | |
| 
 | |
|   sid= fileid_korr(header);
 | |
|   header+= FILEID_STORE_SIZE;
 | |
|   *number_of_ranges= ranges= pagerange_korr(header);
 | |
|   header+= PAGERANGE_STORE_SIZE;
 | |
|   *number_of_blobs= blob_count= pagerange_korr(header);
 | |
|   header+= PAGERANGE_STORE_SIZE;
 | |
|   DBUG_ASSERT(ranges >= blob_count);
 | |
| 
 | |
|   data= (header + ranges * ROW_EXTENT_SIZE +
 | |
|          blob_count * (SUB_RANGE_SIZE + BLOCK_FILLER_SIZE));
 | |
| 
 | |
|   while (blob_count--)
 | |
|   {
 | |
|     uint sub_ranges, empty_space;
 | |
| 
 | |
|     sub_ranges=  uint2korr(header);
 | |
|     header+= SUB_RANGE_SIZE;
 | |
|     empty_space= uint2korr(header);
 | |
|     header+= BLOCK_FILLER_SIZE;
 | |
|     DBUG_ASSERT(sub_ranges <= ranges && empty_space < data_size);
 | |
|     ranges-= sub_ranges;
 | |
| 
 | |
|     while (sub_ranges--)
 | |
|     {
 | |
|       uint i;
 | |
|       uint      res;
 | |
|       uint      page_range;
 | |
|       pgcache_page_no_t page;
 | |
|       uchar     *buff;
 | |
|       uint	data_on_page= data_size;
 | |
| 
 | |
|       page= page_korr(header);
 | |
|       header+= PAGE_STORE_SIZE;
 | |
|       page_range= pagerange_korr(header);
 | |
|       header+= PAGERANGE_STORE_SIZE;
 | |
| 
 | |
|       for (i= page_range; i-- > 0 ; page++, data+= data_on_page)
 | |
|       {
 | |
|         MARIA_PINNED_PAGE page_link;
 | |
|         enum pagecache_page_lock unlock_method;
 | |
|         enum pagecache_page_pin unpin_method;
 | |
| 
 | |
|         set_if_smaller(first_page2, page);
 | |
|         set_if_bigger(last_page2, page);
 | |
|         if (i == 0 && sub_ranges == 0)
 | |
|           data_on_page= data_size - empty_space; /* data on last page */
 | |
|         if (_ma_redo_not_needed_for_page(sid, redo_lsn, page, FALSE))
 | |
|           continue;
 | |
| 
 | |
|         if (((page + 1) * share->block_size) >
 | |
|             share->state.state.data_file_length)
 | |
|         {
 | |
|           /* New page or half written page at end of file */
 | |
|           DBUG_PRINT("info", ("Enlarging data file from %lu to %lu",
 | |
|                               (ulong) share->state.state.data_file_length,
 | |
|                               (ulong) ((page + 1 ) * share->block_size)));
 | |
|           share->state.state.data_file_length= (page + 1) * share->block_size;
 | |
|           buff= info->keyread_buff;
 | |
|           info->keyread_buff_used= 1;
 | |
|           make_empty_page(info, buff, BLOB_PAGE, 0);
 | |
|           unlock_method= PAGECACHE_LOCK_LEFT_UNLOCKED;
 | |
|           unpin_method=  PAGECACHE_PIN_LEFT_UNPINNED;
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|           share->pagecache->readwrite_flags&= ~MY_WME;
 | |
|           share->silence_encryption_errors= 1;
 | |
|           buff= pagecache_read(share->pagecache,
 | |
|                                &info->dfile,
 | |
|                                page, 0, 0,
 | |
|                                PAGECACHE_PLAIN_PAGE,
 | |
|                                PAGECACHE_LOCK_WRITE, &page_link.link);
 | |
|           share->pagecache->readwrite_flags= share->pagecache->
 | |
|             org_readwrite_flags;
 | |
|           share->silence_encryption_errors= 0;
 | |
|           if (!buff)
 | |
|           {
 | |
|             if (my_errno != HA_ERR_FILE_TOO_SHORT &&
 | |
|                 my_errno != HA_ERR_WRONG_CRC &&
 | |
|                 my_errno != HA_ERR_DECRYPTION_FAILED)
 | |
|             {
 | |
|               /* If not read outside of file */
 | |
|               pagecache_unlock_by_link(share->pagecache, page_link.link,
 | |
|                                        PAGECACHE_LOCK_WRITE_UNLOCK,
 | |
|                                        PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
 | |
|                                        LSN_IMPOSSIBLE, 0, FALSE);
 | |
|               goto err;
 | |
|             }
 | |
|             /*
 | |
|               Physical file was too short, create new page. It can be that
 | |
|               recovery started with a file with N pages, wrote page N+2 into
 | |
|               pagecache (increased data_file_length but not physical file
 | |
|               length), now reads page N+1: the read fails.
 | |
|             */
 | |
|             buff= pagecache_block_link_to_buffer(page_link.link);
 | |
|             make_empty_page(info, buff, BLOB_PAGE, 0);
 | |
|           }
 | |
|           else
 | |
|           {
 | |
| #ifdef DBUG_ASSERT_EXISTS
 | |
|             uchar found_page_type= (buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK);
 | |
| #endif
 | |
|             if (lsn_korr(buff) >= lsn)
 | |
|             {
 | |
|               /* Already applied */
 | |
|               check_skipped_lsn(info, lsn_korr(buff), 1, page);
 | |
|               pagecache_unlock_by_link(share->pagecache, page_link.link,
 | |
|                                        PAGECACHE_LOCK_WRITE_UNLOCK,
 | |
|                                        PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
 | |
|                                        LSN_IMPOSSIBLE, 0, FALSE);
 | |
|               goto fix_bitmap;
 | |
|             }
 | |
|             DBUG_ASSERT((found_page_type == (uchar) BLOB_PAGE) ||
 | |
|                         (found_page_type == (uchar) UNALLOCATED_PAGE));
 | |
|           }
 | |
|           unlock_method= PAGECACHE_LOCK_WRITE_UNLOCK;
 | |
|           unpin_method=  PAGECACHE_UNPIN;
 | |
|         }
 | |
| 
 | |
|         /*
 | |
|           Blob pages are never updated twice in same redo-undo chain, so
 | |
|           it's safe to update lsn for them here
 | |
|         */
 | |
|         lsn_store(buff, lsn);
 | |
|         buff[PAGE_TYPE_OFFSET]= BLOB_PAGE;
 | |
|         bzero(buff + LSN_SIZE + PAGE_TYPE_SIZE,
 | |
|               FULL_PAGE_HEADER_SIZE(share) - (LSN_SIZE + PAGE_TYPE_SIZE));
 | |
| 
 | |
|         if (data_on_page != data_size)
 | |
|         {
 | |
|           /*
 | |
|             Last page may be only partly filled. We zero the rest, like
 | |
|             write_full_pages() does.
 | |
|           */
 | |
|           bzero(buff + share->block_size - PAGE_SUFFIX_SIZE - empty_space,
 | |
|                 empty_space);
 | |
|         }
 | |
|         memcpy(buff + FULL_PAGE_HEADER_SIZE(share), data, data_on_page);
 | |
|         if (pagecache_write(share->pagecache,
 | |
|                             &info->dfile, page, 0,
 | |
|                             buff, PAGECACHE_PLAIN_PAGE,
 | |
|                             unlock_method, unpin_method,
 | |
|                             PAGECACHE_WRITE_DELAY, 0, LSN_IMPOSSIBLE))
 | |
|           goto err;
 | |
| 
 | |
|     fix_bitmap:
 | |
|       /** @todo leave bitmap lock to the bitmap code... */
 | |
|         mysql_mutex_lock(&share->bitmap.bitmap_lock);
 | |
|         res= _ma_bitmap_set_full_page_bits(info, &share->bitmap, page,
 | |
|                                            1);
 | |
|         mysql_mutex_unlock(&share->bitmap.bitmap_lock);
 | |
|         if (res)
 | |
|           goto err;
 | |
|       }
 | |
|     }
 | |
|   }
 | |
|   *first_page= first_page2;
 | |
|   *last_page=  last_page2;
 | |
|   DBUG_RETURN(0);
 | |
| 
 | |
| err:
 | |
|   _ma_mark_file_crashed(share);
 | |
|   DBUG_ASSERT(!maria_assert_if_crashed_table);
 | |
|   DBUG_RETURN(1);
 | |
| }
 | |
| 
 | |
| 
 | |
| /****************************************************************************
 | |
|  Applying of UNDO entries
 | |
| ****************************************************************************/
 | |
| 
 | |
| /** Execute undo of a row insert (delete the inserted row) */
 | |
| 
 | |
| my_bool _ma_apply_undo_row_insert(MARIA_HA *info, LSN undo_lsn,
 | |
|                                   const uchar *header)
 | |
| {
 | |
|   pgcache_page_no_t page;
 | |
|   uint rownr;
 | |
|   uchar *buff;
 | |
|   my_bool res;
 | |
|   MARIA_PINNED_PAGE page_link;
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   ha_checksum checksum;
 | |
|   LSN lsn;
 | |
|   DBUG_ENTER("_ma_apply_undo_row_insert");
 | |
| 
 | |
|   page=  page_korr(header);
 | |
|   header+= PAGE_STORE_SIZE;
 | |
|   rownr= dirpos_korr(header);
 | |
|   header+= DIRPOS_STORE_SIZE;
 | |
|   DBUG_PRINT("enter", ("rowid: %lu  page: %lu  rownr: %u",
 | |
|                        (ulong) ma_recordpos(page, rownr),
 | |
|                        (ulong) page, rownr));
 | |
| 
 | |
|   buff= pagecache_read(share->pagecache,
 | |
|                        &info->dfile, page, 0,
 | |
|                        0, share->page_type,
 | |
|                        PAGECACHE_LOCK_WRITE,
 | |
|                        &page_link.link);
 | |
|   page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK;
 | |
|   page_link.changed= buff != 0;
 | |
|   push_dynamic(&info->pinned_pages, (void*) &page_link);
 | |
|   if (!buff)
 | |
|     goto err;
 | |
| 
 | |
|   if (read_row_extent_info(info, buff, rownr))
 | |
|     goto err;
 | |
| 
 | |
|   _ma_bitmap_flushable(info, 1);
 | |
|   if (delete_head_or_tail(info, page, rownr, 1, 1) ||
 | |
|       delete_tails(info, info->cur_row.tail_positions))
 | |
|     goto err;
 | |
| 
 | |
|   if (info->cur_row.extents_count && free_full_pages(info, &info->cur_row))
 | |
|     goto err;
 | |
| 
 | |
|   checksum= 0;
 | |
|   if (share->calc_checksum)
 | |
|     checksum= (ha_checksum) 0 - ha_checksum_korr(header);
 | |
|   info->last_auto_increment= ~ (ulonglong) 0;
 | |
|   if (_ma_write_clr(info, undo_lsn, LOGREC_UNDO_ROW_INSERT,
 | |
|                     share->calc_checksum != 0, checksum, &lsn, (void*) 0))
 | |
|     goto err;
 | |
| 
 | |
|   res= 0;
 | |
| end:
 | |
|   /* The following is true only if _ma_bitmap_flushable() was called earlier */
 | |
|   if (info->non_flushable_state)
 | |
|     _ma_bitmap_flushable(info, -1);
 | |
|   _ma_unpin_all_pages_and_finalize_row(info, lsn);
 | |
|   DBUG_RETURN(res);
 | |
| 
 | |
| err:
 | |
|   DBUG_ASSERT(!maria_assert_if_crashed_table);
 | |
|   res= 1;
 | |
|   _ma_mark_file_crashed(share);
 | |
|   /*
 | |
|     Don't write a new LSN on the used pages. Not important as the file is
 | |
|     marked as crashed and need to be repaired before it can be used.
 | |
|   */
 | |
|   lsn= LSN_IMPOSSIBLE;
 | |
|   goto end;
 | |
| }
 | |
| 
 | |
| 
 | |
| /** Execute undo of a row delete (insert the row back where it was) */
 | |
| 
 | |
| my_bool _ma_apply_undo_row_delete(MARIA_HA *info, LSN undo_lsn,
 | |
|                                   const uchar *header, size_t header_length
 | |
|                                   __attribute__((unused)))
 | |
| {
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   MARIA_ROW row;
 | |
|   MARIA_COLUMNDEF *column, *end_column;
 | |
|   MARIA_BITMAP_BLOCKS *blocks;
 | |
|   struct st_row_pos_info row_pos;
 | |
|   uchar *record;
 | |
|   const uchar *null_bits, *field_length_data, *extent_info;
 | |
|   pgcache_page_no_t page;
 | |
|   ulong *blob_lengths;
 | |
|   uint *null_field_lengths, extent_count, rownr, length_on_head_page;
 | |
|   DBUG_ENTER("_ma_apply_undo_row_delete");
 | |
| 
 | |
|   /*
 | |
|     Use cur row as a base;  We need to make a copy as we will change
 | |
|     some buffers to point directly to 'header'
 | |
|   */
 | |
|   memcpy(&row, &info->cur_row, sizeof(row));
 | |
| 
 | |
|   page=  page_korr(header);
 | |
|   header+= PAGE_STORE_SIZE;
 | |
|   rownr= dirpos_korr(header);
 | |
|   header+= DIRPOS_STORE_SIZE;
 | |
|   length_on_head_page= uint2korr(header);
 | |
|   header+= 2;
 | |
|   extent_count= pagerange_korr(header);
 | |
|   header+= PAGERANGE_STORE_SIZE;
 | |
|   DBUG_PRINT("enter", ("rowid: %lu  page: %lu  rownr: %u",
 | |
|                        (ulong) ma_recordpos(page, rownr),
 | |
|                        (ulong) page, rownr));
 | |
| 
 | |
|   if (share->calc_checksum)
 | |
|   {
 | |
|     /*
 | |
|       We extract the checksum delta here, saving a recomputation in
 | |
|       allocate_and_write_block_record(). It's only an optimization.
 | |
|     */
 | |
|     row.checksum= (ha_checksum) 0 - ha_checksum_korr(header);
 | |
|     header+= HA_CHECKSUM_STORE_SIZE;
 | |
|   }
 | |
|   extent_info= header;
 | |
|   header+= extent_count * ROW_EXTENT_SIZE;
 | |
| 
 | |
|   null_field_lengths= row.null_field_lengths;
 | |
|   blob_lengths= row.blob_lengths;
 | |
| 
 | |
|   /*
 | |
|     Fill in info->cur_row with information about the row, like in
 | |
|     calc_record_size(), to be used by write_block_record()
 | |
|   */
 | |
| 
 | |
|   row.normal_length= row.char_length= row.varchar_length=
 | |
|     row.blob_length= row.extents_count= row.field_lengths_length= 0;
 | |
| 
 | |
|   null_bits= header;
 | |
|   header+= share->base.null_bytes;
 | |
|   /* This will not be changed */
 | |
|   row.empty_bits= (uchar*) header;
 | |
|   header+= share->base.pack_bytes;
 | |
|   if (share->base.max_field_lengths)
 | |
|   {
 | |
|     row.field_lengths_length= uint2korr(header);
 | |
|     row.field_lengths= (uchar*) header + 2 ;
 | |
|     header+= 2 + row.field_lengths_length;
 | |
|   }
 | |
|   if (share->base.blobs)
 | |
|     row.blob_length= ma_get_length(&header);
 | |
| 
 | |
|   /* We need to build up a record (without blobs) in rec_buff */
 | |
|   if (!(record= my_malloc(PSI_INSTRUMENT_ME, share->base.reclength,
 | |
|                           MYF(MY_WME))))
 | |
|     DBUG_RETURN(1);
 | |
| 
 | |
|   memcpy(record, null_bits, share->base.null_bytes);
 | |
| 
 | |
|   /* Copy field information from header to record */
 | |
| 
 | |
|   /* Handle constant length fields that are always present */
 | |
|   for (column= share->columndef,
 | |
|          end_column= column+ share->base.fixed_not_null_fields;
 | |
|        column < end_column;
 | |
|        column++)
 | |
|   {
 | |
|     memcpy(record + column->offset, header, column->length);
 | |
|     header+= column->length;
 | |
|   }
 | |
| 
 | |
|   /* Handle NULL fields and CHAR/VARCHAR fields */
 | |
|   field_length_data= row.field_lengths;
 | |
|   for (end_column= share->columndef + share->base.fields;
 | |
|        column < end_column;
 | |
|        column++, null_field_lengths++)
 | |
|   {
 | |
|     if ((record[column->null_pos] & column->null_bit) ||
 | |
|         (column->empty_bit &&
 | |
|          row.empty_bits[column->empty_pos] & column->empty_bit))
 | |
|     {
 | |
|       if (column->type != FIELD_BLOB)
 | |
|         *null_field_lengths= 0;
 | |
|       else
 | |
|         *blob_lengths++= 0;
 | |
|       if (share->calc_checksum)
 | |
|         bfill(record + column->offset, column->fill_length,
 | |
|               column->type == FIELD_SKIP_ENDSPACE ? ' ' : 0);
 | |
|       continue;
 | |
|     }
 | |
|     switch (column->type) {
 | |
|     case FIELD_CHECK:
 | |
|     case FIELD_NORMAL:                          /* Fixed length field */
 | |
|     case FIELD_ZERO:
 | |
|     case FIELD_SKIP_PRESPACE:                   /* Not packed */
 | |
|     case FIELD_SKIP_ZERO:                       /* Fixed length field */
 | |
|       row.normal_length+= column->length;
 | |
|       *null_field_lengths= column->length;
 | |
|       memcpy(record + column->offset, header, column->length);
 | |
|       header+= column->length;
 | |
|       break;
 | |
|     case FIELD_SKIP_ENDSPACE:                   /* CHAR */
 | |
|     {
 | |
|       uint length;
 | |
|       if (column->length <= 255)
 | |
|         length= (uint) *field_length_data++;
 | |
|       else
 | |
|       {
 | |
|         length= uint2korr(field_length_data);
 | |
|         field_length_data+= 2;
 | |
|       }
 | |
|       row.char_length+= length;
 | |
|       *null_field_lengths= length;
 | |
|       memcpy(record + column->offset, header, length);
 | |
|       if (share->calc_checksum)
 | |
|         bfill(record + column->offset + length, (column->length - length),
 | |
|               ' ');
 | |
|       header+= length;
 | |
|       break;
 | |
|     }
 | |
|     case FIELD_VARCHAR:
 | |
|     {
 | |
|       uint length;
 | |
|       uchar *field_pos= record + column->offset;
 | |
| 
 | |
|       /* 256 is correct as this includes the length uchar */
 | |
|       if (column->fill_length == 1)
 | |
|       {
 | |
|         field_pos[0]= *field_length_data;
 | |
|         length= (uint) *field_length_data;
 | |
|       }
 | |
|       else
 | |
|       {
 | |
|         field_pos[0]= field_length_data[0];
 | |
|         field_pos[1]= field_length_data[1];
 | |
|         length= uint2korr(field_length_data);
 | |
|       }
 | |
|       field_length_data+= column->fill_length;
 | |
|       field_pos+= column->fill_length;
 | |
|       row.varchar_length+= length;
 | |
|       *null_field_lengths= length;
 | |
|       memcpy(field_pos, header, length);
 | |
|       header+= length;
 | |
|       break;
 | |
|     }
 | |
|     case FIELD_BLOB:
 | |
|     {
 | |
|       /* Copy length of blob and pointer to blob data to record */
 | |
|       uchar *field_pos= record + column->offset;
 | |
|       uint size_length= column->length - portable_sizeof_char_ptr;
 | |
|       ulong blob_length= _ma_calc_blob_length(size_length, field_length_data);
 | |
| 
 | |
|       memcpy(field_pos, field_length_data, size_length);
 | |
|       field_length_data+= size_length;
 | |
|       memcpy(field_pos + size_length, &header, sizeof(header));
 | |
|       header+= blob_length;
 | |
|       *blob_lengths++= blob_length;
 | |
|       break;
 | |
|     }
 | |
|     default:
 | |
|       DBUG_ASSERT(0);
 | |
|     }
 | |
|   }
 | |
|   row.head_length= (info->row_base_length +
 | |
|                     share->base.fixed_not_null_fields_length +
 | |
|                     row.field_lengths_length +
 | |
|                     size_to_store_key_length(row.field_lengths_length) +
 | |
|                     row.normal_length +
 | |
|                     row.char_length + row.varchar_length);
 | |
|   row.total_length= (row.head_length + row.blob_length);
 | |
|   if (row.total_length < share->base.min_block_length)
 | |
|     row.total_length= share->base.min_block_length;
 | |
| 
 | |
|   /*
 | |
|     Row is now generated. Now we need to insert record on the original
 | |
|     pages with original size on each page.
 | |
|   */
 | |
| 
 | |
|   _ma_bitmap_flushable(info, 1);
 | |
|   /* Change extent information to be usable by write_block_record() */
 | |
|   blocks= &row.insert_blocks;
 | |
|   if (extent_to_bitmap_blocks(info, blocks, page, extent_count, extent_info))
 | |
|     goto err;
 | |
|   blocks->block->org_bitmap_value= _ma_bitmap_get_page_bits(info,
 | |
|                                                             &share->bitmap,
 | |
|                                                             page);
 | |
|   blocks->block->used|= BLOCKUSED_USE_ORG_BITMAP;
 | |
| 
 | |
|   /* Read head page and allocate data for rowid */
 | |
|   if (get_rowpos_in_head_or_tail_page(info, blocks->block,
 | |
|                                       info->buff,
 | |
|                                       length_on_head_page,
 | |
|                                       HEAD_PAGE, PAGECACHE_LOCK_WRITE,
 | |
|                                       rownr, &row_pos))
 | |
|     goto err;
 | |
| 
 | |
|   if (share->calc_checksum)
 | |
|   {
 | |
|     DBUG_ASSERT(row.checksum == (share->calc_checksum)(info, record));
 | |
|   }
 | |
|   /* Store same amount of data on head page as on original page */
 | |
|   row_pos.length= (length_on_head_page - 
 | |
|                    (extent_count + 1 - blocks->count) * ROW_EXTENT_SIZE);
 | |
|   set_if_bigger(row_pos.length, share->base.min_block_length);
 | |
|   if (write_block_record(info, (uchar*) 0, record, &row,
 | |
|                          blocks, blocks->block->org_bitmap_value != 0,
 | |
|                          &row_pos, undo_lsn, 0))
 | |
|     goto err;
 | |
| 
 | |
|   my_free(record);
 | |
|   DBUG_RETURN(0);
 | |
| 
 | |
| err:
 | |
|   DBUG_ASSERT(!maria_assert_if_crashed_table);
 | |
|   _ma_mark_file_crashed(share);
 | |
|   if (info->non_flushable_state)
 | |
|     _ma_bitmap_flushable(info, -1);
 | |
|   _ma_unpin_all_pages_and_finalize_row(info, LSN_IMPOSSIBLE);
 | |
|   my_free(record);
 | |
|   DBUG_RETURN(1);
 | |
| }
 | |
| 
 | |
| 
 | |
| /**
 | |
|   Execute undo of a row update
 | |
| 
 | |
|   @fn _ma_apply_undo_row_update()
 | |
| 
 | |
|   @return Operation status
 | |
|     @retval 0      OK
 | |
|     @retval 1      Error
 | |
| */
 | |
| 
 | |
| my_bool _ma_apply_undo_row_update(MARIA_HA *info, LSN undo_lsn,
 | |
|                                   const uchar *header,
 | |
|                                   size_t header_length
 | |
|                                   __attribute__((unused)))
 | |
| {
 | |
|   MARIA_SHARE *share= info->s;
 | |
|   MARIA_RECORD_POS record_pos;
 | |
|   const uchar *field_length_data, *field_length_data_end, *extent_info;
 | |
|   uchar *current_record, *orig_record;
 | |
|   pgcache_page_no_t page;
 | |
|   ha_checksum UNINIT_VAR(checksum_delta);
 | |
|   uint rownr, field_length_header, extent_count, length_on_head_page;
 | |
|   int error;
 | |
|   DBUG_ENTER("_ma_apply_undo_row_update");
 | |
| 
 | |
|   page=  page_korr(header);
 | |
|   header+= PAGE_STORE_SIZE;
 | |
|   rownr= dirpos_korr(header);
 | |
|   header+= DIRPOS_STORE_SIZE;
 | |
| 
 | |
|   record_pos= ma_recordpos(page, rownr);
 | |
|   DBUG_PRINT("enter", ("rowid: %lu  page: %lu  rownr: %u",
 | |
|                        (ulong) record_pos, (ulong) page, rownr));
 | |
| 
 | |
|   if (share->calc_checksum)
 | |
|   {
 | |
|     checksum_delta= ha_checksum_korr(header);
 | |
|     header+= HA_CHECKSUM_STORE_SIZE;
 | |
|   }
 | |
|   length_on_head_page= uint2korr(header);
 | |
|   set_if_bigger(length_on_head_page, share->base.min_block_length);
 | |
|   header+= 2;
 | |
|   extent_count= pagerange_korr(header);
 | |
|   header+= PAGERANGE_STORE_SIZE;
 | |
|   extent_info= header;
 | |
|   header+= extent_count * ROW_EXTENT_SIZE;
 | |
| 
 | |
|   /*
 | |
|     Set header to point to old field values, generated by
 | |
|     fill_update_undo_parts()
 | |
|   */
 | |
|   field_length_header= ma_get_length(&header);
 | |
|   field_length_data= (uchar*) header;
 | |
|   header+= field_length_header;
 | |
|   field_length_data_end= header;
 | |
| 
 | |
|   /* Allocate buffer for current row & original row */
 | |
|   if (!(current_record= my_malloc(PSI_INSTRUMENT_ME, share->base.reclength * 2,
 | |
|                                   MYF(MY_WME))))
 | |
|     DBUG_RETURN(1);
 | |
|   orig_record= current_record+ share->base.reclength;
 | |
| 
 | |
|   /* Read current record */
 | |
|   if (_ma_read_block_record(info, current_record, record_pos))
 | |
|     goto err;
 | |
| 
 | |
|   if (*field_length_data == 255)
 | |
|   {
 | |
|     /* Bitmap changed */
 | |
|     field_length_data++;
 | |
|     memcpy(orig_record, header, share->base.null_bytes);
 | |
|     header+= share->base.null_bytes;
 | |
|   }
 | |
|   else
 | |
|     memcpy(orig_record, current_record, share->base.null_bytes);
 | |
|   bitmap_clear_all(&info->changed_fields);
 | |
| 
 | |
|   while (field_length_data < field_length_data_end)
 | |
|   {
 | |
|     uint field_nr= ma_get_length(&field_length_data), field_length;
 | |
|     MARIA_COLUMNDEF *column= share->columndef + field_nr;
 | |
|     uchar *orig_field_pos= orig_record + column->offset;
 | |
| 
 | |
|     bitmap_set_bit(&info->changed_fields, field_nr);
 | |
|     if (field_nr >= share->base.fixed_not_null_fields)
 | |
|     {
 | |
|       if (!(field_length= ma_get_length(&field_length_data)))
 | |
|       {
 | |
|         /* Null field or empty field */
 | |
|         bfill(orig_field_pos, column->fill_length,
 | |
|               column->type == FIELD_SKIP_ENDSPACE ? ' ' : 0);
 | |
|         continue;
 | |
|       }
 | |
|     }
 | |
|     else
 | |
|       field_length= column->length;
 | |
| 
 | |
|     switch (column->type) {
 | |
|     case FIELD_CHECK:
 | |
|     case FIELD_NORMAL:                          /* Fixed length field */
 | |
|     case FIELD_ZERO:
 | |
|     case FIELD_SKIP_PRESPACE:                   /* Not packed */
 | |
|       memcpy(orig_field_pos, header, column->length);
 | |
|       header+= column->length;
 | |
|       break;
 | |
|     case FIELD_SKIP_ZERO:                       /* Number */
 | |
|     case FIELD_SKIP_ENDSPACE:                   /* CHAR */
 | |
|     {
 | |
|       uint diff;
 | |
|       memcpy(orig_field_pos, header, field_length);
 | |
|       if ((diff= (column->length - field_length)))
 | |
|         bfill(orig_field_pos + column->length - diff, diff,
 | |
|               column->type == FIELD_SKIP_ENDSPACE ? ' ' : 0);
 | |
|       header+= field_length;
 | |
|     }
 | |
|     break;
 | |
|     case FIELD_VARCHAR:
 | |
|       if (column->length <= 256)
 | |
|       {
 | |
|         *orig_field_pos++= (uchar) field_length;
 | |
|       }
 | |
|       else
 | |
|       {
 | |
|         int2store(orig_field_pos, field_length);
 | |
|         orig_field_pos+= 2;
 | |
|       }
 | |
|       memcpy(orig_field_pos, header, field_length);
 | |
|       header+= field_length;
 | |
|       break;
 | |
|     case FIELD_BLOB:
 | |
|     {
 | |
|       uint size_length= column->length - portable_sizeof_char_ptr;
 | |
|       _ma_store_blob_length(orig_field_pos, size_length, field_length);
 | |
|       memcpy(orig_field_pos + size_length, &header, sizeof(header));
 | |
|       header+= field_length;
 | |
|       break;
 | |
|     }
 | |
|     default:
 | |
|       DBUG_ASSERT(0);
 | |
|     }
 | |
|   }
 | |
|   copy_not_changed_fields(info, &info->changed_fields,
 | |
|                           orig_record, current_record);
 | |
| 
 | |
|   if (share->calc_checksum)
 | |
|   {
 | |
|     info->new_row.checksum= checksum_delta +
 | |
|       (info->cur_row.checksum= (*share->calc_checksum)(info, orig_record));
 | |
|     /* verify that record's content is sane */
 | |
|     DBUG_ASSERT(info->new_row.checksum ==
 | |
|                 (*share->calc_checksum)(info, current_record));
 | |
|   }
 | |
| 
 | |
|   info->last_auto_increment= ~ (ulonglong) 0;
 | |
|   /* Now records are up to date, execute the update to original values */
 | |
|   if (_ma_update_at_original_place(info, page, rownr, length_on_head_page,
 | |
|                                    extent_count, extent_info,
 | |
|                                    current_record, orig_record, undo_lsn))
 | |
|     goto err;
 | |
| 
 | |
|   error= 0;
 | |
| end:
 | |
|   my_free(current_record);
 | |
|   DBUG_RETURN(error);
 | |
| 
 | |
| err:
 | |
|   DBUG_ASSERT(!maria_assert_if_crashed_table);
 | |
|   error= 1;
 | |
|   _ma_mark_file_crashed(share);
 | |
|   goto end;
 | |
| }
 | |
| 
 | |
| 
 | |
| /**
 | |
|   Execute undo of a bulk insert which used repair
 | |
| 
 | |
|   @return Operation status
 | |
|     @retval 0      OK
 | |
|     @retval 1      Error
 | |
| */
 | |
| 
 | |
| my_bool _ma_apply_undo_bulk_insert(MARIA_HA *info, LSN undo_lsn)
 | |
| {
 | |
|   my_bool error;
 | |
|   LSN lsn;
 | |
|   DBUG_ENTER("_ma_apply_undo_bulk_insert");
 | |
|   /*
 | |
|     We delete all rows, re-enable indices as bulk insert had disabled
 | |
|     non-unique ones.
 | |
|   */
 | |
|   error= (maria_delete_all_rows(info) ||
 | |
|           maria_enable_indexes(info) ||
 | |
|           /* we enabled indices so need '2' below */
 | |
|           _ma_state_info_write(info->s,
 | |
|                                MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET |
 | |
|                                MA_STATE_INFO_WRITE_FULL_INFO |
 | |
|                                MA_STATE_INFO_WRITE_LOCK) ||
 | |
|           _ma_write_clr(info, undo_lsn, LOGREC_UNDO_BULK_INSERT,
 | |
|                         FALSE, 0, &lsn, NULL));
 | |
|   DBUG_RETURN(error);
 | |
| }
 | |
| 
 | |
| 
 | |
| /**
 | |
|   @brief Get the TRANSLOG_ADDRESS to flush up to
 | |
| 
 | |
|   @param page            Page's content
 | |
|   @param page_no         Page's number (<offset>/<page length>)
 | |
|   @param data_ptr        Callback data pointer (pointer to MARIA_SHARE)
 | |
| 
 | |
|   @note
 | |
|   Usable for data (non-bitmap) and index pages
 | |
| 
 | |
|   @retval LSN to flush up to
 | |
| */
 | |
| 
 | |
| TRANSLOG_ADDRESS
 | |
| maria_page_get_lsn(uchar *page,
 | |
|                    pgcache_page_no_t page_no __attribute__((unused)),
 | |
|                    uchar* data_ptr __attribute__((unused)))
 | |
| {
 | |
| #ifndef DBUG_OFF
 | |
|   const MARIA_SHARE *share= (MARIA_SHARE*)data_ptr;
 | |
|   DBUG_ASSERT(share->page_type == PAGECACHE_LSN_PAGE &&
 | |
|               share->now_transactional);
 | |
| #endif
 | |
|   return lsn_korr(page);
 | |
| }
 | |
| 
 | |
| 
 | |
| /**
 | |
|   @brief Enable reading of all rows, ignoring versioning
 | |
| 
 | |
|   @note
 | |
|     This is mainly useful in single user applications, like maria_pack,
 | |
|     where we want to be able to read all rows without having to read the
 | |
|     transaction id from the control file
 | |
| */
 | |
| 
 | |
| void maria_ignore_trids(MARIA_HA *info)
 | |
| {
 | |
|   if (info->s->base.born_transactional)
 | |
|   {
 | |
|     if (!info->trn)
 | |
|       _ma_set_tmp_trn_for_table(info, &dummy_transaction_object);
 | |
|     /* Ignore transaction id when row is read */
 | |
|     info->trn->min_read_from= ~(TrID) 0;
 | |
|   }
 | |
| }
 | |
| 
 | |
| 
 | |
| #ifndef DBUG_OFF
 | |
| 
 | |
| /* The following functions are useful to call from debugger */
 | |
| 
 | |
| void _ma_print_block_info(MARIA_SHARE *share, uchar *buff)
 | |
| {
 | |
|   LSN lsn= lsn_korr(buff);
 | |
| 
 | |
|   printf("LSN: " LSN_FMT "  type: %u  dir_entries: %u  dir_free: %u  empty_space: %u\n",
 | |
|          LSN_IN_PARTS(lsn),
 | |
|          (uint)buff[PAGE_TYPE_OFFSET],
 | |
|          (uint)buff[DIR_COUNT_OFFSET],
 | |
|          (uint)buff[DIR_FREE_OFFSET],
 | |
|          (uint) uint2korr(buff + EMPTY_SPACE_OFFSET));
 | |
|   printf("Start of directory: %lu\n",
 | |
|          maria_block_size - PAGE_SUFFIX_SIZE -
 | |
|          (uint) buff[DIR_COUNT_OFFSET] * DIR_ENTRY_SIZE);
 | |
|   _ma_print_directory(share, stdout, buff, maria_block_size);
 | |
| }
 | |
| #endif
 | 
