mariadb/mysys/my_compr_int.cc
Kristian Nielsen 7081f2a58e Binlog-in-engine: New binlog implementation integrated in InnoDB
Implement an improved binlog implementation that is integrated into
the storage engine. The new implementation is enabled with the
--binlog-storage-engine option. Initially the InnoDB storage engine
implements the binlog.

Integrating the binlog in the storage engine improves performance,
since it makes the InnoDB redo log the single source of truth and
avoids the need for expensive two-phase commit between binlog and
engine. It also makes it possible to disable durability (set
--innodb-flush-log-at-trx-commit=0) to further improve performance,
while still preserving the ability to recover the binlog and database
into a consistent state after a crash.

The new binlog implementation also greatly improves the internal
design and implementation of the binlog, and enables future
enhancements for replication.

This is a squash of the original 11.4-based patch series.

Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
2026-01-23 03:21:03 +01:00

257 lines
8.1 KiB
C++

/*****************************************************************************
Copyright (c) 2024 Kristian Nielsen.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
*****************************************************************************/
/*
Reading and writing of compressed integers.
Created 2024-10-01 Kristian Nielsen <knielsen@knielsen-hq.org>
*/
#include "mysys_priv.h"
#include "my_compr_int.h"
/* Read and write compressed (up to) 64-bit integers. */
/*
Write compressed unsigned integer, efficient version without assuming
unaligned writes.
*/
unsigned char *compr_int_write(unsigned char *p, uint64_t v) {
// Compute bytes needed to store the value v plus 3 bits encoding length.
uint32_t needed_bits_minus_1= 66 - my_nlz(v|1);
uint32_t needed_bytes= (needed_bits_minus_1 >> 3) + 1;
// Compute the encoding of the length.
// We need 1-9 bytes. Use 9 bytes instead of 8, so we can encode the
// length in 3 bits for (1, 2, ..., 7, or 9 bytes).
uint32_t bytes= needed_bytes | (needed_bytes >> 3);
uint32_t len= needed_bytes - 1;
// Encode 1-7 as 0-6, and encode 8,9 both as 8.
len-= (len >> 3);
// Compute the first 64-bit word to write.
uintptr_t offset= (uintptr_t)p & (uintptr_t)7;
uintptr_t offset_bits= offset << 3;
uint64_t v1= (len | (v << 3)) << offset_bits;
uint64_t *p1= (uint64_t *)(p - offset);
uint64_t mask1= ~(uint64_t)0 << offset_bits;
// Compute the second word to write (if any).
uint64_t v2= v >> ((64 - 3) - offset_bits);
uint64_t *p2= p1 + 1;
// Write the value into next one or two 64-bit words, as needed.
// Two words are needed if (offset + bytes) cross into the next word.
#ifdef WORDS_BIGENDIAN
/*
Here it might be possible to use a slightly more efficient endian
conversion on big-endian, since we know the pointer is 8-byte aligned.
*/
int8store((unsigned char *)p1,
(uint8korr((unsigned char *)p1) & ~mask1) | v1);
#else
*p1= (*p1 & ~mask1) | v1;
#endif
if (offset + bytes >= 8) {
#ifdef WORDS_BIGENDIAN
int8store((unsigned char *)p2, v2);
#else
*p2= v2;
#endif
}
return p + bytes;
}
/*
Read compressed integer, efficient version without assuming unaligned reads.
Returns a pair of the value read and the incremented pointer.
*/
std::pair<uint64_t, const unsigned char *>
compr_int_read(const unsigned char *p)
{
uintptr_t offset= (uintptr_t)p & (uintptr_t)7;
uintptr_t offset_bits= offset << 3;
const uint64_t *p_align= (const uint64_t *)((uintptr_t)p & ~(uintptr_t)7);
#ifdef WORDS_BIGENDIAN
/*
Here it might be possible to use a slightly more efficient endian
conversion on big-endian, since we know the pointer is 8-byte aligned.
*/
uint64_t v1= uint8korr((unsigned char *)p_align);
#else
uint64_t v1= p_align[0];
#endif
uint32_t len= (v1 >> offset_bits) & 7;
uint32_t bytes= len + 1;
bytes+= (bytes >> 3);
uint64_t v;
if (offset + bytes > 8) {
uint64_t mask2= (~(uint64_t)0) >> ((16 - (offset + bytes)) << 3);
#ifdef WORDS_BIGENDIAN
v= (v1 >> (3 + offset_bits)) |
((uint8korr((unsigned char *)(p_align + 1)) & mask2) <<
(61 - offset_bits));
#else
v= (v1 >> (3 + offset_bits)) | ((p_align[1] & mask2) << (61 - offset_bits));
#endif
} else {
uint64_t mask1= (~(uint64_t)0) >> ((7 - (offset + len)) << 3);
v= (v1 & mask1) >> (3 + offset_bits);
}
return std::pair<uint64_t, const unsigned char *>(v, p + bytes);
}
#ifdef TEST_MAIN
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
// Smaller version that assumes unaligned writes of 8-bit values is ok, and
// that there are up to 8 scratch bytes available after the value written.
unsigned char *compr_int_write_le_unaligned_buffer(unsigned char *p, uint64_t v) {
// Compute bytes needed to store the value v plus 3 bits encoding length.
uint32_t needed_bits_minus_1= 66 - my_nlz(v|1);
uint32_t needed_bytes= (needed_bits_minus_1 >> 3) + 1;
// Compute the encoding of the length.
// We need 1-9 bytes. Use 9 bytes instead of 8, so we can encode the
// length in 3 bits for (1, 2, ..., 7, or 9 bytes).
uint32_t bytes= needed_bytes | (needed_bytes >> 3);
uint32_t len= needed_bytes - 1;
// Encode 1-7 as 0-6, and encode 8,9 both as 8.
len-= (len >> 3);
// Write the (up to) 9 bytes, prefering redundant write to conditional jump.
*(uint64_t *)p= len | (v << 3);
*(p+8)= v >> 63;
return p + bytes;
}
// Generic version without assumptions.
unsigned char *compr_int_write_generic(unsigned char *p, uint64_t v) {
// Compute bytes needed to store the value v plus 3 bits encoding length.
uint32_t needed_bits_minus_1= 66 - my_nlz(v|1);
uint32_t needed_bytes= (needed_bits_minus_1 >> 3) + 1;
// Compute the encoding of the length.
// We need 1-9 bytes. Use 9 bytes instead of 8, so we can encode the
// length in 3 bits for (1, 2, ..., 7, or 9 bytes).
uint32_t bytes= needed_bytes | (needed_bytes >> 3);
uint32_t len= needed_bytes - 1;
// Encode 1-7 as 0-6, and encode 8,9 both as 8.
len-= (len >> 3);
// Write the necessary bytes out.
*p++= len | (v << 3);
v >>= 5;
while (--bytes > 0) {
*p++= v;
v>>= 8;
}
return p;
}
// Generic read compressed integers.
std::pair<uint64_t, const unsigned char *>
compr_int_read_generic(const unsigned char *p)
{
uint64_t v= *p++;
uint32_t bytes= v & 7;
v>>= 3;
uint32_t shift= 5;
bytes+= ((bytes + 1) >> 3); // A 7 means read 8 bytes more (9 total)
while (bytes-- > 0) {
v|= ((uint64_t)(*p++)) << shift;
shift+= 8;
}
return std::pair<uint64_t, const unsigned char *>(v, p);
}
// Read compressed integers assuming little-endian, efficient unaligned
// 64-bit reads, and up to 7 bytes of scratch space after.
std::pair<uint64_t, const unsigned char *>
compr_int_read_le_unaligned_buf(const unsigned char *p)
{
uint64_t v= *(const uint64_t *)p;
uint32_t len= v & 7;
uint64_t mask= (~(uint64_t)0) >> ((7 - len) << 3);
v= (v & mask) >> 3;
// Need for extra read is assumed rare, well-predicted conditional jump
// likely faster.
uint32_t bytes= len + 1;
if (__builtin_expect((len == 7), 0)) {
v|= ((uint64_t)p[8]) << 61; // Add last 3 bits
bytes+= 1; // 7 means read 9 bytes
}
return std::pair<uint64_t, const unsigned char *>(v, p + bytes);
}
int
main(int argc, char *argv[])
{
int N= (argc > 1 ? atoi(argv[1]) : 1000);
uint64_t *src= new uint64_t[N];
unsigned char *buf1= new unsigned char[N*9];
unsigned char *buf2= new unsigned char[N*9];
int i;
// Generate test data.
for (i= 0; i < N; ++i)
src[i]= ((uint64_t)1 << (rand() % 64)) + (uint64_t)(rand());
// Write test data.
unsigned char *p1= buf1;
unsigned char *p2= buf2;
for (i= 0; i < N; ++i) {
p1= compr_int_write(p1, src[i]);
p2= compr_int_write_generic(p2, src[i]);
}
if ((p1 - buf1) != (p2 - buf2)) {
fprintf(stderr, "Write error! Mismatch lengths of optimised and generic.\n");
} else {
if (memcmp(buf1, buf2, p1 - buf1))
fprintf(stderr, "Write error! Mismatch data of optimised and generic.\n");
}
// Verify written data.
std::pair<uint64_t, const unsigned char *>q1, q2;
const unsigned char *c1= buf1;
const unsigned char *c2= buf2;
for (i= 0; i < N; ++i) {
q1= compr_int_read(c1);
q2= compr_int_read_generic(c2);
uint64_t v1= q1.first;
c1= q1.second;
uint64_t v2= q2.first;
c2= q2.second;
if (v1 != v2 || v1 != src[i]) {
fprintf(stderr, "Read error! mismatch values @ i=%d 0x%llx 0x%llx 0x%llx.\n",
i, (unsigned long long)v1, (unsigned long long)v2,
(unsigned long long)(src[i]));
break;
}
}
return 0;
}
#endif /* TEST_MAIN */