mirror of
https://github.com/MariaDB/server.git
synced 2025-01-15 19:42:28 +01:00
mhnsw: inter-statement shared cache
* preserve the graph in memory between statements * keep it in a TABLE_SHARE, available for concurrent searches * nodes are generally read-only, walking the graph doesn't change them * distance to target is cached, calculated only once * SIMD-optimized bloom filter detects visited nodes * nodes are stored in an array, not List, to better utilize bloom filter * auto-adjusting heuristic to estimate the number of visited nodes (to configure the bloom filter) * many threads can concurrently walk the graph. MEM_ROOT and Hash_set are protected with a mutex, but walking doesn't need them * up to 8 threads can concurrently load nodes into the cache, nodes are partitioned into 8 mutexes (8 is chosen arbitrarily, might need tuning) * concurrent editing is not supported though * this is fine for MyISAM, TL_WRITE protects the TABLE_SHARE and the graph (note that TL_WRITE_CONCURRENT_INSERT is not allowed, because an INSERT into the main table means multiple UPDATEs in the graph) * InnoDB uses secondary transaction-level caches linked in a list in in thd->ha_data via a fake handlerton * on rollback the secondary cache is discarded, on commit nodes from the secondary cache are invalidated in the shared cache while it is exclusively locked * on savepoint rollback both caches are flushed. this can be improved in the future with a row visibility callback * graph size is controlled by @@mhnsw_cache_size, the cache is flushed when it reaches the threshold
This commit is contained in:
parent
8eb39be512
commit
049d839350
19 changed files with 1179 additions and 327 deletions
|
@ -43,6 +43,7 @@
|
|||
#cmakedefine HAVE_IA64INTRIN_H 1
|
||||
#cmakedefine HAVE_IEEEFP_H 1
|
||||
#cmakedefine HAVE_INTTYPES_H 1
|
||||
#cmakedefine HAVE_IMMINTRIN_H 1
|
||||
#cmakedefine HAVE_KQUEUE 1
|
||||
#cmakedefine HAVE_LIMITS_H 1
|
||||
#cmakedefine HAVE_LINK_H 1
|
||||
|
|
|
@ -187,6 +187,7 @@ CHECK_INCLUDE_FILES (fpu_control.h HAVE_FPU_CONTROL_H)
|
|||
CHECK_INCLUDE_FILES (grp.h HAVE_GRP_H)
|
||||
CHECK_INCLUDE_FILES (ieeefp.h HAVE_IEEEFP_H)
|
||||
CHECK_INCLUDE_FILES (inttypes.h HAVE_INTTYPES_H)
|
||||
CHECK_INCLUDE_FILES (immintrin.h HAVE_IMMINTRIN_H)
|
||||
CHECK_INCLUDE_FILES (langinfo.h HAVE_LANGINFO_H)
|
||||
CHECK_INCLUDE_FILES (link.h HAVE_LINK_H)
|
||||
CHECK_INCLUDE_FILES (linux/unistd.h HAVE_LINUX_UNISTD_H)
|
||||
|
|
|
@ -947,6 +947,12 @@ extern LEX_STRING lex_string_casedn_root(MEM_ROOT *root,
|
|||
CHARSET_INFO *cs,
|
||||
const char *str, size_t length);
|
||||
|
||||
static inline size_t root_size(MEM_ROOT *root)
|
||||
{
|
||||
size_t k = root->block_num >> 2;
|
||||
return k * (k + 1) * 2 * root->block_size;
|
||||
}
|
||||
|
||||
extern my_bool my_compress(uchar *, size_t *, size_t *);
|
||||
extern my_bool my_uncompress(uchar *, size_t , size_t *);
|
||||
extern uchar *my_compress_alloc(const uchar *packet, size_t *len,
|
||||
|
|
|
@ -708,6 +708,8 @@ The following specify which files/extra groups are read (specified before remain
|
|||
Unused. Deprecated, will be removed in a future release.
|
||||
--metadata-locks-hash-instances=#
|
||||
Unused. Deprecated, will be removed in a future release.
|
||||
--mhnsw-cache-size=#
|
||||
Size of the cache for the MHNSW vector index
|
||||
--mhnsw-max-edges-per-node=#
|
||||
Larger values means slower INSERT, larger index size and
|
||||
higher memory consumption, but better search results
|
||||
|
@ -1830,6 +1832,7 @@ max-write-lock-count 18446744073709551615
|
|||
memlock FALSE
|
||||
metadata-locks-cache-size 1024
|
||||
metadata-locks-hash-instances 8
|
||||
mhnsw-cache-size 16777216
|
||||
mhnsw-max-edges-per-node 6
|
||||
mhnsw-min-limit 20
|
||||
min-examined-row-limit 0
|
||||
|
|
92
mysql-test/main/vector_innodb.result
Normal file
92
mysql-test/main/vector_innodb.result
Normal file
|
@ -0,0 +1,92 @@
|
|||
create table t1 (id int auto_increment primary key, v blob not null, vector index (v)) engine=innodb;
|
||||
show create table t1;
|
||||
Table Create Table
|
||||
t1 CREATE TABLE `t1` (
|
||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||
`v` blob NOT NULL,
|
||||
PRIMARY KEY (`id`),
|
||||
VECTOR KEY `v` (`v`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_uca1400_ai_ci
|
||||
insert t1 (v) values
|
||||
(x'106d263fdf68ba3eb08d533f97d46e3fd1e1ec3edc4c123f984c563f621a233f'),
|
||||
(x'd55bee3c56eb9e3e84e3093f838dce3eb7cd653fe32d7d3f12de133c5715d23e'),
|
||||
(x'fcd5553f3822443f5dae413f2593493f7777363f5f7f113ebf12373d4d145a3f'),
|
||||
(x'7493093fd9a27d3e9b13783f8c66653f0bd7d23e50db983d251b013f1dba133f'),
|
||||
(x'2e30373fae331a3eba94153ee32bce3e3311b33d5bc75d3f6c25653eb769113f'),
|
||||
(x'381d5f3f2781de3e4f011f3f9353483f9bb37e3edd622d3eabecb63ec246953e'),
|
||||
(x'4ee5dc3e214b103f0e7e583f5f36473e79d7823ea872ec3e3ab2913d1b84433f'),
|
||||
(x'8826243f7d20f03e5135593f83ba653e44572d3fa87e8e3e943e0e3f649a293f'),
|
||||
(x'3859ac3e7d21823ed3f5753fc79c143e61d39c3cee39ba3eb0b0133e815c173f'),
|
||||
(x'cff0d93c32941e3f64b22a3f1e4f083f4ea2563fbff4a63e12a4703f6c824b3f');
|
||||
start transaction;
|
||||
insert t1 values
|
||||
(30, x'f8e2413ed4ff773fef8b893eba487b3febee3f3f9e6f693f5961fd3ee479303d');
|
||||
savepoint foo;
|
||||
insert t1 values
|
||||
(31, x'6129683f90fe1f3e1437bc3ed8c8f63dd141033f21e3a93e54346c3f8c4e043f'),
|
||||
(32, x'1ec8b83d398c4d3f2efb463f23947a3fa1a5093fdde6303e5580413f51569b3e');
|
||||
rollback to savepoint foo;
|
||||
insert t1 values
|
||||
(33, x'86d1003d4262033f8086713ffc4a633e317e933c4dce013d9c4d573fca83b93e');
|
||||
commit;
|
||||
start transaction;
|
||||
insert t1 values
|
||||
(40, x'71046a3e85329b3e05240e3f45c9283f1847363f98d47d3f4224b73d487b613f'),
|
||||
(41, x'71046a3e85329b3e05240e3f45c9283f1847363f98d47d3f4224b73d487b613f');
|
||||
rollback;
|
||||
select id,vec_distance(v, x'c923e33dc0da313fe7c7983e526b3d3fde63963e6eaf3a3f27fa133fe27a583f') d from t1 order by d limit 5;
|
||||
id d
|
||||
10 0.8856208347761952
|
||||
1 0.9381363209273885
|
||||
30 1.0162643974895857
|
||||
7 1.026397313888122
|
||||
5 1.0308161006949719
|
||||
select id,vec_distance(v, x'754b5f3ea2312b3fc169f43e4604883e1d20173e8dd7443f421b703fb11e0d3e') d from t1 order by d limit 5;
|
||||
id d
|
||||
33 0.9477554826856
|
||||
30 1.111405427702547
|
||||
1 1.1154613877616022
|
||||
10 1.118630286292343
|
||||
8 1.1405733350751739
|
||||
create table t2 (id int auto_increment primary key, v blob not null, vector index (v)) engine=innodb;
|
||||
insert t2 (v) values
|
||||
(x'45cf153f830a313f7a0a113fb1ff533f47a1533fcf9e6e3f'),
|
||||
(x'4b311d3fdd82423f35ba7d3fa041223dfd7db03e72d5833e'),
|
||||
(x'f0d4123f6fc1833ea30a483fd9649d3cb94d733f4574a63d'),
|
||||
(x'7ff8a53bf68e4a3e66e3563f214dea3e63372f3ec24d513f'),
|
||||
(x'4709683f0d44473f8a045f3f40f3693df7f1303fdb98b73e'),
|
||||
(x'09de2b3f5db80d3fb4405f3f64aadc3ecfa6183f823c733f'),
|
||||
(x'a93a143f7f71e33d0cde5c3ff106373fd6f6233fc1f4fc3e'),
|
||||
(x'11236e3de44a0d3f8241023d44d8383f2f70733f44d65c3f'),
|
||||
(x'b5e47c3f35d3413fad8a533d5945133f66dbf33d92c6103f');
|
||||
start transaction;
|
||||
insert t1 values
|
||||
(50, x'acae183f56ddc43e5093983d280df53e6fa2093f79c01a3eb1591f3f423a0e3d'),
|
||||
(51, x'6285303f42ef6e3f355e313f3e96a53e70959b3edd720b3ec07f733e5bc8603f');
|
||||
insert t2 values
|
||||
(20, x'58dc7d3fc9feaa3e19e26b3f31820c3f93070b3fc4e36e3f'),
|
||||
(21, x'35e05d3f18e8513fb81a3d3f8acf7d3e794a1d3c72f9613f');
|
||||
commit;
|
||||
select id,vec_distance(v, x'1f4d053f7056493f937da03dd8c97a3f220cbb3c926c1c3facca213ec0618a3e') d from t1 order by d limit 5;
|
||||
id d
|
||||
6 0.9309383181777582
|
||||
5 0.9706304662574956
|
||||
30 0.98144492002831
|
||||
50 1.079862635421575
|
||||
51 1.2403734530917931
|
||||
select id,vec_distance(v, x'f618663f256be73e62cd453f8bcdbf3e16ae503c3858313f') d from t2 order by d limit 5;
|
||||
id d
|
||||
21 0.43559180321379337
|
||||
20 0.6435053022072372
|
||||
6 0.6942000623336242
|
||||
2 0.7971622099055623
|
||||
9 0.8298589136476077
|
||||
drop table t1, t2;
|
||||
#
|
||||
# MDEV-34989 After selecting from empty table with vector key the next insert hangs
|
||||
#
|
||||
create table t (v blob not null, vector key(v)) engine=InnoDB;
|
||||
select vec_distance(v, x'B047263C9F87233fcfd27e3eae493e3f0329f43e') as e from t order by e limit 1;
|
||||
e
|
||||
insert into t values (x'B047263C9F87233fcfd27e3eae493e3f0329f43e');
|
||||
drop table t;
|
73
mysql-test/main/vector_innodb.test
Normal file
73
mysql-test/main/vector_innodb.test
Normal file
|
@ -0,0 +1,73 @@
|
|||
source include/have_innodb.inc;
|
||||
|
||||
create table t1 (id int auto_increment primary key, v blob not null, vector index (v)) engine=innodb;
|
||||
show create table t1;
|
||||
# print unpack("H*",pack("f*",map{rand}1..8))
|
||||
insert t1 (v) values
|
||||
(x'106d263fdf68ba3eb08d533f97d46e3fd1e1ec3edc4c123f984c563f621a233f'),
|
||||
(x'd55bee3c56eb9e3e84e3093f838dce3eb7cd653fe32d7d3f12de133c5715d23e'),
|
||||
(x'fcd5553f3822443f5dae413f2593493f7777363f5f7f113ebf12373d4d145a3f'),
|
||||
(x'7493093fd9a27d3e9b13783f8c66653f0bd7d23e50db983d251b013f1dba133f'),
|
||||
(x'2e30373fae331a3eba94153ee32bce3e3311b33d5bc75d3f6c25653eb769113f'),
|
||||
(x'381d5f3f2781de3e4f011f3f9353483f9bb37e3edd622d3eabecb63ec246953e'),
|
||||
(x'4ee5dc3e214b103f0e7e583f5f36473e79d7823ea872ec3e3ab2913d1b84433f'),
|
||||
(x'8826243f7d20f03e5135593f83ba653e44572d3fa87e8e3e943e0e3f649a293f'),
|
||||
(x'3859ac3e7d21823ed3f5753fc79c143e61d39c3cee39ba3eb0b0133e815c173f'),
|
||||
(x'cff0d93c32941e3f64b22a3f1e4f083f4ea2563fbff4a63e12a4703f6c824b3f');
|
||||
|
||||
### savepoints and rollbacks:
|
||||
start transaction;
|
||||
insert t1 values
|
||||
(30, x'f8e2413ed4ff773fef8b893eba487b3febee3f3f9e6f693f5961fd3ee479303d');
|
||||
savepoint foo;
|
||||
insert t1 values
|
||||
(31, x'6129683f90fe1f3e1437bc3ed8c8f63dd141033f21e3a93e54346c3f8c4e043f'),
|
||||
(32, x'1ec8b83d398c4d3f2efb463f23947a3fa1a5093fdde6303e5580413f51569b3e');
|
||||
rollback to savepoint foo;
|
||||
insert t1 values
|
||||
(33, x'86d1003d4262033f8086713ffc4a633e317e933c4dce013d9c4d573fca83b93e');
|
||||
commit;
|
||||
start transaction;
|
||||
insert t1 values
|
||||
(40, x'71046a3e85329b3e05240e3f45c9283f1847363f98d47d3f4224b73d487b613f'),
|
||||
(41, x'71046a3e85329b3e05240e3f45c9283f1847363f98d47d3f4224b73d487b613f');
|
||||
rollback;
|
||||
|
||||
select id,vec_distance(v, x'c923e33dc0da313fe7c7983e526b3d3fde63963e6eaf3a3f27fa133fe27a583f') d from t1 order by d limit 5;
|
||||
select id,vec_distance(v, x'754b5f3ea2312b3fc169f43e4604883e1d20173e8dd7443f421b703fb11e0d3e') d from t1 order by d limit 5;
|
||||
|
||||
### two indexes in one transaction:
|
||||
create table t2 (id int auto_increment primary key, v blob not null, vector index (v)) engine=innodb;
|
||||
|
||||
insert t2 (v) values
|
||||
(x'45cf153f830a313f7a0a113fb1ff533f47a1533fcf9e6e3f'),
|
||||
(x'4b311d3fdd82423f35ba7d3fa041223dfd7db03e72d5833e'),
|
||||
(x'f0d4123f6fc1833ea30a483fd9649d3cb94d733f4574a63d'),
|
||||
(x'7ff8a53bf68e4a3e66e3563f214dea3e63372f3ec24d513f'),
|
||||
(x'4709683f0d44473f8a045f3f40f3693df7f1303fdb98b73e'),
|
||||
(x'09de2b3f5db80d3fb4405f3f64aadc3ecfa6183f823c733f'),
|
||||
(x'a93a143f7f71e33d0cde5c3ff106373fd6f6233fc1f4fc3e'),
|
||||
(x'11236e3de44a0d3f8241023d44d8383f2f70733f44d65c3f'),
|
||||
(x'b5e47c3f35d3413fad8a533d5945133f66dbf33d92c6103f');
|
||||
|
||||
start transaction;
|
||||
insert t1 values
|
||||
(50, x'acae183f56ddc43e5093983d280df53e6fa2093f79c01a3eb1591f3f423a0e3d'),
|
||||
(51, x'6285303f42ef6e3f355e313f3e96a53e70959b3edd720b3ec07f733e5bc8603f');
|
||||
insert t2 values
|
||||
(20, x'58dc7d3fc9feaa3e19e26b3f31820c3f93070b3fc4e36e3f'),
|
||||
(21, x'35e05d3f18e8513fb81a3d3f8acf7d3e794a1d3c72f9613f');
|
||||
commit;
|
||||
|
||||
select id,vec_distance(v, x'1f4d053f7056493f937da03dd8c97a3f220cbb3c926c1c3facca213ec0618a3e') d from t1 order by d limit 5;
|
||||
select id,vec_distance(v, x'f618663f256be73e62cd453f8bcdbf3e16ae503c3858313f') d from t2 order by d limit 5;
|
||||
|
||||
drop table t1, t2;
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-34989 After selecting from empty table with vector key the next insert hangs
|
||||
--echo #
|
||||
create table t (v blob not null, vector key(v)) engine=InnoDB;
|
||||
select vec_distance(v, x'B047263C9F87233fcfd27e3eae493e3f0329f43e') as e from t order by e limit 1;
|
||||
insert into t values (x'B047263C9F87233fcfd27e3eae493e3f0329f43e');
|
||||
drop table t;
|
|
@ -602,7 +602,16 @@
|
|||
VARIABLE_COMMENT Unused
|
||||
NUMERIC_MIN_VALUE 1
|
||||
NUMERIC_MAX_VALUE 1024
|
||||
@@ -2174,7 +2174,7 @@ READ_ONLY YES
|
||||
@@ -2167,7 +2167,7 @@ VARIABLE_SCOPE GLOBAL
|
||||
VARIABLE_TYPE BIGINT UNSIGNED
|
||||
VARIABLE_COMMENT Size of the cache for the MHNSW vector index
|
||||
NUMERIC_MIN_VALUE 1048576
|
||||
-NUMERIC_MAX_VALUE 18446744073709551615
|
||||
+NUMERIC_MAX_VALUE 4294967295
|
||||
NUMERIC_BLOCK_SIZE 1
|
||||
ENUM_VALUE_LIST NULL
|
||||
READ_ONLY NO
|
||||
@@ -2204,7 +2204,7 @@ READ_ONLY NO
|
||||
COMMAND_LINE_ARGUMENT REQUIRED
|
||||
VARIABLE_NAME MIN_EXAMINED_ROW_LIMIT
|
||||
VARIABLE_SCOPE SESSION
|
||||
|
|
|
@ -2182,6 +2182,16 @@ NUMERIC_BLOCK_SIZE 1
|
|||
ENUM_VALUE_LIST NULL
|
||||
READ_ONLY YES
|
||||
COMMAND_LINE_ARGUMENT REQUIRED
|
||||
VARIABLE_NAME MHNSW_CACHE_SIZE
|
||||
VARIABLE_SCOPE GLOBAL
|
||||
VARIABLE_TYPE BIGINT UNSIGNED
|
||||
VARIABLE_COMMENT Size of the cache for the MHNSW vector index
|
||||
NUMERIC_MIN_VALUE 1048576
|
||||
NUMERIC_MAX_VALUE 18446744073709551615
|
||||
NUMERIC_BLOCK_SIZE 1
|
||||
ENUM_VALUE_LIST NULL
|
||||
READ_ONLY NO
|
||||
COMMAND_LINE_ARGUMENT REQUIRED
|
||||
VARIABLE_NAME MHNSW_MAX_EDGES_PER_NODE
|
||||
VARIABLE_SCOPE SESSION
|
||||
VARIABLE_TYPE INT UNSIGNED
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
|
||||
index 0906f942121..1521ce1a728 100644
|
||||
--- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
|
||||
+++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
|
||||
@@ -44,7 +44,7 @@ READ_ONLY NO
|
||||
|
@ -611,7 +613,16 @@
|
|||
VARIABLE_COMMENT Unused
|
||||
NUMERIC_MIN_VALUE 1
|
||||
NUMERIC_MAX_VALUE 1024
|
||||
@@ -2384,7 +2384,7 @@ READ_ONLY YES
|
||||
@@ -2377,7 +2377,7 @@ VARIABLE_SCOPE GLOBAL
|
||||
VARIABLE_TYPE BIGINT UNSIGNED
|
||||
VARIABLE_COMMENT Size of the cache for the MHNSW vector index
|
||||
NUMERIC_MIN_VALUE 1048576
|
||||
-NUMERIC_MAX_VALUE 18446744073709551615
|
||||
+NUMERIC_MAX_VALUE 4294967295
|
||||
NUMERIC_BLOCK_SIZE 1
|
||||
ENUM_VALUE_LIST NULL
|
||||
READ_ONLY NO
|
||||
@@ -2414,7 +2414,7 @@ READ_ONLY NO
|
||||
COMMAND_LINE_ARGUMENT REQUIRED
|
||||
VARIABLE_NAME MIN_EXAMINED_ROW_LIMIT
|
||||
VARIABLE_SCOPE SESSION
|
||||
|
|
|
@ -2392,6 +2392,16 @@ NUMERIC_BLOCK_SIZE 1
|
|||
ENUM_VALUE_LIST NULL
|
||||
READ_ONLY YES
|
||||
COMMAND_LINE_ARGUMENT REQUIRED
|
||||
VARIABLE_NAME MHNSW_CACHE_SIZE
|
||||
VARIABLE_SCOPE GLOBAL
|
||||
VARIABLE_TYPE BIGINT UNSIGNED
|
||||
VARIABLE_COMMENT Size of the cache for the MHNSW vector index
|
||||
NUMERIC_MIN_VALUE 1048576
|
||||
NUMERIC_MAX_VALUE 18446744073709551615
|
||||
NUMERIC_BLOCK_SIZE 1
|
||||
ENUM_VALUE_LIST NULL
|
||||
READ_ONLY NO
|
||||
COMMAND_LINE_ARGUMENT REQUIRED
|
||||
VARIABLE_NAME MHNSW_MAX_EDGES_PER_NODE
|
||||
VARIABLE_SCOPE SESSION
|
||||
VARIABLE_TYPE INT UNSIGNED
|
||||
|
|
|
@ -324,6 +324,7 @@ void *alloc_root(MEM_ROOT *mem_root, size_t length)
|
|||
size_t alloced_length;
|
||||
|
||||
/* Increase block size over time if there is a lot of mallocs */
|
||||
/* when changing this logic, update root_size() to match */
|
||||
block_size= (MY_ALIGN(mem_root->block_size, ROOT_MIN_BLOCK_SIZE) *
|
||||
(mem_root->block_num >> 2)- MALLOC_OVERHEAD);
|
||||
get_size= length + ALIGN_SIZE(sizeof(USED_MEM));
|
||||
|
|
191
sql/bloom_filters.h
Normal file
191
sql/bloom_filters.h
Normal file
|
@ -0,0 +1,191 @@
|
|||
/*
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2023 Sasha Krassovsky
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
*/
|
||||
|
||||
// https://save-buffer.github.io/bloom_filter.html
|
||||
|
||||
#pragma once
|
||||
#include <cmath>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#ifdef HAVE_IMMINTRIN_H
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
template <typename T>
|
||||
struct PatternedSimdBloomFilter
|
||||
{
|
||||
PatternedSimdBloomFilter(int n, float eps) : n(n), epsilon(eps)
|
||||
{
|
||||
m = ComputeNumBits();
|
||||
int log_num_blocks = my_bit_log2_uint32(m) + 1 - rotate_bits;
|
||||
num_blocks = (1ULL << log_num_blocks);
|
||||
bv.resize(num_blocks);
|
||||
}
|
||||
|
||||
uint32_t ComputeNumBits()
|
||||
{
|
||||
double bits_per_val = -1.44 * std::log2(epsilon);
|
||||
return std::max<uint32_t>(512, static_cast<uint32_t>(bits_per_val * n + 0.5));
|
||||
}
|
||||
|
||||
#if __GNUC__ > 7 && defined(HAVE_IMMINTRIN_H)
|
||||
__attribute__ ((target ("avx2,avx,fma")))
|
||||
__m256i CalcHash(__m256i vecData)
|
||||
{
|
||||
// (almost) xxHash parallel version, 64bit input, 64bit output, seed=0
|
||||
static constexpr __m256i rotl48={
|
||||
0x0504030201000706ULL, 0x0D0C0B0A09080F0EULL,
|
||||
0x1514131211101716ULL, 0x1D1C1B1A19181F1EULL
|
||||
};
|
||||
static constexpr __m256i rotl24={
|
||||
0x0201000706050403ULL, 0x0A09080F0E0D0C0BULL,
|
||||
0x1211101716151413ULL, 0x1A19181F1E1D1C1BULL,
|
||||
};
|
||||
static constexpr uint64_t prime_mx2= 0x9FB21C651E98DF25ULL;
|
||||
static constexpr uint64_t bitflip= 0xC73AB174C5ECD5A2ULL;
|
||||
__m256i step1= _mm256_xor_si256(vecData, _mm256_set1_epi64x(bitflip));
|
||||
__m256i step2= _mm256_shuffle_epi8(step1, rotl48);
|
||||
__m256i step3= _mm256_shuffle_epi8(step1, rotl24);
|
||||
__m256i step4= _mm256_xor_si256(step1, _mm256_xor_si256(step2, step3));
|
||||
__m256i step5= _mm256_mul_epi32(step4, _mm256_set1_epi64x(prime_mx2));
|
||||
__m256i step6= _mm256_srli_epi64(step5, 35);
|
||||
__m256i step7= _mm256_add_epi64(step6, _mm256_set1_epi64x(8));
|
||||
__m256i step8= _mm256_xor_si256(step5, step7);
|
||||
__m256i step9= _mm256_mul_epi32(step8, _mm256_set1_epi64x(prime_mx2));
|
||||
return _mm256_xor_si256(step9, _mm256_srli_epi64(step9, 28));
|
||||
}
|
||||
|
||||
__attribute__ ((target ("avx2,avx,fma")))
|
||||
__m256i GetBlockIdx(__m256i vecHash)
|
||||
{
|
||||
__m256i vecNumBlocksMask = _mm256_set1_epi64x(num_blocks - 1);
|
||||
__m256i vecBlockIdx = _mm256_srli_epi64(vecHash, mask_idx_bits + rotate_bits);
|
||||
return _mm256_and_si256(vecBlockIdx, vecNumBlocksMask);
|
||||
}
|
||||
|
||||
__attribute__ ((target ("avx2,avx,fma")))
|
||||
__m256i ConstructMask(__m256i vecHash)
|
||||
{
|
||||
__m256i vecMaskIdxMask = _mm256_set1_epi64x((1 << mask_idx_bits) - 1);
|
||||
__m256i vecMaskMask = _mm256_set1_epi64x((1ull << bits_per_mask) - 1);
|
||||
__m256i vec64 = _mm256_set1_epi64x(64);
|
||||
|
||||
__m256i vecMaskIdx = _mm256_and_si256(vecHash, vecMaskIdxMask);
|
||||
__m256i vecMaskByteIdx = _mm256_srli_epi64(vecMaskIdx, 3);
|
||||
__m256i vecMaskBitIdx = _mm256_and_si256(vecMaskIdx, _mm256_set1_epi64x(0x7));
|
||||
__m256i vecRawMasks = _mm256_i64gather_epi64((const longlong *)masks, vecMaskByteIdx, 1);
|
||||
__m256i vecUnrotated = _mm256_and_si256(_mm256_srlv_epi64(vecRawMasks, vecMaskBitIdx), vecMaskMask);
|
||||
|
||||
__m256i vecRotation = _mm256_and_si256(_mm256_srli_epi64(vecHash, mask_idx_bits), _mm256_set1_epi64x((1 << rotate_bits) - 1));
|
||||
__m256i vecShiftUp = _mm256_sllv_epi64(vecUnrotated, vecRotation);
|
||||
__m256i vecShiftDown = _mm256_srlv_epi64(vecUnrotated, _mm256_sub_epi64(vec64, vecRotation));
|
||||
return _mm256_or_si256(vecShiftDown, vecShiftUp);
|
||||
}
|
||||
|
||||
__attribute__ ((target ("avx2,avx,fma")))
|
||||
void Insert(const T **data)
|
||||
{
|
||||
__m256i vecDataA = _mm256_loadu_si256(reinterpret_cast<__m256i *>(data + 0));
|
||||
__m256i vecDataB = _mm256_loadu_si256(reinterpret_cast<__m256i *>(data + 4));
|
||||
|
||||
__m256i vecHashA= CalcHash(vecDataA);
|
||||
__m256i vecHashB= CalcHash(vecDataB);
|
||||
|
||||
__m256i vecMaskA = ConstructMask(vecHashA);
|
||||
__m256i vecMaskB = ConstructMask(vecHashB);
|
||||
|
||||
__m256i vecBlockIdxA = GetBlockIdx(vecHashA);
|
||||
__m256i vecBlockIdxB = GetBlockIdx(vecHashB);
|
||||
|
||||
uint64_t block0 = _mm256_extract_epi64(vecBlockIdxA, 0);
|
||||
uint64_t block1 = _mm256_extract_epi64(vecBlockIdxA, 1);
|
||||
uint64_t block2 = _mm256_extract_epi64(vecBlockIdxA, 2);
|
||||
uint64_t block3 = _mm256_extract_epi64(vecBlockIdxA, 3);
|
||||
uint64_t block4 = _mm256_extract_epi64(vecBlockIdxB, 0);
|
||||
uint64_t block5 = _mm256_extract_epi64(vecBlockIdxB, 1);
|
||||
uint64_t block6 = _mm256_extract_epi64(vecBlockIdxB, 2);
|
||||
uint64_t block7 = _mm256_extract_epi64(vecBlockIdxB, 3);
|
||||
|
||||
bv[block0] |= _mm256_extract_epi64(vecMaskA, 0);
|
||||
bv[block1] |= _mm256_extract_epi64(vecMaskA, 1);
|
||||
bv[block2] |= _mm256_extract_epi64(vecMaskA, 2);
|
||||
bv[block3] |= _mm256_extract_epi64(vecMaskA, 3);
|
||||
bv[block4] |= _mm256_extract_epi64(vecMaskB, 0);
|
||||
bv[block5] |= _mm256_extract_epi64(vecMaskB, 1);
|
||||
bv[block6] |= _mm256_extract_epi64(vecMaskB, 2);
|
||||
bv[block7] |= _mm256_extract_epi64(vecMaskB, 3);
|
||||
}
|
||||
|
||||
__attribute__ ((target ("avx2,avx,fma")))
|
||||
uint8_t Query(T **data)
|
||||
{
|
||||
__m256i vecDataA = _mm256_loadu_si256(reinterpret_cast<__m256i *>(data + 0));
|
||||
__m256i vecDataB = _mm256_loadu_si256(reinterpret_cast<__m256i *>(data + 4));
|
||||
|
||||
__m256i vecHashA= CalcHash(vecDataA);
|
||||
__m256i vecHashB= CalcHash(vecDataB);
|
||||
|
||||
__m256i vecMaskA = ConstructMask(vecHashA);
|
||||
__m256i vecMaskB = ConstructMask(vecHashB);
|
||||
|
||||
__m256i vecBlockIdxA = GetBlockIdx(vecHashA);
|
||||
__m256i vecBlockIdxB = GetBlockIdx(vecHashB);
|
||||
|
||||
__m256i vecBloomA = _mm256_i64gather_epi64(bv.data(), vecBlockIdxA, sizeof(longlong));
|
||||
__m256i vecBloomB = _mm256_i64gather_epi64(bv.data(), vecBlockIdxB, sizeof(longlong));
|
||||
__m256i vecCmpA = _mm256_cmpeq_epi64(_mm256_and_si256(vecMaskA, vecBloomA), vecMaskA);
|
||||
__m256i vecCmpB = _mm256_cmpeq_epi64(_mm256_and_si256(vecMaskB, vecBloomB), vecMaskB);
|
||||
uint32_t res_a = static_cast<uint32_t>(_mm256_movemask_epi8(vecCmpA));
|
||||
uint32_t res_b = static_cast<uint32_t>(_mm256_movemask_epi8(vecCmpB));
|
||||
uint64_t res_bytes = res_a | (static_cast<uint64_t>(res_b) << 32);
|
||||
uint8_t res_bits = static_cast<uint8_t>(_mm256_movemask_epi8(_mm256_set1_epi64x(res_bytes)) & 0xff);
|
||||
return res_bits;
|
||||
}
|
||||
#endif
|
||||
|
||||
int n;
|
||||
float epsilon;
|
||||
|
||||
uint64_t num_blocks;
|
||||
uint32_t m;
|
||||
// calculated from the upstream MaskTable and hard-coded
|
||||
static constexpr int log_num_masks = 10;
|
||||
static constexpr int bits_per_mask = 57;
|
||||
const uint8_t masks[136]= {0x00, 0x04, 0x01, 0x04, 0x00, 0x20, 0x01, 0x00,
|
||||
0x00, 0x02, 0x08, 0x00, 0x02, 0x42, 0x00, 0x00, 0x04, 0x00, 0x00, 0x84,
|
||||
0x80, 0x00, 0x04, 0x00, 0x02, 0x00, 0x00, 0x21, 0x00, 0x08, 0x00, 0x14,
|
||||
0x00, 0x00, 0x40, 0x00, 0x10, 0x00, 0xa8, 0x00, 0x00, 0x00, 0x00, 0x10,
|
||||
0x04, 0x40, 0x01, 0x00, 0x40, 0x00, 0x00, 0x08, 0x01, 0x02, 0x80, 0x00,
|
||||
0x00, 0x01, 0x00, 0x06, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x0c, 0x10,
|
||||
0x00, 0x10, 0x00, 0x00, 0x10, 0x08, 0x01, 0x10, 0x00, 0x00, 0x10, 0x20,
|
||||
0x00, 0x01, 0x20, 0x00, 0x02, 0x40, 0x00, 0x00, 0x02, 0x40, 0x01, 0x00,
|
||||
0x40, 0x00, 0x00, 0x0a, 0x00, 0x02, 0x01, 0x80, 0x00, 0x00, 0x10, 0x08,
|
||||
0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x50, 0x00, 0x08, 0x10, 0x20, 0x00,
|
||||
0x00, 0x80, 0x00, 0x10, 0x10, 0x04, 0x04, 0x00, 0x00, 0x00, 0x20, 0x20,
|
||||
0x08, 0x08, 0x02, 0x00, 0x00, 0x00, 0x40, 0x00};
|
||||
std::vector<longlong> bv;
|
||||
|
||||
static constexpr int mask_idx_bits = log_num_masks;
|
||||
static constexpr int rotate_bits = 6;
|
||||
};
|
|
@ -559,6 +559,7 @@ enum legacy_db_type
|
|||
{
|
||||
/* note these numerical values are fixed and can *not* be changed */
|
||||
DB_TYPE_UNKNOWN=0,
|
||||
DB_TYPE_HLINDEX_HELPER=6,
|
||||
DB_TYPE_HEAP=6,
|
||||
DB_TYPE_MYISAM=9,
|
||||
DB_TYPE_MRG_MYISAM=10,
|
||||
|
|
|
@ -2401,6 +2401,10 @@ retry_share:
|
|||
my_error(ER_NOT_SEQUENCE, MYF(0), table_list->db.str, table_list->alias.str);
|
||||
DBUG_RETURN(true);
|
||||
}
|
||||
/* hlindexes don't support concurrent insert */
|
||||
if (table->s->hlindexes() &&
|
||||
table_list->lock_type == TL_WRITE_CONCURRENT_INSERT)
|
||||
table_list->lock_type= TL_WRITE_DEFAULT;
|
||||
|
||||
DBUG_ASSERT(thd->locked_tables_mode || table->file->row_logging == 0);
|
||||
DBUG_RETURN(false);
|
||||
|
|
|
@ -55,6 +55,7 @@
|
|||
#include "opt_trace_context.h"
|
||||
#include "log_event.h"
|
||||
#include "optimizer_defaults.h"
|
||||
#include "vector_mhnsw.h"
|
||||
|
||||
#ifdef WITH_PERFSCHEMA_STORAGE_ENGINE
|
||||
#include "../storage/perfschema/pfs_server.h"
|
||||
|
@ -7463,3 +7464,7 @@ static Sys_var_uint Sys_mhnsw_max_edges_per_node(
|
|||
"memory consumption, but better search results",
|
||||
SESSION_VAR(mhnsw_max_edges_per_node), CMD_LINE(REQUIRED_ARG),
|
||||
VALID_RANGE(3, 200), DEFAULT(6), BLOCK_SIZE(1));
|
||||
static Sys_var_ulonglong Sys_mhnsw_cache_size(
|
||||
"mhnsw_cache_size", "Size of the cache for the MHNSW vector index",
|
||||
GLOBAL_VAR(mhnsw_cache_size), CMD_LINE(REQUIRED_ARG),
|
||||
VALID_RANGE(1024*1024, SIZE_T_MAX), DEFAULT(16*1024*1024), BLOCK_SIZE(1));
|
||||
|
|
|
@ -50,6 +50,7 @@
|
|||
#include "sql_delete.h" // class Sql_cmd_delete
|
||||
#include "rpl_rli.h" // class rpl_group_info
|
||||
#include "rpl_mi.h" // class Master_info
|
||||
#include "vector_mhnsw.h"
|
||||
|
||||
#ifdef WITH_WSREP
|
||||
#include "wsrep_schema.h"
|
||||
|
@ -505,7 +506,10 @@ void TABLE_SHARE::destroy()
|
|||
delete sequence;
|
||||
|
||||
if (hlindex)
|
||||
{
|
||||
mhnsw_free(this);
|
||||
hlindex->destroy();
|
||||
}
|
||||
|
||||
/* The mutexes are initialized only for shares that are part of the TDC */
|
||||
if (tmp_table == NO_TMP_TABLE)
|
||||
|
@ -4795,6 +4799,7 @@ int closefrm(TABLE *table)
|
|||
|
||||
if (table->hlindex)
|
||||
closefrm(table->hlindex);
|
||||
|
||||
if (table->db_stat)
|
||||
error=table->file->ha_close();
|
||||
table->alias.free();
|
||||
|
|
|
@ -743,7 +743,11 @@ struct TABLE_SHARE
|
|||
Virtual_column_info **check_constraints;
|
||||
uint *blob_field; /* Index to blobs in Field arrray*/
|
||||
LEX_CUSTRING vcol_defs; /* definitions of generated columns */
|
||||
TABLE_SHARE *hlindex;
|
||||
|
||||
union {
|
||||
void *hlindex_data; /* for hlindex tables */
|
||||
TABLE_SHARE *hlindex; /* for normal tables */
|
||||
};
|
||||
|
||||
/*
|
||||
EITS statistics data from the last time the table was opened or ANALYZE
|
||||
|
|
1070
sql/vector_mhnsw.cc
1070
sql/vector_mhnsw.cc
File diff suppressed because it is too large
Load diff
|
@ -25,3 +25,6 @@ const LEX_CSTRING mhnsw_hlindex_table_def(THD *thd, uint ref_length);
|
|||
int mhnsw_insert(TABLE *table, KEY *keyinfo);
|
||||
int mhnsw_read_first(TABLE *table, KEY *keyinfo, Item *dist, ulonglong limit);
|
||||
int mhnsw_read_next(TABLE *table);
|
||||
void mhnsw_free(TABLE_SHARE *share);
|
||||
|
||||
extern ulonglong mhnsw_cache_size;
|
||||
|
|
Loading…
Reference in a new issue