mirror of
https://github.com/MariaDB/server.git
synced 2026-05-15 11:27:39 +02:00
misc changes
* sysvars should be REQUIRED_ARG * fix a mix of US and UK spelling (use US) * use consistent naming * work if VEC_DISTANCE arguments are in the swapped order (const, col) * work if VEC_DISTANCE argument is NULL/invalid or wrong length * abort INSERT if the value is invalid or wrong length * store the "number of neighbors" in a blob in endianness-independent way * use field->store(longlong, bool) not field->store(double) * a lot more error checking everywhere * cleanup after errors * simplify calling conventions, remove reinterpret_cast's * todo/XXX comments * whitespaces * use float consistently memory management is still totally PoC quality
This commit is contained in:
parent
88839e71a3
commit
3ff7f04fd4
8 changed files with 373 additions and 397 deletions
|
|
@ -412,10 +412,10 @@ The following specify which files/extra groups are read (specified before remain
|
|||
height-balanced, DOUBLE_PREC_HB - double precision
|
||||
height-balanced, JSON_HB - height-balanced, stored as
|
||||
JSON
|
||||
--hnsw-ef-constructor
|
||||
--hnsw-ef-constructor=#
|
||||
hnsw_ef_constructor
|
||||
--hnsw-ef-search hnsw_ef_search
|
||||
--hnsw-max-connection-per-layer
|
||||
--hnsw-ef-search=# hnsw_ef_search
|
||||
--hnsw-max-connection-per-layer=#
|
||||
hnsw_max_connection_per_layer
|
||||
--host-cache-size=# How many host names should be cached to avoid resolving
|
||||
(Automatically configured unless set explicitly)
|
||||
|
|
|
|||
|
|
@ -80,6 +80,21 @@ id d
|
|||
9 0.4719976290006591
|
||||
10 0.5069011044450041
|
||||
3 0.5865673124650332
|
||||
select id,vec_distance(x'b047263c9f87233fcfd27e3eae493e3f0329f43e', v) d from t1 order by d limit 3;
|
||||
id d
|
||||
9 0.4719976290006591
|
||||
10 0.5069011044450041
|
||||
3 0.5865673124650332
|
||||
select id>0,vec_distance(v, NULL) d from t1 order by d limit 3;
|
||||
id>0 d
|
||||
1 NULL
|
||||
1 NULL
|
||||
1 NULL
|
||||
select id>0,vec_distance(v, x'123456') d from t1 order by d limit 3;
|
||||
id>0 d
|
||||
1 NULL
|
||||
1 NULL
|
||||
1 NULL
|
||||
select t1.id as id1, t2.id as id2, vec_distance(t1.v, t2.v) from t1, t1 as t2 order by 3,1,2;
|
||||
id1 id2 vec_distance(t1.v, t2.v)
|
||||
1 1 0
|
||||
|
|
@ -182,5 +197,11 @@ id1 id2 vec_distance(t1.v, t2.v)
|
|||
9 8 1.2575258643523053
|
||||
7 8 1.288239696195716
|
||||
8 7 1.288239696195716
|
||||
insert t1 (v) values ('');
|
||||
ERROR 22007: Incorrect vector value: '...' for column `test`.`t1`.`v` at row 1
|
||||
insert t1 (v) values (x'1234');
|
||||
ERROR 22007: Incorrect vector value: '...' for column `test`.`t1`.`v` at row 1
|
||||
insert t1 (v) values (x'12345678');
|
||||
ERROR 22007: Incorrect vector value: '...' for column `test`.`t1`.`v` at row 1
|
||||
drop table t1;
|
||||
db.opt
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ create table t1 (id int auto_increment primary key, v blob not null, vector inde
|
|||
show create table t1;
|
||||
show keys from t1;
|
||||
query_vertical select * from information_schema.statistics where table_name='t1';
|
||||
# print unpack(H40,pack(f5,map{rand}1..5))
|
||||
# print unpack("H*",pack("f*",map{rand}1..5))
|
||||
insert t1 (v) values (x'e360d63ebe554f3fcdbc523f4522193f5236083d'),
|
||||
(x'f511303f72224a3fdd05fe3eb22a133ffae86a3f'),
|
||||
(x'f09baa3ea172763f123def3e0c7fe53e288bf33e'),
|
||||
|
|
@ -24,8 +24,23 @@ insert t1 (v) values (x'e360d63ebe554f3fcdbc523f4522193f5236083d'),
|
|||
|
||||
select id, hex(v) from t1;
|
||||
flush tables;
|
||||
# test with a valid query vector
|
||||
select id,vec_distance(v, x'b047263c9f87233fcfd27e3eae493e3f0329f43e') d from t1 order by d limit 3;
|
||||
# swapped arguments
|
||||
select id,vec_distance(x'b047263c9f87233fcfd27e3eae493e3f0329f43e', v) d from t1 order by d limit 3;
|
||||
# test with NULL (id is unpredictable)
|
||||
select id>0,vec_distance(v, NULL) d from t1 order by d limit 3;
|
||||
# test with invalid query vector (id is unpredictable)
|
||||
select id>0,vec_distance(v, x'123456') d from t1 order by d limit 3;
|
||||
select t1.id as id1, t2.id as id2, vec_distance(t1.v, t2.v) from t1, t1 as t2 order by 3,1,2;
|
||||
|
||||
--error ER_TRUNCATED_WRONG_VALUE_FOR_FIELD
|
||||
insert t1 (v) values ('');
|
||||
--error ER_TRUNCATED_WRONG_VALUE_FOR_FIELD
|
||||
insert t1 (v) values (x'1234');
|
||||
--error ER_TRUNCATED_WRONG_VALUE_FOR_FIELD
|
||||
insert t1 (v) values (x'12345678');
|
||||
|
||||
drop table t1;
|
||||
let $datadir=`select @@datadir`;
|
||||
list_files $datadir/test;
|
||||
|
|
|
|||
|
|
@ -1441,7 +1441,7 @@ NUMERIC_MAX_VALUE 4294967295
|
|||
NUMERIC_BLOCK_SIZE 1
|
||||
ENUM_VALUE_LIST NULL
|
||||
READ_ONLY NO
|
||||
COMMAND_LINE_ARGUMENT NONE
|
||||
COMMAND_LINE_ARGUMENT REQUIRED
|
||||
VARIABLE_NAME HNSW_EF_SEARCH
|
||||
VARIABLE_SCOPE SESSION
|
||||
VARIABLE_TYPE INT UNSIGNED
|
||||
|
|
@ -1451,7 +1451,7 @@ NUMERIC_MAX_VALUE 4294967295
|
|||
NUMERIC_BLOCK_SIZE 1
|
||||
ENUM_VALUE_LIST NULL
|
||||
READ_ONLY NO
|
||||
COMMAND_LINE_ARGUMENT NONE
|
||||
COMMAND_LINE_ARGUMENT REQUIRED
|
||||
VARIABLE_NAME HNSW_MAX_CONNECTION_PER_LAYER
|
||||
VARIABLE_SCOPE SESSION
|
||||
VARIABLE_TYPE INT UNSIGNED
|
||||
|
|
@ -1461,7 +1461,7 @@ NUMERIC_MAX_VALUE 4294967295
|
|||
NUMERIC_BLOCK_SIZE 1
|
||||
ENUM_VALUE_LIST NULL
|
||||
READ_ONLY NO
|
||||
COMMAND_LINE_ARGUMENT NONE
|
||||
COMMAND_LINE_ARGUMENT REQUIRED
|
||||
VARIABLE_NAME HOSTNAME
|
||||
VARIABLE_SCOPE GLOBAL
|
||||
VARIABLE_TYPE VARCHAR
|
||||
|
|
|
|||
|
|
@ -965,6 +965,10 @@ public:
|
|||
{
|
||||
return store(to, length, &my_charset_bin);
|
||||
}
|
||||
int store_binary(const uchar *to, size_t length)
|
||||
{
|
||||
return store_binary((const char*)(to), length);
|
||||
}
|
||||
virtual int store_hex_hybrid(const char *str, size_t length);
|
||||
virtual int store(double nr)=0;
|
||||
virtual int store(longlong nr, bool unsigned_val)=0;
|
||||
|
|
|
|||
|
|
@ -51,10 +51,17 @@ public:
|
|||
static LEX_CSTRING name= {STRING_WITH_LEN("vec_distance") };
|
||||
return name;
|
||||
}
|
||||
Item *get_const_arg() const
|
||||
{
|
||||
if (args[0]->type() == Item::FIELD_ITEM && args[1]->const_item())
|
||||
return args[1];
|
||||
if (args[1]->type() == Item::FIELD_ITEM && args[0]->const_item())
|
||||
return args[0];
|
||||
return NULL;
|
||||
}
|
||||
key_map part_of_sortkey() const override;
|
||||
Item *do_get_copy(THD *thd) const override
|
||||
{ return get_item_copy<Item_func_vec_distance>(thd, this); }
|
||||
virtual ~Item_func_vec_distance() {};
|
||||
};
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -7452,18 +7452,18 @@ static Sys_var_ulonglong Sys_binlog_large_commit_threshold(
|
|||
static Sys_var_uint Sys_hnsw_ef_search(
|
||||
"hnsw_ef_search",
|
||||
"hnsw_ef_search",
|
||||
SESSION_VAR(hnsw_ef_search), CMD_LINE(NO_ARG),
|
||||
SESSION_VAR(hnsw_ef_search), CMD_LINE(REQUIRED_ARG),
|
||||
VALID_RANGE(0, UINT_MAX), DEFAULT(10),
|
||||
BLOCK_SIZE(1));
|
||||
static Sys_var_uint Sys_hnsw_ef_constructor(
|
||||
"hnsw_ef_constructor",
|
||||
"hnsw_ef_constructor",
|
||||
SESSION_VAR(hnsw_ef_constructor), CMD_LINE(NO_ARG),
|
||||
SESSION_VAR(hnsw_ef_constructor), CMD_LINE(REQUIRED_ARG),
|
||||
VALID_RANGE(0, UINT_MAX), DEFAULT(10),
|
||||
BLOCK_SIZE(1));
|
||||
static Sys_var_uint Sys_hnsw_max_connection_per_layer(
|
||||
"hnsw_max_connection_per_layer",
|
||||
"hnsw_max_connection_per_layer",
|
||||
SESSION_VAR(hnsw_max_connection_per_layer), CMD_LINE(NO_ARG),
|
||||
SESSION_VAR(hnsw_max_connection_per_layer), CMD_LINE(REQUIRED_ARG),
|
||||
VALID_RANGE(0, UINT_MAX), DEFAULT(50),
|
||||
BLOCK_SIZE(1));
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue