MDEV-35213 Server crash or assertion failure upon query with high value of mhnsw_min_limit

mhnsw_min_limit must not be larger than candidates queue size
This commit is contained in:
Sergei Golubchik 2024-10-20 12:53:27 +02:00
parent dd9a5dd5b5
commit 597e34d000
2 changed files with 5 additions and 4 deletions

View file

@ -2427,7 +2427,7 @@ VARIABLE_SCOPE SESSION
VARIABLE_TYPE INT UNSIGNED
VARIABLE_COMMENT Defines the minimal number of result candidates to look for in the vector index for ORDER BY ... LIMIT N queries. The search will never search for less rows than that, even if LIMIT is smaller. This notably improves the search quality at low LIMIT values, at the expense of search time
NUMERIC_MIN_VALUE 1
NUMERIC_MAX_VALUE 65535
NUMERIC_MAX_VALUE 10000
NUMERIC_BLOCK_SIZE 1
ENUM_VALUE_LIST NULL
READ_ONLY NO

View file

@ -31,6 +31,7 @@ static constexpr float NEAREST = -1.0f;
// Algorithm parameters
static constexpr float alpha = 1.1f;
static constexpr uint ef_construction= 10;
static constexpr uint max_ef= 10000;
static ulonglong mhnsw_cache_size;
static MYSQL_SYSVAR_ULONGLONG(cache_size, mhnsw_cache_size,
@ -41,7 +42,7 @@ static MYSQL_THDVAR_UINT(min_limit, PLUGIN_VAR_RQCMDARG,
"vector index for ORDER BY ... LIMIT N queries. The search will never "
"search for less rows than that, even if LIMIT is smaller. "
"This notably improves the search quality at low LIMIT values, "
"at the expense of search time", nullptr, nullptr, 20, 1, 65535, 1);
"at the expense of search time", nullptr, nullptr, 20, 1, max_ef, 1);
static MYSQL_THDVAR_UINT(max_edges_per_node, PLUGIN_VAR_RQCMDARG,
"Larger values means slower INSERT, larger index size and higher "
"memory consumption, but better search results",
@ -877,7 +878,7 @@ static int select_neighbors(MHNSW_Share *ctx, TABLE *graph, size_t layer,
{
Queue<Visited> pq; // working queue
if (pq.init(10000, false, Visited::cmp))
if (pq.init(max_ef, false, Visited::cmp))
return my_errno= HA_ERR_OUT_OF_MEM;
MEM_ROOT * const root= graph->in_use->mem_root;
@ -1039,7 +1040,7 @@ static int search_layer(MHNSW_Share *ctx, TABLE *graph, const FVector *target,
const uint est_size= static_cast<uint>(est_heuristic * std::pow(ef, ctx->ef_power));
VisitedSet visited(root, target, est_size);
candidates.init(10000, false, Visited::cmp);
candidates.init(max_ef, false, Visited::cmp);
best.init(ef, true, Visited::cmp);
DBUG_ASSERT(inout->num <= result_size);