mirror of
https://github.com/MariaDB/server.git
synced 2025-01-15 19:42:28 +01:00
fix test failures on x86, gcc -O1
x86 builds don't use SIMD, fast math and inlining causes distances to be quite unstable and 1) comparison with the threshold no longer works, the distance calculated twice between the same two vectors comes out differently 2) a bunch of identical vectors get the non-zero distance between them and HNSW cross-links them with no outbound links (if there're more than 2M identical vectors). Let's strengthen the select_neighbors heuristic to skip neighbors that are too close to each other MDEV-35418 suggests a better solution for this.
This commit is contained in:
parent
38ffaeadab
commit
74743b0d88
1 changed files with 3 additions and 3 deletions
|
@ -908,7 +908,7 @@ static int select_neighbors(MHNSW_Share *ctx, TABLE *graph, size_t layer,
|
|||
{
|
||||
Visited *vec= pq.pop();
|
||||
FVectorNode * const node= vec->node;
|
||||
const float target_dista= vec->distance_to_target / alpha;
|
||||
const float target_dista= std::max(32*FLT_EPSILON, vec->distance_to_target / alpha);
|
||||
bool discard= false;
|
||||
for (size_t i=0; i < neighbors.num; i++)
|
||||
if ((discard= node->distance_to(neighbors.links[i]->vec) <= target_dista))
|
||||
|
@ -1348,7 +1348,7 @@ int mhnsw_read_next(TABLE *table)
|
|||
}
|
||||
ctx->release(false, table->s); // release shared ctx
|
||||
result->ctx= trx; // replace it with trx
|
||||
result->ctx_version= trx->version;
|
||||
result->ctx_version= trx->version;
|
||||
std::swap(trx, ctx); // free shared ctx in this scope, keep trx
|
||||
}
|
||||
|
||||
|
@ -1358,7 +1358,7 @@ int mhnsw_read_next(TABLE *table)
|
|||
static_cast<uint>(result->pos), 0, &result->found, false))
|
||||
return err;
|
||||
result->pos= 0;
|
||||
result->threshold= new_threshold;
|
||||
result->threshold= new_threshold + FLT_EPSILON;
|
||||
return mhnsw_read_next(table);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue