fix test failures on x86, gcc -O1

x86 builds don't use SIMD, fast math and inlining causes
distances to be quite unstable and

1) comparison with the threshold no longer works, the distance calculated
   twice between the same two vectors comes out differently

2) a bunch of identical vectors get the non-zero distance between
   them and HNSW cross-links them with no outbound links (if there're
   more than 2M identical vectors). Let's strengthen the select_neighbors
   heuristic to skip neighbors that are too close to each other

MDEV-35418 suggests a better solution for this.
This commit is contained in:
Sergei Golubchik 2024-11-11 19:53:41 +01:00
parent 38ffaeadab
commit 74743b0d88

View file

@ -908,7 +908,7 @@ static int select_neighbors(MHNSW_Share *ctx, TABLE *graph, size_t layer,
{ {
Visited *vec= pq.pop(); Visited *vec= pq.pop();
FVectorNode * const node= vec->node; FVectorNode * const node= vec->node;
const float target_dista= vec->distance_to_target / alpha; const float target_dista= std::max(32*FLT_EPSILON, vec->distance_to_target / alpha);
bool discard= false; bool discard= false;
for (size_t i=0; i < neighbors.num; i++) for (size_t i=0; i < neighbors.num; i++)
if ((discard= node->distance_to(neighbors.links[i]->vec) <= target_dista)) if ((discard= node->distance_to(neighbors.links[i]->vec) <= target_dista))
@ -1348,7 +1348,7 @@ int mhnsw_read_next(TABLE *table)
} }
ctx->release(false, table->s); // release shared ctx ctx->release(false, table->s); // release shared ctx
result->ctx= trx; // replace it with trx result->ctx= trx; // replace it with trx
result->ctx_version= trx->version; result->ctx_version= trx->version;
std::swap(trx, ctx); // free shared ctx in this scope, keep trx std::swap(trx, ctx); // free shared ctx in this scope, keep trx
} }
@ -1358,7 +1358,7 @@ int mhnsw_read_next(TABLE *table)
static_cast<uint>(result->pos), 0, &result->found, false)) static_cast<uint>(result->pos), 0, &result->found, false))
return err; return err;
result->pos= 0; result->pos= 0;
result->threshold= new_threshold; result->threshold= new_threshold + FLT_EPSILON;
return mhnsw_read_next(table); return mhnsw_read_next(table);
} }