mariadb/mysys/lf_hash.cc
Yuchen Pei bd1e74aa0b
MDEV-9826 More hash algorithms for PARTITION BY [LINEAR] KEY
PARTITION BY [LINEAR] KEY ALGORITHM={MYSQL51|MYSQL55|BASE31|CRC32C|XXH32|XXH3}

- The MYSQL5X algorithms are the existing algorithms, with MYSQL55
  being the default
- The BASE31 algorithm uses a base-31 representation of the bytes, see
  Modular hashing in https://algs4.cs.princeton.edu/34hash/. It serves
  as a simple baseline that distributes better than the old mysql5x
  algorithms
- CRC32C uses my_crc32c.
- XXH32 and XXH3 are xxhash algorithms - xxhash.h copied from latest
  release (0.8.3) of https://github.com/Cyan4973/xxHash

For performance (esp. xxh) we use one-shot hash functions in binary
hash_sort, and streaming hash function otherwise for byte-by-byte
hashing. XXH is the only stateful hash function. The other hash
algorithms are stateless and homomorphic, so streaming and one-shot
functions are identical.

Also updated the columnstore hash due to the change of MY_HASH_ADD
signature.

The following testing commands were run at an earlier version of the
patch with the following patch that changes the default algorithm from
MYSQL55 to CRC32C, and XXH32 (changing the patch accordingly)

mtr --suite main --do-test=.*partition
mtr --suite parts

modified   sql/ha_partition.cc
@@ -10336,6 +10336,8 @@ uint32 ha_partition::calculate_key_hash_value(Field **field_array)
   switch ((*field_array)->table->part_info->key_algorithm)
   {
   case partition_info::KEY_ALGORITHM_NONE:
+    hasher.set_algorithm(HASH_ALGORITHM_CRC32C);
+    break;
   case partition_info::KEY_ALGORITHM_55:
     /* Hasher default to mysql55 */
     break;
modified   sql/partition_info.cc
@@ -2328,7 +2328,7 @@ bool partition_info::fix_parser_data(THD *thd)
       if ((thd_sql_command(thd) == SQLCOM_CREATE_TABLE ||
            thd_sql_command(thd) == SQLCOM_ALTER_TABLE) &&
           key_algorithm == KEY_ALGORITHM_NONE)
-        key_algorithm= KEY_ALGORITHM_55;
+        key_algorithm= PARTITION_INFO_DEFAULT_ALGORITHM;
     }
     DBUG_RETURN(FALSE);
   }
@@ -2344,7 +2344,7 @@ bool partition_info::fix_parser_data(THD *thd)
     if ((thd_sql_command(thd) == SQLCOM_CREATE_TABLE ||
          thd_sql_command(thd) == SQLCOM_ALTER_TABLE) &&
         key_algorithm == KEY_ALGORITHM_NONE)
-      key_algorithm= KEY_ALGORITHM_55;
+      key_algorithm= PARTITION_INFO_DEFAULT_ALGORITHM;
   }
   defined_max_value= FALSE; // in case it already set (CREATE TABLE LIKE)
   do
modified   sql/partition_info.h
@@ -446,6 +446,8 @@ class partition_info : public DDL_LOG_STATE, public Sql_alloc
   int gen_part_type(THD *thd, String *str) const;
 };

+#define PARTITION_INFO_DEFAULT_ALGORITHM partition_info::KEY_ALGORITHM_CRC32C
+
 void part_type_error(THD *thd, partition_info *work_part_info,
                      const char *part_type, partition_info *tab_part_info);

modified   sql/sql_partition.cc
@@ -2471,7 +2471,7 @@ static int add_key_with_algorithm(String *str, const partition_info *part_info)
   err+= str->append(STRING_WITH_LEN("KEY "));

   if (part_info->key_algorithm != partition_info::KEY_ALGORITHM_NONE &&
-      part_info->key_algorithm != partition_info::KEY_ALGORITHM_55)
+      part_info->key_algorithm != PARTITION_INFO_DEFAULT_ALGORITHM)
   {
     err+= str->append(STRING_WITH_LEN("ALGORITHM = "));
     switch (part_info->key_algorithm)
@@ -2479,6 +2479,9 @@ static int add_key_with_algorithm(String *str, const partition_info *part_info)
       case partition_info::KEY_ALGORITHM_51:
         err+= str->append(STRING_WITH_LEN("MYSQL51"));
         break;
+      case partition_info::KEY_ALGORITHM_55:
+        err+= str->append(STRING_WITH_LEN("MYSQL55"));
+        break;
       case partition_info::KEY_ALGORITHM_BASE31:
         err+= str->append(STRING_WITH_LEN("BASE31"));
         break;
2026-02-05 11:55:12 +11:00

591 lines
18 KiB
C++

/* Copyright (c) 2006, 2018, Oracle and/or its affiliates.
Copyright (c) 2009, 2020, MariaDB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
/*
extensible hash
TODO
try to get rid of dummy nodes ?
for non-unique hash, count only _distinct_ values
(but how to do it in lf_hash_delete ?)
*/
#include "mysys_priv.h"
#include <m_string.h>
#include <mysys_err.h>
#include <my_bit.h>
#include <lf.h>
#include "my_cpu.h"
#include "assume_aligned.h"
/* An element of the list */
typedef struct {
intptr link; /* a pointer to the next element in a list and a flag */
const uchar *key;
size_t keylen;
uint32 hashnr; /* reversed hash number, for sorting */
/*
data is stored here, directly after the keylen.
thus the pointer to data is (void*)(slist_element_ptr+1)
*/
} LF_SLIST;
const int LF_HASH_OVERHEAD= sizeof(LF_SLIST);
/*
a structure to pass the context (pointers two the three successive elements
in a list) from l_find to l_insert/l_delete
*/
typedef struct {
intptr *prev;
LF_SLIST *curr, *next;
} CURSOR;
/*
the last bit in LF_SLIST::link is a "deleted" flag.
the helper macros below convert it to a pure pointer or a pure flag
*/
#define PTR(V) (LF_SLIST *)((V) & (~(intptr)1))
#define DELETED(V) ((V) & 1)
/** walk the list, searching for an element or invoking a callback
Search for hashnr/key/keylen in the list starting from 'head' and
position the cursor. The list is ORDER BY hashnr, key
@param head start walking the list from this node
@param cs charset for comparing keys, NULL if callback is used
@param hashnr hash number to search for
@param key key to search for OR data for the callback
@param keylen length of the key to compare, 0 if callback is used
@param cursor for returning the found element
@param pins see lf_alloc-pin.c
@param callback callback action, invoked for every element
@note
cursor is positioned in either case
pins[0..2] are used, they are NOT removed on return
callback might see some elements twice (because of retries)
@return
if find: 0 - not found
1 - found
if callback:
0 - ok
1 - error (callbck returned 1)
*/
static int l_find(LF_SLIST **head, CHARSET_INFO *cs, uint32 hashnr,
const uchar *key, size_t keylen, CURSOR *cursor, LF_PINS *pins,
my_hash_walk_action callback)
{
uint32 cur_hashnr;
const uchar *cur_key;
size_t cur_keylen;
intptr link;
DBUG_ASSERT(!cs || !callback); /* should not be set both */
DBUG_ASSERT(!keylen || !callback); /* should not be set both */
retry:
cursor->prev= (intptr *) my_assume_aligned<sizeof(intptr)>(head);
do { /* PTR() isn't necessary below, head is a dummy node */
cursor->curr= my_assume_aligned<sizeof(LF_SLIST *)>((LF_SLIST *)(*cursor->prev));
lf_pin(pins, 1, cursor->curr);
} while (my_atomic_loadptr(
(void **)my_assume_aligned<sizeof(LF_SLIST *)>(cursor->prev))
!= cursor->curr && LF_BACKOFF());
for (;;)
{
if (unlikely(!cursor->curr))
return 0; /* end of the list */
cur_hashnr= cursor->curr->hashnr;
cur_keylen= cursor->curr->keylen;
/* The key element needs to be aligned, not necessary what it points to */
my_assume_aligned<sizeof(const uchar *)>(&cursor->curr->key);
cur_key= (const uchar *) my_atomic_loadptr_explicit((void **) &cursor->curr->key,
MY_MEMORY_ORDER_ACQUIRE);
do {
/* attempting to my_assume_aligned on link below broke the implementation */
link= (intptr) my_atomic_loadptr_explicit((void **) &cursor->curr->link,
MY_MEMORY_ORDER_RELAXED);
cursor->next= my_assume_aligned<sizeof(LF_SLIST *)>(PTR(link));
lf_pin(pins, 0, cursor->next);
} while (link != (intptr) my_atomic_loadptr((void *volatile *) &cursor->curr->link)
&& LF_BACKOFF());
if (!DELETED(link))
{
if (unlikely(callback))
{
if (cur_hashnr & 1 && callback(cursor->curr + 1, (void*)key))
return 1;
}
else if (cur_hashnr >= hashnr)
{
int r= 1;
if (cur_hashnr > hashnr ||
(r= my_strnncoll(cs, cur_key, cur_keylen, key, keylen)) >= 0)
return !r;
}
cursor->prev= &(cursor->curr->link);
if (!(cur_hashnr & 1)) /* dummy node */
head= (LF_SLIST **)cursor->prev;
lf_pin(pins, 2, cursor->curr);
}
else
{
/*
we found a deleted node - be nice, help the other thread
and remove this deleted node
*/
if (my_atomic_casptr((void **) cursor->prev,
(void **) &cursor->curr, cursor->next) && LF_BACKOFF())
lf_alloc_free(pins, cursor->curr);
else
goto retry;
}
cursor->curr= cursor->next;
lf_pin(pins, 1, cursor->curr);
}
}
/* static l_find is the only user my_assume_aligned, keep the rest as c scoped */
C_MODE_START
/*
DESCRIPTION
insert a 'node' in the list that starts from 'head' in the correct
position (as found by l_find)
RETURN
0 - inserted
not 0 - a pointer to a duplicate (not pinned and thus unusable)
NOTE
it uses pins[0..2], on return all pins are removed.
if there're nodes with the same key value, a new node is added before them.
*/
static LF_SLIST *l_insert(LF_SLIST **head, CHARSET_INFO *cs,
LF_SLIST *node, LF_PINS *pins, uint flags)
{
CURSOR cursor;
int res;
for (;;)
{
if (l_find(head, cs, node->hashnr, node->key, node->keylen,
&cursor, pins, 0) &&
(flags & LF_HASH_UNIQUE))
{
res= 0; /* duplicate found */
break;
}
else
{
node->link= (intptr)cursor.curr;
DBUG_ASSERT(node->link != (intptr)node); /* no circular references */
DBUG_ASSERT(cursor.prev != &node->link); /* no circular references */
if (my_atomic_casptr((void **) cursor.prev,
(void **)(char*) &cursor.curr, node))
{
res= 1; /* inserted ok */
break;
}
}
}
lf_unpin(pins, 0);
lf_unpin(pins, 1);
lf_unpin(pins, 2);
/*
Note that cursor.curr is not pinned here and the pointer is unreliable,
the object may disappear anytime. But if it points to a dummy node, the
pointer is safe, because dummy nodes are never freed - initialize_bucket()
uses this fact.
*/
return res ? 0 : cursor.curr;
}
/*
DESCRIPTION
deletes a node as identified by hashnr/keey/keylen from the list
that starts from 'head'
RETURN
0 - ok
1 - not found
NOTE
it uses pins[0..2], on return all pins are removed.
*/
static int l_delete(LF_SLIST **head, CHARSET_INFO *cs, uint32 hashnr,
const uchar *key, uint keylen, LF_PINS *pins)
{
CURSOR cursor;
int res;
for (;;)
{
if (!l_find(head, cs, hashnr, key, keylen, &cursor, pins, 0))
{
res= 1; /* not found */
break;
}
else
{
/* mark the node deleted */
if (my_atomic_casptr((void **) (char*) &(cursor.curr->link),
(void **) (char*) &cursor.next,
(void *)(((intptr)cursor.next) | 1)))
{
/* and remove it from the list */
if (my_atomic_casptr((void **)cursor.prev,
(void **)(char*)&cursor.curr, cursor.next))
lf_alloc_free(pins, cursor.curr);
else
{
/*
somebody already "helped" us and removed the node ?
Let's check if we need to help that someone too!
(to ensure the number of "set DELETED flag" actions
is equal to the number of "remove from the list" actions)
*/
l_find(head, cs, hashnr, key, keylen, &cursor, pins, 0);
}
res= 0;
break;
}
}
}
lf_unpin(pins, 0);
lf_unpin(pins, 1);
lf_unpin(pins, 2);
return res;
}
/*
DESCRIPTION
searches for a node as identified by hashnr/keey/keylen in the list
that starts from 'head'
RETURN
0 - not found
node - found
NOTE
it uses pins[0..2], on return the pin[2] keeps the node found
all other pins are removed.
*/
static LF_SLIST *l_search(LF_SLIST **head, CHARSET_INFO *cs,
uint32 hashnr, const uchar *key, uint keylen,
LF_PINS *pins)
{
CURSOR cursor;
int res= l_find(head, cs, hashnr, key, keylen, &cursor, pins, 0);
if (res)
lf_pin(pins, 2, cursor.curr);
else
lf_unpin(pins, 2);
lf_unpin(pins, 1);
lf_unpin(pins, 0);
return res ? cursor.curr : 0;
}
static inline const uchar* hash_key(const LF_HASH *hash,
const uchar *record, size_t *length)
{
if (hash->get_key)
return (*hash->get_key)(record, length, 0);
*length= hash->key_length;
return record + hash->key_offset;
}
/*
Compute the hash key value from the raw key.
@note, that the hash value is limited to 2^31, because we need one
bit to distinguish between normal and dummy nodes.
*/
static inline my_hash_value_type calc_hash(CHARSET_INFO *cs,
const uchar *key,
size_t keylen)
{
my_hasher_st hasher= my_hasher_mysql5x();
my_ci_hash_sort(&hasher, cs, (uchar*) key, keylen);
return hasher.m_nr1;
}
#define MAX_LOAD 1.0 /* average number of elements in a bucket */
static int initialize_bucket(LF_HASH *, LF_SLIST **, uint, LF_PINS *);
static void default_initializer(LF_HASH *hash, void *dst, const void *src)
{
memcpy(dst, src, hash->element_size);
}
/*
Initializes lf_hash, the arguments are compatible with hash_init
@note element_size sets both the size of allocated memory block for
lf_alloc and a size of memcpy'ed block size in lf_hash_insert. Typically
they are the same, indeed. But LF_HASH::element_size can be decreased
after lf_hash_init, and then lf_alloc will allocate larger block that
lf_hash_insert will copy over. It is desirable if part of the element
is expensive to initialize - for example if there is a mutex or
DYNAMIC_ARRAY. In this case they should be initialize in the
LF_ALLOCATOR::constructor, and lf_hash_insert should not overwrite them.
The above works well with PODS. For more complex cases (e.g. C++ classes
with private members) use initializer function.
*/
void lf_hash_init(LF_HASH *hash, uint element_size, uint flags,
uint key_offset, uint key_length, my_hash_get_key get_key,
CHARSET_INFO *charset)
{
lf_alloc_init(&hash->alloc, sizeof(LF_SLIST)+element_size,
offsetof(LF_SLIST, key));
lf_dynarray_init(&hash->array, sizeof(LF_SLIST *));
hash->size= 1;
hash->count= 0;
hash->element_size= element_size;
hash->flags= flags;
hash->charset= charset ? charset : &my_charset_bin;
hash->key_offset= key_offset;
hash->key_length= key_length;
hash->get_key= get_key;
hash->initializer= default_initializer;
hash->hash_function= calc_hash;
DBUG_ASSERT(get_key ? !key_offset && !key_length : key_length);
}
void lf_hash_destroy(LF_HASH *hash)
{
LF_SLIST *el, **head= (LF_SLIST **)lf_dynarray_value(&hash->array, 0);
if (head)
{
el= *head;
while (el)
{
intptr next= el->link;
if (el->hashnr & 1)
lf_alloc_direct_free(&hash->alloc, el); /* normal node */
else
my_free(el); /* dummy node */
el= (LF_SLIST *)next;
}
}
lf_alloc_destroy(&hash->alloc);
lf_dynarray_destroy(&hash->array);
}
/*
DESCRIPTION
inserts a new element to a hash. it will have a _copy_ of
data, not a pointer to it.
RETURN
0 - inserted
1 - didn't (unique key conflict)
-1 - out of memory
NOTE
see l_insert() for pin usage notes
*/
int lf_hash_insert(LF_HASH *hash, LF_PINS *pins, const void *data)
{
int csize, bucket, hashnr;
LF_SLIST *node, **el;
node= (LF_SLIST *)lf_alloc_new(pins);
if (unlikely(!node))
return -1;
hash->initializer(hash, node + 1, data);
node->key= hash_key(hash, (uchar *)(node+1), &node->keylen);
hashnr= hash->hash_function(hash->charset, node->key, node->keylen) & INT_MAX32;
bucket= hashnr % hash->size;
el= (LF_SLIST **)lf_dynarray_lvalue(&hash->array, bucket);
if (unlikely(!el))
return -1;
if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins)))
return -1;
node->hashnr= my_reverse_bits(hashnr) | 1; /* normal node */
if (l_insert(el, hash->charset, node, pins, hash->flags))
{
lf_alloc_free(pins, node);
return 1;
}
csize= hash->size;
if ((my_atomic_add32(&hash->count, 1)+1.0) / csize > MAX_LOAD)
my_atomic_cas32(&hash->size, &csize, csize*2);
return 0;
}
/*
DESCRIPTION
deletes an element with the given key from the hash (if a hash is
not unique and there're many elements with this key - the "first"
matching element is deleted)
RETURN
0 - deleted
1 - didn't (not found)
NOTE
see l_delete() for pin usage notes
*/
int lf_hash_delete(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen)
{
LF_SLIST **el;
uint bucket, hashnr;
hashnr= hash->hash_function(hash->charset, (uchar *)key, keylen) & INT_MAX32;
/* hide OOM errors - if we cannot initialize a bucket, try the previous one */
for (bucket= hashnr % hash->size; ;bucket= my_clear_highest_bit(bucket))
{
el= (LF_SLIST **)lf_dynarray_lvalue(&hash->array, bucket);
if (el && (*el || initialize_bucket(hash, el, bucket, pins) == 0))
break;
if (unlikely(bucket == 0))
return 1; /* if there's no bucket==0, the hash is empty */
}
if (l_delete(el, hash->charset, my_reverse_bits(hashnr) | 1,
(uchar *)key, keylen, pins))
{
return 1;
}
my_atomic_add32(&hash->count, -1);
return 0;
}
/*
RETURN
a pointer to an element with the given key (if a hash is not unique and
there're many elements with this key - the "first" matching element)
NULL if nothing is found
NOTE
see l_search() for pin usage notes
*/
void *lf_hash_search_using_hash_value(LF_HASH *hash, LF_PINS *pins,
my_hash_value_type hashnr,
const void *key, uint keylen)
{
LF_SLIST **el, *found;
uint bucket;
/* hide OOM errors - if we cannot initialize a bucket, try the previous one */
for (bucket= hashnr % hash->size; ;bucket= my_clear_highest_bit(bucket))
{
el= (LF_SLIST **)lf_dynarray_lvalue(&hash->array, bucket);
if (el && (*el || initialize_bucket(hash, el, bucket, pins) == 0))
break;
if (unlikely(bucket == 0))
return 0; /* if there's no bucket==0, the hash is empty */
}
found= l_search(el, hash->charset, my_reverse_bits(hashnr) | 1,
(uchar *)key, keylen, pins);
return found ? found+1 : 0;
}
/**
Iterate over all elements in hash and call function with the element
@note
If one of 'action' invocations returns 1 the iteration aborts.
'action' might see some elements twice!
@retval 0 ok
@retval 1 error (action returned 1)
*/
int lf_hash_iterate(LF_HASH *hash, LF_PINS *pins,
my_hash_walk_action action, void *argument)
{
CURSOR cursor;
uint bucket= 0;
int res;
LF_SLIST **el;
el= (LF_SLIST **)lf_dynarray_lvalue(&hash->array, bucket);
if (unlikely(!el))
return 0; /* if there's no bucket==0, the hash is empty */
if (*el == NULL && unlikely(initialize_bucket(hash, el, bucket, pins)))
return 0; /* if there's no bucket==0, the hash is empty */
res= l_find(el, 0, 0, (uchar*)argument, 0, &cursor, pins, action);
lf_unpin(pins, 2);
lf_unpin(pins, 1);
lf_unpin(pins, 0);
return res;
}
void *lf_hash_search(LF_HASH *hash, LF_PINS *pins, const void *key, uint keylen)
{
return lf_hash_search_using_hash_value(hash, pins,
hash->hash_function(hash->charset,
(uchar*) key,
keylen) & INT_MAX32,
key, keylen);
}
static const uchar *dummy_key= (uchar*)"";
/*
RETURN
0 - ok
-1 - out of memory
*/
static int initialize_bucket(LF_HASH *hash, LF_SLIST **node,
uint bucket, LF_PINS *pins)
{
uint parent= my_clear_highest_bit(bucket);
LF_SLIST *dummy= (LF_SLIST *)my_malloc(key_memory_lf_slist,
sizeof(LF_SLIST), MYF(MY_WME));
LF_SLIST **tmp= 0, *cur;
LF_SLIST **el= (LF_SLIST **)lf_dynarray_lvalue(&hash->array, parent);
if (unlikely(!el || !dummy))
return -1;
if (*el == NULL && bucket &&
unlikely(initialize_bucket(hash, el, parent, pins)))
{
my_free(dummy);
return -1;
}
dummy->hashnr= my_reverse_bits(bucket) | 0; /* dummy node */
dummy->key= dummy_key;
dummy->keylen= 0;
if ((cur= l_insert(el, hash->charset, dummy, pins, LF_HASH_UNIQUE)))
{
my_free(dummy);
dummy= cur;
}
my_atomic_casptr((void **)node, (void **)(char*) &tmp, dummy);
/*
note that if the CAS above failed (after l_insert() succeeded),
it would mean that some other thread has executed l_insert() for
the same dummy node, its l_insert() failed, it picked up our
dummy node (in "dummy= cur") and executed the same CAS as above.
Which means that even if CAS above failed we don't need to retry,
and we should not free(dummy) - there's no memory leak here
*/
return 0;
}
C_MODE_END