2016-03-22 21:51:59 +02:00
|
|
|
/* Copyright (c) 2016 MariaDB corporation
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
|
|
|
|
|
|
|
|
#ifndef UNIQUE_INCLUDED
|
|
|
|
#define UNIQUE_INCLUDED
|
|
|
|
|
|
|
|
#include "filesort.h"
|
|
|
|
|
|
|
|
/*
|
|
|
|
Unique -- class for unique (removing of duplicates).
|
|
|
|
Puts all values to the TREE. If the tree becomes too big,
|
|
|
|
it's dumped to the file. User can request sorted values, or
|
|
|
|
just iterate through them. In the last case tree merging is performed in
|
|
|
|
memory simultaneously with iteration, so it should be ~2-3x faster.
|
|
|
|
*/
|
|
|
|
|
|
|
|
class Unique :public Sql_alloc
|
|
|
|
{
|
|
|
|
DYNAMIC_ARRAY file_ptrs;
|
2020-01-20 16:15:05 +01:00
|
|
|
ulong max_elements; /* Total number of elements that will be stored in-memory */
|
2017-09-28 12:21:16 +00:00
|
|
|
size_t max_in_memory_size;
|
2016-03-22 21:51:59 +02:00
|
|
|
IO_CACHE file;
|
|
|
|
TREE tree;
|
2020-01-20 16:15:05 +01:00
|
|
|
/* Number of elements filtered out due to min_dupl_count when storing results
|
|
|
|
to table. See Unique::get */
|
2016-03-22 21:51:59 +02:00
|
|
|
ulong filtered_out_elems;
|
|
|
|
uint size;
|
2020-01-20 16:15:05 +01:00
|
|
|
|
|
|
|
uint full_size; /* Size of element + space needed to store the number of
|
|
|
|
duplicates found for the element. */
|
|
|
|
uint min_dupl_count; /* Minimum number of occurences of element required for
|
|
|
|
it to be written to record_pointers.
|
|
|
|
always 0 for unions, > 0 for intersections */
|
2016-03-22 21:51:59 +02:00
|
|
|
bool with_counters;
|
|
|
|
|
2020-01-21 01:37:47 +05:30
|
|
|
bool merge(TABLE *table, uchar *buff, size_t size, bool without_last_merge);
|
2016-03-22 21:51:59 +02:00
|
|
|
bool flush();
|
|
|
|
|
|
|
|
public:
|
|
|
|
ulong elements;
|
|
|
|
SORT_INFO sort;
|
|
|
|
Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg,
|
2017-09-28 12:21:16 +00:00
|
|
|
uint size_arg, size_t max_in_memory_size_arg,
|
2016-03-22 21:51:59 +02:00
|
|
|
uint min_dupl_count_arg= 0);
|
|
|
|
~Unique();
|
|
|
|
ulong elements_in_tree() { return tree.elements_in_tree; }
|
|
|
|
inline bool unique_add(void *ptr)
|
|
|
|
{
|
|
|
|
DBUG_ENTER("unique_add");
|
|
|
|
DBUG_PRINT("info", ("tree %u - %lu", tree.elements_in_tree, max_elements));
|
|
|
|
if (!(tree.flag & TREE_ONLY_DUPS) &&
|
|
|
|
tree.elements_in_tree >= max_elements && flush())
|
|
|
|
DBUG_RETURN(1);
|
|
|
|
DBUG_RETURN(!tree_insert(&tree, ptr, 0, tree.custom_arg));
|
|
|
|
}
|
|
|
|
|
|
|
|
bool is_in_memory() { return (my_b_tell(&file) == 0); }
|
|
|
|
void close_for_expansion() { tree.flag= TREE_ONLY_DUPS; }
|
|
|
|
|
|
|
|
bool get(TABLE *table);
|
|
|
|
|
|
|
|
/* Cost of searching for an element in the tree */
|
2020-02-28 12:59:30 +02:00
|
|
|
inline static double get_search_cost(ulonglong tree_elems,
|
|
|
|
double compare_factor)
|
2016-03-22 21:51:59 +02:00
|
|
|
{
|
|
|
|
return log((double) tree_elems) / (compare_factor * M_LN2);
|
|
|
|
}
|
|
|
|
|
|
|
|
static double get_use_cost(uint *buffer, size_t nkeys, uint key_size,
|
2020-02-28 12:59:30 +02:00
|
|
|
size_t max_in_memory_size, double compare_factor,
|
2016-03-22 21:51:59 +02:00
|
|
|
bool intersect_fl, bool *in_memory);
|
|
|
|
inline static int get_cost_calc_buff_size(size_t nkeys, uint key_size,
|
2017-09-28 12:21:16 +00:00
|
|
|
size_t max_in_memory_size)
|
2016-03-22 21:51:59 +02:00
|
|
|
{
|
2018-04-24 12:14:35 +03:00
|
|
|
size_t max_elems_in_tree=
|
2016-03-22 21:51:59 +02:00
|
|
|
max_in_memory_size / ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size);
|
2020-06-12 10:45:54 +03:00
|
|
|
|
|
|
|
if (max_elems_in_tree == 0)
|
|
|
|
max_elems_in_tree= 1;
|
2016-03-22 21:51:59 +02:00
|
|
|
return (int) (sizeof(uint)*(1 + nkeys/max_elems_in_tree));
|
|
|
|
}
|
|
|
|
|
|
|
|
void reset();
|
|
|
|
bool walk(TABLE *table, tree_walk_action action, void *walk_action_arg);
|
|
|
|
|
|
|
|
uint get_size() const { return size; }
|
2017-09-28 12:21:16 +00:00
|
|
|
size_t get_max_in_memory_size() const { return max_in_memory_size; }
|
2016-03-22 21:51:59 +02:00
|
|
|
|
|
|
|
friend int unique_write_to_file(uchar* key, element_count count, Unique *unique);
|
|
|
|
friend int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique);
|
|
|
|
|
|
|
|
friend int unique_write_to_file_with_count(uchar* key, element_count count,
|
|
|
|
Unique *unique);
|
|
|
|
friend int unique_intersect_write_to_ptrs(uchar* key, element_count count,
|
|
|
|
Unique *unique);
|
|
|
|
};
|
|
|
|
|
|
|
|
#endif /* UNIQUE_INCLUDED */
|