mirror of
				https://github.com/MariaDB/server.git
				synced 2025-11-03 20:36:16 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			110 lines
		
	
	
	
		
			4.1 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			110 lines
		
	
	
	
		
			4.1 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
/* Copyright (c) 2016 MariaDB corporation
 | 
						|
 | 
						|
   This program is free software; you can redistribute it and/or modify
 | 
						|
   it under the terms of the GNU General Public License as published by
 | 
						|
   the Free Software Foundation; version 2 of the License.
 | 
						|
 | 
						|
   This program is distributed in the hope that it will be useful,
 | 
						|
   but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
   GNU General Public License for more details.
 | 
						|
 | 
						|
   You should have received a copy of the GNU General Public License
 | 
						|
   along with this program; if not, write to the Free Software
 | 
						|
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */
 | 
						|
 | 
						|
#ifndef UNIQUE_INCLUDED
 | 
						|
#define UNIQUE_INCLUDED
 | 
						|
 | 
						|
#include "filesort.h"
 | 
						|
 | 
						|
/*
 | 
						|
   Unique -- class for unique (removing of duplicates).
 | 
						|
   Puts all values to the TREE. If the tree becomes too big,
 | 
						|
   it's dumped to the file. User can request sorted values, or
 | 
						|
   just iterate through them. In the last case tree merging is performed in
 | 
						|
   memory simultaneously with iteration, so it should be ~2-3x faster.
 | 
						|
 */
 | 
						|
 | 
						|
class Unique :public Sql_alloc
 | 
						|
{
 | 
						|
  DYNAMIC_ARRAY file_ptrs;
 | 
						|
  ulong max_elements;   /* Total number of elements that will be stored in-memory */
 | 
						|
  size_t max_in_memory_size;
 | 
						|
  IO_CACHE file;
 | 
						|
  TREE tree;
 | 
						|
 /* Number of elements filtered out due to min_dupl_count when storing results
 | 
						|
    to table. See Unique::get */
 | 
						|
  ulong filtered_out_elems;
 | 
						|
  uint size;
 | 
						|
 | 
						|
  uint full_size;   /* Size of element + space needed to store the number of
 | 
						|
                       duplicates found for the element. */
 | 
						|
  uint min_dupl_count;   /* Minimum number of occurences of element required for
 | 
						|
                            it to be written to record_pointers.
 | 
						|
                            always 0 for unions, > 0 for intersections */
 | 
						|
  bool with_counters;
 | 
						|
 | 
						|
  bool merge(TABLE *table, uchar *buff, size_t size, bool without_last_merge);
 | 
						|
  bool flush();
 | 
						|
 | 
						|
public:
 | 
						|
  ulong elements;
 | 
						|
  SORT_INFO sort;
 | 
						|
  Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg,
 | 
						|
	 uint size_arg, size_t max_in_memory_size_arg,
 | 
						|
         uint min_dupl_count_arg= 0);
 | 
						|
  ~Unique();
 | 
						|
  ulong elements_in_tree() { return tree.elements_in_tree; }
 | 
						|
  inline bool unique_add(void *ptr)
 | 
						|
  {
 | 
						|
    DBUG_ENTER("unique_add");
 | 
						|
    DBUG_PRINT("info", ("tree %u - %lu", tree.elements_in_tree, max_elements));
 | 
						|
    if (!(tree.flag & TREE_ONLY_DUPS) && 
 | 
						|
        tree.elements_in_tree >= max_elements && flush())
 | 
						|
      DBUG_RETURN(1);
 | 
						|
    DBUG_RETURN(!tree_insert(&tree, ptr, 0, tree.custom_arg));
 | 
						|
  }
 | 
						|
 | 
						|
  bool is_in_memory() { return (my_b_tell(&file) == 0); }
 | 
						|
  void close_for_expansion() { tree.flag= TREE_ONLY_DUPS; }
 | 
						|
 | 
						|
  bool get(TABLE *table);
 | 
						|
  
 | 
						|
  /* Cost of searching for an element in the tree */
 | 
						|
  inline static double get_search_cost(ulonglong tree_elems,
 | 
						|
                                       double compare_factor)
 | 
						|
  {
 | 
						|
    return log((double) tree_elems) * compare_factor / M_LN2;
 | 
						|
  }  
 | 
						|
 | 
						|
  static double get_use_cost(THD *thd, uint *buffer, size_t nkeys, uint key_size,
 | 
						|
                             size_t max_in_memory_size, double compare_factor,
 | 
						|
                             bool intersect_fl, bool *in_memory);
 | 
						|
  inline static int get_cost_calc_buff_size(size_t nkeys, uint key_size,
 | 
						|
                                            size_t max_in_memory_size)
 | 
						|
  {
 | 
						|
    size_t max_elems_in_tree=
 | 
						|
      max_in_memory_size / ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size);
 | 
						|
 | 
						|
    if (max_elems_in_tree == 0)
 | 
						|
      max_elems_in_tree= 1;
 | 
						|
    return (int) (sizeof(uint)*(1 + nkeys/max_elems_in_tree));
 | 
						|
  }
 | 
						|
 | 
						|
  void reset();
 | 
						|
  bool walk(TABLE *table, tree_walk_action action, void *walk_action_arg);
 | 
						|
 | 
						|
  uint get_size() const { return size; }
 | 
						|
  size_t get_max_in_memory_size() const { return max_in_memory_size; }
 | 
						|
 | 
						|
  friend int unique_write_to_file(void* key, element_count count, void *unique);
 | 
						|
  friend int unique_write_to_ptrs(void* key, element_count count, void *unique);
 | 
						|
 | 
						|
  friend int unique_write_to_file_with_count(void *key, element_count count,
 | 
						|
                                             void *unique);
 | 
						|
  friend int unique_intersect_write_to_ptrs(void *key, element_count count,
 | 
						|
                                            void *unique);
 | 
						|
};
 | 
						|
 | 
						|
#endif /* UNIQUE_INCLUDED */
 |