mirror of
				https://github.com/MariaDB/server.git
				synced 2025-10-31 10:56:12 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			110 lines
		
	
	
	
		
			4.1 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			110 lines
		
	
	
	
		
			4.1 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /* Copyright (c) 2016 MariaDB corporation
 | |
| 
 | |
|    This program is free software; you can redistribute it and/or modify
 | |
|    it under the terms of the GNU General Public License as published by
 | |
|    the Free Software Foundation; version 2 of the License.
 | |
| 
 | |
|    This program is distributed in the hope that it will be useful,
 | |
|    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
|    GNU General Public License for more details.
 | |
| 
 | |
|    You should have received a copy of the GNU General Public License
 | |
|    along with this program; if not, write to the Free Software
 | |
|    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */
 | |
| 
 | |
| #ifndef UNIQUE_INCLUDED
 | |
| #define UNIQUE_INCLUDED
 | |
| 
 | |
| #include "filesort.h"
 | |
| 
 | |
| /*
 | |
|    Unique -- class for unique (removing of duplicates).
 | |
|    Puts all values to the TREE. If the tree becomes too big,
 | |
|    it's dumped to the file. User can request sorted values, or
 | |
|    just iterate through them. In the last case tree merging is performed in
 | |
|    memory simultaneously with iteration, so it should be ~2-3x faster.
 | |
|  */
 | |
| 
 | |
| class Unique :public Sql_alloc
 | |
| {
 | |
|   DYNAMIC_ARRAY file_ptrs;
 | |
|   ulong max_elements;   /* Total number of elements that will be stored in-memory */
 | |
|   size_t max_in_memory_size;
 | |
|   IO_CACHE file;
 | |
|   TREE tree;
 | |
|  /* Number of elements filtered out due to min_dupl_count when storing results
 | |
|     to table. See Unique::get */
 | |
|   ulong filtered_out_elems;
 | |
|   uint size;
 | |
| 
 | |
|   uint full_size;   /* Size of element + space needed to store the number of
 | |
|                        duplicates found for the element. */
 | |
|   uint min_dupl_count;   /* Minimum number of occurences of element required for
 | |
|                             it to be written to record_pointers.
 | |
|                             always 0 for unions, > 0 for intersections */
 | |
|   bool with_counters;
 | |
| 
 | |
|   bool merge(TABLE *table, uchar *buff, size_t size, bool without_last_merge);
 | |
|   bool flush();
 | |
| 
 | |
| public:
 | |
|   ulong elements;
 | |
|   SORT_INFO sort;
 | |
|   Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg,
 | |
| 	 uint size_arg, size_t max_in_memory_size_arg,
 | |
|          uint min_dupl_count_arg= 0);
 | |
|   ~Unique();
 | |
|   ulong elements_in_tree() { return tree.elements_in_tree; }
 | |
|   inline bool unique_add(void *ptr)
 | |
|   {
 | |
|     DBUG_ENTER("unique_add");
 | |
|     DBUG_PRINT("info", ("tree %u - %lu", tree.elements_in_tree, max_elements));
 | |
|     if (!(tree.flag & TREE_ONLY_DUPS) && 
 | |
|         tree.elements_in_tree >= max_elements && flush())
 | |
|       DBUG_RETURN(1);
 | |
|     DBUG_RETURN(!tree_insert(&tree, ptr, 0, tree.custom_arg));
 | |
|   }
 | |
| 
 | |
|   bool is_in_memory() { return (my_b_tell(&file) == 0); }
 | |
|   void close_for_expansion() { tree.flag= TREE_ONLY_DUPS; }
 | |
| 
 | |
|   bool get(TABLE *table);
 | |
|   
 | |
|   /* Cost of searching for an element in the tree */
 | |
|   inline static double get_search_cost(ulonglong tree_elems,
 | |
|                                        double compare_factor)
 | |
|   {
 | |
|     return log((double) tree_elems) * compare_factor / M_LN2;
 | |
|   }  
 | |
| 
 | |
|   static double get_use_cost(THD *thd, uint *buffer, size_t nkeys, uint key_size,
 | |
|                              size_t max_in_memory_size, double compare_factor,
 | |
|                              bool intersect_fl, bool *in_memory);
 | |
|   inline static int get_cost_calc_buff_size(size_t nkeys, uint key_size,
 | |
|                                             size_t max_in_memory_size)
 | |
|   {
 | |
|     size_t max_elems_in_tree=
 | |
|       max_in_memory_size / ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size);
 | |
| 
 | |
|     if (max_elems_in_tree == 0)
 | |
|       max_elems_in_tree= 1;
 | |
|     return (int) (sizeof(uint)*(1 + nkeys/max_elems_in_tree));
 | |
|   }
 | |
| 
 | |
|   void reset();
 | |
|   bool walk(TABLE *table, tree_walk_action action, void *walk_action_arg);
 | |
| 
 | |
|   uint get_size() const { return size; }
 | |
|   size_t get_max_in_memory_size() const { return max_in_memory_size; }
 | |
| 
 | |
|   friend int unique_write_to_file(void* key, element_count count, void *unique);
 | |
|   friend int unique_write_to_ptrs(void* key, element_count count, void *unique);
 | |
| 
 | |
|   friend int unique_write_to_file_with_count(void *key, element_count count,
 | |
|                                              void *unique);
 | |
|   friend int unique_intersect_write_to_ptrs(void *key, element_count count,
 | |
|                                             void *unique);
 | |
| };
 | |
| 
 | |
| #endif /* UNIQUE_INCLUDED */
 | 
