mirror of
				https://github.com/MariaDB/server.git
				synced 2025-10-31 10:56:12 +01:00 
			
		
		
		
	 d5e15424d8
			
		
	
	
	d5e15424d8
	
	
	
		
			
			The MDEV-29693 conflict resolution is from Monty, as well as is
a bug fix where ANALYZE TABLE wrongly built histograms for
single-column PRIMARY KEY.
Also includes a fix for safe_malloc error reporting.
Other things:
- Copied main.log_slow from 10.4 to avoid mtr issue
Disabled test:
- spider/bugfix.mdev_27239 because we started to get
  +Error	1429 Unable to connect to foreign data source: localhost
  -Error	1158 Got an error reading communication packets
- main.delayed
  - Bug#54332 Deadlock with two connections doing LOCK TABLE+INSERT DELAYED
    This part is disabled for now as it fails randomly with different
    warnings/errors (no corruption).
		
	
			
		
			
				
	
	
		
			147 lines
		
	
	
	
		
			4.5 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			147 lines
		
	
	
	
		
			4.5 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*
 | |
|    Copyright (c) 2021, MariaDB Corporation.
 | |
| 
 | |
|    This program is free software; you can redistribute it and/or modify
 | |
|    it under the terms of the GNU General Public License as published by
 | |
|    the Free Software Foundation; version 2 of the License.
 | |
| 
 | |
|    This program is distributed in the hope that it will be useful,
 | |
|    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
|    GNU General Public License for more details.
 | |
| 
 | |
|    You should have received a copy of the GNU General Public License
 | |
|    along with this program; if not, write to the Free Software
 | |
|    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
 | |
| 
 | |
| #include "sql_statistics.h"
 | |
| 
 | |
| /*
 | |
|   An equi-height histogram which stores real values for bucket bounds.
 | |
| 
 | |
|   Handles @@histogram_type=JSON_HB
 | |
| 
 | |
|   Histogram format in JSON:
 | |
| 
 | |
|   {
 | |
|     // The next three are saved but not currently analyzed:
 | |
|     "target_histogram_size": nnn,
 | |
|     "collected_at": "(date and time)",
 | |
|     "collected_by": "(server version)",
 | |
| 
 | |
|     "histogram_hb": [
 | |
|       { "start": "value", "size":nnn.nn, "ndv": nnn },
 | |
|       ...
 | |
| 
 | |
|       // Optionally, start and/or end can be replaced with _hex variant
 | |
|       { "start_hex: "value", "size":nnn.nn, "ndv":nnn},
 | |
| 
 | |
|       ...
 | |
|       { "start": "value", "size":nnn.nn, "ndv": nnn, "end": "value"},
 | |
|     ]
 | |
|   }
 | |
| 
 | |
|   Histogram is a JSON object. It has some global properties and "histogram_hb"
 | |
|   member whose value is a JSON array of histogram buckets.
 | |
| 
 | |
|   Each bucket is an object with these members:
 | |
|     "start" - the first value in the bucket.
 | |
|     "size"  - fraction of table rows that is contained in the bucket.
 | |
|     "ndv"   - Number of Distinct Values in the bucket.
 | |
|     "end"   - Optionally, the last value in the bucket.
 | |
| 
 | |
|   A bucket is a single-point bucket if it has ndv=1.
 | |
| 
 | |
|   Most buckets have no "end" member: the bucket is assumed to contain all
 | |
|   values up to the "start" of the next bucket.
 | |
| 
 | |
|   The exception is single-point buckets where last value is the same as the
 | |
|   first value.
 | |
| 
 | |
|   start/end can be replaced with start_hex/end_hex. In _hex variant, the
 | |
|   constant is encoded in hex. This encoding is used to handle so called 
 | |
|   "unassigned characters": some non-UTF8 charsets have byte combinations that
 | |
|   are not mapped to any UTF8 character.
 | |
| */
 | |
| 
 | |
| class Histogram_json_hb final : public Histogram_base
 | |
| {
 | |
|   size_t size; /* Number of elements in the histogram */
 | |
| 
 | |
|   /* Collection-time only: collected histogram in the JSON form. */
 | |
|   std::string json_text;
 | |
| 
 | |
|   struct Bucket
 | |
|   {
 | |
|     // The left endpoint in KeyTupleFormat. The endpoint is inclusive, this
 | |
|     // value is in this bucket.
 | |
|     std::string start_value;
 | |
| 
 | |
|     // Cumulative fraction: The fraction of table rows that fall into this
 | |
|     //  and preceding buckets.
 | |
|     double cum_fract;
 | |
| 
 | |
|     // Number of distinct values in the bucket.
 | |
|     longlong ndv;
 | |
|   };
 | |
| 
 | |
|   std::vector<Bucket> buckets;
 | |
| 
 | |
|   std::string last_bucket_end_endp;
 | |
| 
 | |
| public:
 | |
|   static constexpr const char* JSON_NAME="histogram_hb";
 | |
| 
 | |
|   bool parse(MEM_ROOT *mem_root, const char *db_name, const char *table_name,
 | |
|              Field *field, const char *hist_data,
 | |
|              size_t hist_data_len) override;
 | |
| 
 | |
|   void serialize(Field *field) override;
 | |
| 
 | |
|   Histogram_builder *create_builder(Field *col, uint col_len,
 | |
|                                     ha_rows rows) override;
 | |
| 
 | |
|   // returns number of buckets in the histogram
 | |
|   uint get_width() override
 | |
|   {
 | |
|     return (uint)size;
 | |
|   }
 | |
| 
 | |
|   Histogram_type get_type() override
 | |
|   {
 | |
|     return JSON_HB;
 | |
|   }
 | |
| 
 | |
|   /*
 | |
|     @brief
 | |
|       This used to be the size of the histogram on disk, which was redundant
 | |
|       (one can check the size directly). Return the number of buckets instead.
 | |
|   */
 | |
|   uint get_size() override
 | |
|   {
 | |
|     return (uint)size;
 | |
|   }
 | |
|   void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg,
 | |
|                            ulonglong size) override;
 | |
| 
 | |
|   double point_selectivity(Field *field, key_range *endpoint,
 | |
|                            double avg_sel) override;
 | |
|   double range_selectivity(Field *field, key_range *min_endp,
 | |
|                            key_range *max_endp, double avg_sel) override;
 | |
| 
 | |
|   void set_json_text(ulonglong sz, const char *json_text_arg,
 | |
|                      size_t json_text_len)
 | |
|   {
 | |
|     size= (size_t) sz;
 | |
|     json_text.assign(json_text_arg, json_text_len);
 | |
|   }
 | |
| 
 | |
| private:
 | |
|   int parse_bucket(json_engine_t *je, Field *field, double *cumulative_size,
 | |
|                    bool *assigned_last_end, const char **err);
 | |
| 
 | |
|   double get_left_fract(int idx);
 | |
|   std::string& get_end_value(int idx);
 | |
|   int find_bucket(const Field *field, const uchar *lookup_val, int *cmp);
 | |
| };
 | |
| 
 |