mariadb/sql/item_sum.cc

3393 lines
82 KiB
C++
Raw Normal View History

2003-02-10 17:03:27 +02:00
/* Copyright (C) 2000-2003 MySQL AB
2000-07-31 21:29:14 +02:00
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
2000-07-31 21:29:14 +02:00
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
2000-07-31 21:29:14 +02:00
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
/* Sum functions (COUNT, MIN...) */
#ifdef USE_PRAGMA_IMPLEMENTATION
2000-07-31 21:29:14 +02:00
#pragma implementation // gcc: Class implementation
#endif
#include "mysql_priv.h"
2005-03-01 22:19:19 +02:00
#include "sql_select.h"
/*
Prepare an aggregate function item for checking context conditions
SYNOPSIS
init_sum_func_check()
thd reference to the thread context info
DESCRIPTION
The function initializes the members of the Item_sum object created
for a set function that are used to check validity of the set function
occurrence.
If the set function is not allowed in any subquery where it occurs
an error is reported immediately.
NOTES
This function is to be called for any item created for a set function
object when the traversal of trees built for expressions used in the query
is performed at the phase of context analysis. This function is to
be invoked at the descent of this traversal.
RETURN
TRUE if an error is reported
FALSE otherwise
*/
bool Item_sum::init_sum_func_check(THD *thd)
{
if (!thd->lex->allow_sum_func)
{
my_message(ER_INVALID_GROUP_FUNC_USE, ER(ER_INVALID_GROUP_FUNC_USE),
MYF(0));
return TRUE;
}
/* Set a reference to the nesting set function if there is any */
in_sum_func= thd->lex->in_sum_func;
/* Save a pointer to object to be used in items for nested set functions */
thd->lex->in_sum_func= this;
nest_level= thd->lex->current_select->nest_level;
ref_by= 0;
aggr_level= -1;
max_arg_level= -1;
max_sum_func_level= -1;
return FALSE;
}
/*
Check constraints imposed on a usage of a set function
SYNOPSIS
check_sum_func()
thd reference to the thread context info
ref location of the pointer to this item in the embedding expression
DESCRIPTION
The method verifies whether context conditions imposed on a usage
of any set function are met for this occurrence.
It checks whether the set function occurs in the position where it
can be aggregated and, when it happens to occur in argument of another
set function, the method checks that these two functions are aggregated in
different subqueries.
If the context conditions are not met the method reports an error.
If the set function is aggregated in some outer subquery the method
adds it to the chain of items for such set functions that is attached
to the the st_select_lex structure for this subquery.
NOTES
This function is to be called for any item created for a set function
object when the traversal of trees built for expressions used in the query
is performed at the phase of context analysis. This function is to
be invoked at the ascent of this traversal.
IMPLEMENTATION
A number of designated members of the object are used to check the
conditions. They are specified in the comment before the Item_sum
class declaration.
Additionally a bitmap variable called allow_sum_func is employed.
It is included into the thd->lex structure.
The bitmap contains 1 at n-th position if the set function happens
to occur under a construct of the n-th level subquery where usage
of set functions are allowed (i.e either in the SELECT list or
in the HAVING clause of the corresponding subquery)
Consider the query:
SELECT SUM(t1.b) FROM t1 GROUP BY t1.a
HAVING t1.a IN (SELECT t2.c FROM t2 WHERE AVG(t1.b) > 20) AND
t1.a > (SELECT MIN(t2.d) FROM t2);
allow_sum_func will contain:
for SUM(t1.b) - 1 at the first position
for AVG(t1.b) - 1 at the first position, 0 at the second position
for MIN(t2.d) - 1 at the first position, 1 at the second position.
RETURN
TRUE if an error is reported
FALSE otherwise
*/
bool Item_sum::check_sum_func(THD *thd, Item **ref)
{
bool invalid= FALSE;
nesting_map allow_sum_func= thd->lex->allow_sum_func;
/*
The value of max_arg_level is updated if an argument of the set function
contains a column reference resolved against a subquery whose level is
greater than the current value of max_arg_level.
max_arg_level cannot be greater than nest level.
nest level is always >= 0
*/
if (nest_level == max_arg_level)
{
/*
The function must be aggregated in the current subquery,
If it is there under a construct where it is not allowed
we report an error.
*/
invalid= !(allow_sum_func & (1 << max_arg_level));
}
else if (max_arg_level >= 0 || !(allow_sum_func & (1 << nest_level)))
{
/*
The set function can be aggregated only in outer subqueries.
Try to find a subquery where it can be aggregated;
If we fail to find such a subquery report an error.
*/
if (register_sum_func(thd, ref))
return TRUE;
invalid= aggr_level < 0 && !(allow_sum_func & (1 << nest_level));
}
if (!invalid && aggr_level < 0)
aggr_level= nest_level;
/*
By this moment we either found a subquery where the set function is
to be aggregated and assigned a value that is >= 0 to aggr_level,
or set the value of 'invalid' to TRUE to report later an error.
*/
/*
Additionally we have to check whether possible nested set functions
are acceptable here: they are not, if the level of aggregation of
some of them is less than aggr_level.
*/
invalid= aggr_level <= max_sum_func_level;
if (invalid)
{
my_message(ER_INVALID_GROUP_FUNC_USE, ER(ER_INVALID_GROUP_FUNC_USE),
MYF(0));
return TRUE;
}
if (in_sum_func && in_sum_func->nest_level == nest_level)
{
/*
If the set function is nested adjust the value of
max_sum_func_level for the nesting set function.
*/
set_if_bigger(in_sum_func->max_sum_func_level, aggr_level);
}
thd->lex->in_sum_func= in_sum_func;
return FALSE;
}
/*
Attach a set function to the subquery where it must be aggregated
SYNOPSIS
register_sum_func()
thd reference to the thread context info
ref location of the pointer to this item in the embedding expression
DESCRIPTION
The function looks for an outer subquery where the set function must be
aggregated. If it finds such a subquery then aggr_level is set to
the nest level of this subquery and the item for the set function
is added to the list of set functions used in nested subqueries
inner_sum_func_list defined for each subquery. When the item is placed
there the field 'ref_by' is set to ref.
NOTES.
Now we 'register' only set functions that are aggregated in outer
subqueries. Actually it makes sense to link all set function for
a subquery in one chain. It would simplify the process of 'splitting'
for set functions.
RETURN
FALSE if the executes without failures (currently always)
TRUE otherwise
*/
bool Item_sum::register_sum_func(THD *thd, Item **ref)
{
SELECT_LEX *sl;
SELECT_LEX *aggr_sl= NULL;
nesting_map allow_sum_func= thd->lex->allow_sum_func;
for (sl= thd->lex->current_select->master_unit()->outer_select() ;
sl && sl->nest_level > max_arg_level;
sl= sl->master_unit()->outer_select() )
{
if (aggr_level < 0 && (allow_sum_func & (1 << sl->nest_level)))
{
/* Found the most nested subquery where the function can be aggregated */
aggr_level= sl->nest_level;
aggr_sl= sl;
}
}
if (sl && (allow_sum_func & (1 << sl->nest_level)))
{
/*
We reached the subquery of level max_arg_level and checked
that the function can be aggregated here.
The set function will be aggregated in this subquery.
*/
aggr_level= sl->nest_level;
aggr_sl= sl;
}
if (aggr_level >= 0)
{
ref_by= ref;
/* Add the object to the list of registered objects assigned to aggr_sl */
if (!aggr_sl->inner_sum_func_list)
next= this;
else
{
next= aggr_sl->inner_sum_func_list->next;
aggr_sl->inner_sum_func_list->next= this;
}
aggr_sl->inner_sum_func_list= this;
aggr_sl->with_sum_func= 1;
/*
Mark Item_subselect(s) as containing aggregate function all the way up
to aggregate function's calculation context.
Note that we must not mark the Item of calculation context itself
because with_sum_func on the calculation context st_select_lex is
already set above.
with_sum_func being set for an Item means that this Item refers
(somewhere in it, e.g. one of its arguments if it's a function) directly
or through intermediate items to an aggregate function that is calculated
in a context "outside" of the Item (e.g. in the current or outer select).
with_sum_func being set for an st_select_lex means that this st_select_lex
has aggregate functions directly referenced (i.e. not through a sub-select).
*/
for (sl= thd->lex->current_select;
sl && sl != aggr_sl && sl->master_unit()->item;
sl= sl->master_unit()->outer_select() )
sl->master_unit()->item->with_sum_func= 1;
}
return FALSE;
}
2000-07-31 21:29:14 +02:00
Item_sum::Item_sum(List<Item> &list)
:arg_count(list.elements)
2000-07-31 21:29:14 +02:00
{
if ((args=(Item**) sql_alloc(sizeof(Item*)*arg_count)))
{
uint i=0;
List_iterator_fast<Item> li(list);
2000-07-31 21:29:14 +02:00
Item *item;
while ((item=li++))
{
args[i++]= item;
}
}
mark_as_sum_func();
2000-07-31 21:29:14 +02:00
list.empty(); // Fields are used
}
/*
Constructor used in processing select with temporary tebles
*/
Item_sum::Item_sum(THD *thd, Item_sum *item):
Item_result_field(thd, item), arg_count(item->arg_count),
quick_group(item->quick_group)
{
if (arg_count <= 2)
args=tmp_args;
else
if (!(args= (Item**) thd->alloc(sizeof(Item*)*arg_count)))
return;
memcpy(args, item->args, sizeof(Item*)*arg_count);
}
void Item_sum::mark_as_sum_func()
{
SELECT_LEX *cur_select= current_thd->lex->current_select;
cur_select->n_sum_items++;
cur_select->with_sum_func= 1;
with_sum_func= 1;
}
2000-07-31 21:29:14 +02:00
2000-07-31 21:29:14 +02:00
void Item_sum::make_field(Send_field *tmp_field)
{
if (args[0]->type() == Item::FIELD_ITEM && keep_field_type())
{
((Item_field*) args[0])->field->make_field(tmp_field);
/* For expressions only col_name should be non-empty string. */
char *empty_string= (char*)"";
tmp_field->db_name= empty_string;
tmp_field->org_table_name= empty_string;
tmp_field->table_name= empty_string;
tmp_field->org_col_name= empty_string;
tmp_field->col_name= name;
if (maybe_null)
tmp_field->flags&= ~NOT_NULL_FLAG;
2000-07-31 21:29:14 +02:00
}
else
init_make_field(tmp_field, field_type());
2000-07-31 21:29:14 +02:00
}
2000-07-31 21:29:14 +02:00
void Item_sum::print(String *str)
{
str->append(func_name());
for (uint i=0 ; i < arg_count ; i++)
{
if (i)
str->append(',');
args[i]->print(str);
}
str->append(')');
}
void Item_sum::fix_num_length_and_dec()
{
decimals=0;
for (uint i=0 ; i < arg_count ; i++)
set_if_bigger(decimals,args[i]->decimals);
max_length=float_length(decimals);
}
Item *Item_sum::get_tmp_table_item(THD *thd)
{
Item_sum* sum_item= (Item_sum *) copy_or_same(thd);
if (sum_item && sum_item->result_field) // If not a const sum func
{
Field *result_field_tmp= sum_item->result_field;
for (uint i=0 ; i < sum_item->arg_count ; i++)
{
Item *arg= sum_item->args[i];
if (!arg->const_item())
{
if (arg->type() == Item::FIELD_ITEM)
((Item_field*) arg)->field= result_field_tmp++;
else
sum_item->args[i]= new Item_field(result_field_tmp++);
}
}
}
return sum_item;
}
2000-07-31 21:29:14 +02:00
2005-02-09 02:50:45 +04:00
This changeset is largely a handler cleanup changeset (WL#3281), but includes fixes and cleanups that was found necessary while testing the handler changes Changes that requires code changes in other code of other storage engines. (Note that all changes are very straightforward and one should find all issues by compiling a --debug build and fixing all compiler errors and all asserts in field.cc while running the test suite), - New optional handler function introduced: reset() This is called after every DML statement to make it easy for a handler to statement specific cleanups. (The only case it's not called is if force the file to be closed) - handler::extra(HA_EXTRA_RESET) is removed. Code that was there before should be moved to handler::reset() - table->read_set contains a bitmap over all columns that are needed in the query. read_row() and similar functions only needs to read these columns - table->write_set contains a bitmap over all columns that will be updated in the query. write_row() and update_row() only needs to update these columns. The above bitmaps should now be up to date in all context (including ALTER TABLE, filesort()). The handler is informed of any changes to the bitmap after fix_fields() by calling the virtual function handler::column_bitmaps_signal(). If the handler does caching of these bitmaps (instead of using table->read_set, table->write_set), it should redo the caching in this code. as the signal() may be sent several times, it's probably best to set a variable in the signal and redo the caching on read_row() / write_row() if the variable was set. - Removed the read_set and write_set bitmap objects from the handler class - Removed all column bit handling functions from the handler class. (Now one instead uses the normal bitmap functions in my_bitmap.c instead of handler dedicated bitmap functions) - field->query_id is removed. One should instead instead check table->read_set and table->write_set if a field is used in the query. - handler::extra(HA_EXTRA_RETRIVE_ALL_COLS) and handler::extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY) are removed. One should now instead use table->read_set to check for which columns to retrieve. - If a handler needs to call Field->val() or Field->store() on columns that are not used in the query, one should install a temporary all-columns-used map while doing so. For this, we provide the following functions: my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set); field->val(); dbug_tmp_restore_column_map(table->read_set, old_map); and similar for the write map: my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->write_set); field->val(); dbug_tmp_restore_column_map(table->write_set, old_map); If this is not done, you will sooner or later hit a DBUG_ASSERT in the field store() / val() functions. (For not DBUG binaries, the dbug_tmp_restore_column_map() and dbug_tmp_restore_column_map() are inline dummy functions and should be optimized away be the compiler). - If one needs to temporary set the column map for all binaries (and not just to avoid the DBUG_ASSERT() in the Field::store() / Field::val() methods) one should use the functions tmp_use_all_columns() and tmp_restore_column_map() instead of the above dbug_ variants. - All 'status' fields in the handler base class (like records, data_file_length etc) are now stored in a 'stats' struct. This makes it easier to know what status variables are provided by the base handler. This requires some trivial variable names in the extra() function. - New virtual function handler::records(). This is called to optimize COUNT(*) if (handler::table_flags() & HA_HAS_RECORDS()) is true. (stats.records is not supposed to be an exact value. It's only has to be 'reasonable enough' for the optimizer to be able to choose a good optimization path). - Non virtual handler::init() function added for caching of virtual constants from engine. - Removed has_transactions() virtual method. Now one should instead return HA_NO_TRANSACTIONS in table_flags() if the table handler DOES NOT support transactions. - The 'xxxx_create_handler()' function now has a MEM_ROOT_root argument that is to be used with 'new handler_name()' to allocate the handler in the right area. The xxxx_create_handler() function is also responsible for any initialization of the object before returning. For example, one should change: static handler *myisam_create_handler(TABLE_SHARE *table) { return new ha_myisam(table); } -> static handler *myisam_create_handler(TABLE_SHARE *table, MEM_ROOT *mem_root) { return new (mem_root) ha_myisam(table); } - New optional virtual function: use_hidden_primary_key(). This is called in case of an update/delete when (table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined but we don't have a primary key. This allows the handler to take precisions in remembering any hidden primary key to able to update/delete any found row. The default handler marks all columns to be read. - handler::table_flags() now returns a ulonglong (to allow for more flags). - New/changed table_flags() - HA_HAS_RECORDS Set if ::records() is supported - HA_NO_TRANSACTIONS Set if engine doesn't support transactions - HA_PRIMARY_KEY_REQUIRED_FOR_DELETE Set if we should mark all primary key columns for read when reading rows as part of a DELETE statement. If there is no primary key, all columns are marked for read. - HA_PARTIAL_COLUMN_READ Set if engine will not read all columns in some cases (based on table->read_set) - HA_PRIMARY_KEY_ALLOW_RANDOM_ACCESS Renamed to HA_PRIMARY_KEY_REQUIRED_FOR_POSITION. - HA_DUPP_POS Renamed to HA_DUPLICATE_POS - HA_REQUIRES_KEY_COLUMNS_FOR_DELETE Set this if we should mark ALL key columns for read when when reading rows as part of a DELETE statement. In case of an update we will mark all keys for read for which key part changed value. - HA_STATS_RECORDS_IS_EXACT Set this if stats.records is exact. (This saves us some extra records() calls when optimizing COUNT(*)) - Removed table_flags() - HA_NOT_EXACT_COUNT Now one should instead use HA_HAS_RECORDS if handler::records() gives an exact count() and HA_STATS_RECORDS_IS_EXACT if stats.records is exact. - HA_READ_RND_SAME Removed (no one supported this one) - Removed not needed functions ha_retrieve_all_cols() and ha_retrieve_all_pk() - Renamed handler::dupp_pos to handler::dup_pos - Removed not used variable handler::sortkey Upper level handler changes: - ha_reset() now does some overall checks and calls ::reset() - ha_table_flags() added. This is a cached version of table_flags(). The cache is updated on engine creation time and updated on open. MySQL level changes (not obvious from the above): - DBUG_ASSERT() added to check that column usage matches what is set in the column usage bit maps. (This found a LOT of bugs in current column marking code). - In 5.1 before, all used columns was marked in read_set and only updated columns was marked in write_set. Now we only mark columns for which we need a value in read_set. - Column bitmaps are created in open_binary_frm() and open_table_from_share(). (Before this was in table.cc) - handler::table_flags() calls are replaced with handler::ha_table_flags() - For calling field->val() you must have the corresponding bit set in table->read_set. For calling field->store() you must have the corresponding bit set in table->write_set. (There are asserts in all store()/val() functions to catch wrong usage) - thd->set_query_id is renamed to thd->mark_used_columns and instead of setting this to an integer value, this has now the values: MARK_COLUMNS_NONE, MARK_COLUMNS_READ, MARK_COLUMNS_WRITE Changed also all variables named 'set_query_id' to mark_used_columns. - In filesort() we now inform the handler of exactly which columns are needed doing the sort and choosing the rows. - The TABLE_SHARE object has a 'all_set' column bitmap one can use when one needs a column bitmap with all columns set. (This is used for table->use_all_columns() and other places) - The TABLE object has 3 column bitmaps: - def_read_set Default bitmap for columns to be read - def_write_set Default bitmap for columns to be written - tmp_set Can be used as a temporary bitmap when needed. The table object has also two pointer to bitmaps read_set and write_set that the handler should use to find out which columns are used in which way. - count() optimization now calls handler::records() instead of using handler->stats.records (if (table_flags() & HA_HAS_RECORDS) is true). - Added extra argument to Item::walk() to indicate if we should also traverse sub queries. - Added TABLE parameter to cp_buffer_from_ref() - Don't close tables created with CREATE ... SELECT but keep them in the table cache. (Faster usage of newly created tables). New interfaces: - table->clear_column_bitmaps() to initialize the bitmaps for tables at start of new statements. - table->column_bitmaps_set() to set up new column bitmaps and signal the handler about this. - table->column_bitmaps_set_no_signal() for some few cases where we need to setup new column bitmaps but don't signal the handler (as the handler has already been signaled about these before). Used for the momement only in opt_range.cc when doing ROR scans. - table->use_all_columns() to install a bitmap where all columns are marked as use in the read and the write set. - table->default_column_bitmaps() to install the normal read and write column bitmaps, but not signaling the handler about this. This is mainly used when creating TABLE instances. - table->mark_columns_needed_for_delete(), table->mark_columns_needed_for_delete() and table->mark_columns_needed_for_insert() to allow us to put additional columns in column usage maps if handler so requires. (The handler indicates what it neads in handler->table_flags()) - table->prepare_for_position() to allow us to tell handler that it needs to read primary key parts to be able to store them in future table->position() calls. (This replaces the table->file->ha_retrieve_all_pk function) - table->mark_auto_increment_column() to tell handler are going to update columns part of any auto_increment key. - table->mark_columns_used_by_index() to mark all columns that is part of an index. It will also send extra(HA_EXTRA_KEYREAD) to handler to allow it to quickly know that it only needs to read colums that are part of the key. (The handler can also use the column map for detecting this, but simpler/faster handler can just monitor the extra() call). - table->mark_columns_used_by_index_no_reset() to in addition to other columns, also mark all columns that is used by the given key. - table->restore_column_maps_after_mark_index() to restore to default column maps after a call to table->mark_columns_used_by_index(). - New item function register_field_in_read_map(), for marking used columns in table->read_map. Used by filesort() to mark all used columns - Maintain in TABLE->merge_keys set of all keys that are used in query. (Simplices some optimization loops) - Maintain Field->part_of_key_not_clustered which is like Field->part_of_key but the field in the clustered key is not assumed to be part of all index. (used in opt_range.cc for faster loops) - dbug_tmp_use_all_columns(), dbug_tmp_restore_column_map() tmp_use_all_columns() and tmp_restore_column_map() functions to temporally mark all columns as usable. The 'dbug_' version is primarily intended inside a handler when it wants to just call Field:store() & Field::val() functions, but don't need the column maps set for any other usage. (ie:: bitmap_is_set() is never called) - We can't use compare_records() to skip updates for handlers that returns a partial column set and the read_set doesn't cover all columns in the write set. The reason for this is that if we have a column marked only for write we can't in the MySQL level know if the value changed or not. The reason this worked before was that MySQL marked all to be written columns as also to be read. The new 'optimal' bitmaps exposed this 'hidden bug'. - open_table_from_share() does not anymore setup temporary MEM_ROOT object as a thread specific variable for the handler. Instead we send the to-be-used MEMROOT to get_new_handler(). (Simpler, faster code) Bugs fixed: - Column marking was not done correctly in a lot of cases. (ALTER TABLE, when using triggers, auto_increment fields etc) (Could potentially result in wrong values inserted in table handlers relying on that the old column maps or field->set_query_id was correct) Especially when it comes to triggers, there may be cases where the old code would cause lost/wrong values for NDB and/or InnoDB tables. - Split thd->options flag OPTION_STATUS_NO_TRANS_UPDATE to two flags: OPTION_STATUS_NO_TRANS_UPDATE and OPTION_KEEP_LOG. This allowed me to remove some wrong warnings about: "Some non-transactional changed tables couldn't be rolled back" - Fixed handling of INSERT .. SELECT and CREATE ... SELECT that wrongly reset (thd->options & OPTION_STATUS_NO_TRANS_UPDATE) which caused us to loose some warnings about "Some non-transactional changed tables couldn't be rolled back") - Fixed use of uninitialized memory in ha_ndbcluster.cc::delete_table() which could cause delete_table to report random failures. - Fixed core dumps for some tests when running with --debug - Added missing FN_LIBCHAR in mysql_rm_tmp_tables() (This has probably caused us to not properly remove temporary files after crash) - slow_logs was not properly initialized, which could maybe cause extra/lost entries in slow log. - If we get an duplicate row on insert, change column map to read and write all columns while retrying the operation. This is required by the definition of REPLACE and also ensures that fields that are only part of UPDATE are properly handled. This fixed a bug in NDB and REPLACE where REPLACE wrongly copied some column values from the replaced row. - For table handler that doesn't support NULL in keys, we would give an error when creating a primary key with NULL fields, even after the fields has been automaticly converted to NOT NULL. - Creating a primary key on a SPATIAL key, would fail if field was not declared as NOT NULL. Cleanups: - Removed not used condition argument to setup_tables - Removed not needed item function reset_query_id_processor(). - Field->add_index is removed. Now this is instead maintained in (field->flags & FIELD_IN_ADD_INDEX) - Field->fieldnr is removed (use field->field_index instead) - New argument to filesort() to indicate that it should return a set of row pointers (not used columns). This allowed me to remove some references to sql_command in filesort and should also enable us to return column results in some cases where we couldn't before. - Changed column bitmap handling in opt_range.cc to be aligned with TABLE bitmap, which allowed me to use bitmap functions instead of looping over all fields to create some needed bitmaps. (Faster and smaller code) - Broke up found too long lines - Moved some variable declaration at start of function for better code readability. - Removed some not used arguments from functions. (setup_fields(), mysql_prepare_insert_check_table()) - setup_fields() now takes an enum instead of an int for marking columns usage. - For internal temporary tables, use handler::write_row(), handler::delete_row() and handler::update_row() instead of handler::ha_xxxx() for faster execution. - Changed some constants to enum's and define's. - Using separate column read and write sets allows for easier checking of timestamp field was set by statement. - Remove calls to free_io_cache() as this is now done automaticly in ha_reset() - Don't build table->normalized_path as this is now identical to table->path (after bar's fixes to convert filenames) - Fixed some missed DBUG_PRINT(.."%lx") to use "0x%lx" to make it easier to do comparision with the 'convert-dbug-for-diff' tool. Things left to do in 5.1: - We wrongly log failed CREATE TABLE ... SELECT in some cases when using row based logging (as shown by testcase binlog_row_mix_innodb_myisam.result) Mats has promised to look into this. - Test that my fix for CREATE TABLE ... SELECT is indeed correct. (I added several test cases for this, but in this case it's better that someone else also tests this throughly). Lars has promosed to do this.
2006-06-04 18:52:22 +03:00
bool Item_sum::walk (Item_processor processor, bool walk_subquery,
byte *argument)
{
if (arg_count)
{
Item **arg,**arg_end;
for (arg= args, arg_end= args+arg_count; arg != arg_end; arg++)
{
This changeset is largely a handler cleanup changeset (WL#3281), but includes fixes and cleanups that was found necessary while testing the handler changes Changes that requires code changes in other code of other storage engines. (Note that all changes are very straightforward and one should find all issues by compiling a --debug build and fixing all compiler errors and all asserts in field.cc while running the test suite), - New optional handler function introduced: reset() This is called after every DML statement to make it easy for a handler to statement specific cleanups. (The only case it's not called is if force the file to be closed) - handler::extra(HA_EXTRA_RESET) is removed. Code that was there before should be moved to handler::reset() - table->read_set contains a bitmap over all columns that are needed in the query. read_row() and similar functions only needs to read these columns - table->write_set contains a bitmap over all columns that will be updated in the query. write_row() and update_row() only needs to update these columns. The above bitmaps should now be up to date in all context (including ALTER TABLE, filesort()). The handler is informed of any changes to the bitmap after fix_fields() by calling the virtual function handler::column_bitmaps_signal(). If the handler does caching of these bitmaps (instead of using table->read_set, table->write_set), it should redo the caching in this code. as the signal() may be sent several times, it's probably best to set a variable in the signal and redo the caching on read_row() / write_row() if the variable was set. - Removed the read_set and write_set bitmap objects from the handler class - Removed all column bit handling functions from the handler class. (Now one instead uses the normal bitmap functions in my_bitmap.c instead of handler dedicated bitmap functions) - field->query_id is removed. One should instead instead check table->read_set and table->write_set if a field is used in the query. - handler::extra(HA_EXTRA_RETRIVE_ALL_COLS) and handler::extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY) are removed. One should now instead use table->read_set to check for which columns to retrieve. - If a handler needs to call Field->val() or Field->store() on columns that are not used in the query, one should install a temporary all-columns-used map while doing so. For this, we provide the following functions: my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set); field->val(); dbug_tmp_restore_column_map(table->read_set, old_map); and similar for the write map: my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->write_set); field->val(); dbug_tmp_restore_column_map(table->write_set, old_map); If this is not done, you will sooner or later hit a DBUG_ASSERT in the field store() / val() functions. (For not DBUG binaries, the dbug_tmp_restore_column_map() and dbug_tmp_restore_column_map() are inline dummy functions and should be optimized away be the compiler). - If one needs to temporary set the column map for all binaries (and not just to avoid the DBUG_ASSERT() in the Field::store() / Field::val() methods) one should use the functions tmp_use_all_columns() and tmp_restore_column_map() instead of the above dbug_ variants. - All 'status' fields in the handler base class (like records, data_file_length etc) are now stored in a 'stats' struct. This makes it easier to know what status variables are provided by the base handler. This requires some trivial variable names in the extra() function. - New virtual function handler::records(). This is called to optimize COUNT(*) if (handler::table_flags() & HA_HAS_RECORDS()) is true. (stats.records is not supposed to be an exact value. It's only has to be 'reasonable enough' for the optimizer to be able to choose a good optimization path). - Non virtual handler::init() function added for caching of virtual constants from engine. - Removed has_transactions() virtual method. Now one should instead return HA_NO_TRANSACTIONS in table_flags() if the table handler DOES NOT support transactions. - The 'xxxx_create_handler()' function now has a MEM_ROOT_root argument that is to be used with 'new handler_name()' to allocate the handler in the right area. The xxxx_create_handler() function is also responsible for any initialization of the object before returning. For example, one should change: static handler *myisam_create_handler(TABLE_SHARE *table) { return new ha_myisam(table); } -> static handler *myisam_create_handler(TABLE_SHARE *table, MEM_ROOT *mem_root) { return new (mem_root) ha_myisam(table); } - New optional virtual function: use_hidden_primary_key(). This is called in case of an update/delete when (table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined but we don't have a primary key. This allows the handler to take precisions in remembering any hidden primary key to able to update/delete any found row. The default handler marks all columns to be read. - handler::table_flags() now returns a ulonglong (to allow for more flags). - New/changed table_flags() - HA_HAS_RECORDS Set if ::records() is supported - HA_NO_TRANSACTIONS Set if engine doesn't support transactions - HA_PRIMARY_KEY_REQUIRED_FOR_DELETE Set if we should mark all primary key columns for read when reading rows as part of a DELETE statement. If there is no primary key, all columns are marked for read. - HA_PARTIAL_COLUMN_READ Set if engine will not read all columns in some cases (based on table->read_set) - HA_PRIMARY_KEY_ALLOW_RANDOM_ACCESS Renamed to HA_PRIMARY_KEY_REQUIRED_FOR_POSITION. - HA_DUPP_POS Renamed to HA_DUPLICATE_POS - HA_REQUIRES_KEY_COLUMNS_FOR_DELETE Set this if we should mark ALL key columns for read when when reading rows as part of a DELETE statement. In case of an update we will mark all keys for read for which key part changed value. - HA_STATS_RECORDS_IS_EXACT Set this if stats.records is exact. (This saves us some extra records() calls when optimizing COUNT(*)) - Removed table_flags() - HA_NOT_EXACT_COUNT Now one should instead use HA_HAS_RECORDS if handler::records() gives an exact count() and HA_STATS_RECORDS_IS_EXACT if stats.records is exact. - HA_READ_RND_SAME Removed (no one supported this one) - Removed not needed functions ha_retrieve_all_cols() and ha_retrieve_all_pk() - Renamed handler::dupp_pos to handler::dup_pos - Removed not used variable handler::sortkey Upper level handler changes: - ha_reset() now does some overall checks and calls ::reset() - ha_table_flags() added. This is a cached version of table_flags(). The cache is updated on engine creation time and updated on open. MySQL level changes (not obvious from the above): - DBUG_ASSERT() added to check that column usage matches what is set in the column usage bit maps. (This found a LOT of bugs in current column marking code). - In 5.1 before, all used columns was marked in read_set and only updated columns was marked in write_set. Now we only mark columns for which we need a value in read_set. - Column bitmaps are created in open_binary_frm() and open_table_from_share(). (Before this was in table.cc) - handler::table_flags() calls are replaced with handler::ha_table_flags() - For calling field->val() you must have the corresponding bit set in table->read_set. For calling field->store() you must have the corresponding bit set in table->write_set. (There are asserts in all store()/val() functions to catch wrong usage) - thd->set_query_id is renamed to thd->mark_used_columns and instead of setting this to an integer value, this has now the values: MARK_COLUMNS_NONE, MARK_COLUMNS_READ, MARK_COLUMNS_WRITE Changed also all variables named 'set_query_id' to mark_used_columns. - In filesort() we now inform the handler of exactly which columns are needed doing the sort and choosing the rows. - The TABLE_SHARE object has a 'all_set' column bitmap one can use when one needs a column bitmap with all columns set. (This is used for table->use_all_columns() and other places) - The TABLE object has 3 column bitmaps: - def_read_set Default bitmap for columns to be read - def_write_set Default bitmap for columns to be written - tmp_set Can be used as a temporary bitmap when needed. The table object has also two pointer to bitmaps read_set and write_set that the handler should use to find out which columns are used in which way. - count() optimization now calls handler::records() instead of using handler->stats.records (if (table_flags() & HA_HAS_RECORDS) is true). - Added extra argument to Item::walk() to indicate if we should also traverse sub queries. - Added TABLE parameter to cp_buffer_from_ref() - Don't close tables created with CREATE ... SELECT but keep them in the table cache. (Faster usage of newly created tables). New interfaces: - table->clear_column_bitmaps() to initialize the bitmaps for tables at start of new statements. - table->column_bitmaps_set() to set up new column bitmaps and signal the handler about this. - table->column_bitmaps_set_no_signal() for some few cases where we need to setup new column bitmaps but don't signal the handler (as the handler has already been signaled about these before). Used for the momement only in opt_range.cc when doing ROR scans. - table->use_all_columns() to install a bitmap where all columns are marked as use in the read and the write set. - table->default_column_bitmaps() to install the normal read and write column bitmaps, but not signaling the handler about this. This is mainly used when creating TABLE instances. - table->mark_columns_needed_for_delete(), table->mark_columns_needed_for_delete() and table->mark_columns_needed_for_insert() to allow us to put additional columns in column usage maps if handler so requires. (The handler indicates what it neads in handler->table_flags()) - table->prepare_for_position() to allow us to tell handler that it needs to read primary key parts to be able to store them in future table->position() calls. (This replaces the table->file->ha_retrieve_all_pk function) - table->mark_auto_increment_column() to tell handler are going to update columns part of any auto_increment key. - table->mark_columns_used_by_index() to mark all columns that is part of an index. It will also send extra(HA_EXTRA_KEYREAD) to handler to allow it to quickly know that it only needs to read colums that are part of the key. (The handler can also use the column map for detecting this, but simpler/faster handler can just monitor the extra() call). - table->mark_columns_used_by_index_no_reset() to in addition to other columns, also mark all columns that is used by the given key. - table->restore_column_maps_after_mark_index() to restore to default column maps after a call to table->mark_columns_used_by_index(). - New item function register_field_in_read_map(), for marking used columns in table->read_map. Used by filesort() to mark all used columns - Maintain in TABLE->merge_keys set of all keys that are used in query. (Simplices some optimization loops) - Maintain Field->part_of_key_not_clustered which is like Field->part_of_key but the field in the clustered key is not assumed to be part of all index. (used in opt_range.cc for faster loops) - dbug_tmp_use_all_columns(), dbug_tmp_restore_column_map() tmp_use_all_columns() and tmp_restore_column_map() functions to temporally mark all columns as usable. The 'dbug_' version is primarily intended inside a handler when it wants to just call Field:store() & Field::val() functions, but don't need the column maps set for any other usage. (ie:: bitmap_is_set() is never called) - We can't use compare_records() to skip updates for handlers that returns a partial column set and the read_set doesn't cover all columns in the write set. The reason for this is that if we have a column marked only for write we can't in the MySQL level know if the value changed or not. The reason this worked before was that MySQL marked all to be written columns as also to be read. The new 'optimal' bitmaps exposed this 'hidden bug'. - open_table_from_share() does not anymore setup temporary MEM_ROOT object as a thread specific variable for the handler. Instead we send the to-be-used MEMROOT to get_new_handler(). (Simpler, faster code) Bugs fixed: - Column marking was not done correctly in a lot of cases. (ALTER TABLE, when using triggers, auto_increment fields etc) (Could potentially result in wrong values inserted in table handlers relying on that the old column maps or field->set_query_id was correct) Especially when it comes to triggers, there may be cases where the old code would cause lost/wrong values for NDB and/or InnoDB tables. - Split thd->options flag OPTION_STATUS_NO_TRANS_UPDATE to two flags: OPTION_STATUS_NO_TRANS_UPDATE and OPTION_KEEP_LOG. This allowed me to remove some wrong warnings about: "Some non-transactional changed tables couldn't be rolled back" - Fixed handling of INSERT .. SELECT and CREATE ... SELECT that wrongly reset (thd->options & OPTION_STATUS_NO_TRANS_UPDATE) which caused us to loose some warnings about "Some non-transactional changed tables couldn't be rolled back") - Fixed use of uninitialized memory in ha_ndbcluster.cc::delete_table() which could cause delete_table to report random failures. - Fixed core dumps for some tests when running with --debug - Added missing FN_LIBCHAR in mysql_rm_tmp_tables() (This has probably caused us to not properly remove temporary files after crash) - slow_logs was not properly initialized, which could maybe cause extra/lost entries in slow log. - If we get an duplicate row on insert, change column map to read and write all columns while retrying the operation. This is required by the definition of REPLACE and also ensures that fields that are only part of UPDATE are properly handled. This fixed a bug in NDB and REPLACE where REPLACE wrongly copied some column values from the replaced row. - For table handler that doesn't support NULL in keys, we would give an error when creating a primary key with NULL fields, even after the fields has been automaticly converted to NOT NULL. - Creating a primary key on a SPATIAL key, would fail if field was not declared as NOT NULL. Cleanups: - Removed not used condition argument to setup_tables - Removed not needed item function reset_query_id_processor(). - Field->add_index is removed. Now this is instead maintained in (field->flags & FIELD_IN_ADD_INDEX) - Field->fieldnr is removed (use field->field_index instead) - New argument to filesort() to indicate that it should return a set of row pointers (not used columns). This allowed me to remove some references to sql_command in filesort and should also enable us to return column results in some cases where we couldn't before. - Changed column bitmap handling in opt_range.cc to be aligned with TABLE bitmap, which allowed me to use bitmap functions instead of looping over all fields to create some needed bitmaps. (Faster and smaller code) - Broke up found too long lines - Moved some variable declaration at start of function for better code readability. - Removed some not used arguments from functions. (setup_fields(), mysql_prepare_insert_check_table()) - setup_fields() now takes an enum instead of an int for marking columns usage. - For internal temporary tables, use handler::write_row(), handler::delete_row() and handler::update_row() instead of handler::ha_xxxx() for faster execution. - Changed some constants to enum's and define's. - Using separate column read and write sets allows for easier checking of timestamp field was set by statement. - Remove calls to free_io_cache() as this is now done automaticly in ha_reset() - Don't build table->normalized_path as this is now identical to table->path (after bar's fixes to convert filenames) - Fixed some missed DBUG_PRINT(.."%lx") to use "0x%lx" to make it easier to do comparision with the 'convert-dbug-for-diff' tool. Things left to do in 5.1: - We wrongly log failed CREATE TABLE ... SELECT in some cases when using row based logging (as shown by testcase binlog_row_mix_innodb_myisam.result) Mats has promised to look into this. - Test that my fix for CREATE TABLE ... SELECT is indeed correct. (I added several test cases for this, but in this case it's better that someone else also tests this throughly). Lars has promosed to do this.
2006-06-04 18:52:22 +03:00
if ((*arg)->walk(processor, walk_subquery, argument))
return 1;
}
}
return (this->*processor)(argument);
}
2005-02-09 02:50:45 +04:00
Field *Item_sum::create_tmp_field(bool group, TABLE *table,
uint convert_blob_length)
{
Field *field;
2005-02-09 02:50:45 +04:00
switch (result_type()) {
case REAL_RESULT:
field= new Field_double(max_length, maybe_null, name, decimals);
break;
2005-02-09 02:50:45 +04:00
case INT_RESULT:
field= new Field_longlong(max_length, maybe_null, name, unsigned_flag);
break;
2005-02-09 02:50:45 +04:00
case STRING_RESULT:
if (max_length/collation.collation->mbmaxlen <= 255 ||
max_length/collation.collation->mbmaxlen >=UINT_MAX16 ||
!convert_blob_length)
return make_string_field(table);
field= new Field_varstring(convert_blob_length, maybe_null,
name, table->s, collation.collation);
break;
2005-02-09 02:50:45 +04:00
case DECIMAL_RESULT:
field= new Field_new_decimal(max_length, maybe_null, name,
decimals, unsigned_flag);
break;
2005-02-09 02:50:45 +04:00
case ROW_RESULT:
default:
// This case should never be choosen
DBUG_ASSERT(0);
return 0;
}
if (field)
field->init(table);
return field;
2005-02-09 02:50:45 +04:00
}
2000-07-31 21:29:14 +02:00
String *
Item_sum_num::val_str(String *str)
{
return val_string_from_real(str);
2000-07-31 21:29:14 +02:00
}
2005-02-09 02:50:45 +04:00
my_decimal *Item_sum_num::val_decimal(my_decimal *decimal_value)
{
return val_decimal_from_real(decimal_value);
2005-02-09 02:50:45 +04:00
}
2000-07-31 21:29:14 +02:00
String *
Item_sum_int::val_str(String *str)
{
return val_string_from_int(str);
}
my_decimal *Item_sum_int::val_decimal(my_decimal *decimal_value)
{
return val_decimal_from_int(decimal_value);
2000-07-31 21:29:14 +02:00
}
bool
Item_sum_num::fix_fields(THD *thd, Item **ref)
2000-07-31 21:29:14 +02:00
{
DBUG_ASSERT(fixed == 0);
if (init_sum_func_check(thd))
return TRUE;
2000-07-31 21:29:14 +02:00
decimals=0;
maybe_null=0;
for (uint i=0 ; i < arg_count ; i++)
{
if (args[i]->fix_fields(thd, args + i) || args[i]->check_cols(1))
return TRUE;
2005-02-09 02:50:45 +04:00
set_if_bigger(decimals, args[i]->decimals);
2000-07-31 21:29:14 +02:00
maybe_null |= args[i]->maybe_null;
}
result_field=0;
max_length=float_length(decimals);
null_value=1;
fix_length_and_dec();
if (check_sum_func(thd, ref))
return TRUE;
fixed= 1;
return FALSE;
2000-07-31 21:29:14 +02:00
}
2005-02-09 02:50:45 +04:00
Item_sum_hybrid::Item_sum_hybrid(THD *thd, Item_sum_hybrid *item)
:Item_sum(thd, item), value(item->value), hybrid_type(item->hybrid_type),
hybrid_field_type(item->hybrid_field_type), cmp_sign(item->cmp_sign),
used_table_cache(item->used_table_cache), was_values(item->was_values)
{
/* copy results from old value */
switch (hybrid_type) {
2005-02-09 02:50:45 +04:00
case INT_RESULT:
sum_int= item->sum_int;
break;
case DECIMAL_RESULT:
my_decimal2decimal(&item->sum_dec, &sum_dec);
break;
case REAL_RESULT:
sum= item->sum;
break;
case STRING_RESULT:
/*
This can happen with ROLLUP. Note that the value is already
copied at function call.
*/
break;
2005-02-09 02:50:45 +04:00
case ROW_RESULT:
default:
DBUG_ASSERT(0);
}
collation.set(item->collation);
}
2000-07-31 21:29:14 +02:00
bool
Item_sum_hybrid::fix_fields(THD *thd, Item **ref)
2000-07-31 21:29:14 +02:00
{
DBUG_ASSERT(fixed == 0);
Item *item= args[0];
if (init_sum_func_check(thd))
return TRUE;
// 'item' can be changed during fix_fields
if (!item->fixed &&
item->fix_fields(thd, args) ||
(item= args[0])->check_cols(1))
return TRUE;
2005-02-09 02:50:45 +04:00
decimals=item->decimals;
switch (hybrid_type= item->result_type()) {
2005-02-09 02:50:45 +04:00
case INT_RESULT:
max_length= 20;
sum_int= 0;
break;
case DECIMAL_RESULT:
max_length= item->max_length;
my_decimal_set_zero(&sum_dec);
break;
case REAL_RESULT:
max_length= float_length(decimals);
sum= 0.0;
break;
case STRING_RESULT:
max_length= item->max_length;
break;
case ROW_RESULT:
default:
DBUG_ASSERT(0);
};
/* MIN/MAX can return NULL for empty set indepedent of the used column */
maybe_null= 1;
unsigned_flag=item->unsigned_flag;
collation.set(item->collation);
2000-07-31 21:29:14 +02:00
result_field=0;
null_value=1;
fix_length_and_dec();
if (item->type() == Item::FIELD_ITEM)
hybrid_field_type= ((Item_field*) item)->field->type();
else
hybrid_field_type= Item::field_type();
if (check_sum_func(thd, ref))
return TRUE;
fixed= 1;
return FALSE;
2000-07-31 21:29:14 +02:00
}
2005-03-01 22:19:19 +02:00
Field *Item_sum_hybrid::create_tmp_field(bool group, TABLE *table,
uint convert_blob_length)
{
Field *field;
2005-03-01 22:19:19 +02:00
if (args[0]->type() == Item::FIELD_ITEM)
{
field= ((Item_field*) args[0])->field;
2005-03-01 22:19:19 +02:00
2005-03-30 18:57:56 +05:00
if ((field= create_tmp_field_from_field(current_thd, field, name, table,
NULL, convert_blob_length)))
2005-03-01 22:19:19 +02:00
field->flags&= ~NOT_NULL_FLAG;
return field;
}
/*
DATE/TIME fields have STRING_RESULT result types.
In order to preserve field type, it's needed to handle DATE/TIME
fields creations separately.
*/
switch (args[0]->field_type()) {
case MYSQL_TYPE_DATE:
field= new Field_date(maybe_null, name, collation.collation);
break;
case MYSQL_TYPE_TIME:
field= new Field_time(maybe_null, name, collation.collation);
break;
case MYSQL_TYPE_TIMESTAMP:
case MYSQL_TYPE_DATETIME:
field= new Field_datetime(maybe_null, name, collation.collation);
break;
default:
return Item_sum::create_tmp_field(group, table, convert_blob_length);
}
if (field)
field->init(table);
return field;
2005-03-01 22:19:19 +02:00
}
2000-07-31 21:29:14 +02:00
/***********************************************************************
** reset and add of sum_func
***********************************************************************/
2005-02-09 02:50:45 +04:00
Item_sum_sum::Item_sum_sum(THD *thd, Item_sum_sum *item)
:Item_sum_num(thd, item), hybrid_type(item->hybrid_type),
curr_dec_buff(item->curr_dec_buff)
{
/* TODO: check if the following assignments are really needed */
2005-02-09 02:50:45 +04:00
if (hybrid_type == DECIMAL_RESULT)
{
my_decimal2decimal(item->dec_buffs, dec_buffs);
my_decimal2decimal(item->dec_buffs + 1, dec_buffs + 1);
}
else
sum= item->sum;
}
Item *Item_sum_sum::copy_or_same(THD* thd)
2000-07-31 21:29:14 +02:00
{
return new (thd->mem_root) Item_sum_sum(thd, this);
2000-07-31 21:29:14 +02:00
}
void Item_sum_sum::clear()
{
2005-02-09 02:50:45 +04:00
DBUG_ENTER("Item_sum_sum::clear");
null_value=1;
if (hybrid_type == DECIMAL_RESULT)
{
curr_dec_buff= 0;
my_decimal_set_zero(dec_buffs);
}
else
sum= 0.0;
DBUG_VOID_RETURN;
}
void Item_sum_sum::fix_length_and_dec()
{
DBUG_ENTER("Item_sum_sum::fix_length_and_dec");
maybe_null=null_value=1;
decimals= args[0]->decimals;
switch (args[0]->result_type()) {
2005-02-09 02:50:45 +04:00
case REAL_RESULT:
case STRING_RESULT:
hybrid_type= REAL_RESULT;
sum= 0.0;
break;
case INT_RESULT:
case DECIMAL_RESULT:
{
2005-02-09 02:50:45 +04:00
/* SUM result can't be longer than length(arg) + length(MAX_ROWS) */
int precision= args[0]->decimal_precision() + DECIMAL_LONGLONG_DIGITS;
max_length= my_decimal_precision_to_length(precision, decimals,
unsigned_flag);
2005-02-09 02:50:45 +04:00
curr_dec_buff= 0;
hybrid_type= DECIMAL_RESULT;
my_decimal_set_zero(dec_buffs);
break;
}
2005-02-09 02:50:45 +04:00
case ROW_RESULT:
default:
DBUG_ASSERT(0);
}
DBUG_PRINT("info", ("Type: %s (%d, %d)",
(hybrid_type == REAL_RESULT ? "REAL_RESULT" :
hybrid_type == DECIMAL_RESULT ? "DECIMAL_RESULT" :
hybrid_type == INT_RESULT ? "INT_RESULT" :
"--ILLEGAL!!!--"),
max_length,
(int)decimals));
DBUG_VOID_RETURN;
}
2000-07-31 21:29:14 +02:00
bool Item_sum_sum::add()
{
2005-02-09 02:50:45 +04:00
DBUG_ENTER("Item_sum_sum::add");
if (hybrid_type == DECIMAL_RESULT)
{
my_decimal value, *val= args[0]->val_decimal(&value);
if (!args[0]->null_value)
{
my_decimal_add(E_DEC_FATAL_ERROR, dec_buffs + (curr_dec_buff^1),
val, dec_buffs + curr_dec_buff);
curr_dec_buff^= 1;
null_value= 0;
}
}
else
{
sum+= args[0]->val_real();
if (!args[0]->null_value)
null_value= 0;
}
DBUG_RETURN(0);
}
longlong Item_sum_sum::val_int()
{
DBUG_ASSERT(fixed == 1);
if (hybrid_type == DECIMAL_RESULT)
{
longlong result;
my_decimal2int(E_DEC_FATAL_ERROR, dec_buffs + curr_dec_buff, unsigned_flag,
&result);
return result;
}
return (longlong) rint(val_real());
2000-07-31 21:29:14 +02:00
}
2004-11-11 21:39:35 +03:00
double Item_sum_sum::val_real()
2000-07-31 21:29:14 +02:00
{
DBUG_ASSERT(fixed == 1);
2005-02-09 02:50:45 +04:00
if (hybrid_type == DECIMAL_RESULT)
my_decimal2double(E_DEC_FATAL_ERROR, dec_buffs + curr_dec_buff, &sum);
2000-07-31 21:29:14 +02:00
return sum;
}
String *Item_sum_sum::val_str(String *str)
2005-02-09 02:50:45 +04:00
{
if (hybrid_type == DECIMAL_RESULT)
return val_string_from_decimal(str);
return val_string_from_real(str);
2005-02-09 02:50:45 +04:00
}
my_decimal *Item_sum_sum::val_decimal(my_decimal *val)
{
if (hybrid_type == DECIMAL_RESULT)
return (dec_buffs + curr_dec_buff);
return val_decimal_from_real(val);
2005-02-09 02:50:45 +04:00
}
/***************************************************************************/
C_MODE_START
/* Declarations for auxilary C-callbacks */
static int simple_raw_key_cmp(void* arg, const void* key1, const void* key2)
{
return memcmp(key1, key2, *(uint *) arg);
}
static int item_sum_distinct_walk(void *element, element_count num_of_dups,
void *item)
{
This changeset is largely a handler cleanup changeset (WL#3281), but includes fixes and cleanups that was found necessary while testing the handler changes Changes that requires code changes in other code of other storage engines. (Note that all changes are very straightforward and one should find all issues by compiling a --debug build and fixing all compiler errors and all asserts in field.cc while running the test suite), - New optional handler function introduced: reset() This is called after every DML statement to make it easy for a handler to statement specific cleanups. (The only case it's not called is if force the file to be closed) - handler::extra(HA_EXTRA_RESET) is removed. Code that was there before should be moved to handler::reset() - table->read_set contains a bitmap over all columns that are needed in the query. read_row() and similar functions only needs to read these columns - table->write_set contains a bitmap over all columns that will be updated in the query. write_row() and update_row() only needs to update these columns. The above bitmaps should now be up to date in all context (including ALTER TABLE, filesort()). The handler is informed of any changes to the bitmap after fix_fields() by calling the virtual function handler::column_bitmaps_signal(). If the handler does caching of these bitmaps (instead of using table->read_set, table->write_set), it should redo the caching in this code. as the signal() may be sent several times, it's probably best to set a variable in the signal and redo the caching on read_row() / write_row() if the variable was set. - Removed the read_set and write_set bitmap objects from the handler class - Removed all column bit handling functions from the handler class. (Now one instead uses the normal bitmap functions in my_bitmap.c instead of handler dedicated bitmap functions) - field->query_id is removed. One should instead instead check table->read_set and table->write_set if a field is used in the query. - handler::extra(HA_EXTRA_RETRIVE_ALL_COLS) and handler::extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY) are removed. One should now instead use table->read_set to check for which columns to retrieve. - If a handler needs to call Field->val() or Field->store() on columns that are not used in the query, one should install a temporary all-columns-used map while doing so. For this, we provide the following functions: my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set); field->val(); dbug_tmp_restore_column_map(table->read_set, old_map); and similar for the write map: my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->write_set); field->val(); dbug_tmp_restore_column_map(table->write_set, old_map); If this is not done, you will sooner or later hit a DBUG_ASSERT in the field store() / val() functions. (For not DBUG binaries, the dbug_tmp_restore_column_map() and dbug_tmp_restore_column_map() are inline dummy functions and should be optimized away be the compiler). - If one needs to temporary set the column map for all binaries (and not just to avoid the DBUG_ASSERT() in the Field::store() / Field::val() methods) one should use the functions tmp_use_all_columns() and tmp_restore_column_map() instead of the above dbug_ variants. - All 'status' fields in the handler base class (like records, data_file_length etc) are now stored in a 'stats' struct. This makes it easier to know what status variables are provided by the base handler. This requires some trivial variable names in the extra() function. - New virtual function handler::records(). This is called to optimize COUNT(*) if (handler::table_flags() & HA_HAS_RECORDS()) is true. (stats.records is not supposed to be an exact value. It's only has to be 'reasonable enough' for the optimizer to be able to choose a good optimization path). - Non virtual handler::init() function added for caching of virtual constants from engine. - Removed has_transactions() virtual method. Now one should instead return HA_NO_TRANSACTIONS in table_flags() if the table handler DOES NOT support transactions. - The 'xxxx_create_handler()' function now has a MEM_ROOT_root argument that is to be used with 'new handler_name()' to allocate the handler in the right area. The xxxx_create_handler() function is also responsible for any initialization of the object before returning. For example, one should change: static handler *myisam_create_handler(TABLE_SHARE *table) { return new ha_myisam(table); } -> static handler *myisam_create_handler(TABLE_SHARE *table, MEM_ROOT *mem_root) { return new (mem_root) ha_myisam(table); } - New optional virtual function: use_hidden_primary_key(). This is called in case of an update/delete when (table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined but we don't have a primary key. This allows the handler to take precisions in remembering any hidden primary key to able to update/delete any found row. The default handler marks all columns to be read. - handler::table_flags() now returns a ulonglong (to allow for more flags). - New/changed table_flags() - HA_HAS_RECORDS Set if ::records() is supported - HA_NO_TRANSACTIONS Set if engine doesn't support transactions - HA_PRIMARY_KEY_REQUIRED_FOR_DELETE Set if we should mark all primary key columns for read when reading rows as part of a DELETE statement. If there is no primary key, all columns are marked for read. - HA_PARTIAL_COLUMN_READ Set if engine will not read all columns in some cases (based on table->read_set) - HA_PRIMARY_KEY_ALLOW_RANDOM_ACCESS Renamed to HA_PRIMARY_KEY_REQUIRED_FOR_POSITION. - HA_DUPP_POS Renamed to HA_DUPLICATE_POS - HA_REQUIRES_KEY_COLUMNS_FOR_DELETE Set this if we should mark ALL key columns for read when when reading rows as part of a DELETE statement. In case of an update we will mark all keys for read for which key part changed value. - HA_STATS_RECORDS_IS_EXACT Set this if stats.records is exact. (This saves us some extra records() calls when optimizing COUNT(*)) - Removed table_flags() - HA_NOT_EXACT_COUNT Now one should instead use HA_HAS_RECORDS if handler::records() gives an exact count() and HA_STATS_RECORDS_IS_EXACT if stats.records is exact. - HA_READ_RND_SAME Removed (no one supported this one) - Removed not needed functions ha_retrieve_all_cols() and ha_retrieve_all_pk() - Renamed handler::dupp_pos to handler::dup_pos - Removed not used variable handler::sortkey Upper level handler changes: - ha_reset() now does some overall checks and calls ::reset() - ha_table_flags() added. This is a cached version of table_flags(). The cache is updated on engine creation time and updated on open. MySQL level changes (not obvious from the above): - DBUG_ASSERT() added to check that column usage matches what is set in the column usage bit maps. (This found a LOT of bugs in current column marking code). - In 5.1 before, all used columns was marked in read_set and only updated columns was marked in write_set. Now we only mark columns for which we need a value in read_set. - Column bitmaps are created in open_binary_frm() and open_table_from_share(). (Before this was in table.cc) - handler::table_flags() calls are replaced with handler::ha_table_flags() - For calling field->val() you must have the corresponding bit set in table->read_set. For calling field->store() you must have the corresponding bit set in table->write_set. (There are asserts in all store()/val() functions to catch wrong usage) - thd->set_query_id is renamed to thd->mark_used_columns and instead of setting this to an integer value, this has now the values: MARK_COLUMNS_NONE, MARK_COLUMNS_READ, MARK_COLUMNS_WRITE Changed also all variables named 'set_query_id' to mark_used_columns. - In filesort() we now inform the handler of exactly which columns are needed doing the sort and choosing the rows. - The TABLE_SHARE object has a 'all_set' column bitmap one can use when one needs a column bitmap with all columns set. (This is used for table->use_all_columns() and other places) - The TABLE object has 3 column bitmaps: - def_read_set Default bitmap for columns to be read - def_write_set Default bitmap for columns to be written - tmp_set Can be used as a temporary bitmap when needed. The table object has also two pointer to bitmaps read_set and write_set that the handler should use to find out which columns are used in which way. - count() optimization now calls handler::records() instead of using handler->stats.records (if (table_flags() & HA_HAS_RECORDS) is true). - Added extra argument to Item::walk() to indicate if we should also traverse sub queries. - Added TABLE parameter to cp_buffer_from_ref() - Don't close tables created with CREATE ... SELECT but keep them in the table cache. (Faster usage of newly created tables). New interfaces: - table->clear_column_bitmaps() to initialize the bitmaps for tables at start of new statements. - table->column_bitmaps_set() to set up new column bitmaps and signal the handler about this. - table->column_bitmaps_set_no_signal() for some few cases where we need to setup new column bitmaps but don't signal the handler (as the handler has already been signaled about these before). Used for the momement only in opt_range.cc when doing ROR scans. - table->use_all_columns() to install a bitmap where all columns are marked as use in the read and the write set. - table->default_column_bitmaps() to install the normal read and write column bitmaps, but not signaling the handler about this. This is mainly used when creating TABLE instances. - table->mark_columns_needed_for_delete(), table->mark_columns_needed_for_delete() and table->mark_columns_needed_for_insert() to allow us to put additional columns in column usage maps if handler so requires. (The handler indicates what it neads in handler->table_flags()) - table->prepare_for_position() to allow us to tell handler that it needs to read primary key parts to be able to store them in future table->position() calls. (This replaces the table->file->ha_retrieve_all_pk function) - table->mark_auto_increment_column() to tell handler are going to update columns part of any auto_increment key. - table->mark_columns_used_by_index() to mark all columns that is part of an index. It will also send extra(HA_EXTRA_KEYREAD) to handler to allow it to quickly know that it only needs to read colums that are part of the key. (The handler can also use the column map for detecting this, but simpler/faster handler can just monitor the extra() call). - table->mark_columns_used_by_index_no_reset() to in addition to other columns, also mark all columns that is used by the given key. - table->restore_column_maps_after_mark_index() to restore to default column maps after a call to table->mark_columns_used_by_index(). - New item function register_field_in_read_map(), for marking used columns in table->read_map. Used by filesort() to mark all used columns - Maintain in TABLE->merge_keys set of all keys that are used in query. (Simplices some optimization loops) - Maintain Field->part_of_key_not_clustered which is like Field->part_of_key but the field in the clustered key is not assumed to be part of all index. (used in opt_range.cc for faster loops) - dbug_tmp_use_all_columns(), dbug_tmp_restore_column_map() tmp_use_all_columns() and tmp_restore_column_map() functions to temporally mark all columns as usable. The 'dbug_' version is primarily intended inside a handler when it wants to just call Field:store() & Field::val() functions, but don't need the column maps set for any other usage. (ie:: bitmap_is_set() is never called) - We can't use compare_records() to skip updates for handlers that returns a partial column set and the read_set doesn't cover all columns in the write set. The reason for this is that if we have a column marked only for write we can't in the MySQL level know if the value changed or not. The reason this worked before was that MySQL marked all to be written columns as also to be read. The new 'optimal' bitmaps exposed this 'hidden bug'. - open_table_from_share() does not anymore setup temporary MEM_ROOT object as a thread specific variable for the handler. Instead we send the to-be-used MEMROOT to get_new_handler(). (Simpler, faster code) Bugs fixed: - Column marking was not done correctly in a lot of cases. (ALTER TABLE, when using triggers, auto_increment fields etc) (Could potentially result in wrong values inserted in table handlers relying on that the old column maps or field->set_query_id was correct) Especially when it comes to triggers, there may be cases where the old code would cause lost/wrong values for NDB and/or InnoDB tables. - Split thd->options flag OPTION_STATUS_NO_TRANS_UPDATE to two flags: OPTION_STATUS_NO_TRANS_UPDATE and OPTION_KEEP_LOG. This allowed me to remove some wrong warnings about: "Some non-transactional changed tables couldn't be rolled back" - Fixed handling of INSERT .. SELECT and CREATE ... SELECT that wrongly reset (thd->options & OPTION_STATUS_NO_TRANS_UPDATE) which caused us to loose some warnings about "Some non-transactional changed tables couldn't be rolled back") - Fixed use of uninitialized memory in ha_ndbcluster.cc::delete_table() which could cause delete_table to report random failures. - Fixed core dumps for some tests when running with --debug - Added missing FN_LIBCHAR in mysql_rm_tmp_tables() (This has probably caused us to not properly remove temporary files after crash) - slow_logs was not properly initialized, which could maybe cause extra/lost entries in slow log. - If we get an duplicate row on insert, change column map to read and write all columns while retrying the operation. This is required by the definition of REPLACE and also ensures that fields that are only part of UPDATE are properly handled. This fixed a bug in NDB and REPLACE where REPLACE wrongly copied some column values from the replaced row. - For table handler that doesn't support NULL in keys, we would give an error when creating a primary key with NULL fields, even after the fields has been automaticly converted to NOT NULL. - Creating a primary key on a SPATIAL key, would fail if field was not declared as NOT NULL. Cleanups: - Removed not used condition argument to setup_tables - Removed not needed item function reset_query_id_processor(). - Field->add_index is removed. Now this is instead maintained in (field->flags & FIELD_IN_ADD_INDEX) - Field->fieldnr is removed (use field->field_index instead) - New argument to filesort() to indicate that it should return a set of row pointers (not used columns). This allowed me to remove some references to sql_command in filesort and should also enable us to return column results in some cases where we couldn't before. - Changed column bitmap handling in opt_range.cc to be aligned with TABLE bitmap, which allowed me to use bitmap functions instead of looping over all fields to create some needed bitmaps. (Faster and smaller code) - Broke up found too long lines - Moved some variable declaration at start of function for better code readability. - Removed some not used arguments from functions. (setup_fields(), mysql_prepare_insert_check_table()) - setup_fields() now takes an enum instead of an int for marking columns usage. - For internal temporary tables, use handler::write_row(), handler::delete_row() and handler::update_row() instead of handler::ha_xxxx() for faster execution. - Changed some constants to enum's and define's. - Using separate column read and write sets allows for easier checking of timestamp field was set by statement. - Remove calls to free_io_cache() as this is now done automaticly in ha_reset() - Don't build table->normalized_path as this is now identical to table->path (after bar's fixes to convert filenames) - Fixed some missed DBUG_PRINT(.."%lx") to use "0x%lx" to make it easier to do comparision with the 'convert-dbug-for-diff' tool. Things left to do in 5.1: - We wrongly log failed CREATE TABLE ... SELECT in some cases when using row based logging (as shown by testcase binlog_row_mix_innodb_myisam.result) Mats has promised to look into this. - Test that my fix for CREATE TABLE ... SELECT is indeed correct. (I added several test cases for this, but in this case it's better that someone else also tests this throughly). Lars has promosed to do this.
2006-06-04 18:52:22 +03:00
return ((Item_sum_distinct*) (item))->unique_walk_function(element);
}
C_MODE_END
/* Item_sum_distinct */
Item_sum_distinct::Item_sum_distinct(Item *item_arg)
:Item_sum_num(item_arg), tree(0)
{
/*
quick_group is an optimizer hint, which means that GROUP BY can be
handled with help of index on grouped columns.
By setting quick_group to zero we force creation of temporary table
to perform GROUP BY.
*/
quick_group= 0;
}
Item_sum_distinct::Item_sum_distinct(THD *thd, Item_sum_distinct *original)
:Item_sum_num(thd, original), val(original->val), tree(0),
table_field_type(original->table_field_type)
{
quick_group= 0;
}
/*
Behaves like an Integer except to fix_length_and_dec().
Additionally div() converts val with this traits to a val with true
decimal traits along with conversion of integer value to decimal value.
This is to speedup SUM/AVG(DISTINCT) evaluation for 8-32 bit integer
values.
*/
struct Hybrid_type_traits_fast_decimal: public
Hybrid_type_traits_integer
2005-02-09 02:50:45 +04:00
{
virtual Item_result type() const { return DECIMAL_RESULT; }
virtual void fix_length_and_dec(Item *item, Item *arg) const
{ Hybrid_type_traits_decimal::instance()->fix_length_and_dec(item, arg); }
virtual void div(Hybrid_type *val, ulonglong u) const
2005-02-09 02:50:45 +04:00
{
int2my_decimal(E_DEC_FATAL_ERROR, val->integer, 0, val->dec_buf);
val->used_dec_buf_no= 0;
val->traits= Hybrid_type_traits_decimal::instance();
val->traits->div(val, u);
2005-02-09 02:50:45 +04:00
}
static const Hybrid_type_traits_fast_decimal *instance();
Hybrid_type_traits_fast_decimal() {};
};
2005-02-09 02:50:45 +04:00
static const Hybrid_type_traits_fast_decimal fast_decimal_traits_instance;
const Hybrid_type_traits_fast_decimal
*Hybrid_type_traits_fast_decimal::instance()
{
return &fast_decimal_traits_instance;
}
2005-02-09 02:50:45 +04:00
void Item_sum_distinct::fix_length_and_dec()
{
DBUG_ASSERT(args[0]->fixed);
table_field_type= args[0]->field_type();
/* Adjust tmp table type according to the chosen aggregation type */
switch (args[0]->result_type()) {
case STRING_RESULT:
case REAL_RESULT:
val.traits= Hybrid_type_traits::instance();
if (table_field_type != MYSQL_TYPE_FLOAT)
table_field_type= MYSQL_TYPE_DOUBLE;
break;
case INT_RESULT:
/*
Preserving int8, int16, int32 field types gives ~10% performance boost
as the size of result tree becomes significantly smaller.
Another speed up we gain by using longlong for intermediate
calculations. The range of int64 is enough to hold sum 2^32 distinct
integers each <= 2^32.
*/
if (table_field_type == MYSQL_TYPE_INT24 ||
table_field_type >= MYSQL_TYPE_TINY &&
table_field_type <= MYSQL_TYPE_LONG)
{
val.traits= Hybrid_type_traits_fast_decimal::instance();
break;
}
table_field_type= MYSQL_TYPE_LONGLONG;
/* fallthrough */
case DECIMAL_RESULT:
val.traits= Hybrid_type_traits_decimal::instance();
if (table_field_type != MYSQL_TYPE_LONGLONG)
table_field_type= MYSQL_TYPE_NEWDECIMAL;
break;
case ROW_RESULT:
default:
DBUG_ASSERT(0);
}
val.traits->fix_length_and_dec(this, args[0]);
}
bool Item_sum_distinct::setup(THD *thd)
{
List<create_field> field_list;
create_field field_def; /* field definition */
DBUG_ENTER("Item_sum_distinct::setup");
DBUG_ASSERT(tree == 0);
/*
Virtual table and the tree are created anew on each re-execution of
PS/SP. Hence all further allocations are performed in the runtime
mem_root.
*/
if (field_list.push_back(&field_def))
return TRUE;
null_value= maybe_null= 1;
quick_group= 0;
DBUG_ASSERT(args[0]->fixed);
field_def.init_for_tmp_table(table_field_type, args[0]->max_length,
args[0]->decimals, args[0]->maybe_null,
args[0]->unsigned_flag);
if (! (table= create_virtual_tmp_table(thd, field_list)))
return TRUE;
/* XXX: check that the case of CHAR(0) works OK */
tree_key_length= table->s->reclength - table->s->null_bytes;
/*
Unique handles all unique elements in a tree until they can't fit
in. Then the tree is dumped to the temporary file. We can use
simple_raw_key_cmp because the table contains numbers only; decimals
are converted to binary representation as well.
*/
tree= new Unique(simple_raw_key_cmp, &tree_key_length, tree_key_length,
thd->variables.max_heap_table_size);
is_evaluated= FALSE;
DBUG_RETURN(tree == 0);
}
bool Item_sum_distinct::add()
{
args[0]->save_in_field(table->field[0], FALSE);
is_evaluated= FALSE;
if (!table->field[0]->is_null())
{
DBUG_ASSERT(tree);
null_value= 0;
/*
'0' values are also stored in the tree. This doesn't matter
for SUM(DISTINCT), but is important for AVG(DISTINCT)
*/
return tree->unique_add(table->field[0]->ptr);
}
return 0;
2005-02-09 02:50:45 +04:00
}
bool Item_sum_distinct::unique_walk_function(void *element)
2005-02-09 02:50:45 +04:00
{
memcpy(table->field[0]->ptr, element, tree_key_length);
++count;
val.traits->add(&val, table->field[0]);
return 0;
2005-02-09 02:50:45 +04:00
}
void Item_sum_distinct::clear()
2005-02-09 02:50:45 +04:00
{
DBUG_ENTER("Item_sum_distinct::clear");
DBUG_ASSERT(tree != 0); /* we always have a tree */
null_value= 1;
tree->reset();
is_evaluated= FALSE;
DBUG_VOID_RETURN;
}
void Item_sum_distinct::cleanup()
{
Item_sum_num::cleanup();
delete tree;
tree= 0;
table= 0;
is_evaluated= FALSE;
2005-02-09 02:50:45 +04:00
}
Item_sum_distinct::~Item_sum_distinct()
{
delete tree;
/* no need to free the table */
}
void Item_sum_distinct::calculate_val_and_count()
{
if (!is_evaluated)
2005-02-09 02:50:45 +04:00
{
count= 0;
val.traits->set_zero(&val);
/*
We don't have a tree only if 'setup()' hasn't been called;
this is the case of sql_select.cc:return_zero_rows.
*/
if (tree)
{
table->field[0]->set_notnull();
tree->walk(item_sum_distinct_walk, (void*) this);
}
is_evaluated= TRUE;
2005-02-09 02:50:45 +04:00
}
}
2005-02-09 02:50:45 +04:00
double Item_sum_distinct::val_real()
2005-02-09 02:50:45 +04:00
{
calculate_val_and_count();
return val.traits->val_real(&val);
2005-02-09 02:50:45 +04:00
}
my_decimal *Item_sum_distinct::val_decimal(my_decimal *to)
2005-02-09 02:50:45 +04:00
{
calculate_val_and_count();
if (null_value)
return 0;
return val.traits->val_decimal(&val, to);
2005-02-09 02:50:45 +04:00
}
longlong Item_sum_distinct::val_int()
2005-02-09 02:50:45 +04:00
{
calculate_val_and_count();
return val.traits->val_int(&val, unsigned_flag);
}
String *Item_sum_distinct::val_str(String *str)
{
calculate_val_and_count();
if (null_value)
return 0;
return val.traits->val_str(&val, str, decimals);
2005-02-09 02:50:45 +04:00
}
/* end of Item_sum_distinct */
/* Item_sum_avg_distinct */
void
Item_sum_avg_distinct::fix_length_and_dec()
{
Item_sum_distinct::fix_length_and_dec();
prec_increment= current_thd->variables.div_precincrement;
/*
AVG() will divide val by count. We need to reserve digits
after decimal point as the result can be fractional.
*/
decimals= min(decimals + prec_increment, NOT_FIXED_DEC);
}
void
Item_sum_avg_distinct::calculate_val_and_count()
{
if (!is_evaluated)
{
Item_sum_distinct::calculate_val_and_count();
if (count)
val.traits->div(&val, count);
is_evaluated= TRUE;
}
}
2005-02-09 02:50:45 +04:00
Item *Item_sum_count::copy_or_same(THD* thd)
2000-07-31 21:29:14 +02:00
{
return new (thd->mem_root) Item_sum_count(thd, this);
2000-07-31 21:29:14 +02:00
}
void Item_sum_count::clear()
{
count= 0;
}
2000-07-31 21:29:14 +02:00
bool Item_sum_count::add()
{
if (!args[0]->maybe_null)
count++;
else
{
args[0]->update_null_value();
2000-07-31 21:29:14 +02:00
if (!args[0]->null_value)
count++;
}
return 0;
}
longlong Item_sum_count::val_int()
{
DBUG_ASSERT(fixed == 1);
2000-07-31 21:29:14 +02:00
return (longlong) count;
}
void Item_sum_count::cleanup()
{
DBUG_ENTER("Item_sum_count::cleanup");
count= 0;
Item_sum_int::cleanup();
used_table_cache= ~(table_map) 0;
DBUG_VOID_RETURN;
}
2000-07-31 21:29:14 +02:00
/*
Avgerage
2000-07-31 21:29:14 +02:00
*/
2005-02-09 02:50:45 +04:00
void Item_sum_avg::fix_length_and_dec()
{
Item_sum_sum::fix_length_and_dec();
maybe_null=null_value=1;
prec_increment= current_thd->variables.div_precincrement;
2005-02-09 02:50:45 +04:00
if (hybrid_type == DECIMAL_RESULT)
{
int precision= args[0]->decimal_precision() + prec_increment;
decimals= min(args[0]->decimals + prec_increment, DECIMAL_MAX_SCALE);
max_length= my_decimal_precision_to_length(precision, decimals,
unsigned_flag);
f_precision= min(precision+DECIMAL_LONGLONG_DIGITS, DECIMAL_MAX_PRECISION);
f_scale= args[0]->decimals;
2005-02-09 02:50:45 +04:00
dec_bin_size= my_decimal_get_binary_size(f_precision, f_scale);
}
else
decimals= min(args[0]->decimals + prec_increment, NOT_FIXED_DEC);
2005-02-09 02:50:45 +04:00
}
2000-07-31 21:29:14 +02:00
Item *Item_sum_avg::copy_or_same(THD* thd)
2000-07-31 21:29:14 +02:00
{
return new (thd->mem_root) Item_sum_avg(thd, this);
2000-07-31 21:29:14 +02:00
}
2005-02-09 02:50:45 +04:00
Field *Item_sum_avg::create_tmp_field(bool group, TABLE *table,
uint convert_blob_len)
{
Field *field;
if (group)
2005-02-09 02:50:45 +04:00
{
/*
We must store both value and counter in the temporary table in one field.
The easiest way is to do this is to store both value in a string
and unpack on access.
*/
field= new Field_string(((hybrid_type == DECIMAL_RESULT) ?
dec_bin_size : sizeof(double)) + sizeof(longlong),
0, name, &my_charset_bin);
2005-02-09 02:50:45 +04:00
}
else if (hybrid_type == DECIMAL_RESULT)
field= new Field_new_decimal(max_length, maybe_null, name,
decimals, unsigned_flag);
else
field= new Field_double(max_length, maybe_null, name, decimals);
if (field)
field->init(table);
return field;
2005-02-09 02:50:45 +04:00
}
void Item_sum_avg::clear()
{
2005-02-09 02:50:45 +04:00
Item_sum_sum::clear();
count=0;
}
2000-07-31 21:29:14 +02:00
bool Item_sum_avg::add()
{
2005-02-09 02:50:45 +04:00
if (Item_sum_sum::add())
return TRUE;
2000-07-31 21:29:14 +02:00
if (!args[0]->null_value)
count++;
2005-02-09 02:50:45 +04:00
return FALSE;
2000-07-31 21:29:14 +02:00
}
2004-11-11 21:39:35 +03:00
double Item_sum_avg::val_real()
2000-07-31 21:29:14 +02:00
{
DBUG_ASSERT(fixed == 1);
2000-07-31 21:29:14 +02:00
if (!count)
{
null_value=1;
return 0.0;
}
2005-02-09 02:50:45 +04:00
return Item_sum_sum::val_real() / ulonglong2double(count);
}
my_decimal *Item_sum_avg::val_decimal(my_decimal *val)
{
my_decimal sum, cnt;
const my_decimal *sum_dec;
2005-02-09 02:50:45 +04:00
DBUG_ASSERT(fixed == 1);
if (!count)
{
null_value=1;
return NULL;
}
sum_dec= Item_sum_sum::val_decimal(&sum);
2005-02-09 02:50:45 +04:00
int2my_decimal(E_DEC_FATAL_ERROR, count, 0, &cnt);
my_decimal_div(E_DEC_FATAL_ERROR, val, sum_dec, &cnt, prec_increment);
2005-02-09 02:50:45 +04:00
return val;
}
String *Item_sum_avg::val_str(String *str)
{
if (hybrid_type == DECIMAL_RESULT)
return val_string_from_decimal(str);
return val_string_from_real(str);
2000-07-31 21:29:14 +02:00
}
/*
Standard deviation
2000-07-31 21:29:14 +02:00
*/
2004-11-11 21:39:35 +03:00
double Item_sum_std::val_real()
2000-07-31 21:29:14 +02:00
{
DBUG_ASSERT(fixed == 1);
double nr= Item_sum_variance::val_real();
DBUG_ASSERT(nr >= 0.0);
return sqrt(nr);
2000-07-31 21:29:14 +02:00
}
Item *Item_sum_std::copy_or_same(THD* thd)
{
return new (thd->mem_root) Item_sum_std(thd, this);
}
2002-12-14 03:36:59 +04:00
/*
Variance
2002-12-14 03:36:59 +04:00
*/
2005-02-09 02:50:45 +04:00
/**
Variance implementation for floating-point implementations, without
catastrophic cancellation, from Knuth's _TAoCP_, 3rd ed, volume 2, pg232.
This alters the value at m, s, and increments count.
*/
/*
These two functions are used by the Item_sum_variance and the
Item_variance_field classes, which are unrelated, and each need to calculate
variance. The difference between the two classes is that the first is used
for a mundane SELECT, while the latter is used in a GROUPing SELECT.
*/
static void variance_fp_recurrence_next(double *m, double *s, ulonglong *count, double nr)
2005-02-09 02:50:45 +04:00
{
*count += 1;
if (*count == 1)
2005-02-09 02:50:45 +04:00
{
*m= nr;
*s= 0;
2005-02-09 02:50:45 +04:00
}
else
{
double m_kminusone= *m;
*m= m_kminusone + (nr - m_kminusone) / (double) *count;
*s= *s + (nr - m_kminusone) * (nr - *m);
2005-02-09 02:50:45 +04:00
}
}
static double variance_fp_recurrence_result(double s, ulonglong count, bool is_sample_variance)
{
if (count == 1)
return 0.0;
if (is_sample_variance)
return s / (count - 1);
/* else, is a population variance */
return s / count;
}
Item_sum_variance::Item_sum_variance(THD *thd, Item_sum_variance *item):
Item_sum_num(thd, item), hybrid_type(item->hybrid_type),
count(item->count), sample(item->sample),
prec_increment(item->prec_increment)
{
recurrence_m= item->recurrence_m;
recurrence_s= item->recurrence_s;
}
2005-02-09 02:50:45 +04:00
void Item_sum_variance::fix_length_and_dec()
{
DBUG_ENTER("Item_sum_variance::fix_length_and_dec");
maybe_null= null_value= 1;
prec_increment= current_thd->variables.div_precincrement;
/*
According to the SQL2003 standard (Part 2, Foundations; sec 10.9,
aggregate function; paragraph 7h of Syntax Rules), "the declared
type of the result is an implementation-defined aproximate numeric
type.
*/
hybrid_type= REAL_RESULT;
switch (args[0]->result_type()) {
2005-02-09 02:50:45 +04:00
case REAL_RESULT:
case STRING_RESULT:
decimals= min(args[0]->decimals + 4, NOT_FIXED_DEC);
2005-02-09 02:50:45 +04:00
break;
case INT_RESULT:
case DECIMAL_RESULT:
{
int precision= args[0]->decimal_precision()*2 + prec_increment;
decimals= min(args[0]->decimals + prec_increment, DECIMAL_MAX_SCALE);
max_length= my_decimal_precision_to_length(precision, decimals,
unsigned_flag);
2005-02-09 02:50:45 +04:00
break;
}
2005-02-09 02:50:45 +04:00
case ROW_RESULT:
default:
DBUG_ASSERT(0);
}
DBUG_PRINT("info", ("Type: REAL_RESULT (%d, %d)", max_length, (int)decimals));
2005-02-09 02:50:45 +04:00
DBUG_VOID_RETURN;
}
Item *Item_sum_variance::copy_or_same(THD* thd)
{
return new (thd->mem_root) Item_sum_variance(thd, this);
}
/**
Create a new field to match the type of value we're expected to yield.
If we're grouping, then we need some space to serialize variables into, to
pass around.
*/
2005-02-09 02:50:45 +04:00
Field *Item_sum_variance::create_tmp_field(bool group, TABLE *table,
uint convert_blob_len)
{
Field *field;
if (group)
2005-02-09 02:50:45 +04:00
{
/*
We must store both value and counter in the temporary table in one field.
The easiest way is to do this is to store both value in a string
and unpack on access.
*/
field= new Field_string(sizeof(double)*2 + sizeof(longlong), 0, name, &my_charset_bin);
2005-02-09 02:50:45 +04:00
}
else
field= new Field_double(max_length, maybe_null, name, decimals);
if (field != NULL)
field->init(table);
return field;
2005-02-09 02:50:45 +04:00
}
void Item_sum_variance::clear()
2002-12-14 03:36:59 +04:00
{
count= 0;
2002-12-14 03:36:59 +04:00
}
bool Item_sum_variance::add()
2000-07-31 21:29:14 +02:00
{
/*
Why use a temporary variable? We don't know if it is null until we
evaluate it, which has the side-effect of setting null_value .
*/
double nr= args[0]->val_real();
if (!args[0]->null_value)
variance_fp_recurrence_next(&recurrence_m, &recurrence_s, &count, nr);
2000-07-31 21:29:14 +02:00
return 0;
}
2004-11-11 21:39:35 +03:00
double Item_sum_variance::val_real()
2000-07-31 21:29:14 +02:00
{
DBUG_ASSERT(fixed == 1);
/*
'sample' is a 1/0 boolean value. If it is 1/true, id est this is a sample
variance call, then we should set nullness when the count of the items
is one or zero. If it's zero, i.e. a population variance, then we only
set nullness when the count is zero.
Another way to read it is that 'sample' is the numerical threshhold, at and
below which a 'count' number of items is called NULL.
*/
DBUG_ASSERT((sample == 0) || (sample == 1));
if (count <= sample)
2000-07-31 21:29:14 +02:00
{
null_value=1;
return 0.0;
}
2005-02-09 02:50:45 +04:00
null_value=0;
return variance_fp_recurrence_result(recurrence_s, count, sample);
2005-02-09 02:50:45 +04:00
}
my_decimal *Item_sum_variance::val_decimal(my_decimal *dec_buf)
{
DBUG_ASSERT(fixed == 1);
return val_decimal_from_real(dec_buf);
2005-02-09 02:50:45 +04:00
}
2005-02-09 02:50:45 +04:00
void Item_sum_variance::reset_field()
{
double nr;
char *res= result_field->ptr;
nr= args[0]->val_real(); /* sets null_value as side-effect */
2000-07-31 21:29:14 +02:00
if (args[0]->null_value)
bzero(res,sizeof(double)*2+sizeof(longlong));
else
{
/* Serialize format is (double)m, (double)s, (longlong)count */
ulonglong tmp_count;
double tmp_s;
float8store(res, nr); /* recurrence variable m */
tmp_s= 0.0;
float8store(res + sizeof(double), tmp_s);
tmp_count= 1;
int8store(res + sizeof(double)*2, tmp_count);
2000-07-31 21:29:14 +02:00
}
}
2003-08-29 13:44:35 +03:00
void Item_sum_variance::update_field()
2000-07-31 21:29:14 +02:00
{
ulonglong field_count;
2000-07-31 21:29:14 +02:00
char *res=result_field->ptr;
double nr= args[0]->val_real(); /* sets null_value as side-effect */
if (args[0]->null_value)
2005-02-09 02:50:45 +04:00
return;
2000-07-31 21:29:14 +02:00
/* Serialize format is (double)m, (double)s, (longlong)count */
double field_recurrence_m, field_recurrence_s;
float8get(field_recurrence_m, res);
float8get(field_recurrence_s, res + sizeof(double));
field_count=sint8korr(res+sizeof(double)*2);
2000-07-31 21:29:14 +02:00
variance_fp_recurrence_next(&field_recurrence_m, &field_recurrence_s, &field_count, nr);
float8store(res, field_recurrence_m);
float8store(res + sizeof(double), field_recurrence_s);
res+= sizeof(double)*2;
int8store(res,field_count);
2000-07-31 21:29:14 +02:00
}
2000-07-31 21:29:14 +02:00
/* min & max */
void Item_sum_hybrid::clear()
{
switch (hybrid_type) {
2005-02-09 02:50:45 +04:00
case INT_RESULT:
sum_int= 0;
break;
case DECIMAL_RESULT:
my_decimal_set_zero(&sum_dec);
break;
case REAL_RESULT:
sum= 0.0;
break;
default:
value.length(0);
}
null_value= 1;
}
2004-11-11 21:39:35 +03:00
double Item_sum_hybrid::val_real()
2000-07-31 21:29:14 +02:00
{
DBUG_ASSERT(fixed == 1);
2000-07-31 21:29:14 +02:00
if (null_value)
return 0.0;
switch (hybrid_type) {
case STRING_RESULT:
{
char *end_not_used;
int err_not_used;
2000-07-31 21:29:14 +02:00
String *res; res=val_str(&str_value);
return (res ? my_strntod(res->charset(), (char*) res->ptr(), res->length(),
&end_not_used, &err_not_used) : 0.0);
}
case INT_RESULT:
if (unsigned_flag)
return ulonglong2double(sum_int);
return (double) sum_int;
2005-02-09 02:50:45 +04:00
case DECIMAL_RESULT:
my_decimal2double(E_DEC_FATAL_ERROR, &sum_dec, &sum);
return sum;
case REAL_RESULT:
return sum;
case ROW_RESULT:
2003-01-31 14:07:07 +04:00
default:
2002-11-15 20:32:09 +02:00
// This case should never be choosen
DBUG_ASSERT(0);
return 0;
2000-07-31 21:29:14 +02:00
}
}
longlong Item_sum_hybrid::val_int()
{
DBUG_ASSERT(fixed == 1);
if (null_value)
return 0;
switch (hybrid_type) {
2005-02-09 02:50:45 +04:00
case INT_RESULT:
return sum_int;
case DECIMAL_RESULT:
{
longlong result;
my_decimal2int(E_DEC_FATAL_ERROR, &sum_dec, unsigned_flag, &result);
return sum_int;
2005-02-09 02:50:45 +04:00
}
default:
return (longlong) rint(Item_sum_hybrid::val_real());
2005-02-09 02:50:45 +04:00
}
}
my_decimal *Item_sum_hybrid::val_decimal(my_decimal *val)
{
DBUG_ASSERT(fixed == 1);
if (null_value)
return 0;
switch (hybrid_type) {
case STRING_RESULT:
string2my_decimal(E_DEC_FATAL_ERROR, &value, val);
break;
case REAL_RESULT:
double2my_decimal(E_DEC_FATAL_ERROR, sum, val);
break;
case DECIMAL_RESULT:
val= &sum_dec;
break;
case INT_RESULT:
int2my_decimal(E_DEC_FATAL_ERROR, sum_int, unsigned_flag, val);
break;
case ROW_RESULT:
default:
// This case should never be choosen
DBUG_ASSERT(0);
break;
}
return val; // Keep compiler happy
2000-07-31 21:29:14 +02:00
}
String *
Item_sum_hybrid::val_str(String *str)
{
DBUG_ASSERT(fixed == 1);
2000-07-31 21:29:14 +02:00
if (null_value)
return 0;
switch (hybrid_type) {
case STRING_RESULT:
2000-07-31 21:29:14 +02:00
return &value;
case REAL_RESULT:
str->set_real(sum,decimals, &my_charset_bin);
break;
2005-02-09 02:50:45 +04:00
case DECIMAL_RESULT:
my_decimal2string(E_DEC_FATAL_ERROR, &sum_dec, 0, 0, 0, str);
return str;
case INT_RESULT:
str->set_int(sum_int, unsigned_flag, &my_charset_bin);
break;
case ROW_RESULT:
2003-01-31 14:07:07 +04:00
default:
2002-11-15 20:32:09 +02:00
// This case should never be choosen
DBUG_ASSERT(0);
break;
}
return str; // Keep compiler happy
2000-07-31 21:29:14 +02:00
}
void Item_sum_hybrid::cleanup()
{
DBUG_ENTER("Item_sum_hybrid::cleanup");
Item_sum::cleanup();
used_table_cache= ~(table_map) 0;
2004-12-07 21:18:15 +02:00
/*
2004-12-07 21:18:15 +02:00
by default it is TRUE to avoid TRUE reporting by
Item_func_not_all/Item_func_nop_all if this item was never called.
no_rows_in_result() set it to FALSE if was not results found.
2004-12-07 21:18:15 +02:00
If some results found it will be left unchanged.
*/
was_values= TRUE;
DBUG_VOID_RETURN;
}
void Item_sum_hybrid::no_rows_in_result()
{
was_values= FALSE;
clear();
}
Item *Item_sum_min::copy_or_same(THD* thd)
{
return new (thd->mem_root) Item_sum_min(thd, this);
}
2000-07-31 21:29:14 +02:00
bool Item_sum_min::add()
{
switch (hybrid_type) {
case STRING_RESULT:
2000-07-31 21:29:14 +02:00
{
String *result=args[0]->val_str(&tmp_value);
if (!args[0]->null_value &&
(null_value || sortcmp(&value,result,collation.collation) > 0))
2000-07-31 21:29:14 +02:00
{
value.copy(*result);
null_value=0;
}
}
break;
case INT_RESULT:
{
longlong nr=args[0]->val_int();
if (!args[0]->null_value && (null_value ||
(unsigned_flag &&
(ulonglong) nr < (ulonglong) sum_int) ||
(!unsigned_flag && nr < sum_int)))
{
sum_int=nr;
null_value=0;
}
}
break;
2005-02-09 02:50:45 +04:00
case DECIMAL_RESULT:
{
my_decimal value, *val= args[0]->val_decimal(&value);
if (!args[0]->null_value &&
(null_value || (my_decimal_cmp(&sum_dec, val) > 0)))
{
my_decimal2decimal(val, &sum_dec);
null_value= 0;
}
}
break;
case REAL_RESULT:
2000-07-31 21:29:14 +02:00
{
2004-11-11 21:39:35 +03:00
double nr= args[0]->val_real();
if (!args[0]->null_value && (null_value || nr < sum))
2000-07-31 21:29:14 +02:00
{
sum=nr;
null_value=0;
}
}
break;
case ROW_RESULT:
2003-01-31 14:07:07 +04:00
default:
2002-11-15 20:32:09 +02:00
// This case should never be choosen
DBUG_ASSERT(0);
break;
}
return 0;
}
Item *Item_sum_max::copy_or_same(THD* thd)
{
return new (thd->mem_root) Item_sum_max(thd, this);
}
bool Item_sum_max::add()
{
switch (hybrid_type) {
case STRING_RESULT:
2000-07-31 21:29:14 +02:00
{
String *result=args[0]->val_str(&tmp_value);
if (!args[0]->null_value &&
(null_value || sortcmp(&value,result,collation.collation) < 0))
2000-07-31 21:29:14 +02:00
{
value.copy(*result);
null_value=0;
}
}
break;
case INT_RESULT:
{
longlong nr=args[0]->val_int();
if (!args[0]->null_value && (null_value ||
(unsigned_flag &&
(ulonglong) nr > (ulonglong) sum_int) ||
(!unsigned_flag && nr > sum_int)))
{
sum_int=nr;
null_value=0;
}
}
break;
2005-02-09 02:50:45 +04:00
case DECIMAL_RESULT:
{
my_decimal value, *val= args[0]->val_decimal(&value);
if (!args[0]->null_value &&
(null_value || (my_decimal_cmp(val, &sum_dec) > 0)))
{
my_decimal2decimal(val, &sum_dec);
null_value= 0;
}
}
break;
case REAL_RESULT:
{
2004-11-11 21:39:35 +03:00
double nr= args[0]->val_real();
if (!args[0]->null_value && (null_value || nr > sum))
{
sum=nr;
null_value=0;
}
}
break;
case ROW_RESULT:
2003-01-31 14:07:07 +04:00
default:
2002-11-15 20:32:09 +02:00
// This case should never be choosen
DBUG_ASSERT(0);
break;
}
2000-07-31 21:29:14 +02:00
return 0;
}
/* bit_or and bit_and */
longlong Item_sum_bit::val_int()
{
DBUG_ASSERT(fixed == 1);
2000-07-31 21:29:14 +02:00
return (longlong) bits;
}
void Item_sum_bit::clear()
{
bits= reset_bits;
}
Item *Item_sum_or::copy_or_same(THD* thd)
2000-07-31 21:29:14 +02:00
{
return new (thd->mem_root) Item_sum_or(thd, this);
2000-07-31 21:29:14 +02:00
}
2000-07-31 21:29:14 +02:00
bool Item_sum_or::add()
{
ulonglong value= (ulonglong) args[0]->val_int();
if (!args[0]->null_value)
bits|=value;
return 0;
}
Item *Item_sum_xor::copy_or_same(THD* thd)
{
return new (thd->mem_root) Item_sum_xor(thd, this);
}
bool Item_sum_xor::add()
{
ulonglong value= (ulonglong) args[0]->val_int();
if (!args[0]->null_value)
bits^=value;
return 0;
}
Item *Item_sum_and::copy_or_same(THD* thd)
{
return new (thd->mem_root) Item_sum_and(thd, this);
}
2000-07-31 21:29:14 +02:00
bool Item_sum_and::add()
{
ulonglong value= (ulonglong) args[0]->val_int();
if (!args[0]->null_value)
bits&=value;
return 0;
}
/************************************************************************
** reset result of a Item_sum with is saved in a tmp_table
*************************************************************************/
void Item_sum_num::reset_field()
{
2004-11-11 21:39:35 +03:00
double nr= args[0]->val_real();
2000-07-31 21:29:14 +02:00
char *res=result_field->ptr;
if (maybe_null)
{
if (args[0]->null_value)
{
nr=0.0;
result_field->set_null();
}
else
result_field->set_notnull();
}
float8store(res,nr);
}
void Item_sum_hybrid::reset_field()
{
switch(hybrid_type) {
2005-02-09 02:50:45 +04:00
case STRING_RESULT:
2000-07-31 21:29:14 +02:00
{
char buff[MAX_FIELD_WIDTH];
String tmp(buff,sizeof(buff),result_field->charset()),*res;
2000-07-31 21:29:14 +02:00
res=args[0]->val_str(&tmp);
if (args[0]->null_value)
{
result_field->set_null();
result_field->reset();
}
else
{
result_field->set_notnull();
result_field->store(res->ptr(),res->length(),tmp.charset());
2000-07-31 21:29:14 +02:00
}
2005-02-09 02:50:45 +04:00
break;
2000-07-31 21:29:14 +02:00
}
2005-02-09 02:50:45 +04:00
case INT_RESULT:
2000-07-31 21:29:14 +02:00
{
longlong nr=args[0]->val_int();
if (maybe_null)
{
if (args[0]->null_value)
{
nr=0;
result_field->set_null();
}
else
result_field->set_notnull();
}
result_field->store(nr, unsigned_flag);
2005-02-09 02:50:45 +04:00
break;
2000-07-31 21:29:14 +02:00
}
2005-02-09 02:50:45 +04:00
case REAL_RESULT:
2000-07-31 21:29:14 +02:00
{
2004-11-11 21:39:35 +03:00
double nr= args[0]->val_real();
2000-07-31 21:29:14 +02:00
if (maybe_null)
{
if (args[0]->null_value)
{
nr=0.0;
result_field->set_null();
}
else
result_field->set_notnull();
}
result_field->store(nr);
2005-02-09 02:50:45 +04:00
break;
}
case DECIMAL_RESULT:
{
my_decimal value, *arg_dec= args[0]->val_decimal(&value);
if (maybe_null)
{
if (args[0]->null_value)
result_field->set_null();
else
result_field->set_notnull();
}
/*
We must store zero in the field as we will use the field value in
add()
*/
if (!arg_dec) // Null
arg_dec= &decimal_zero;
result_field->store_decimal(arg_dec);
2005-02-09 02:50:45 +04:00
break;
}
case ROW_RESULT:
default:
DBUG_ASSERT(0);
2000-07-31 21:29:14 +02:00
}
}
void Item_sum_sum::reset_field()
{
2005-02-09 02:50:45 +04:00
if (hybrid_type == DECIMAL_RESULT)
{
my_decimal value, *arg_val= args[0]->val_decimal(&value);
if (!arg_val) // Null
arg_val= &decimal_zero;
result_field->store_decimal(arg_val);
2005-02-09 02:50:45 +04:00
}
else
{
DBUG_ASSERT(hybrid_type == REAL_RESULT);
double nr= args[0]->val_real(); // Nulls also return 0
float8store(result_field->ptr, nr);
}
if (args[0]->null_value)
result_field->set_null();
else
result_field->set_notnull();
2000-07-31 21:29:14 +02:00
}
void Item_sum_count::reset_field()
{
char *res=result_field->ptr;
longlong nr=0;
if (!args[0]->maybe_null)
nr=1;
else
{
args[0]->update_null_value();
2000-07-31 21:29:14 +02:00
if (!args[0]->null_value)
nr=1;
}
int8store(res,nr);
}
void Item_sum_avg::reset_field()
{
char *res=result_field->ptr;
2005-02-09 02:50:45 +04:00
if (hybrid_type == DECIMAL_RESULT)
{
longlong tmp;
2005-02-09 02:50:45 +04:00
my_decimal value, *arg_dec= args[0]->val_decimal(&value);
if (args[0]->null_value)
{
arg_dec= &decimal_zero;
tmp= 0;
2005-02-09 02:50:45 +04:00
}
else
tmp= 1;
my_decimal2binary(E_DEC_FATAL_ERROR, arg_dec, res, f_precision, f_scale);
res+= dec_bin_size;
int8store(res, tmp);
2005-02-09 02:50:45 +04:00
}
2000-07-31 21:29:14 +02:00
else
{
2005-02-09 02:50:45 +04:00
double nr= args[0]->val_real();
if (args[0]->null_value)
bzero(res,sizeof(double)+sizeof(longlong));
else
{
longlong tmp= 1;
2005-02-09 02:50:45 +04:00
float8store(res,nr);
res+=sizeof(double);
int8store(res,tmp);
}
2000-07-31 21:29:14 +02:00
}
}
2000-07-31 21:29:14 +02:00
void Item_sum_bit::reset_field()
{
reset();
int8store(result_field->ptr, bits);
}
void Item_sum_bit::update_field()
2000-07-31 21:29:14 +02:00
{
char *res=result_field->ptr;
bits= uint8korr(res);
add();
int8store(res, bits);
2000-07-31 21:29:14 +02:00
}
2000-07-31 21:29:14 +02:00
/*
** calc next value and merge it with field_value
*/
void Item_sum_sum::update_field()
2000-07-31 21:29:14 +02:00
{
2005-02-09 02:50:45 +04:00
if (hybrid_type == DECIMAL_RESULT)
{
2005-02-09 02:50:45 +04:00
my_decimal value, *arg_val= args[0]->val_decimal(&value);
if (!args[0]->null_value)
{
if (!result_field->is_null())
{
my_decimal field_value,
*field_val= result_field->val_decimal(&field_value);
my_decimal_add(E_DEC_FATAL_ERROR, dec_buffs, arg_val, field_val);
result_field->store_decimal(dec_buffs);
}
else
{
result_field->store_decimal(arg_val);
result_field->set_notnull();
}
}
}
else
{
double old_nr,nr;
char *res=result_field->ptr;
float8get(old_nr,res);
nr= args[0]->val_real();
if (!args[0]->null_value)
{
old_nr+=nr;
result_field->set_notnull();
}
float8store(res,old_nr);
}
2000-07-31 21:29:14 +02:00
}
void Item_sum_count::update_field()
2000-07-31 21:29:14 +02:00
{
longlong nr;
char *res=result_field->ptr;
nr=sint8korr(res);
2000-07-31 21:29:14 +02:00
if (!args[0]->maybe_null)
nr++;
else
{
args[0]->update_null_value();
2000-07-31 21:29:14 +02:00
if (!args[0]->null_value)
nr++;
}
int8store(res,nr);
}
void Item_sum_avg::update_field()
2000-07-31 21:29:14 +02:00
{
longlong field_count;
char *res=result_field->ptr;
2005-02-09 02:50:45 +04:00
if (hybrid_type == DECIMAL_RESULT)
{
my_decimal value, *arg_val= args[0]->val_decimal(&value);
if (!args[0]->null_value)
{
binary2my_decimal(E_DEC_FATAL_ERROR, res,
dec_buffs + 1, f_precision, f_scale);
field_count= sint8korr(res + dec_bin_size);
my_decimal_add(E_DEC_FATAL_ERROR, dec_buffs, arg_val, dec_buffs + 1);
my_decimal2binary(E_DEC_FATAL_ERROR, dec_buffs,
res, f_precision, f_scale);
res+= dec_bin_size;
field_count++;
2005-02-09 02:50:45 +04:00
int8store(res, field_count);
}
}
else
{
double nr;
2000-07-31 21:29:14 +02:00
2005-02-09 02:50:45 +04:00
nr= args[0]->val_real();
if (!args[0]->null_value)
{
double old_nr;
float8get(old_nr, res);
field_count= sint8korr(res + sizeof(double));
2005-02-09 02:50:45 +04:00
old_nr+= nr;
float8store(res,old_nr);
res+= sizeof(double);
2005-02-09 02:50:45 +04:00
field_count++;
int8store(res, field_count);
2005-02-09 02:50:45 +04:00
}
2000-07-31 21:29:14 +02:00
}
}
void Item_sum_hybrid::update_field()
2000-07-31 21:29:14 +02:00
{
switch (hybrid_type) {
2005-02-09 02:50:45 +04:00
case STRING_RESULT:
min_max_update_str_field();
2005-02-09 02:50:45 +04:00
break;
case INT_RESULT:
min_max_update_int_field();
2005-02-09 02:50:45 +04:00
break;
case DECIMAL_RESULT:
min_max_update_decimal_field();
break;
default:
min_max_update_real_field();
2005-02-09 02:50:45 +04:00
}
2000-07-31 21:29:14 +02:00
}
void
Item_sum_hybrid::min_max_update_str_field()
2000-07-31 21:29:14 +02:00
{
String *res_str=args[0]->val_str(&value);
if (!args[0]->null_value)
2000-07-31 21:29:14 +02:00
{
result_field->val_str(&tmp_value);
2000-07-31 21:29:14 +02:00
if (result_field->is_null() ||
(cmp_sign * sortcmp(res_str,&tmp_value,collation.collation)) < 0)
result_field->store(res_str->ptr(),res_str->length(),res_str->charset());
2000-07-31 21:29:14 +02:00
result_field->set_notnull();
}
}
void
Item_sum_hybrid::min_max_update_real_field()
2000-07-31 21:29:14 +02:00
{
double nr,old_nr;
old_nr=result_field->val_real();
2004-11-11 21:39:35 +03:00
nr= args[0]->val_real();
2000-07-31 21:29:14 +02:00
if (!args[0]->null_value)
{
if (result_field->is_null(0) ||
2000-07-31 21:29:14 +02:00
(cmp_sign > 0 ? old_nr > nr : old_nr < nr))
old_nr=nr;
result_field->set_notnull();
}
else if (result_field->is_null(0))
2000-07-31 21:29:14 +02:00
result_field->set_null();
result_field->store(old_nr);
}
void
Item_sum_hybrid::min_max_update_int_field()
2000-07-31 21:29:14 +02:00
{
longlong nr,old_nr;
old_nr=result_field->val_int();
nr=args[0]->val_int();
if (!args[0]->null_value)
{
if (result_field->is_null(0))
2000-07-31 21:29:14 +02:00
old_nr=nr;
else
{
bool res=(unsigned_flag ?
(ulonglong) old_nr > (ulonglong) nr :
old_nr > nr);
/* (cmp_sign > 0 && res) || (!(cmp_sign > 0) && !res) */
2001-10-03 17:02:14 +03:00
if ((cmp_sign > 0) ^ (!res))
old_nr=nr;
}
2000-07-31 21:29:14 +02:00
result_field->set_notnull();
}
else if (result_field->is_null(0))
2000-07-31 21:29:14 +02:00
result_field->set_null();
result_field->store(old_nr, unsigned_flag);
2000-07-31 21:29:14 +02:00
}
2005-02-09 02:50:45 +04:00
void
Item_sum_hybrid::min_max_update_decimal_field()
{
/* TODO: optimize: do not get result_field in case of args[0] is NULL */
my_decimal old_val, nr_val;
const my_decimal *old_nr= result_field->val_decimal(&old_val);
const my_decimal *nr= args[0]->val_decimal(&nr_val);
if (!args[0]->null_value)
{
if (result_field->is_null(0))
old_nr=nr;
else
{
bool res= my_decimal_cmp(old_nr, nr) > 0;
/* (cmp_sign > 0 && res) || (!(cmp_sign > 0) && !res) */
if ((cmp_sign > 0) ^ (!res))
old_nr=nr;
}
result_field->set_notnull();
}
else if (result_field->is_null(0))
result_field->set_null();
result_field->store_decimal(old_nr);
}
Item_avg_field::Item_avg_field(Item_result res_type, Item_sum_avg *item)
2000-07-31 21:29:14 +02:00
{
name=item->name;
decimals=item->decimals;
max_length= item->max_length;
unsigned_flag= item->unsigned_flag;
2000-07-31 21:29:14 +02:00
field=item->result_field;
maybe_null=1;
2005-02-09 02:50:45 +04:00
hybrid_type= res_type;
prec_increment= item->prec_increment;
2005-02-09 02:50:45 +04:00
if (hybrid_type == DECIMAL_RESULT)
{
f_scale= item->f_scale;
f_precision= item->f_precision;
dec_bin_size= item->dec_bin_size;
}
2000-07-31 21:29:14 +02:00
}
2004-11-11 21:39:35 +03:00
double Item_avg_field::val_real()
2000-07-31 21:29:14 +02:00
{
// fix_fields() never calls for this Item
double nr;
longlong count;
char *res;
2005-02-09 02:50:45 +04:00
if (hybrid_type == DECIMAL_RESULT)
return val_real_from_decimal();
2005-02-09 02:50:45 +04:00
float8get(nr,field->ptr);
res= (field->ptr+sizeof(double));
count= sint8korr(res);
if ((null_value= !count))
return 0.0;
return nr/(double) count;
2005-02-09 02:50:45 +04:00
}
2005-02-09 02:50:45 +04:00
longlong Item_avg_field::val_int()
{
return (longlong) rint(val_real());
2005-02-09 02:50:45 +04:00
}
my_decimal *Item_avg_field::val_decimal(my_decimal *dec_buf)
2005-02-09 02:50:45 +04:00
{
// fix_fields() never calls for this Item
if (hybrid_type == REAL_RESULT)
return val_decimal_from_real(dec_buf);
2005-02-09 02:50:45 +04:00
longlong count= sint8korr(field->ptr + dec_bin_size);
if ((null_value= !count))
return 0;
2005-02-09 02:50:45 +04:00
my_decimal dec_count, dec_field;
binary2my_decimal(E_DEC_FATAL_ERROR,
field->ptr, &dec_field, f_precision, f_scale);
int2my_decimal(E_DEC_FATAL_ERROR, count, 0, &dec_count);
my_decimal_div(E_DEC_FATAL_ERROR, dec_buf,
&dec_field, &dec_count, prec_increment);
return dec_buf;
2000-07-31 21:29:14 +02:00
}
2005-02-09 02:50:45 +04:00
2000-07-31 21:29:14 +02:00
String *Item_avg_field::val_str(String *str)
{
// fix_fields() never calls for this Item
2005-02-09 02:50:45 +04:00
if (hybrid_type == DECIMAL_RESULT)
return val_string_from_decimal(str);
return val_string_from_real(str);
2000-07-31 21:29:14 +02:00
}
2000-07-31 21:29:14 +02:00
Item_std_field::Item_std_field(Item_sum_std *item)
2002-12-14 03:36:59 +04:00
: Item_variance_field(item)
{
}
2004-11-11 21:39:35 +03:00
double Item_std_field::val_real()
2002-12-14 03:36:59 +04:00
{
double nr;
// fix_fields() never calls for this Item
nr= Item_variance_field::val_real();
DBUG_ASSERT(nr >= 0.0);
return sqrt(nr);
2002-12-14 03:36:59 +04:00
}
my_decimal *Item_std_field::val_decimal(my_decimal *dec_buf)
{
/*
We can't call val_decimal_from_real() for DECIMAL_RESULT as
Item_variance_field::val_real() would cause an infinite loop
*/
my_decimal tmp_dec, *dec;
double nr;
if (hybrid_type == REAL_RESULT)
return val_decimal_from_real(dec_buf);
dec= Item_variance_field::val_decimal(dec_buf);
if (!dec)
return 0;
my_decimal2double(E_DEC_FATAL_ERROR, dec, &nr);
DBUG_ASSERT(nr >= 0.0);
nr= sqrt(nr);
double2my_decimal(E_DEC_FATAL_ERROR, nr, &tmp_dec);
my_decimal_round(E_DEC_FATAL_ERROR, &tmp_dec, decimals, FALSE, dec_buf);
return dec_buf;
}
2002-12-14 03:36:59 +04:00
Item_variance_field::Item_variance_field(Item_sum_variance *item)
2000-07-31 21:29:14 +02:00
{
name=item->name;
decimals=item->decimals;
max_length=item->max_length;
unsigned_flag= item->unsigned_flag;
2000-07-31 21:29:14 +02:00
field=item->result_field;
maybe_null=1;
sample= item->sample;
prec_increment= item->prec_increment;
2005-02-09 02:50:45 +04:00
if ((hybrid_type= item->hybrid_type) == DECIMAL_RESULT)
{
f_scale0= item->f_scale0;
f_precision0= item->f_precision0;
dec_bin_size0= item->dec_bin_size0;
f_scale1= item->f_scale1;
f_precision1= item->f_precision1;
dec_bin_size1= item->dec_bin_size1;
}
2000-07-31 21:29:14 +02:00
}
2004-11-11 21:39:35 +03:00
double Item_variance_field::val_real()
2000-07-31 21:29:14 +02:00
{
// fix_fields() never calls for this Item
2005-02-09 02:50:45 +04:00
if (hybrid_type == DECIMAL_RESULT)
return val_real_from_decimal();
double recurrence_s;
ulonglong count;
float8get(recurrence_s, (field->ptr + sizeof(double)));
2000-07-31 21:29:14 +02:00
count=sint8korr(field->ptr+sizeof(double)*2);
if ((null_value= (count <= sample)))
2000-07-31 21:29:14 +02:00
return 0.0;
return variance_fp_recurrence_result(recurrence_s, count, sample);
2005-02-09 02:50:45 +04:00
}
2000-07-31 21:29:14 +02:00
/****************************************************************************
** COUNT(DISTINCT ...)
****************************************************************************/
int simple_str_key_cmp(void* arg, byte* key1, byte* key2)
{
Field *f= (Field*) arg;
2005-03-18 16:15:33 -08:00
return f->cmp((const char*)key1, (const char*)key2);
}
/*
Did not make this one static - at least gcc gets confused when
I try to declare a static function as a friend. If you can figure
out the syntax to make a static function a friend, make this one
static
*/
int composite_key_cmp(void* arg, byte* key1, byte* key2)
{
Item_sum_count_distinct* item = (Item_sum_count_distinct*)arg;
Field **field = item->table->field;
Field **field_end= field + item->table->s->fields;
uint32 *lengths=item->field_lengths;
for (; field < field_end; ++field)
{
Field* f = *field;
int len = *lengths++;
int res = f->cmp((char *) key1, (char *) key2);
if (res)
return res;
key1 += len;
key2 += len;
}
return 0;
}
C_MODE_START
static int count_distinct_walk(void *elem, element_count count, void *arg)
{
(*((ulonglong*)arg))++;
return 0;
}
C_MODE_END
void Item_sum_count_distinct::cleanup()
2000-07-31 21:29:14 +02:00
{
DBUG_ENTER("Item_sum_count_distinct::cleanup");
Item_sum_int::cleanup();
/* Free objects only if we own them. */
if (!original)
{
/*
We need to delete the table and the tree in cleanup() as
they were allocated in the runtime memroot. Using the runtime
memroot reduces memory footprint for PS/SP and simplifies setup().
*/
delete tree;
tree= 0;
is_evaluated= FALSE;
if (table)
{
free_tmp_table(table->in_use, table);
table= 0;
}
delete tmp_table_param;
tmp_table_param= 0;
}
always_null= FALSE;
DBUG_VOID_RETURN;
2000-07-31 21:29:14 +02:00
}
/* This is used by rollup to create a separate usable copy of the function */
void Item_sum_count_distinct::make_unique()
{
table=0;
original= 0;
Fixed bug#15560: GROUP_CONCAT wasn't ready for WITH ROLLUP queries The GROUP_CONCAT uses its own temporary table. When ROLLUP is present it creates the second copy of Item_func_group_concat. This copy receives the same list of arguments that original group_concat does. When the copy is set up the result_fields of functions from the argument list are reset to the temporary table of this copy. As a result of this action data from functions flow directly to the ROLLUP copy and the original group_concat functions shows wrong result. Since queries with COUNT(DISTINCT ...) use temporary tables to store the results the COUNT function they are also affected by this bug. The idea of the fix is to copy content of the result_field for the function under GROUP_CONCAT/COUNT from the first temporary table to the second one, rather than setting result_field to point to the second temporary table. To achieve this goal force_copy_fields flag is added to Item_func_group_concat and Item_sum_count_distinct classes. This flag is initialized to 0 and set to 1 into the make_unique() member function of both classes. To the TMP_TABLE_PARAM structure is modified to include the similar flag as well. The create_tmp_table() function passes that flag to create_tmp_field(). When the flag is set the create_tmp_field() function will set result_field as a source field and will not reset that result field to newly created field for Item_func_result_field and its descendants. Due to this there will be created copy func to copy data from old result_field to newly created field.
2006-03-29 23:30:34 +04:00
force_copy_fields= 1;
tree= 0;
is_evaluated= FALSE;
tmp_table_param= 0;
always_null= FALSE;
}
Item_sum_count_distinct::~Item_sum_count_distinct()
{
cleanup();
}
2000-07-31 21:29:14 +02:00
bool Item_sum_count_distinct::setup(THD *thd)
{
List<Item> list;
2003-08-26 11:51:09 +02:00
SELECT_LEX *select_lex= thd->lex->current_select;
/*
Setup can be called twice for ROLLUP items. This is a bug.
Please add DBUG_ASSERT(tree == 0) here when it's fixed.
*/
if (tree || table || tmp_table_param)
return FALSE;
if (!(tmp_table_param= new TMP_TABLE_PARAM))
2005-03-15 16:54:11 +03:00
return TRUE;
2000-07-31 21:29:14 +02:00
/* Create a table with an unique key over all parameters */
for (uint i=0; i < arg_count ; i++)
{
Item *item=args[i];
if (list.push_back(item))
2005-03-15 16:54:11 +03:00
return TRUE; // End of memory
if (item->const_item())
{
item->update_null_value();
if (item->null_value)
always_null=1;
}
}
if (always_null)
2005-03-15 16:54:11 +03:00
return FALSE;
count_field_types(tmp_table_param,list,0);
Fixed bug#15560: GROUP_CONCAT wasn't ready for WITH ROLLUP queries The GROUP_CONCAT uses its own temporary table. When ROLLUP is present it creates the second copy of Item_func_group_concat. This copy receives the same list of arguments that original group_concat does. When the copy is set up the result_fields of functions from the argument list are reset to the temporary table of this copy. As a result of this action data from functions flow directly to the ROLLUP copy and the original group_concat functions shows wrong result. Since queries with COUNT(DISTINCT ...) use temporary tables to store the results the COUNT function they are also affected by this bug. The idea of the fix is to copy content of the result_field for the function under GROUP_CONCAT/COUNT from the first temporary table to the second one, rather than setting result_field to point to the second temporary table. To achieve this goal force_copy_fields flag is added to Item_func_group_concat and Item_sum_count_distinct classes. This flag is initialized to 0 and set to 1 into the make_unique() member function of both classes. To the TMP_TABLE_PARAM structure is modified to include the similar flag as well. The create_tmp_table() function passes that flag to create_tmp_field(). When the flag is set the create_tmp_field() function will set result_field as a source field and will not reset that result field to newly created field for Item_func_result_field and its descendants. Due to this there will be created copy func to copy data from old result_field to newly created field.
2006-03-29 23:30:34 +04:00
tmp_table_param->force_copy_fields= force_copy_fields;
DBUG_ASSERT(table == 0);
if (!(table= create_tmp_table(thd, tmp_table_param, list, (ORDER*) 0, 1,
0,
(select_lex->options | thd->options),
HA_POS_ERROR, (char*)"")))
2005-03-15 16:54:11 +03:00
return TRUE;
2000-07-31 21:29:14 +02:00
table->file->extra(HA_EXTRA_NO_ROWS); // Don't update rows
table->no_rows=1;
if (table->s->db_type == heap_hton)
{
/*
No blobs, otherwise it would have been MyISAM: set up a compare
function and its arguments to use with Unique.
*/
qsort_cmp2 compare_key;
void* cmp_arg;
Field **field= table->field;
Field **field_end= field + table->s->fields;
bool all_binary= TRUE;
2001-05-29 18:53:37 -06:00
for (tree_key_length= 0; field < field_end; ++field)
{
Field *f= *field;
enum enum_field_types type= f->type();
tree_key_length+= f->pack_length();
if ((type == MYSQL_TYPE_VARCHAR) ||
!f->binary() && (type == MYSQL_TYPE_STRING ||
type == MYSQL_TYPE_VAR_STRING))
{
all_binary= FALSE;
break;
}
}
if (all_binary)
{
cmp_arg= (void*) &tree_key_length;
compare_key= (qsort_cmp2) simple_raw_key_cmp;
}
else
{
if (table->s->fields == 1)
{
/*
If we have only one field, which is the most common use of
count(distinct), it is much faster to use a simpler key
compare method that can take advantage of not having to worry
about other fields.
*/
compare_key= (qsort_cmp2) simple_str_key_cmp;
cmp_arg= (void*) table->field[0];
/* tree_key_length has been set already */
}
else
{
uint32 *length;
compare_key= (qsort_cmp2) composite_key_cmp;
cmp_arg= (void*) this;
field_lengths= (uint32*) thd->alloc(table->s->fields * sizeof(uint32));
for (tree_key_length= 0, length= field_lengths, field= table->field;
field < field_end; ++field, ++length)
{
*length= (*field)->pack_length();
tree_key_length+= *length;
}
}
}
DBUG_ASSERT(tree == 0);
tree= new Unique(compare_key, cmp_arg, tree_key_length,
thd->variables.max_heap_table_size);
/*
The only time tree_key_length could be 0 is if someone does
count(distinct) on a char(0) field - stupid thing to do,
but this has to be handled - otherwise someone can crash
the server with a DoS attack
*/
is_evaluated= FALSE;
if (! tree)
return TRUE;
}
return FALSE;
}
2000-07-31 21:29:14 +02:00
Item *Item_sum_count_distinct::copy_or_same(THD* thd)
{
return new (thd->mem_root) Item_sum_count_distinct(thd, this);
}
void Item_sum_count_distinct::clear()
2000-07-31 21:29:14 +02:00
{
/* tree and table can be both null only if always_null */
is_evaluated= FALSE;
if (tree)
{
tree->reset();
}
2001-06-21 11:03:54 +02:00
else if (table)
{
table->file->extra(HA_EXTRA_NO_CACHE);
table->file->delete_all_rows();
table->file->extra(HA_EXTRA_WRITE_CACHE);
}
2000-07-31 21:29:14 +02:00
}
bool Item_sum_count_distinct::add()
{
int error;
if (always_null)
return 0;
2000-07-31 21:29:14 +02:00
copy_fields(tmp_table_param);
copy_funcs(tmp_table_param->items_to_copy);
2000-07-31 21:29:14 +02:00
for (Field **field=table->field ; *field ; field++)
if ((*field)->is_real_null(0))
return 0; // Don't count NULL
is_evaluated= FALSE;
if (tree)
{
/*
The first few bytes of record (at least one) are just markers
for deleted and NULLs. We want to skip them since they will
bloat the tree without providing any valuable info. Besides,
key_length used to initialize the tree didn't include space for them.
*/
return tree->unique_add(table->record[0] + table->s->null_bytes);
2000-07-31 21:29:14 +02:00
}
if ((error= table->file->ha_write_row(table->record[0])) &&
table->file->is_fatal_error(error, HA_CHECK_DUP))
return TRUE;
return FALSE;
2000-07-31 21:29:14 +02:00
}
2000-07-31 21:29:14 +02:00
longlong Item_sum_count_distinct::val_int()
{
int error;
DBUG_ASSERT(fixed == 1);
2000-07-31 21:29:14 +02:00
if (!table) // Empty query
return LL(0);
if (tree)
{
if (is_evaluated)
return count;
if (tree->elements == 0)
return (longlong) tree->elements_in_tree(); // everything fits in memory
count= 0;
tree->walk(count_distinct_walk, (void*) &count);
is_evaluated= TRUE;
return (longlong) count;
}
error= table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
if(error)
{
table->file->print_error(error, MYF(0));
}
This changeset is largely a handler cleanup changeset (WL#3281), but includes fixes and cleanups that was found necessary while testing the handler changes Changes that requires code changes in other code of other storage engines. (Note that all changes are very straightforward and one should find all issues by compiling a --debug build and fixing all compiler errors and all asserts in field.cc while running the test suite), - New optional handler function introduced: reset() This is called after every DML statement to make it easy for a handler to statement specific cleanups. (The only case it's not called is if force the file to be closed) - handler::extra(HA_EXTRA_RESET) is removed. Code that was there before should be moved to handler::reset() - table->read_set contains a bitmap over all columns that are needed in the query. read_row() and similar functions only needs to read these columns - table->write_set contains a bitmap over all columns that will be updated in the query. write_row() and update_row() only needs to update these columns. The above bitmaps should now be up to date in all context (including ALTER TABLE, filesort()). The handler is informed of any changes to the bitmap after fix_fields() by calling the virtual function handler::column_bitmaps_signal(). If the handler does caching of these bitmaps (instead of using table->read_set, table->write_set), it should redo the caching in this code. as the signal() may be sent several times, it's probably best to set a variable in the signal and redo the caching on read_row() / write_row() if the variable was set. - Removed the read_set and write_set bitmap objects from the handler class - Removed all column bit handling functions from the handler class. (Now one instead uses the normal bitmap functions in my_bitmap.c instead of handler dedicated bitmap functions) - field->query_id is removed. One should instead instead check table->read_set and table->write_set if a field is used in the query. - handler::extra(HA_EXTRA_RETRIVE_ALL_COLS) and handler::extra(HA_EXTRA_RETRIEVE_PRIMARY_KEY) are removed. One should now instead use table->read_set to check for which columns to retrieve. - If a handler needs to call Field->val() or Field->store() on columns that are not used in the query, one should install a temporary all-columns-used map while doing so. For this, we provide the following functions: my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->read_set); field->val(); dbug_tmp_restore_column_map(table->read_set, old_map); and similar for the write map: my_bitmap_map *old_map= dbug_tmp_use_all_columns(table, table->write_set); field->val(); dbug_tmp_restore_column_map(table->write_set, old_map); If this is not done, you will sooner or later hit a DBUG_ASSERT in the field store() / val() functions. (For not DBUG binaries, the dbug_tmp_restore_column_map() and dbug_tmp_restore_column_map() are inline dummy functions and should be optimized away be the compiler). - If one needs to temporary set the column map for all binaries (and not just to avoid the DBUG_ASSERT() in the Field::store() / Field::val() methods) one should use the functions tmp_use_all_columns() and tmp_restore_column_map() instead of the above dbug_ variants. - All 'status' fields in the handler base class (like records, data_file_length etc) are now stored in a 'stats' struct. This makes it easier to know what status variables are provided by the base handler. This requires some trivial variable names in the extra() function. - New virtual function handler::records(). This is called to optimize COUNT(*) if (handler::table_flags() & HA_HAS_RECORDS()) is true. (stats.records is not supposed to be an exact value. It's only has to be 'reasonable enough' for the optimizer to be able to choose a good optimization path). - Non virtual handler::init() function added for caching of virtual constants from engine. - Removed has_transactions() virtual method. Now one should instead return HA_NO_TRANSACTIONS in table_flags() if the table handler DOES NOT support transactions. - The 'xxxx_create_handler()' function now has a MEM_ROOT_root argument that is to be used with 'new handler_name()' to allocate the handler in the right area. The xxxx_create_handler() function is also responsible for any initialization of the object before returning. For example, one should change: static handler *myisam_create_handler(TABLE_SHARE *table) { return new ha_myisam(table); } -> static handler *myisam_create_handler(TABLE_SHARE *table, MEM_ROOT *mem_root) { return new (mem_root) ha_myisam(table); } - New optional virtual function: use_hidden_primary_key(). This is called in case of an update/delete when (table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined but we don't have a primary key. This allows the handler to take precisions in remembering any hidden primary key to able to update/delete any found row. The default handler marks all columns to be read. - handler::table_flags() now returns a ulonglong (to allow for more flags). - New/changed table_flags() - HA_HAS_RECORDS Set if ::records() is supported - HA_NO_TRANSACTIONS Set if engine doesn't support transactions - HA_PRIMARY_KEY_REQUIRED_FOR_DELETE Set if we should mark all primary key columns for read when reading rows as part of a DELETE statement. If there is no primary key, all columns are marked for read. - HA_PARTIAL_COLUMN_READ Set if engine will not read all columns in some cases (based on table->read_set) - HA_PRIMARY_KEY_ALLOW_RANDOM_ACCESS Renamed to HA_PRIMARY_KEY_REQUIRED_FOR_POSITION. - HA_DUPP_POS Renamed to HA_DUPLICATE_POS - HA_REQUIRES_KEY_COLUMNS_FOR_DELETE Set this if we should mark ALL key columns for read when when reading rows as part of a DELETE statement. In case of an update we will mark all keys for read for which key part changed value. - HA_STATS_RECORDS_IS_EXACT Set this if stats.records is exact. (This saves us some extra records() calls when optimizing COUNT(*)) - Removed table_flags() - HA_NOT_EXACT_COUNT Now one should instead use HA_HAS_RECORDS if handler::records() gives an exact count() and HA_STATS_RECORDS_IS_EXACT if stats.records is exact. - HA_READ_RND_SAME Removed (no one supported this one) - Removed not needed functions ha_retrieve_all_cols() and ha_retrieve_all_pk() - Renamed handler::dupp_pos to handler::dup_pos - Removed not used variable handler::sortkey Upper level handler changes: - ha_reset() now does some overall checks and calls ::reset() - ha_table_flags() added. This is a cached version of table_flags(). The cache is updated on engine creation time and updated on open. MySQL level changes (not obvious from the above): - DBUG_ASSERT() added to check that column usage matches what is set in the column usage bit maps. (This found a LOT of bugs in current column marking code). - In 5.1 before, all used columns was marked in read_set and only updated columns was marked in write_set. Now we only mark columns for which we need a value in read_set. - Column bitmaps are created in open_binary_frm() and open_table_from_share(). (Before this was in table.cc) - handler::table_flags() calls are replaced with handler::ha_table_flags() - For calling field->val() you must have the corresponding bit set in table->read_set. For calling field->store() you must have the corresponding bit set in table->write_set. (There are asserts in all store()/val() functions to catch wrong usage) - thd->set_query_id is renamed to thd->mark_used_columns and instead of setting this to an integer value, this has now the values: MARK_COLUMNS_NONE, MARK_COLUMNS_READ, MARK_COLUMNS_WRITE Changed also all variables named 'set_query_id' to mark_used_columns. - In filesort() we now inform the handler of exactly which columns are needed doing the sort and choosing the rows. - The TABLE_SHARE object has a 'all_set' column bitmap one can use when one needs a column bitmap with all columns set. (This is used for table->use_all_columns() and other places) - The TABLE object has 3 column bitmaps: - def_read_set Default bitmap for columns to be read - def_write_set Default bitmap for columns to be written - tmp_set Can be used as a temporary bitmap when needed. The table object has also two pointer to bitmaps read_set and write_set that the handler should use to find out which columns are used in which way. - count() optimization now calls handler::records() instead of using handler->stats.records (if (table_flags() & HA_HAS_RECORDS) is true). - Added extra argument to Item::walk() to indicate if we should also traverse sub queries. - Added TABLE parameter to cp_buffer_from_ref() - Don't close tables created with CREATE ... SELECT but keep them in the table cache. (Faster usage of newly created tables). New interfaces: - table->clear_column_bitmaps() to initialize the bitmaps for tables at start of new statements. - table->column_bitmaps_set() to set up new column bitmaps and signal the handler about this. - table->column_bitmaps_set_no_signal() for some few cases where we need to setup new column bitmaps but don't signal the handler (as the handler has already been signaled about these before). Used for the momement only in opt_range.cc when doing ROR scans. - table->use_all_columns() to install a bitmap where all columns are marked as use in the read and the write set. - table->default_column_bitmaps() to install the normal read and write column bitmaps, but not signaling the handler about this. This is mainly used when creating TABLE instances. - table->mark_columns_needed_for_delete(), table->mark_columns_needed_for_delete() and table->mark_columns_needed_for_insert() to allow us to put additional columns in column usage maps if handler so requires. (The handler indicates what it neads in handler->table_flags()) - table->prepare_for_position() to allow us to tell handler that it needs to read primary key parts to be able to store them in future table->position() calls. (This replaces the table->file->ha_retrieve_all_pk function) - table->mark_auto_increment_column() to tell handler are going to update columns part of any auto_increment key. - table->mark_columns_used_by_index() to mark all columns that is part of an index. It will also send extra(HA_EXTRA_KEYREAD) to handler to allow it to quickly know that it only needs to read colums that are part of the key. (The handler can also use the column map for detecting this, but simpler/faster handler can just monitor the extra() call). - table->mark_columns_used_by_index_no_reset() to in addition to other columns, also mark all columns that is used by the given key. - table->restore_column_maps_after_mark_index() to restore to default column maps after a call to table->mark_columns_used_by_index(). - New item function register_field_in_read_map(), for marking used columns in table->read_map. Used by filesort() to mark all used columns - Maintain in TABLE->merge_keys set of all keys that are used in query. (Simplices some optimization loops) - Maintain Field->part_of_key_not_clustered which is like Field->part_of_key but the field in the clustered key is not assumed to be part of all index. (used in opt_range.cc for faster loops) - dbug_tmp_use_all_columns(), dbug_tmp_restore_column_map() tmp_use_all_columns() and tmp_restore_column_map() functions to temporally mark all columns as usable. The 'dbug_' version is primarily intended inside a handler when it wants to just call Field:store() & Field::val() functions, but don't need the column maps set for any other usage. (ie:: bitmap_is_set() is never called) - We can't use compare_records() to skip updates for handlers that returns a partial column set and the read_set doesn't cover all columns in the write set. The reason for this is that if we have a column marked only for write we can't in the MySQL level know if the value changed or not. The reason this worked before was that MySQL marked all to be written columns as also to be read. The new 'optimal' bitmaps exposed this 'hidden bug'. - open_table_from_share() does not anymore setup temporary MEM_ROOT object as a thread specific variable for the handler. Instead we send the to-be-used MEMROOT to get_new_handler(). (Simpler, faster code) Bugs fixed: - Column marking was not done correctly in a lot of cases. (ALTER TABLE, when using triggers, auto_increment fields etc) (Could potentially result in wrong values inserted in table handlers relying on that the old column maps or field->set_query_id was correct) Especially when it comes to triggers, there may be cases where the old code would cause lost/wrong values for NDB and/or InnoDB tables. - Split thd->options flag OPTION_STATUS_NO_TRANS_UPDATE to two flags: OPTION_STATUS_NO_TRANS_UPDATE and OPTION_KEEP_LOG. This allowed me to remove some wrong warnings about: "Some non-transactional changed tables couldn't be rolled back" - Fixed handling of INSERT .. SELECT and CREATE ... SELECT that wrongly reset (thd->options & OPTION_STATUS_NO_TRANS_UPDATE) which caused us to loose some warnings about "Some non-transactional changed tables couldn't be rolled back") - Fixed use of uninitialized memory in ha_ndbcluster.cc::delete_table() which could cause delete_table to report random failures. - Fixed core dumps for some tests when running with --debug - Added missing FN_LIBCHAR in mysql_rm_tmp_tables() (This has probably caused us to not properly remove temporary files after crash) - slow_logs was not properly initialized, which could maybe cause extra/lost entries in slow log. - If we get an duplicate row on insert, change column map to read and write all columns while retrying the operation. This is required by the definition of REPLACE and also ensures that fields that are only part of UPDATE are properly handled. This fixed a bug in NDB and REPLACE where REPLACE wrongly copied some column values from the replaced row. - For table handler that doesn't support NULL in keys, we would give an error when creating a primary key with NULL fields, even after the fields has been automaticly converted to NOT NULL. - Creating a primary key on a SPATIAL key, would fail if field was not declared as NOT NULL. Cleanups: - Removed not used condition argument to setup_tables - Removed not needed item function reset_query_id_processor(). - Field->add_index is removed. Now this is instead maintained in (field->flags & FIELD_IN_ADD_INDEX) - Field->fieldnr is removed (use field->field_index instead) - New argument to filesort() to indicate that it should return a set of row pointers (not used columns). This allowed me to remove some references to sql_command in filesort and should also enable us to return column results in some cases where we couldn't before. - Changed column bitmap handling in opt_range.cc to be aligned with TABLE bitmap, which allowed me to use bitmap functions instead of looping over all fields to create some needed bitmaps. (Faster and smaller code) - Broke up found too long lines - Moved some variable declaration at start of function for better code readability. - Removed some not used arguments from functions. (setup_fields(), mysql_prepare_insert_check_table()) - setup_fields() now takes an enum instead of an int for marking columns usage. - For internal temporary tables, use handler::write_row(), handler::delete_row() and handler::update_row() instead of handler::ha_xxxx() for faster execution. - Changed some constants to enum's and define's. - Using separate column read and write sets allows for easier checking of timestamp field was set by statement. - Remove calls to free_io_cache() as this is now done automaticly in ha_reset() - Don't build table->normalized_path as this is now identical to table->path (after bar's fixes to convert filenames) - Fixed some missed DBUG_PRINT(.."%lx") to use "0x%lx" to make it easier to do comparision with the 'convert-dbug-for-diff' tool. Things left to do in 5.1: - We wrongly log failed CREATE TABLE ... SELECT in some cases when using row based logging (as shown by testcase binlog_row_mix_innodb_myisam.result) Mats has promised to look into this. - Test that my fix for CREATE TABLE ... SELECT is indeed correct. (I added several test cases for this, but in this case it's better that someone else also tests this throughly). Lars has promosed to do this.
2006-06-04 18:52:22 +03:00
return table->file->stats.records;
2000-07-31 21:29:14 +02:00
}
2000-07-31 21:29:14 +02:00
/****************************************************************************
** Functions to handle dynamic loadable aggregates
** Original source by: Alexis Mikhailov <root@medinf.chuvashia.su>
** Adapted for UDAs by: Andreas F. Bobak <bobak@relog.ch>.
** Rewritten by: Monty.
****************************************************************************/
#ifdef HAVE_DLOPEN
void Item_udf_sum::clear()
2000-07-31 21:29:14 +02:00
{
DBUG_ENTER("Item_udf_sum::clear");
udf.clear();
DBUG_VOID_RETURN;
2000-07-31 21:29:14 +02:00
}
bool Item_udf_sum::add()
{
DBUG_ENTER("Item_udf_sum::add");
2000-07-31 21:29:14 +02:00
udf.add(&null_value);
DBUG_RETURN(0);
}
void Item_udf_sum::cleanup()
{
/*
udf_handler::cleanup() nicely handles case when we have not
original item but one created by copy_or_same() method.
*/
udf.cleanup();
Item_sum::cleanup();
}
void Item_udf_sum::print(String *str)
{
str->append(func_name());
str->append('(');
for (uint i=0 ; i < arg_count ; i++)
{
if (i)
str->append(',');
args[i]->print(str);
}
str->append(')');
}
Item *Item_sum_udf_float::copy_or_same(THD* thd)
{
return new (thd->mem_root) Item_sum_udf_float(thd, this);
}
2004-11-11 21:39:35 +03:00
double Item_sum_udf_float::val_real()
2000-07-31 21:29:14 +02:00
{
DBUG_ASSERT(fixed == 1);
2000-07-31 21:29:14 +02:00
DBUG_ENTER("Item_sum_udf_float::val");
DBUG_PRINT("info",("result_type: %d arg_count: %d",
args[0]->result_type(), arg_count));
DBUG_RETURN(udf.val(&null_value));
}
2000-07-31 21:29:14 +02:00
String *Item_sum_udf_float::val_str(String *str)
{
return val_string_from_real(str);
2000-07-31 21:29:14 +02:00
}
my_decimal *Item_sum_udf_float::val_decimal(my_decimal *dec)
{
return val_decimal_from_real(dec);
}
2005-02-09 02:50:45 +04:00
String *Item_sum_udf_decimal::val_str(String *str)
{
return val_string_from_decimal(str);
2005-02-09 02:50:45 +04:00
}
double Item_sum_udf_decimal::val_real()
{
return val_real_from_decimal();
2005-02-09 02:50:45 +04:00
}
longlong Item_sum_udf_decimal::val_int()
{
return val_int_from_decimal();
2005-02-09 02:50:45 +04:00
}
my_decimal *Item_sum_udf_decimal::val_decimal(my_decimal *dec_buf)
{
DBUG_ASSERT(fixed == 1);
DBUG_ENTER("Item_func_udf_decimal::val_decimal");
DBUG_PRINT("info",("result_type: %d arg_count: %d",
args[0]->result_type(), arg_count));
DBUG_RETURN(udf.val_decimal(&null_value, dec_buf));
}
Item *Item_sum_udf_decimal::copy_or_same(THD* thd)
{
return new (thd->mem_root) Item_sum_udf_decimal(thd, this);
}
Item *Item_sum_udf_int::copy_or_same(THD* thd)
{
return new (thd->mem_root) Item_sum_udf_int(thd, this);
}
2000-07-31 21:29:14 +02:00
longlong Item_sum_udf_int::val_int()
{
DBUG_ASSERT(fixed == 1);
2000-07-31 21:29:14 +02:00
DBUG_ENTER("Item_sum_udf_int::val_int");
DBUG_PRINT("info",("result_type: %d arg_count: %d",
args[0]->result_type(), arg_count));
DBUG_RETURN(udf.val_int(&null_value));
}
2000-07-31 21:29:14 +02:00
String *Item_sum_udf_int::val_str(String *str)
{
return val_string_from_int(str);
2000-07-31 21:29:14 +02:00
}
my_decimal *Item_sum_udf_int::val_decimal(my_decimal *dec)
{
return val_decimal_from_int(dec);
}
2000-07-31 21:29:14 +02:00
/* Default max_length is max argument length */
void Item_sum_udf_str::fix_length_and_dec()
{
DBUG_ENTER("Item_sum_udf_str::fix_length_and_dec");
max_length=0;
for (uint i = 0; i < arg_count; i++)
set_if_bigger(max_length,args[i]->max_length);
DBUG_VOID_RETURN;
}
Item *Item_sum_udf_str::copy_or_same(THD* thd)
{
return new (thd->mem_root) Item_sum_udf_str(thd, this);
}
my_decimal *Item_sum_udf_str::val_decimal(my_decimal *dec)
{
return val_decimal_from_string(dec);
}
2000-07-31 21:29:14 +02:00
String *Item_sum_udf_str::val_str(String *str)
{
DBUG_ASSERT(fixed == 1);
2000-07-31 21:29:14 +02:00
DBUG_ENTER("Item_sum_udf_str::str");
String *res=udf.val_str(str,&str_value);
null_value = !res;
DBUG_RETURN(res);
}
#endif /* HAVE_DLOPEN */
/*****************************************************************************
GROUP_CONCAT function
SQL SYNTAX:
2005-03-18 00:41:03 +03:00
GROUP_CONCAT([DISTINCT] expr,... [ORDER BY col [ASC|DESC],...]
[SEPARATOR str_const])
concat of values from "group by" operation
BUGS
DISTINCT and ORDER BY only works if ORDER BY uses all fields and only fields
in expression list
Blobs doesn't work with DISTINCT or ORDER BY
*****************************************************************************/
/*
function of sort for syntax:
GROUP_CONCAT(DISTINCT expr,...)
*/
int group_concat_key_cmp_with_distinct(void* arg, byte* key1,
byte* key2)
{
Item_func_group_concat* grp_item= (Item_func_group_concat*)arg;
2005-03-18 00:41:03 +03:00
TABLE *table= grp_item->table;
Item **field_item, **end;
for (field_item= grp_item->args, end= field_item + grp_item->arg_count_field;
field_item < end;
field_item++)
{
/*
We have to use get_tmp_table_field() instead of
real_item()->get_tmp_table_field() because we want the field in
the temporary table, not the original field
*/
Field *field= (*field_item)->get_tmp_table_field();
/*
If field_item is a const item then either get_tmp_table_field returns 0
or it is an item over a const table.
*/
if (field && !(*field_item)->const_item())
{
int res;
uint offset= (field->offset(field->table->record[0]) -
table->s->null_bytes);
if ((res= field->cmp((char *) key1 + offset, (char *) key2 + offset)))
return res;
}
}
return 0;
}
/*
function of sort for syntax:
GROUP_CONCAT(expr,... ORDER BY col,... )
*/
int group_concat_key_cmp_with_order(void* arg, byte* key1, byte* key2)
{
Item_func_group_concat* grp_item= (Item_func_group_concat*) arg;
ORDER **order_item, **end;
2005-03-18 00:41:03 +03:00
TABLE *table= grp_item->table;
for (order_item= grp_item->order, end=order_item+ grp_item->arg_count_order;
order_item < end;
order_item++)
{
Item *item= *(*order_item)->item;
/*
We have to use get_tmp_table_field() instead of
real_item()->get_tmp_table_field() because we want the field in
the temporary table, not the original field
*/
Field *field= item->get_tmp_table_field();
/*
If item is a const item then either get_tp_table_field returns 0
or it is an item over a const table.
*/
if (field && !item->const_item())
{
int res;
uint offset= (field->offset(field->table->record[0]) -
table->s->null_bytes);
if ((res= field->cmp((char *) key1 + offset, (char *) key2 + offset)))
return (*order_item)->asc ? res : -res;
}
}
/*
We can't return 0 because in that case the tree class would remove this
item as double value. This would cause problems for case-changes and
2005-03-18 00:41:03 +03:00
if the returned values are not the same we do the sort on.
*/
return 1;
}
/*
function of sort for syntax:
GROUP_CONCAT(DISTINCT expr,... ORDER BY col,... )
BUG:
This doesn't work in the case when the order by contains data that
is not part of the field list because tree-insert will not notice
the duplicated values when inserting things sorted by ORDER BY
*/
int group_concat_key_cmp_with_distinct_and_order(void* arg,byte* key1,
byte* key2)
{
if (!group_concat_key_cmp_with_distinct(arg,key1,key2))
return 0;
return(group_concat_key_cmp_with_order(arg,key1,key2));
}
/*
Append data from current leaf to item->result
*/
int dump_leaf_key(byte* key, element_count count __attribute__((unused)),
Item_func_group_concat *item)
{
2005-03-18 00:41:03 +03:00
TABLE *table= item->table;
2005-06-07 05:43:59 +03:00
String tmp((char *)table->record[1], table->s->reclength,
default_charset_info);
String tmp2;
String *result= &item->result;
2005-03-18 00:41:03 +03:00
Item **arg= item->args, **arg_end= item->args + item->arg_count_field;
uint old_length= result->length();
if (item->no_appended)
item->no_appended= FALSE;
else
result->append(*item->separator);
tmp.length(0);
2005-03-18 00:41:03 +03:00
for (; arg < arg_end; arg++)
{
2005-03-18 00:41:03 +03:00
String *res;
if (! (*arg)->const_item())
{
/*
We have to use get_tmp_table_field() instead of
real_item()->get_tmp_table_field() because we want the field in
the temporary table, not the original field
2005-03-18 00:41:03 +03:00
We also can't use table->field array to access the fields
because it contains both order and arg list fields.
*/
2005-03-18 00:41:03 +03:00
Field *field= (*arg)->get_tmp_table_field();
uint offset= (field->offset(field->table->record[0]) -
table->s->null_bytes);
2005-03-18 00:41:03 +03:00
DBUG_ASSERT(offset < table->s->reclength);
2005-06-07 05:43:59 +03:00
res= field->val_str(&tmp, (char *) key + offset);
}
2005-03-18 00:41:03 +03:00
else
res= (*arg)->val_str(&tmp);
if (res)
result->append(*res);
}
2005-03-18 00:41:03 +03:00
/* stop if length of result more than max_length */
if (result->length() > item->max_length)
{
int well_formed_error;
CHARSET_INFO *cs= item->collation.collation;
2006-11-08 22:08:50 +04:00
const char *ptr= result->ptr();
uint add_length;
/*
It's ok to use item->result.length() as the fourth argument
as this is never used to limit the length of the data.
Cut is done with the third argument.
*/
add_length= cs->cset->well_formed_len(cs,
ptr + old_length,
2006-11-08 22:08:50 +04:00
ptr + item->max_length,
result->length(),
&well_formed_error);
2006-11-08 22:08:50 +04:00
result->length(old_length + add_length);
item->count_cut_values++;
item->warning_for_row= TRUE;
return 1;
}
return 0;
}
/*
Constructor of Item_func_group_concat
2005-03-18 00:41:03 +03:00
distinct_arg - distinct
select_list - list of expression for show values
order_list - list of sort columns
separator_arg - string value of separator
*/
2005-03-18 00:41:03 +03:00
Item_func_group_concat::
Item_func_group_concat(Name_resolution_context *context_arg,
bool distinct_arg, List<Item> *select_list,
2005-03-18 00:41:03 +03:00
SQL_LIST *order_list, String *separator_arg)
:tmp_table_param(0), warning(0),
separator(separator_arg), tree(0), table(0),
order(0), context(context_arg),
2005-03-18 00:41:03 +03:00
arg_count_order(order_list ? order_list->elements : 0),
arg_count_field(select_list->elements),
count_cut_values(0),
distinct(distinct_arg),
warning_for_row(FALSE),
force_copy_fields(0), original(0)
{
Item *item_select;
Item **arg_ptr;
2005-03-18 00:41:03 +03:00
quick_group= FALSE;
arg_count= arg_count_field + arg_count_order;
2005-03-18 00:41:03 +03:00
/*
We need to allocate:
args - arg_count_field+arg_count_order
(for possible order items in temporare tables)
order - arg_count_order
*/
if (!(args= (Item**) sql_alloc(sizeof(Item*) * arg_count +
2005-03-18 00:41:03 +03:00
sizeof(ORDER*)*arg_count_order)))
return;
order= (ORDER**)(args + arg_count);
/* fill args items of show and sort */
2005-03-18 00:41:03 +03:00
List_iterator_fast<Item> li(*select_list);
for (arg_ptr=args ; (item_select= li++) ; arg_ptr++)
*arg_ptr= item_select;
2005-03-18 00:41:03 +03:00
if (arg_count_order)
{
ORDER **order_ptr= order;
2005-03-18 00:41:03 +03:00
for (ORDER *order_item= (ORDER*) order_list->first;
order_item != NULL;
order_item= order_item->next)
{
(*order_ptr++)= order_item;
*arg_ptr= *order_item->item;
order_item->item= arg_ptr++;
}
}
}
2005-03-18 00:41:03 +03:00
Item_func_group_concat::Item_func_group_concat(THD *thd,
2005-03-18 00:41:03 +03:00
Item_func_group_concat *item)
:Item_sum(thd, item),
tmp_table_param(item->tmp_table_param),
warning(item->warning),
separator(item->separator),
tree(item->tree),
table(item->table),
order(item->order),
context(item->context),
arg_count_order(item->arg_count_order),
arg_count_field(item->arg_count_field),
count_cut_values(item->count_cut_values),
2005-03-18 00:41:03 +03:00
distinct(item->distinct),
warning_for_row(item->warning_for_row),
always_null(item->always_null),
force_copy_fields(item->force_copy_fields),
original(item)
{
quick_group= item->quick_group;
}
void Item_func_group_concat::cleanup()
{
THD *thd= current_thd;
DBUG_ENTER("Item_func_group_concat::cleanup");
Item_sum::cleanup();
/* Adjust warning message to include total number of cut values */
if (warning)
{
char warn_buff[MYSQL_ERRMSG_SIZE];
sprintf(warn_buff, ER(ER_CUT_VALUE_GROUP_CONCAT), count_cut_values);
warning->set_msg(thd, warn_buff);
warning= 0;
}
/*
Free table and tree if they belong to this item (if item have not pointer
to original item from which was made copy => it own its objects )
*/
if (!original)
{
2005-03-18 00:41:03 +03:00
delete tmp_table_param;
tmp_table_param= 0;
if (table)
{
2005-03-18 00:41:03 +03:00
THD *thd= table->in_use;
free_tmp_table(thd, table);
table= 0;
2005-03-18 00:41:03 +03:00
if (tree)
{
delete_tree(tree);
tree= 0;
}
if (warning)
{
char warn_buff[MYSQL_ERRMSG_SIZE];
sprintf(warn_buff, ER(ER_CUT_VALUE_GROUP_CONCAT), count_cut_values);
warning->set_msg(thd, warn_buff);
warning= 0;
}
}
2005-03-18 00:41:03 +03:00
DBUG_ASSERT(tree == 0);
DBUG_ASSERT(warning == 0);
}
DBUG_VOID_RETURN;
}
Item *Item_func_group_concat::copy_or_same(THD* thd)
{
return new (thd->mem_root) Item_func_group_concat(thd, this);
}
void Item_func_group_concat::clear()
{
result.length(0);
result.copy();
null_value= TRUE;
warning_for_row= FALSE;
no_appended= TRUE;
2005-03-18 00:41:03 +03:00
if (tree)
reset_tree(tree);
2005-03-18 00:41:03 +03:00
/* No need to reset the table as we never call write_row */
}
bool Item_func_group_concat::add()
{
2003-06-03 17:02:51 +06:00
if (always_null)
return 0;
copy_fields(tmp_table_param);
copy_funcs(tmp_table_param->items_to_copy);
for (uint i= 0; i < arg_count_field; i++)
{
Item *show_item= args[i];
if (!show_item->const_item())
{
2005-06-07 05:43:59 +03:00
Field *f= show_item->get_tmp_table_field();
if (f->is_null_in_record((const uchar*) table->record[0]))
2005-03-18 00:41:03 +03:00
return 0; // Skip row if it contains null
}
}
null_value= FALSE;
TREE_ELEMENT *el= 0; // Only for safety
2005-03-18 00:41:03 +03:00
if (tree)
el= tree_insert(tree, table->record[0] + table->s->null_bytes, 0,
tree->custom_arg);
/*
If the row is not a duplicate (el->count == 1)
we can dump the row here in case of GROUP_CONCAT(DISTINCT...)
instead of doing tree traverse later.
*/
if (!warning_for_row &&
2005-03-18 00:41:03 +03:00
(!tree || (el->count == 1 && distinct && !arg_count_order)))
dump_leaf_key(table->record[0] + table->s->null_bytes, 1, this);
return 0;
}
bool
Item_func_group_concat::fix_fields(THD *thd, Item **ref)
{
2005-03-18 00:41:03 +03:00
uint i; /* for loop variable */
DBUG_ASSERT(fixed == 0);
if (init_sum_func_check(thd))
return TRUE;
2005-03-18 00:41:03 +03:00
maybe_null= 1;
/*
Fix fields for select list and ORDER clause
*/
2005-03-18 00:41:03 +03:00
for (i=0 ; i < arg_count ; i++)
{
2005-02-09 02:50:45 +04:00
if ((!args[i]->fixed &&
args[i]->fix_fields(thd, args + i)) ||
args[i]->check_cols(1))
return TRUE;
}
if (agg_item_charsets(collation, func_name(),
args,
/* skip charset aggregation for order columns */
arg_count - arg_count_order,
MY_COLL_ALLOW_CONV, 1))
return 1;
result.set_charset(collation.collation);
result_field= 0;
null_value= 1;
2005-03-18 00:41:03 +03:00
max_length= thd->variables.group_concat_max_len;
if (check_sum_func(thd, ref))
return TRUE;
fixed= 1;
return FALSE;
}
bool Item_func_group_concat::setup(THD *thd)
{
List<Item> list;
2003-08-26 11:51:09 +02:00
SELECT_LEX *select_lex= thd->lex->current_select;
qsort_cmp2 compare_key;
DBUG_ENTER("Item_func_group_concat::setup");
/*
2005-03-18 00:41:03 +03:00
Currently setup() can be called twice. Please add
assertion here when this is fixed.
2004-07-21 00:45:08 +02:00
*/
2005-03-18 00:41:03 +03:00
if (table || tree)
DBUG_RETURN(FALSE);
if (!(tmp_table_param= new TMP_TABLE_PARAM))
DBUG_RETURN(TRUE);
/* We'll convert all blobs to varchar fields in the temporary table */
2006-11-08 22:08:50 +04:00
tmp_table_param->convert_blob_length= max_length *
collation.collation->mbmaxlen;
2005-03-18 00:41:03 +03:00
/* Push all not constant fields to the list and create a temp table */
2003-06-03 17:02:51 +06:00
always_null= 0;
for (uint i= 0; i < arg_count_field; i++)
{
Item *item= args[i];
if (list.push_back(item))
2005-03-18 00:41:03 +03:00
DBUG_RETURN(TRUE);
if (item->const_item())
{
if (item->is_null())
2005-03-18 00:41:03 +03:00
{
always_null= 1;
DBUG_RETURN(FALSE);
2005-03-18 00:41:03 +03:00
}
}
}
2004-07-21 00:45:08 +02:00
List<Item> all_fields(list);
2005-03-18 00:41:03 +03:00
/*
Try to find every ORDER expression in the list of GROUP_CONCAT
arguments. If an expression is not found, prepend it to
"all_fields". The resulting field list is used as input to create
tmp table columns.
*/
if (arg_count_order &&
setup_order(thd, args, context->table_list, list, all_fields, *order))
2005-03-18 00:41:03 +03:00
DBUG_RETURN(TRUE);
2004-07-21 00:45:08 +02:00
count_field_types(tmp_table_param,all_fields,0);
Fixed bug#15560: GROUP_CONCAT wasn't ready for WITH ROLLUP queries The GROUP_CONCAT uses its own temporary table. When ROLLUP is present it creates the second copy of Item_func_group_concat. This copy receives the same list of arguments that original group_concat does. When the copy is set up the result_fields of functions from the argument list are reset to the temporary table of this copy. As a result of this action data from functions flow directly to the ROLLUP copy and the original group_concat functions shows wrong result. Since queries with COUNT(DISTINCT ...) use temporary tables to store the results the COUNT function they are also affected by this bug. The idea of the fix is to copy content of the result_field for the function under GROUP_CONCAT/COUNT from the first temporary table to the second one, rather than setting result_field to point to the second temporary table. To achieve this goal force_copy_fields flag is added to Item_func_group_concat and Item_sum_count_distinct classes. This flag is initialized to 0 and set to 1 into the make_unique() member function of both classes. To the TMP_TABLE_PARAM structure is modified to include the similar flag as well. The create_tmp_table() function passes that flag to create_tmp_field(). When the flag is set the create_tmp_field() function will set result_field as a source field and will not reset that result field to newly created field for Item_func_result_field and its descendants. Due to this there will be created copy func to copy data from old result_field to newly created field.
2006-03-29 23:30:34 +04:00
tmp_table_param->force_copy_fields= force_copy_fields;
2005-03-18 00:41:03 +03:00
DBUG_ASSERT(table == 0);
/*
2005-03-18 00:41:03 +03:00
We have to create a temporary table to get descriptions of fields
(types, sizes and so on).
Note that in the table, we first have the ORDER BY fields, then the
field list.
2004-08-11 15:15:37 +06:00
2005-03-18 00:41:03 +03:00
We need to set set_sum_field in true for storing value of blob in buffer
of a record instead of a pointer of one.
*/
2005-03-18 00:41:03 +03:00
if (!(table= create_tmp_table(thd, tmp_table_param, all_fields,
(ORDER*) 0, 0, TRUE,
(select_lex->options | thd->options),
2005-03-18 00:41:03 +03:00
HA_POS_ERROR, (char*) "")))
DBUG_RETURN(TRUE);
table->file->extra(HA_EXTRA_NO_ROWS);
table->no_rows= 1;
2005-03-18 00:41:03 +03:00
if (distinct || arg_count_order)
{
2005-03-18 00:41:03 +03:00
/*
Need sorting: init tree and choose a function to sort.
Don't reserve space for NULLs: if any of gconcat arguments is NULL,
the row is not added to the result.
*/
uint tree_key_length= table->s->reclength - table->s->null_bytes;
tree= &tree_base;
if (arg_count_order)
{
if (distinct)
compare_key= (qsort_cmp2) group_concat_key_cmp_with_distinct_and_order;
else
compare_key= (qsort_cmp2) group_concat_key_cmp_with_order;
}
else
{
compare_key= (qsort_cmp2) group_concat_key_cmp_with_distinct;
}
/*
2005-03-18 00:41:03 +03:00
Create a tree for sorting. The tree is used to sort and to remove
duplicate values (according to the syntax of this function). If there
is no DISTINCT or ORDER BY clauses, we don't create this tree.
*/
init_tree(tree, (uint) min(thd->variables.max_heap_table_size,
thd->variables.sortbuff_size/16), 0,
2005-03-18 00:41:03 +03:00
tree_key_length, compare_key, 0, NULL, (void*) this);
}
2005-03-18 00:41:03 +03:00
DBUG_RETURN(FALSE);
}
/* This is used by rollup to create a separate usable copy of the function */
void Item_func_group_concat::make_unique()
{
2005-03-18 00:41:03 +03:00
tmp_table_param= 0;
table=0;
original= 0;
Fixed bug#15560: GROUP_CONCAT wasn't ready for WITH ROLLUP queries The GROUP_CONCAT uses its own temporary table. When ROLLUP is present it creates the second copy of Item_func_group_concat. This copy receives the same list of arguments that original group_concat does. When the copy is set up the result_fields of functions from the argument list are reset to the temporary table of this copy. As a result of this action data from functions flow directly to the ROLLUP copy and the original group_concat functions shows wrong result. Since queries with COUNT(DISTINCT ...) use temporary tables to store the results the COUNT function they are also affected by this bug. The idea of the fix is to copy content of the result_field for the function under GROUP_CONCAT/COUNT from the first temporary table to the second one, rather than setting result_field to point to the second temporary table. To achieve this goal force_copy_fields flag is added to Item_func_group_concat and Item_sum_count_distinct classes. This flag is initialized to 0 and set to 1 into the make_unique() member function of both classes. To the TMP_TABLE_PARAM structure is modified to include the similar flag as well. The create_tmp_table() function passes that flag to create_tmp_field(). When the flag is set the create_tmp_field() function will set result_field as a source field and will not reset that result field to newly created field for Item_func_result_field and its descendants. Due to this there will be created copy func to copy data from old result_field to newly created field.
2006-03-29 23:30:34 +04:00
force_copy_fields= 1;
2005-03-18 00:41:03 +03:00
tree= 0;
}
String* Item_func_group_concat::val_str(String* str)
{
DBUG_ASSERT(fixed == 1);
if (null_value)
return 0;
if (count_cut_values && !warning)
2005-03-18 00:41:03 +03:00
{
/*
ER_CUT_VALUE_GROUP_CONCAT needs an argument, but this gets set in
Item_func_group_concat::cleanup().
*/
2005-03-18 00:41:03 +03:00
DBUG_ASSERT(table);
warning= push_warning(table->in_use, MYSQL_ERROR::WARN_LEVEL_WARN,
ER_CUT_VALUE_GROUP_CONCAT,
ER(ER_CUT_VALUE_GROUP_CONCAT));
2005-03-18 00:41:03 +03:00
}
if (result.length())
return &result;
2005-03-18 00:41:03 +03:00
if (tree)
tree_walk(tree, (tree_walk_action)&dump_leaf_key, (void*)this,
left_root_right);
return &result;
}
void Item_func_group_concat::print(String *str)
{
str->append(STRING_WITH_LEN("group_concat("));
if (distinct)
str->append(STRING_WITH_LEN("distinct "));
for (uint i= 0; i < arg_count_field; i++)
{
if (i)
str->append(',');
args[i]->print(str);
}
if (arg_count_order)
{
str->append(STRING_WITH_LEN(" order by "));
for (uint i= 0 ; i < arg_count_order ; i++)
{
if (i)
2005-03-18 00:41:03 +03:00
str->append(',');
(*order[i]->item)->print(str);
if (order[i]->asc)
str->append(STRING_WITH_LEN(" ASC"));
else
str->append(STRING_WITH_LEN(" DESC"));
}
}
str->append(STRING_WITH_LEN(" separator \'"));
str->append(*separator);
str->append(STRING_WITH_LEN("\')"));
}