mariadb/sql/item_windowfunc.h
Vicențiu Ciorbaru 3544fe0144 Implemented cume_dist function.
Also fixed a bug in row_counts detection, when partition changes.
2016-03-28 22:51:42 +03:00

723 lines
16 KiB
C++

#ifndef ITEM_WINDOWFUNC_INCLUDED
#define ITEM_WINDOWFUNC_INCLUDED
#include "my_global.h"
#include "item.h"
class Window_spec;
int test_if_group_changed(List<Cached_item> &list);
/* A wrapper around test_if_group_changed */
class Group_bound_tracker
{
List<Cached_item> group_fields;
public:
void init(THD *thd, SQL_I_List<ORDER> *list)
{
for (ORDER *curr = list->first; curr; curr=curr->next)
{
Cached_item *tmp= new_Cached_item(thd, curr->item[0], TRUE);
group_fields.push_back(tmp);
}
}
void cleanup()
{
group_fields.empty();
}
/*
Check if the current row is in a different group than the previous row
this function was called for.
The new row's group becomes the current row's group.
*/
bool check_if_next_group()
{
if (test_if_group_changed(group_fields) > -1)
return true;
return false;
}
int compare_with_cache()
{
List_iterator<Cached_item> li(group_fields);
Cached_item *ptr;
int res;
while ((ptr= li++))
{
if ((res= ptr->cmp_read_only()))
return res;
}
return 0;
}
};
/*
ROW_NUMBER() OVER (...)
@detail
- This is a Window function (not just an aggregate)
- It can be computed by doing one pass over select output, provided
the output is sorted according to the window definition.
*/
class Item_sum_row_number: public Item_sum_int
{
longlong count;
public:
void clear()
{
count= 0;
}
bool add()
{
count++;
return false;
}
void update_field() {}
Item_sum_row_number(THD *thd)
: Item_sum_int(thd), count(0) {}
enum Sumfunctype sum_func() const
{
return ROW_NUMBER_FUNC;
}
longlong val_int()
{
return count;
}
const char*func_name() const
{
return "row_number";
}
};
/*
RANK() OVER (...) Windowing function
@detail
- This is a Window function (not just an aggregate)
- It can be computed by doing one pass over select output, provided
the output is sorted according to the window definition.
The function is defined as:
"The rank of row R is defined as 1 (one) plus the number of rows that
precede R and are not peers of R"
"This implies that if two or more rows are not distinct with respect to
the window ordering, then there will be one or more"
*/
class Item_sum_rank: public Item_sum_int
{
protected:
longlong row_number; // just ROW_NUMBER()
longlong cur_rank; // current value
Group_bound_tracker peer_tracker;
public:
void clear()
{
/* This is called on partition start */
cur_rank= 1;
row_number= 0;
}
bool add();
longlong val_int()
{
return cur_rank;
}
void update_field() {}
/*
void reset_field();
TODO: ^^ what does this do ? It is not called ever?
*/
public:
Item_sum_rank(THD *thd)
: Item_sum_int(thd) {}
enum Sumfunctype sum_func () const
{
return RANK_FUNC;
}
const char*func_name() const
{
return "rank";
}
void setup_window_func(THD *thd, Window_spec *window_spec);
void cleanup()
{
peer_tracker.cleanup();
Item_sum_int::cleanup();
}
};
/*
DENSE_RANK() OVER (...) Windowing function
@detail
- This is a Window function (not just an aggregate)
- It can be computed by doing one pass over select output, provided
the output is sorted according to the window definition.
The function is defined as:
"If DENSE_RANK is specified, then the rank of row R is defined as the
number of rows preceding and including R that are distinct with respect
to the window ordering"
"This implies that there are no gaps in the sequential rank numbering of
rows in each window partition."
*/
class Item_sum_dense_rank: public Item_sum_int
{
longlong dense_rank;
Group_bound_tracker peer_tracker;
/*
XXX(cvicentiu) This class could potentially be implemented in the rank
class, with a switch for the DENSE case.
*/
void clear()
{
dense_rank= 1;
}
bool add();
void update_field() {}
longlong val_int()
{
return dense_rank;
}
public:
Item_sum_dense_rank(THD *thd)
: Item_sum_int(thd), dense_rank(0) {}
enum Sumfunctype sum_func () const
{
return DENSE_RANK_FUNC;
}
const char*func_name() const
{
return "dense_rank";
}
void setup_window_func(THD *thd, Window_spec *window_spec);
void cleanup()
{
peer_tracker.cleanup();
Item_sum_int::cleanup();
}
};
/* TODO-cvicentiu
* Perhaps this is overengineering, but I would like to decouple the 2-pass
* algorithm from the specific action that must be performed during the
* first pass. The second pass can make use of the "add" function from the
* Item_sum_<window_function>.
*/
/*
This class represents a generic interface for window functions that need
to store aditional information. Such window functions include percent_rank
and cume_dist.
*/
class Window_context
{
public:
virtual void add_field_to_context(Field* field) = 0;
virtual void reset() = 0;
virtual ~Window_context() {};
};
/*
A generic interface that specifies the datatype that the context represents.
*/
template <typename T>
class Window_context_getter
{
protected:
virtual T get_field_context(const Field* field) = 0;
virtual ~Window_context_getter() {};
};
/*
A window function context representing the number of rows that are present
with a partition. Because the number of rows is not dependent of the
specific value within the current field, we ignore the parameter
in this case.
*/
class Window_context_row_count :
public Window_context, Window_context_getter<ulonglong>
{
public:
Window_context_row_count() : num_rows_(0) {};
void add_field_to_context(Field* field __attribute__((unused)))
{
num_rows_++;
}
void reset()
{
num_rows_= 0;
}
ulonglong get_field_context(const Field* field __attribute__((unused)))
{
return num_rows_;
}
private:
ulonglong num_rows_;
};
class Window_context_row_and_group_count :
public Window_context, Window_context_getter<std::pair<ulonglong, ulonglong> >
{
public:
Window_context_row_and_group_count(void * group_list) {}
};
/*
An abstract class representing an item that holds a context.
*/
class Item_context
{
public:
Item_context() : context_(NULL) {}
Window_context* get_window_context() { return context_; }
virtual bool create_window_context() = 0;
virtual void delete_window_context() = 0;
protected:
Window_context* context_;
};
/*
A base window function (aggregate) that also holds a context.
NOTE: All two pass window functions need to implement
this interface.
*/
class Item_sum_window_with_row_count : public Item_sum_num
{
public:
Item_sum_window_with_row_count(THD *thd) : Item_sum_num(thd),
partition_row_count_(0){}
void set_row_count(ulonglong count) { partition_row_count_ = count; }
protected:
longlong get_row_count() { return partition_row_count_; }
private:
ulonglong partition_row_count_;
};
/*
@detail
"The relative rank of a row R is defined as (RK-1)/(NR-1), where RK is
defined to be the RANK of R and NR is defined to be the number of rows in
the window partition of R."
Computation of this function requires two passes:
- First pass to find #rows in the partition
This is held within the row_count context.
- Second pass to compute rank of current row and the value of the function
*/
class Item_sum_percent_rank: public Item_sum_window_with_row_count
{
public:
Item_sum_percent_rank(THD *thd)
: Item_sum_window_with_row_count(thd), cur_rank(1) {}
longlong val_int()
{
/*
Percent rank is a real value so calling the integer value should never
happen. It makes no sense as it gets truncated to either 0 or 1.
*/
DBUG_ASSERT(0);
return 0;
}
double val_real()
{
/*
We can not get the real value without knowing the number of rows
in the partition. Don't divide by 0.
*/
ulonglong partition_rows = get_row_count();
null_value= partition_rows > 0 ? false : true;
return partition_rows > 1 ?
static_cast<double>(cur_rank - 1) / (partition_rows - 1) : 0;
}
enum Sumfunctype sum_func () const
{
return PERCENT_RANK_FUNC;
}
const char*func_name() const
{
return "percent_rank";
}
void update_field() {}
void clear()
{
cur_rank= 1;
row_number= 0;
}
bool add();
enum Item_result result_type () const { return REAL_RESULT; }
enum_field_types field_type() const { return MYSQL_TYPE_DOUBLE; }
void fix_length_and_dec()
{
decimals = 10; // TODO-cvicentiu find out how many decimals the standard
// requires.
}
void setup_window_func(THD *thd, Window_spec *window_spec);
private:
longlong cur_rank; // Current rank of the current row.
longlong row_number; // Value if this were ROW_NUMBER() function.
Group_bound_tracker peer_tracker;
void cleanup()
{
peer_tracker.cleanup();
}
};
/*
@detail
"The relative rank of a row R is defined as NP/NR, where
- NP is defined to be the number of rows preceding or peer with R in the
window ordering of the window partition of R
- NR is defined to be the number of rows in the window partition of R.
Just like with Item_sum_percent_rank, computation of this function requires
two passes.
*/
class Item_sum_cume_dist: public Item_sum_window_with_row_count
{
public:
Item_sum_cume_dist(THD *thd) : Item_sum_window_with_row_count(thd),
current_row_count_(0) {}
double val_real()
{
if (get_row_count() == 0)
{
null_value= true;
return 0;
}
ulonglong partition_row_count= get_row_count();
null_value= false;
return static_cast<double>(current_row_count_) / partition_row_count;
}
bool add()
{
current_row_count_++;
return false;
}
enum Sumfunctype sum_func () const
{
return CUME_DIST_FUNC;
}
void clear()
{
current_row_count_= 0;
set_row_count(0);
}
const char*func_name() const
{
return "cume_dist";
}
void update_field() {}
enum Item_result result_type () const { return REAL_RESULT; }
enum_field_types field_type() const { return MYSQL_TYPE_DOUBLE; }
void fix_length_and_dec()
{
decimals = 10; // TODO-cvicentiu find out how many decimals the standard
// requires.
}
private:
ulonglong current_row_count_;
};
class Item_window_func : public Item_func_or_sum
{
/* Window function parameters as we've got them from the parser */
public:
LEX_STRING *window_name;
public:
Window_spec *window_spec;
/*
This stores the data about the partition we're currently in.
advance_window() uses this to tell when we've left one partition and
entered another
*/
Group_bound_tracker partition_tracker;
public:
Item_window_func(THD *thd, Item_sum *win_func, LEX_STRING *win_name)
: Item_func_or_sum(thd, (Item *) win_func),
window_name(win_name), window_spec(NULL),
force_return_blank(true),
read_value_from_result_field(false) {}
Item_window_func(THD *thd, Item_sum *win_func, Window_spec *win_spec)
: Item_func_or_sum(thd, (Item *) win_func),
window_name(NULL), window_spec(win_spec),
force_return_blank(true),
read_value_from_result_field(false) {}
Item_sum *window_func() const { return (Item_sum *) args[0]; }
void update_used_tables();
bool is_frame_prohibited() const
{
switch (window_func()->sum_func()) {
case Item_sum::ROW_NUMBER_FUNC:
case Item_sum::RANK_FUNC:
case Item_sum::DENSE_RANK_FUNC:
case Item_sum::PERCENT_RANK_FUNC:
case Item_sum::CUME_DIST_FUNC:
return true;
default:
return false;
}
}
bool requires_partition_size() const
{
switch (window_func()->sum_func()) {
case Item_sum::PERCENT_RANK_FUNC:
case Item_sum::CUME_DIST_FUNC:
return true;
default:
return false;
}
}
bool requires_peer_size() const
{
switch (window_func()->sum_func()) {
case Item_sum::CUME_DIST_FUNC:
return true;
default:
return false;
}
}
bool is_order_list_mandatory() const
{
switch (window_func()->sum_func()) {
case Item_sum::RANK_FUNC:
case Item_sum::DENSE_RANK_FUNC:
case Item_sum::PERCENT_RANK_FUNC:
case Item_sum::CUME_DIST_FUNC:
return true;
default:
return false;
}
}
/*
Computation functions.
TODO: consoder merging these with class Group_bound_tracker.
*/
void setup_partition_border_check(THD *thd);
void advance_window();
bool check_if_partition_changed();
enum_field_types field_type() const
{
return ((Item_sum *) args[0])->field_type();
}
enum Item::Type type() const { return Item::WINDOW_FUNC_ITEM; }
private:
/*
Window functions are very special functions, so val_() methods have
special meaning for them:
- Phase#1, "Initial" we run the join and put its result into temporary
table. For window functions, we write the default value (NULL?) as
a placeholder.
- Phase#2: "Computation": executor does the scan in {PARTITION, ORDER BY}
order of this window function. It calls appropriate methods to inform
the window function about rows entering/leaving the window.
It calls window_func()->val_int() so that current window function value
can be saved and stored in the temp.table.
- Phase#3: "Retrieval" the temporary table is read and passed to query
output. However, Item_window_func still remains in the select list,
so item_windowfunc->val_int() will be called.
During Phase#3, read_value_from_result_field= true.
*/
bool force_return_blank;
bool read_value_from_result_field;
public:
void set_phase_to_initial()
{
force_return_blank= true;
read_value_from_result_field= false;
}
void set_phase_to_computation()
{
force_return_blank= false;
read_value_from_result_field= false;
}
void set_phase_to_retrieval()
{
force_return_blank= false;
read_value_from_result_field= true;
}
double val_real()
{
double res;
if (force_return_blank)
{
res= 0.0;
null_value= false;
}
else if (read_value_from_result_field)
{
res= result_field->val_real();
null_value= result_field->is_null();
}
else
{
res= window_func()->val_real();
null_value= window_func()->null_value;
}
return res;
}
longlong val_int()
{
longlong res;
if (force_return_blank)
{
res= 0;
null_value= false;
}
else if (read_value_from_result_field)
{
res= result_field->val_int();
null_value= result_field->is_null();
}
else
{
res= window_func()->val_int();
null_value= window_func()->null_value;
}
return res;
}
String* val_str(String* str)
{
String *res;
if (force_return_blank)
{
null_value= false;
str->length(0);
res= str;
}
else if (read_value_from_result_field)
{
if ((null_value= result_field->is_null()))
res= NULL;
else
res= result_field->val_str(str);
}
else
{
res= window_func()->val_str(str);
null_value= window_func()->null_value;
}
return res;
}
my_decimal* val_decimal(my_decimal* dec)
{
my_decimal *res;
if (force_return_blank)
{
my_decimal_set_zero(dec);
null_value= false;
res= dec;
}
else if (read_value_from_result_field)
{
if ((null_value= result_field->is_null()))
res= NULL;
else
res= result_field->val_decimal(dec);
}
else
{
res= window_func()->val_decimal(dec);
null_value= window_func()->null_value;
}
return res;
}
void split_sum_func(THD *thd, Ref_ptr_array ref_pointer_array,
List<Item> &fields, uint flags);
void fix_length_and_dec()
{
decimals = window_func()->decimals;
}
const char* func_name() const { return "WF"; }
bool fix_fields(THD *thd, Item **ref);
bool resolve_window_name(THD *thd);
};
#endif /* ITEM_WINDOWFUNC_INCLUDED */