mirror of
https://github.com/MariaDB/server.git
synced 2025-01-29 02:05:57 +01:00
Convert percent_rank to work with cursors
The percent_rank function now is compatible with the cursor algorithm. We no longer need a special implementation for it to work.
This commit is contained in:
parent
bf18dac08f
commit
d40d68f236
2 changed files with 90 additions and 150 deletions
|
@ -317,12 +317,18 @@ class Item_context
|
|||
NOTE: All two pass window functions need to implement
|
||||
this interface.
|
||||
*/
|
||||
class Item_sum_window_with_context : public Item_sum_num,
|
||||
public Item_context
|
||||
class Item_sum_window_with_row_count : public Item_sum_num
|
||||
{
|
||||
public:
|
||||
Item_sum_window_with_context(THD *thd)
|
||||
: Item_sum_num(thd), Item_context() {}
|
||||
Item_sum_window_with_row_count(THD *thd) : Item_sum_num(thd),
|
||||
partition_row_count_(0){}
|
||||
|
||||
void set_row_count(ulonglong count) { partition_row_count_ = count; }
|
||||
|
||||
protected:
|
||||
longlong get_row_count() { return partition_row_count_; }
|
||||
private:
|
||||
ulonglong partition_row_count_;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -336,12 +342,11 @@ class Item_sum_window_with_context : public Item_sum_num,
|
|||
This is held within the row_count context.
|
||||
- Second pass to compute rank of current row and the value of the function
|
||||
*/
|
||||
class Item_sum_percent_rank: public Item_sum_window_with_context,
|
||||
public Window_context_row_count
|
||||
class Item_sum_percent_rank: public Item_sum_window_with_row_count
|
||||
{
|
||||
public:
|
||||
Item_sum_percent_rank(THD *thd)
|
||||
: Item_sum_window_with_context(thd), cur_rank(1) {}
|
||||
: Item_sum_window_with_row_count(thd), cur_rank(1) {}
|
||||
|
||||
longlong val_int()
|
||||
{
|
||||
|
@ -359,14 +364,9 @@ class Item_sum_percent_rank: public Item_sum_window_with_context,
|
|||
We can not get the real value without knowing the number of rows
|
||||
in the partition. Don't divide by 0.
|
||||
*/
|
||||
if (!get_context_())
|
||||
{
|
||||
// Calling this kind of function with a context makes no sense.
|
||||
DBUG_ASSERT(0);
|
||||
return 0;
|
||||
}
|
||||
ulonglong partition_rows = get_row_count();
|
||||
null_value= partition_rows > 0 ? false : true;
|
||||
|
||||
longlong partition_rows = get_context_()->get_field_context(result_field);
|
||||
return partition_rows > 1 ?
|
||||
static_cast<double>(cur_rank - 1) / (partition_rows - 1) : 0;
|
||||
}
|
||||
|
@ -381,25 +381,6 @@ class Item_sum_percent_rank: public Item_sum_window_with_context,
|
|||
return "percent_rank";
|
||||
}
|
||||
|
||||
bool create_window_context()
|
||||
{
|
||||
// TODO-cvicentiu: Currently this means we must make sure to delete
|
||||
// the window context. We can potentially allocate this on the THD memroot.
|
||||
// At the same time, this is only necessary for a small portion of the
|
||||
// query execution and it does not make sense to keep it for all of it.
|
||||
context_ = new Window_context_row_count();
|
||||
if (context_ == NULL)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
void delete_window_context()
|
||||
{
|
||||
if (context_)
|
||||
delete get_context_();
|
||||
context_ = NULL;
|
||||
}
|
||||
|
||||
void update_field() {}
|
||||
|
||||
void clear()
|
||||
|
@ -428,13 +409,6 @@ class Item_sum_percent_rank: public Item_sum_window_with_context,
|
|||
void cleanup()
|
||||
{
|
||||
peer_tracker.cleanup();
|
||||
Item_sum_window_with_context::cleanup();
|
||||
}
|
||||
|
||||
/* Helper function so that we don't cast the context every time. */
|
||||
Window_context_row_count* get_context_()
|
||||
{
|
||||
return static_cast<Window_context_row_count *>(context_);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -517,6 +491,27 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
bool requires_partition_size() const
|
||||
{
|
||||
switch (window_func()->sum_func()) {
|
||||
case Item_sum::PERCENT_RANK_FUNC:
|
||||
case Item_sum::CUME_DIST_FUNC:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool requires_peer_size() const
|
||||
{
|
||||
switch (window_func()->sum_func()) {
|
||||
case Item_sum::CUME_DIST_FUNC:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool is_order_list_mandatory() const
|
||||
{
|
||||
switch (window_func()->sum_func()) {
|
||||
|
|
|
@ -865,15 +865,18 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
UNBOUNDED FOLLOWING frame bound
|
||||
*/
|
||||
|
||||
class Frame_unbounded_following : public Frame_cursor
|
||||
{
|
||||
Table_read_cursor cursor;
|
||||
|
||||
protected:
|
||||
Table_read_cursor cursor;
|
||||
Group_bound_tracker bound_tracker;
|
||||
|
||||
public:
|
||||
void init(THD *thd, READ_RECORD *info, SQL_I_List<ORDER> *partition_list,
|
||||
SQL_I_List<ORDER> *order_list)
|
||||
|
@ -910,6 +913,35 @@ public:
|
|||
};
|
||||
|
||||
|
||||
class Frame_unbounded_following_set_count : public Frame_unbounded_following
|
||||
{
|
||||
void next_partition(longlong rownum, Item_sum* item)
|
||||
{
|
||||
ulonglong num_rows_in_partition= 0;
|
||||
if (!rownum)
|
||||
{
|
||||
/* Read the first row */
|
||||
if (cursor.get_next())
|
||||
return;
|
||||
num_rows_in_partition++;
|
||||
}
|
||||
|
||||
/* Remember which partition we are in */
|
||||
bound_tracker.check_if_next_group();
|
||||
/* Walk to the end of the partition, find how many rows there are. */
|
||||
while (!cursor.get_next())
|
||||
{
|
||||
if (bound_tracker.check_if_next_group())
|
||||
break;
|
||||
num_rows_in_partition++;
|
||||
}
|
||||
|
||||
Item_sum_window_with_row_count* item_with_row_count =
|
||||
static_cast<Item_sum_window_with_row_count *>(item);
|
||||
item_with_row_count->set_row_count(num_rows_in_partition);
|
||||
}
|
||||
};
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// ROWS-type frame bounds
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -1212,18 +1244,37 @@ Frame_cursor *get_frame_cursor(Window_frame *frame, bool is_top_bound)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
void add_extra_frame_cursors(List<Frame_cursor> *cursors,
|
||||
const Item_sum *window_func)
|
||||
{
|
||||
switch (window_func->sum_func())
|
||||
{
|
||||
case Item_sum::CUME_DIST_FUNC:
|
||||
cursors->push_back(new Frame_unbounded_preceding);
|
||||
cursors->push_back(new Frame_range_current_row_bottom);
|
||||
break;
|
||||
default:
|
||||
cursors->push_back(new Frame_unbounded_preceding);
|
||||
cursors->push_back(new Frame_rows_current_row_bottom);
|
||||
}
|
||||
}
|
||||
|
||||
List<Frame_cursor> get_window_func_required_cursors(
|
||||
const Item_window_func* item_win)
|
||||
{
|
||||
List<Frame_cursor> result;
|
||||
|
||||
if (item_win->requires_partition_size())
|
||||
result.push_back(new Frame_unbounded_following_set_count);
|
||||
|
||||
/*
|
||||
If it is not a regular window function that follows frame specifications,
|
||||
specific cursors are required.
|
||||
*/
|
||||
if (item_win->is_frame_prohibited())
|
||||
{
|
||||
DBUG_ASSERT(0); // TODO-cvicentiu not-implemented yet.
|
||||
add_extra_frame_cursors(&result, item_win->window_func());
|
||||
return result;
|
||||
}
|
||||
|
||||
/* A regular window function follows the frame specification. */
|
||||
|
@ -1283,7 +1334,6 @@ bool compute_window_func_with_frames(Item_window_func *item_win,
|
|||
|
||||
List<Frame_cursor> cursors= get_window_func_required_cursors(item_win);
|
||||
|
||||
|
||||
List_iterator_fast<Frame_cursor> it(cursors);
|
||||
Frame_cursor *c;
|
||||
while((c= it++))
|
||||
|
@ -1362,107 +1412,6 @@ bool compute_window_func_with_frames(Item_window_func *item_win,
|
|||
}
|
||||
|
||||
|
||||
bool compute_two_pass_window_functions(Item_window_func *item_win,
|
||||
TABLE *table, READ_RECORD *info)
|
||||
{
|
||||
/* Perform first pass. */
|
||||
|
||||
// TODO-cvicentiu why not initialize the record for when we need, _in_
|
||||
// this function.
|
||||
READ_RECORD *info2= new READ_RECORD();
|
||||
int err;
|
||||
bool is_error = false;
|
||||
bool first_row= true;
|
||||
clone_read_record(info, info2);
|
||||
Item_sum_window_with_context *window_func=
|
||||
static_cast<Item_sum_window_with_context *>(item_win->window_func());
|
||||
uchar *rowid_buf= (uchar*) my_malloc(table->file->ref_length, MYF(0));
|
||||
|
||||
is_error= window_func->create_window_context();
|
||||
/* Unable to allocate a new context. */
|
||||
if (is_error)
|
||||
return true;
|
||||
|
||||
Window_context *context = window_func->get_window_context();
|
||||
/*
|
||||
The two pass algorithm is as follows:
|
||||
We have a sorted table according to the partition and order by clauses.
|
||||
1. Scan through the table till we reach a partition boundary.
|
||||
2. For each row that we scan, add it to the context.
|
||||
3. Once the partition boundary is met, do a second scan through the
|
||||
current partition and use the context information to compute the value for
|
||||
the window function for that partition.
|
||||
4. Reset the context.
|
||||
5. Repeat from 1 till end of table.
|
||||
*/
|
||||
|
||||
bool done = false;
|
||||
longlong rows_in_current_partition = 0;
|
||||
// TODO handle end of table updating.
|
||||
while (!done)
|
||||
{
|
||||
|
||||
if ((err= info->read_record(info)))
|
||||
{
|
||||
done = true;
|
||||
}
|
||||
|
||||
bool partition_changed= done || item_win->check_if_partition_changed();
|
||||
// The first time we always have a partition changed. Ignore it.
|
||||
if (first_row)
|
||||
{
|
||||
partition_changed= false;
|
||||
first_row= false;
|
||||
}
|
||||
|
||||
if (partition_changed)
|
||||
{
|
||||
/*
|
||||
We are now looking at the first row for the next partition, or at the
|
||||
end of the table. Either way, we must remember this position for when
|
||||
we finish doing the second pass.
|
||||
*/
|
||||
table->file->position(table->record[0]);
|
||||
memcpy(rowid_buf, table->file->ref, table->file->ref_length);
|
||||
|
||||
for (longlong row_number = 0; row_number < rows_in_current_partition;
|
||||
row_number++)
|
||||
{
|
||||
if ((err= info2->read_record(info2)))
|
||||
{
|
||||
is_error= true;
|
||||
break;
|
||||
}
|
||||
window_func->add();
|
||||
// Save the window function into the table.
|
||||
item_win->save_in_field(item_win->result_field, true);
|
||||
err= table->file->ha_update_row(table->record[1], table->record[0]);
|
||||
if (err && err != HA_ERR_RECORD_IS_THE_SAME)
|
||||
{
|
||||
is_error= true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_error)
|
||||
break;
|
||||
|
||||
rows_in_current_partition= 0;
|
||||
window_func->clear();
|
||||
context->reset();
|
||||
|
||||
// Return to the beginning of the new partition.
|
||||
table->file->ha_rnd_pos(table->record[0], rowid_buf);
|
||||
}
|
||||
rows_in_current_partition++;
|
||||
context->add_field_to_context(item_win->result_field);
|
||||
}
|
||||
|
||||
window_func->delete_window_context();
|
||||
delete info2;
|
||||
my_free(rowid_buf);
|
||||
return is_error;
|
||||
}
|
||||
|
||||
|
||||
/* Make a list that is a concation of two lists of ORDER elements */
|
||||
|
@ -1527,16 +1476,12 @@ bool Window_func_runner::setup(THD *thd)
|
|||
compute_func= compute_window_func_values;
|
||||
break;
|
||||
}
|
||||
case Item_sum::PERCENT_RANK_FUNC:
|
||||
case Item_sum::CUME_DIST_FUNC:
|
||||
{
|
||||
compute_func= compute_two_pass_window_functions;
|
||||
break;
|
||||
}
|
||||
case Item_sum::COUNT_FUNC:
|
||||
case Item_sum::SUM_BIT_FUNC:
|
||||
case Item_sum::SUM_FUNC:
|
||||
case Item_sum::AVG_FUNC:
|
||||
case Item_sum::PERCENT_RANK_FUNC:
|
||||
case Item_sum::CUME_DIST_FUNC:
|
||||
{
|
||||
/*
|
||||
Frame-aware window function computation. It does one pass, but
|
||||
|
|
Loading…
Add table
Reference in a new issue