MDEV-10879: Window Functions final ordering of result set

Window functions now force an ordering to the result set if none is
specified through order by. The ordering that will be returned is the final ordering
that is used during window function computation. For multiple window functions,
it should generally be the most specific sort ordering for the final
window function in the select list.
This commit is contained in:
Vicențiu Ciorbaru 2017-06-13 16:54:40 +03:00
parent 4f93c732d5
commit effbd52b8c
5 changed files with 401 additions and 11 deletions

View file

@ -0,0 +1,264 @@
#
# MDEV-10879 Window functions final ordering result set
#
# This testcase covers window function ordering without an explicit
# order by clause. Our behaviour is to reuse the final ordering used
# during window function computation. This will produce at least one
# window function with values in the order that they are computed.
#
# This feature was implemented as a request from the community, as this
# is what other DBMS engines are doing and they expect simillar behaviour.
#
create table t1 (a int, b varchar(10));
insert into t1 values (1, 'x'),
(2, 'xx'),
(3, 'yy'),
(4, 'zz'),
(5, 'xxx'),
(6, 'yyy'),
(7, 'zzz'),
(8, 'aaa'),
(9, 'bbb'),
(11, 'aa'),
(12, 'bb'),
(13, 'cc'),
(13, 'dd'),
(10, 'ccc');
select row_number() over (), a, b
from t1;
row_number() over () a b
1 3 yy
2 12 bb
3 4 zz
4 13 cc
5 5 xxx
6 13 dd
7 6 yyy
8 10 ccc
9 7 zzz
10 8 aaa
11 1 x
12 9 bbb
13 2 xx
14 11 aa
select row_number() over (order by a), a, b
from t1;
row_number() over (order by a) a b
1 1 x
2 2 xx
3 3 yy
4 4 zz
5 5 xxx
6 6 yyy
7 7 zzz
8 8 aaa
9 9 bbb
10 10 ccc
11 11 aa
12 12 bb
13 13 cc
14 13 dd
select row_number() over (order by b), a, b
from t1;
row_number() over (order by b) a b
1 11 aa
2 8 aaa
3 12 bb
4 9 bbb
5 13 cc
6 10 ccc
7 13 dd
8 1 x
9 2 xx
10 5 xxx
11 3 yy
12 6 yyy
13 4 zz
14 7 zzz
select row_number() over (order by a,b), a, b
from t1;
row_number() over (order by a,b) a b
1 1 x
2 2 xx
3 3 yy
4 4 zz
5 5 xxx
6 6 yyy
7 7 zzz
8 8 aaa
9 9 bbb
10 10 ccc
11 11 aa
12 12 bb
13 13 cc
14 13 dd
select row_number() over (partition by substring(b, -1) order by a), a, substring(b, -1)
from t1;
row_number() over (partition by substring(b, -1) order by a) a substring(b, -1)
1 8 a
2 11 a
1 9 b
2 12 b
1 10 c
2 13 c
1 13 d
1 1 x
2 2 x
3 5 x
1 3 y
2 6 y
1 4 z
2 7 z
select row_number() over (partition by substring(b, -1) order by a), a, substring(b, -1)
from t1;
row_number() over (partition by substring(b, -1) order by a) a substring(b, -1)
1 8 a
2 11 a
1 9 b
2 12 b
1 10 c
2 13 c
1 13 d
1 1 x
2 2 x
3 5 x
1 3 y
2 6 y
1 4 z
2 7 z
select row_number() over (order by a),
row_number() over (partition by substring(b, -1) order by a), a, b
from t1;
row_number() over (order by a) row_number() over (partition by substring(b, -1) order by a) a b
8 1 8 aaa
11 2 11 aa
9 1 9 bbb
12 2 12 bb
10 1 10 ccc
13 2 13 cc
14 1 13 dd
1 1 1 x
2 2 2 xx
5 3 5 xxx
3 1 3 yy
6 2 6 yyy
4 1 4 zz
7 2 7 zzz
#
# Test descending ordering too.
#
select row_number() over (order by a desc), a, b
from t1;
row_number() over (order by a desc) a b
1 13 cc
2 13 dd
3 12 bb
4 11 aa
5 10 ccc
6 9 bbb
7 8 aaa
8 7 zzz
9 6 yyy
10 5 xxx
11 4 zz
12 3 yy
13 2 xx
14 1 x
select row_number() over (order by a),
row_number() over (partition by substring(b, -1) desc order by a), a, b
from t1;
row_number() over (order by a) row_number() over (partition by substring(b, -1) desc order by a) a b
4 1 4 zz
7 2 7 zzz
3 1 3 yy
6 2 6 yyy
1 1 1 x
2 2 2 xx
5 3 5 xxx
14 1 13 dd
10 1 10 ccc
13 2 13 cc
9 1 9 bbb
12 2 12 bb
8 1 8 aaa
11 2 11 aa
#
# Test that we can still use the order by explicitly
#
select row_number() over (order by a),
row_number() over (partition by substring(b, -1) desc order by a), a, b
from t1
order by a;
row_number() over (order by a) row_number() over (partition by substring(b, -1) desc order by a) a b
1 1 1 x
2 2 2 xx
3 1 3 yy
4 1 4 zz
5 3 5 xxx
6 2 6 yyy
7 2 7 zzz
8 1 8 aaa
9 1 9 bbb
10 1 10 ccc
11 2 11 aa
12 2 12 bb
13 2 13 cc
14 1 13 dd
select row_number() over (order by a),
row_number() over (partition by substring(b, -1) desc order by a), a, b
from t1
order by a desc;
row_number() over (order by a) row_number() over (partition by substring(b, -1) desc order by a) a b
13 2 13 cc
14 1 13 dd
12 2 12 bb
11 2 11 aa
10 1 10 ccc
9 1 9 bbb
8 1 8 aaa
7 2 7 zzz
6 2 6 yyy
5 3 5 xxx
4 1 4 zz
3 1 3 yy
2 2 2 xx
1 1 1 x
select row_number() over (order by a desc),
row_number() over (partition by substring(b, -1) desc order by a), a, b
from t1
order by a;
row_number() over (order by a desc) row_number() over (partition by substring(b, -1) desc order by a) a b
14 1 1 x
13 2 2 xx
12 1 3 yy
11 1 4 zz
10 3 5 xxx
9 2 6 yyy
8 2 7 zzz
7 1 8 aaa
6 1 9 bbb
5 1 10 ccc
4 2 11 aa
3 2 12 bb
1 2 13 cc
2 1 13 dd
select row_number() over (order by a) fst_row,
row_number() over (partition by substring(b, -1) desc order by a), a, b
from t1
order by fst_row;
fst_row row_number() over (partition by substring(b, -1) desc order by a) a b
1 1 1 x
2 2 2 xx
3 1 3 yy
4 1 4 zz
5 3 5 xxx
6 2 6 yyy
7 2 7 zzz
8 1 8 aaa
9 1 9 bbb
10 1 10 ccc
11 2 11 aa
12 2 12 bb
13 2 13 cc
14 1 13 dd
drop table t1;

View file

@ -0,0 +1,84 @@
--echo #
--echo # MDEV-10879 Window functions final ordering result set
--echo #
--echo # This testcase covers window function ordering without an explicit
--echo # order by clause. Our behaviour is to reuse the final ordering used
--echo # during window function computation. This will produce at least one
--echo # window function with values in the order that they are computed.
--echo #
--echo # This feature was implemented as a request from the community, as this
--echo # is what other DBMS engines are doing and they expect simillar behaviour.
--echo #
create table t1 (a int, b varchar(10));
insert into t1 values (1, 'x'),
(2, 'xx'),
(3, 'yy'),
(4, 'zz'),
(5, 'xxx'),
(6, 'yyy'),
(7, 'zzz'),
(8, 'aaa'),
(9, 'bbb'),
(11, 'aa'),
(12, 'bb'),
(13, 'cc'),
(13, 'dd'),
(10, 'ccc');
select row_number() over (), a, b
from t1;
select row_number() over (order by a), a, b
from t1;
select row_number() over (order by b), a, b
from t1;
select row_number() over (order by a,b), a, b
from t1;
select row_number() over (partition by substring(b, -1) order by a), a, substring(b, -1)
from t1;
select row_number() over (partition by substring(b, -1) order by a), a, substring(b, -1)
from t1;
select row_number() over (order by a),
row_number() over (partition by substring(b, -1) order by a), a, b
from t1;
--echo #
--echo # Test descending ordering too.
--echo #
select row_number() over (order by a desc), a, b
from t1;
select row_number() over (order by a),
row_number() over (partition by substring(b, -1) desc order by a), a, b
from t1;
--echo #
--echo # Test that we can still use the order by explicitly
--echo #
select row_number() over (order by a),
row_number() over (partition by substring(b, -1) desc order by a), a, b
from t1
order by a;
select row_number() over (order by a),
row_number() over (partition by substring(b, -1) desc order by a), a, b
from t1
order by a desc;
select row_number() over (order by a desc),
row_number() over (partition by substring(b, -1) desc order by a), a, b
from t1
order by a;
select row_number() over (order by a) fst_row,
row_number() over (partition by substring(b, -1) desc order by a), a, b
from t1
order by fst_row;
drop table t1;

View file

@ -26349,9 +26349,20 @@ AGGR_OP::end_send()
// Update ref array // Update ref array
join_tab->join->set_items_ref_array(*join_tab->ref_array); join_tab->join->set_items_ref_array(*join_tab->ref_array);
/* During window function computation, if no order by is specified explicitly
for the select, the final result will end up with a non-deterministic
order of rows. This can become confusing especially when you expect
row_number() to return monotonically increasing values. To address this
issue, we save the final ordering from the temporary table and use
that when presenting the results.
*/
bool save_final_window_funcs_ordering= join_tab->filesort ? false : true;
if (join_tab->window_funcs_step) if (join_tab->window_funcs_step)
{ {
if (join_tab->window_funcs_step->exec(join)) if (join_tab->window_funcs_step->exec(join,
save_final_window_funcs_ordering))
return NESTED_LOOP_ERROR; return NESTED_LOOP_ERROR;
} }
@ -26405,6 +26416,12 @@ AGGR_OP::end_send()
} }
} }
if (save_final_window_funcs_ordering)
{
delete join_tab->filesort_result;
join_tab->filesort_result= NULL;
}
// Finish rnd scn after sending records // Finish rnd scn after sending records
if (join_tab->table->file->inited) if (join_tab->table->file->inited)
join_tab->table->file->ha_rnd_end(); join_tab->table->file->ha_rnd_end();

View file

@ -2758,7 +2758,7 @@ bool Window_func_runner::exec(THD *thd, TABLE *tbl, SORT_INFO *filesort_result)
} }
bool Window_funcs_sort::exec(JOIN *join) bool Window_funcs_sort::exec(JOIN *join, bool keep_filesort_result)
{ {
THD *thd= join->thd; THD *thd= join->thd;
JOIN_TAB *join_tab= join->join_tab + join->exec_join_tab_cnt(); JOIN_TAB *join_tab= join->join_tab + join->exec_join_tab_cnt();
@ -2773,8 +2773,20 @@ bool Window_funcs_sort::exec(JOIN *join)
bool is_error= runner.exec(thd, tbl, filesort_result); bool is_error= runner.exec(thd, tbl, filesort_result);
delete join_tab->filesort_result; /*
join_tab->filesort_result= NULL; During create_sort_index, we set the filesort_result field within the
join_tab. We will reuse the sorting if we don't have an additional
ORDER BY clause for our table in the select statement encompasing these
window functions. We only keep the last Window_funcs_sort filesort result.
Free the filesort_result if we encountered an error during execution so as
to prevent memory leaks.
*/
if (!keep_filesort_result || is_error)
{
delete join_tab->filesort_result;
join_tab->filesort_result= NULL;
}
return is_error; return is_error;
} }
@ -2883,14 +2895,21 @@ bool Window_funcs_computation::setup(THD *thd,
} }
bool Window_funcs_computation::exec(JOIN *join) bool Window_funcs_computation::exec(JOIN *join, bool keep_final_sort_result)
{ {
List_iterator<Window_funcs_sort> it(win_func_sorts); List_iterator<Window_funcs_sort> it(win_func_sorts);
uint count= 0;
Window_funcs_sort *srt; Window_funcs_sort *srt;
/* Execute each sort */ /* Execute each sort */
while ((srt = it++)) bool keep_result= false;
while ((srt= it++))
{ {
if (srt->exec(join)) count++;
/* The final Window_funcs_sort keeps the filesort result. */
if (keep_final_sort_result && count == win_func_sorts.elements)
keep_result= true;
if (srt->exec(join, keep_result))
return true; return true;
} }
return false; return false;

View file

@ -195,14 +195,20 @@ class Window_funcs_sort : public Sql_alloc
public: public:
bool setup(THD *thd, SQL_SELECT *sel, List_iterator<Item_window_func> &it, bool setup(THD *thd, SQL_SELECT *sel, List_iterator<Item_window_func> &it,
st_join_table *join_tab); st_join_table *join_tab);
bool exec(JOIN *join); /*
Execute the window functions requiring the previously set up sort order.
@param keep_filesort_result: Do not delete the filesort_result after
sorting the table if it is set to true.
Otherwise free it.
*/
bool exec(JOIN *join, bool keep_filesort_result);
void cleanup() { delete filesort; } void cleanup() { delete filesort; }
private:
friend class Window_funcs_computation; friend class Window_funcs_computation;
private:
Window_func_runner runner; Window_func_runner runner;
/* Window functions can be computed over this sorting */ /* Window functions can be computed over this sorting */
Filesort *filesort; Filesort *filesort;
}; };
@ -225,7 +231,7 @@ class Window_funcs_computation : public Sql_alloc
List<Window_funcs_sort> win_func_sorts; List<Window_funcs_sort> win_func_sorts;
public: public:
bool setup(THD *thd, List<Item_window_func> *window_funcs, st_join_table *tab); bool setup(THD *thd, List<Item_window_func> *window_funcs, st_join_table *tab);
bool exec(JOIN *join); bool exec(JOIN *join, bool keep_final_sort_result);
Explain_aggr_window_funcs *save_explain_plan(MEM_ROOT *mem_root, bool is_analyze); Explain_aggr_window_funcs *save_explain_plan(MEM_ROOT *mem_root, bool is_analyze);
void cleanup(); void cleanup();