From be3902fceba95254d13e0f74741c3fa2d4a0c9f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicen=C8=9Biu=20Ciorbaru?= Date: Mon, 4 Apr 2016 17:06:12 +0300 Subject: [PATCH] Implement ntile window function. The current implementation does not allow for a dynamic expression within the sum function's parameter. --- mysql-test/r/win_ntile.result | 360 ++++++++++++++++++++++++++++++++++ mysql-test/t/win_ntile.test | 147 ++++++++++++++ sql/item_sum.h | 2 +- sql/item_windowfunc.h | 63 ++++++ sql/lex.h | 1 + sql/share/errmsg-utf8.txt | 2 + sql/sql_window.cc | 1 + sql/sql_yacc.yy | 13 ++ 8 files changed, 588 insertions(+), 1 deletion(-) create mode 100644 mysql-test/r/win_ntile.result create mode 100644 mysql-test/t/win_ntile.test diff --git a/mysql-test/r/win_ntile.result b/mysql-test/r/win_ntile.result new file mode 100644 index 00000000000..c403e2937e6 --- /dev/null +++ b/mysql-test/r/win_ntile.result @@ -0,0 +1,360 @@ +create table t1 ( +pk int primary key, +a int, +b int +); +insert into t1 values +(11 , 0, 10), +(12 , 0, 10), +(13 , 1, 10), +(14 , 1, 10), +(18 , 2, 10), +(15 , 2, 20), +(16 , 2, 20), +(17 , 2, 20), +(19 , 4, 20), +(20 , 4, 20); +select pk, a, b, ntile(-1) over (order by a) +from t1; +ERROR HY000: Argument of NTILE must be greater than 0 +select pk, a, b, +ntile(0) over (order by a) +from t1; +ERROR HY000: Argument of NTILE must be greater than 0 +select pk, a, b, +ntile(1) over (order by pk) +from t1; +pk a b ntile(1) over (order by pk) +11 0 10 1 +12 0 10 1 +13 1 10 1 +14 1 10 1 +15 2 20 1 +16 2 20 1 +17 2 20 1 +18 2 10 1 +19 4 20 1 +20 4 20 1 +select pk, a, b, +ntile(2) over (order by pk) +from t1; +pk a b ntile(2) over (order by pk) +11 0 10 1 +12 0 10 1 +13 1 10 1 +14 1 10 1 +15 2 20 1 +16 2 20 2 +17 2 20 2 +18 2 10 2 +19 4 20 2 +20 4 20 2 +select pk, a, b, +ntile(3) over (order by pk) +from t1; +pk a b ntile(3) over (order by pk) +11 0 10 1 +12 0 10 1 +13 1 10 1 +14 1 10 1 +15 2 20 2 +16 2 20 2 +17 2 20 2 +18 2 10 3 +19 4 20 3 +20 4 20 3 +select pk, a, b, +ntile(4) over (order by pk) +from t1; +pk a b ntile(4) over (order by pk) +11 0 10 1 +12 0 10 1 +13 1 10 1 +14 1 10 2 +15 2 20 2 +16 2 20 2 +17 2 20 3 +18 2 10 3 +19 4 20 4 +20 4 20 4 +select pk, a, b, +ntile(5) over (order by pk) +from t1; +pk a b ntile(5) over (order by pk) +11 0 10 1 +12 0 10 1 +13 1 10 2 +14 1 10 2 +15 2 20 3 +16 2 20 3 +17 2 20 4 +18 2 10 4 +19 4 20 5 +20 4 20 5 +select pk, a, b, +ntile(6) over (order by pk) +from t1; +pk a b ntile(6) over (order by pk) +11 0 10 1 +12 0 10 1 +13 1 10 2 +14 1 10 2 +15 2 20 3 +16 2 20 3 +17 2 20 4 +18 2 10 4 +19 4 20 5 +20 4 20 6 +select pk, a, b, +ntile(7) over (order by pk) +from t1; +pk a b ntile(7) over (order by pk) +11 0 10 1 +12 0 10 1 +13 1 10 2 +14 1 10 2 +15 2 20 3 +16 2 20 3 +17 2 20 4 +18 2 10 5 +19 4 20 6 +20 4 20 7 +select pk, a, b, +ntile(8) over (order by pk) +from t1; +pk a b ntile(8) over (order by pk) +11 0 10 1 +12 0 10 1 +13 1 10 2 +14 1 10 2 +15 2 20 3 +16 2 20 4 +17 2 20 5 +18 2 10 6 +19 4 20 7 +20 4 20 8 +select pk, a, b, +ntile(9) over (order by pk) +from t1; +pk a b ntile(9) over (order by pk) +11 0 10 1 +12 0 10 1 +13 1 10 2 +14 1 10 3 +15 2 20 4 +16 2 20 5 +17 2 20 6 +18 2 10 7 +19 4 20 8 +20 4 20 9 +select pk, a, b, +ntile(10) over (order by pk) +from t1; +pk a b ntile(10) over (order by pk) +11 0 10 1 +12 0 10 2 +13 1 10 3 +14 1 10 4 +15 2 20 5 +16 2 20 6 +17 2 20 7 +18 2 10 8 +19 4 20 9 +20 4 20 10 +select pk, a, b, +ntile(11) over (order by pk) +from t1; +pk a b ntile(11) over (order by pk) +11 0 10 1 +12 0 10 2 +13 1 10 3 +14 1 10 4 +15 2 20 5 +16 2 20 6 +17 2 20 7 +18 2 10 8 +19 4 20 9 +20 4 20 10 +select pk, a, b, +ntile(20) over (order by pk) +from t1; +pk a b ntile(20) over (order by pk) +11 0 10 1 +12 0 10 2 +13 1 10 3 +14 1 10 4 +15 2 20 5 +16 2 20 6 +17 2 20 7 +18 2 10 8 +19 4 20 9 +20 4 20 10 +select pk, a, b, +ntile(1) over (partition by b order by pk) +from t1; +pk a b ntile(1) over (partition by b order by pk) +11 0 10 1 +12 0 10 1 +13 1 10 1 +14 1 10 1 +18 2 10 1 +15 2 20 1 +16 2 20 1 +17 2 20 1 +19 4 20 1 +20 4 20 1 +select pk, a, b, +ntile(2) over (partition by b order by pk) +from t1; +pk a b ntile(2) over (partition by b order by pk) +11 0 10 1 +12 0 10 1 +13 1 10 1 +14 1 10 2 +18 2 10 2 +15 2 20 1 +16 2 20 1 +17 2 20 1 +19 4 20 2 +20 4 20 2 +select pk, a, b, +ntile(3) over (partition by b order by pk) +from t1; +pk a b ntile(3) over (partition by b order by pk) +11 0 10 1 +12 0 10 1 +13 1 10 2 +14 1 10 2 +18 2 10 3 +15 2 20 1 +16 2 20 1 +17 2 20 2 +19 4 20 2 +20 4 20 3 +select pk, a, b, +ntile(4) over (partition by b order by pk) +from t1; +pk a b ntile(4) over (partition by b order by pk) +11 0 10 1 +12 0 10 1 +13 1 10 2 +14 1 10 3 +18 2 10 4 +15 2 20 1 +16 2 20 1 +17 2 20 2 +19 4 20 3 +20 4 20 4 +select pk, a, b, +ntile(5) over (partition by b order by pk) +from t1; +pk a b ntile(5) over (partition by b order by pk) +11 0 10 1 +12 0 10 2 +13 1 10 3 +14 1 10 4 +18 2 10 5 +15 2 20 1 +16 2 20 2 +17 2 20 3 +19 4 20 4 +20 4 20 5 +select pk, a, b, +ntile(6) over (partition by b order by pk) +from t1; +pk a b ntile(6) over (partition by b order by pk) +11 0 10 1 +12 0 10 2 +13 1 10 3 +14 1 10 4 +18 2 10 5 +15 2 20 1 +16 2 20 2 +17 2 20 3 +19 4 20 4 +20 4 20 5 +select pk, a, b, +ntile(7) over (partition by b order by pk) +from t1; +pk a b ntile(7) over (partition by b order by pk) +11 0 10 1 +12 0 10 2 +13 1 10 3 +14 1 10 4 +18 2 10 5 +15 2 20 1 +16 2 20 2 +17 2 20 3 +19 4 20 4 +20 4 20 5 +select pk, a, b, +ntile(8) over (partition by b order by pk) +from t1; +pk a b ntile(8) over (partition by b order by pk) +11 0 10 1 +12 0 10 2 +13 1 10 3 +14 1 10 4 +18 2 10 5 +15 2 20 1 +16 2 20 2 +17 2 20 3 +19 4 20 4 +20 4 20 5 +select pk, a, b, +ntile(9) over (partition by b order by pk) +from t1; +pk a b ntile(9) over (partition by b order by pk) +11 0 10 1 +12 0 10 2 +13 1 10 3 +14 1 10 4 +18 2 10 5 +15 2 20 1 +16 2 20 2 +17 2 20 3 +19 4 20 4 +20 4 20 5 +select pk, a, b, +ntile(10) over (partition by b order by pk) +from t1; +pk a b ntile(10) over (partition by b order by pk) +11 0 10 1 +12 0 10 2 +13 1 10 3 +14 1 10 4 +18 2 10 5 +15 2 20 1 +16 2 20 2 +17 2 20 3 +19 4 20 4 +20 4 20 5 +select pk, a, b, +ntile(11) over (partition by b order by pk) +from t1; +pk a b ntile(11) over (partition by b order by pk) +11 0 10 1 +12 0 10 2 +13 1 10 3 +14 1 10 4 +18 2 10 5 +15 2 20 1 +16 2 20 2 +17 2 20 3 +19 4 20 4 +20 4 20 5 +select pk, a, b, +ntile(20) over (partition by b order by pk) +from t1; +pk a b ntile(20) over (partition by b order by pk) +11 0 10 1 +12 0 10 2 +13 1 10 3 +14 1 10 4 +18 2 10 5 +15 2 20 1 +16 2 20 2 +17 2 20 3 +19 4 20 4 +20 4 20 5 +drop table t1; diff --git a/mysql-test/t/win_ntile.test b/mysql-test/t/win_ntile.test new file mode 100644 index 00000000000..0a6efa91c91 --- /dev/null +++ b/mysql-test/t/win_ntile.test @@ -0,0 +1,147 @@ +create table t1 ( + pk int primary key, + a int, + b int +); + + +insert into t1 values +(11 , 0, 10), +(12 , 0, 10), +(13 , 1, 10), +(14 , 1, 10), +(18 , 2, 10), +(15 , 2, 20), +(16 , 2, 20), +(17 , 2, 20), +(19 , 4, 20), +(20 , 4, 20); + +# TODO Try invalid queries too. + +--error ER_INVALID_NTILE_ARGUMENT +select pk, a, b, ntile(-1) over (order by a) +from t1; + +--error ER_INVALID_NTILE_ARGUMENT +select pk, a, b, + ntile(0) over (order by a) +from t1; + +--sorted_result +select pk, a, b, + ntile(1) over (order by pk) +from t1; + +--sorted_result +select pk, a, b, + ntile(2) over (order by pk) +from t1; + +--sorted_result +select pk, a, b, + ntile(3) over (order by pk) +from t1; + +--sorted_result +select pk, a, b, + ntile(4) over (order by pk) +from t1; + +--sorted_result +select pk, a, b, + ntile(5) over (order by pk) +from t1; + +--sorted_result +select pk, a, b, + ntile(6) over (order by pk) +from t1; + +--sorted_result +select pk, a, b, + ntile(7) over (order by pk) +from t1; + +--sorted_result +select pk, a, b, + ntile(8) over (order by pk) +from t1; + +--sorted_result +select pk, a, b, + ntile(9) over (order by pk) +from t1; + +--sorted_result +select pk, a, b, + ntile(10) over (order by pk) +from t1; + +--sorted_result +select pk, a, b, + ntile(11) over (order by pk) +from t1; + +--sorted_result +select pk, a, b, + ntile(20) over (order by pk) +from t1; + + +select pk, a, b, + ntile(1) over (partition by b order by pk) +from t1; + +select pk, a, b, + ntile(2) over (partition by b order by pk) +from t1; + +select pk, a, b, + ntile(3) over (partition by b order by pk) +from t1; + +select pk, a, b, + ntile(4) over (partition by b order by pk) +from t1; + +select pk, a, b, + ntile(5) over (partition by b order by pk) +from t1; + +select pk, a, b, + ntile(6) over (partition by b order by pk) +from t1; + +select pk, a, b, + ntile(7) over (partition by b order by pk) +from t1; + +select pk, a, b, + ntile(8) over (partition by b order by pk) +from t1; + +select pk, a, b, + ntile(9) over (partition by b order by pk) +from t1; + +select pk, a, b, + ntile(10) over (partition by b order by pk) +from t1; + +select pk, a, b, + ntile(11) over (partition by b order by pk) +from t1; + +select pk, a, b, + ntile(20) over (partition by b order by pk) +from t1; + + + + + + + + +drop table t1; diff --git a/sql/item_sum.h b/sql/item_sum.h index cfe2d3db878..e766e69a1c5 100644 --- a/sql/item_sum.h +++ b/sql/item_sum.h @@ -350,7 +350,7 @@ public: AVG_DISTINCT_FUNC, MIN_FUNC, MAX_FUNC, STD_FUNC, VARIANCE_FUNC, SUM_BIT_FUNC, UDF_SUM_FUNC, GROUP_CONCAT_FUNC, ROW_NUMBER_FUNC, RANK_FUNC, DENSE_RANK_FUNC, PERCENT_RANK_FUNC, - CUME_DIST_FUNC + CUME_DIST_FUNC, NTILE_FUNC }; Item **ref_by; /* pointer to a ref to the object used to register it */ diff --git a/sql/item_windowfunc.h b/sql/item_windowfunc.h index 2a2c29b6c20..215ceb14aa7 100644 --- a/sql/item_windowfunc.h +++ b/sql/item_windowfunc.h @@ -395,6 +395,67 @@ class Item_sum_cume_dist: public Item_sum_window_with_row_count ulonglong current_row_count_; }; +class Item_sum_ntile : public Item_sum_window_with_row_count +{ + public: + Item_sum_ntile(THD* thd, ulong num_quantiles) : + Item_sum_window_with_row_count(thd), num_quantiles_(num_quantiles), + current_row_count_(0) {}; + + double val_real() + { + return val_int(); + } + + longlong val_int() + { + if (get_row_count() == 0) + { + null_value= true; + return 0; + } + null_value= false; + ulonglong quantile_size = get_row_count() / num_quantiles_; + ulonglong extra_rows = get_row_count() - quantile_size * num_quantiles_; + + if (current_row_count_ <= extra_rows * (quantile_size + 1)) + return (current_row_count_ - 1) / (quantile_size + 1) + 1; + + return (current_row_count_ - 1 - extra_rows) / quantile_size + 1; + } + + bool add() + { + current_row_count_++; + return false; + } + + enum Sumfunctype sum_func() const + { + return NTILE_FUNC; + } + + void clear() + { + current_row_count_= 0; + set_row_count(0); + } + + const char*func_name() const + { + return "ntile"; + } + + void update_field() {} + + enum Item_result result_type () const { return INT_RESULT; } + enum_field_types field_type() const { return MYSQL_TYPE_LONGLONG; } + + private: + ulong num_quantiles_; + ulong current_row_count_; +}; + class Item_window_func : public Item_func_or_sum { @@ -435,6 +496,7 @@ public: case Item_sum::DENSE_RANK_FUNC: case Item_sum::PERCENT_RANK_FUNC: case Item_sum::CUME_DIST_FUNC: + case Item_sum::NTILE_FUNC: return true; default: return false; @@ -446,6 +508,7 @@ public: switch (window_func()->sum_func()) { case Item_sum::PERCENT_RANK_FUNC: case Item_sum::CUME_DIST_FUNC: + case Item_sum::NTILE_FUNC: return true; default: return false; diff --git a/sql/lex.h b/sql/lex.h index 01e73f5f3d3..f7a183e1862 100644 --- a/sql/lex.h +++ b/sql/lex.h @@ -702,6 +702,7 @@ static SYMBOL sql_functions[] = { { "MID", SYM(SUBSTRING)}, /* unireg function */ { "MIN", SYM(MIN_SYM)}, { "NOW", SYM(NOW_SYM)}, + { "NTILE", SYM(NTILE_SYM)}, { "POSITION", SYM(POSITION_SYM)}, { "PERCENT_RANK", SYM(PERCENT_RANK_SYM)}, { "RANK", SYM(RANK_SYM)}, diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt index dfd12ec20ac..d6b180b79aa 100644 --- a/sql/share/errmsg-utf8.txt +++ b/sql/share/errmsg-utf8.txt @@ -7184,3 +7184,5 @@ ER_FRAME_EXCLUSION_NOT_SUPPORTED eng "Frame exclusion is not supported yet" ER_WINDOW_FUNCTION_DONT_HAVE_FRAME eng "This window function may not have a window frame" +ER_INVALID_NTILE_ARGUMENT + eng "Argument of NTILE must be greater than 0" diff --git a/sql/sql_window.cc b/sql/sql_window.cc index ff2f5a46449..8e4d597b435 100644 --- a/sql/sql_window.cc +++ b/sql/sql_window.cc @@ -1745,6 +1745,7 @@ bool Window_func_runner::setup(THD *thd) case Item_sum::AVG_FUNC: case Item_sum::PERCENT_RANK_FUNC: case Item_sum::CUME_DIST_FUNC: + case Item_sum::NTILE_FUNC: { /* Frame-aware window function computation. It does one pass, but diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index b8d0238b753..25416100fb3 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -1432,6 +1432,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); %token NO_SYM /* SQL-2003-R */ %token NO_WAIT_SYM %token NO_WRITE_TO_BINLOG +%token NTILE_SYM %token NULL_SYM /* SQL-2003-R */ %token NUM %token NUMBER_SYM /* SQL-2003-N */ @@ -10539,6 +10540,18 @@ simple_window_func: if ($$ == NULL) MYSQL_YYABORT; } + | + NTILE_SYM '(' int_num ')' + { + if ($3 <= 0) + { + my_error(ER_INVALID_NTILE_ARGUMENT, MYF(0)); + MYSQL_YYABORT; + } + $$= new (thd->mem_root) Item_sum_ntile(thd, $3); + if ($$ == NULL) + MYSQL_YYABORT; + } ; window_name: