MDEV-7836: ANALYZE FORMAT=JSON should provide info about GROUP/ORDER BY

Provide basic info about sorting/grouping done by the queries.
2025-01-16 20:12:31 +01:00 · 2015-04-12 04:48:42 +03:00 · 2015-04-12 04:48:42 +03:00 · 4938b82263
commit 4938b82263
parent 66ff1632f5
23 changed files with 1111 additions and 181 deletions
--- a/libmysqld/CMakeLists.txt
+++ b/libmysqld/CMakeLists.txt
@ -102,6 +102,7 @@ SET(SQL_EMBEDDED_SOURCES emb_qcache.cc libmysqld.c lib_sql.cc
           ../sql/my_json_writer.cc ../sql/my_json_writer.h
 	   ../sql/rpl_gtid.cc
           ../sql/sql_explain.cc ../sql/sql_explain.h
+           ../sql/sql_analyze_stmt.cc ../sql/sql_analyze_stmt.h
           ../sql/compat56.cc
           ../sql/table_cache.cc
           ../sql/item_inetfunc.cc
--- a/mysql-test/r/analyze_format_json.result
+++ b/mysql-test/r/analyze_format_json.result
@ -281,6 +281,7 @@ ANALYZE
 {
  "query_block": {
    "select_id": 1,
+    "r_total_time_ms": "REPLACED",
    "table": {
      "update": 1,
      "table_name": "t1",
@ -324,6 +325,7 @@ ANALYZE
 {
  "query_block": {
    "select_id": 1,
+    "r_total_time_ms": "REPLACED",
    "table": {
      "delete": 1,
      "table_name": "t1",
--- a/mysql-test/r/analyze_stmt_orderby.result
+++ b/mysql-test/r/analyze_stmt_orderby.result
@ -0,0 +1,289 @@
+create table t0(a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a int);
+insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
+create table t2 (
+a int, 
+b int, 
+key (a)
+);
+insert into t2 select A.a*1000 + B.a, A.a*1000 + B.a from t0 A, t1 B;
+#
+# Try an UPDATE that uses filesort:
+#
+explain 
+update t2 set b=b+1 order by b limit 5;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	10000	Using filesort
+explain format=json
+update t2 set b=b+1 order by b limit 5;
+EXPLAIN
+{
+  "query_block": {
+    "select_id": 1,
+    "filesort": {
+      "table": {
+        "update": 1,
+        "table_name": "t2",
+        "access_type": "ALL",
+        "rows": 10000
+      }
+    }
+  }
+}
+analyze format=json
+update t2 set b=b+1 order by b limit 5;
+ANALYZE
+{
+  "query_block": {
+    "select_id": 1,
+    "r_total_time_ms": "REPLACED",
+    "filesort": {
+      "r_loops": 1,
+      "r_limit": 5,
+      "r_used_priority_queue": true,
+      "r_output_rows": 6,
+      "table": {
+        "update": 1,
+        "table_name": "t2",
+        "access_type": "ALL",
+        "rows": 10000,
+        "r_rows": 10000,
+        "r_filtered": 100,
+        "r_total_time_ms": "REPLACED"
+      }
+    }
+  }
+}
+#
+# Try an UPDATE that uses buffering:
+#
+explain
+update t2 set a=a+1 where a<10;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t2	range	a	a	5	NULL	8	Using where; Using buffer
+explain format=json
+update t2 set a=a+1 where a<10;
+EXPLAIN
+{
+  "query_block": {
+    "select_id": 1,
+    "buffer": {
+      "table": {
+        "update": 1,
+        "table_name": "t2",
+        "access_type": "range",
+        "possible_keys": ["a"],
+        "key": "a",
+        "key_length": "5",
+        "used_key_parts": ["a"],
+        "rows": 8,
+        "attached_condition": "(t2.a < 10)"
+      }
+    }
+  }
+}
+analyze format=json
+update t2 set a=a+1 where a<10;
+ANALYZE
+{
+  "query_block": {
+    "select_id": 1,
+    "r_total_time_ms": "REPLACED",
+    "buffer": {
+      "table": {
+        "update": 1,
+        "table_name": "t2",
+        "access_type": "range",
+        "possible_keys": ["a"],
+        "key": "a",
+        "key_length": "5",
+        "used_key_parts": ["a"],
+        "rows": 8,
+        "r_rows": 10,
+        "r_filtered": 100,
+        "r_total_time_ms": "REPLACED",
+        "attached_condition": "(t2.a < 10)"
+      }
+    }
+  }
+}
+#
+# Try a DELETE that uses filesort:
+#
+explain 
+delete from t2 order by b limit 5;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t2	ALL	NULL	NULL	NULL	NULL	10000	Using filesort
+explain format=json
+delete from t2 order by b limit 5;
+EXPLAIN
+{
+  "query_block": {
+    "select_id": 1,
+    "filesort": {
+      "table": {
+        "delete": 1,
+        "table_name": "t2",
+        "access_type": "ALL",
+        "rows": 10000
+      }
+    }
+  }
+}
+analyze format=json
+delete from t2 order by b limit 5;
+ANALYZE
+{
+  "query_block": {
+    "select_id": 1,
+    "r_total_time_ms": "REPLACED",
+    "filesort": {
+      "r_loops": 1,
+      "r_used_priority_queue": false,
+      "r_output_rows": 10000,
+      "r_buffer_size": "195Kb",
+      "table": {
+        "delete": 1,
+        "table_name": "t2",
+        "access_type": "ALL",
+        "rows": 10000,
+        "r_rows": 10000,
+        "r_filtered": 100,
+        "r_total_time_ms": "REPLACED"
+      }
+    }
+  }
+}
+#
+# Try a SELECT with QEP in form: filesort { tmp_table { join } } 
+#
+explain
+select * from t0,t2 where t2.a=t0.a order by t2.b limit 4;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t0	ALL	NULL	NULL	NULL	NULL	10	Using where; Using temporary; Using filesort
+1	SIMPLE	t2	ref	a	a	5	test.t0.a	1	
+explain format=json
+select * from t0,t2 where t2.a=t0.a order by t2.b limit 4;
+EXPLAIN
+{
+  "query_block": {
+    "select_id": 1,
+    "temporary_table": {
+      "function": "buffer",
+      "table": {
+        "table_name": "t0",
+        "access_type": "ALL",
+        "rows": 10,
+        "filtered": 100,
+        "attached_condition": "(t0.a is not null)"
+      },
+      "table": {
+        "table_name": "t2",
+        "access_type": "ref",
+        "possible_keys": ["a"],
+        "key": "a",
+        "key_length": "5",
+        "used_key_parts": ["a"],
+        "ref": ["test.t0.a"],
+        "rows": 1,
+        "filtered": 100
+      }
+    }
+  }
+}
+analyze format=json
+select * from t0,t2 where t2.a=t0.a order by t2.b limit 4;
+ANALYZE
+{
+  "query_block": {
+    "select_id": 1,
+    "r_loops": 1,
+    "r_total_time_ms": "REPLACED",
+    "filesort": {
+      "temporary_table": {
+        "table": {
+          "table_name": "t0",
+          "access_type": "ALL",
+          "r_loops": 1,
+          "rows": 10,
+          "r_rows": 10,
+          "r_total_time_ms": "REPLACED",
+          "filtered": 100,
+          "r_filtered": 100,
+          "attached_condition": "(t0.a is not null)"
+        },
+        "table": {
+          "table_name": "t2",
+          "access_type": "ref",
+          "possible_keys": ["a"],
+          "key": "a",
+          "key_length": "5",
+          "used_key_parts": ["a"],
+          "ref": ["test.t0.a"],
+          "r_loops": 10,
+          "rows": 1,
+          "r_rows": 0.4,
+          "r_total_time_ms": "REPLACED",
+          "filtered": 100,
+          "r_filtered": 100
+        }
+      }
+    }
+  }
+}
+#
+# Try a SELECT with QEP in form: join { filesort { table0 }, table2 }
+#
+explain
+select * from t0,t2 where t2.a=t0.a order by t0.a limit 4;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t0	ALL	NULL	NULL	NULL	NULL	10	Using where; Using filesort
+1	SIMPLE	t2	ref	a	a	5	test.t0.a	1	
+analyze format=json 
+select * from t0,t2 where t2.a=t0.a order by t0.a limit 4;
+ANALYZE
+{
+  "query_block": {
+    "select_id": 1,
+    "r_loops": 1,
+    "r_total_time_ms": "REPLACED",
+    "read_sorted_file": {
+      "r_rows": 10,
+      "filesort": {
+        "r_loops": 1,
+        "r_used_priority_queue": false,
+        "r_output_rows": 10,
+        "r_buffer_size": "360",
+        "table": {
+          "table_name": "t0",
+          "access_type": "ALL",
+          "r_loops": 1,
+          "rows": 10,
+          "r_rows": 10,
+          "r_total_time_ms": "REPLACED",
+          "filtered": 100,
+          "r_filtered": 1,
+          "attached_condition": "(t0.a is not null)"
+        }
+      }
+    },
+    "table": {
+      "table_name": "t2",
+      "access_type": "ref",
+      "possible_keys": ["a"],
+      "key": "a",
+      "key_length": "5",
+      "used_key_parts": ["a"],
+      "ref": ["test.t0.a"],
+      "r_loops": 10,
+      "rows": 1,
+      "r_rows": 0.4,
+      "r_total_time_ms": "REPLACED",
+      "filtered": 100,
+      "r_filtered": 100
+    }
+  }
+}
+drop table t2;
+drop table t0, t1;
--- a/mysql-test/r/explain_json.result
+++ b/mysql-test/r/explain_json.result
@ -478,11 +478,14 @@ EXPLAIN
      "materialized": {
        "query_block": {
          "select_id": 2,
-          "table": {
-            "table_name": "t1",
-            "access_type": "ALL",
-            "rows": 10,
-            "filtered": 100
+          "temporary_table": {
+            "function": "buffer",
+            "table": {
+              "table_name": "t1",
+              "access_type": "ALL",
+              "rows": 10,
+              "filtered": 100
+            }
          }
        }
      }
@ -517,11 +520,14 @@ EXPLAIN
      "materialized": {
        "query_block": {
          "select_id": 2,
-          "table": {
-            "table_name": "t1",
-            "access_type": "ALL",
-            "rows": 10,
-            "filtered": 100
+          "temporary_table": {
+            "function": "buffer",
+            "table": {
+              "table_name": "t1",
+              "access_type": "ALL",
+              "rows": 10,
+              "filtered": 100
+            }
          }
        }
      }
@ -558,11 +564,14 @@ EXPLAIN
        "unique": 1,
        "query_block": {
          "select_id": 2,
-          "table": {
-            "table_name": "t1",
-            "access_type": "ALL",
-            "rows": 10,
-            "filtered": 100
+          "temporary_table": {
+            "function": "buffer",
+            "table": {
+              "table_name": "t1",
+              "access_type": "ALL",
+              "rows": 10,
+              "filtered": 100
+            }
          }
        }
      }
--- a/mysql-test/t/analyze_stmt_orderby.test
+++ b/mysql-test/t/analyze_stmt_orderby.test
@ -0,0 +1,73 @@
+
+create table t0(a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+create table t1(a int);
+insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
+
+create table t2 (
+  a int, 
+  b int, 
+  key (a)
+);
+
+insert into t2 select A.a*1000 + B.a, A.a*1000 + B.a from t0 A, t1 B;
+
+--echo #
+--echo # Try an UPDATE that uses filesort:
+--echo #
+explain 
+update t2 set b=b+1 order by b limit 5;
+explain format=json
+update t2 set b=b+1 order by b limit 5;
+--replace_regex /"r_total_time_ms": [0-9]*[.]?[0-9]*/"r_total_time_ms": "REPLACED"/
+analyze format=json
+update t2 set b=b+1 order by b limit 5;
+
+--echo #
+--echo # Try an UPDATE that uses buffering:
+--echo #
+explain
+update t2 set a=a+1 where a<10;
+explain format=json
+update t2 set a=a+1 where a<10;
+--replace_regex /"r_total_time_ms": [0-9]*[.]?[0-9]*/"r_total_time_ms": "REPLACED"/
+analyze format=json
+update t2 set a=a+1 where a<10;
+
+--echo #
+--echo # Try a DELETE that uses filesort:
+--echo #
+explain 
+delete from t2 order by b limit 5;
+explain format=json
+delete from t2 order by b limit 5;
+--replace_regex /"r_total_time_ms": [0-9]*[.]?[0-9]*/"r_total_time_ms": "REPLACED"/
+analyze format=json
+delete from t2 order by b limit 5;
+
+--echo #
+--echo # Try a SELECT with QEP in form: filesort { tmp_table { join } } 
+--echo #
+explain
+select * from t0,t2 where t2.a=t0.a order by t2.b limit 4;
+explain format=json
+select * from t0,t2 where t2.a=t0.a order by t2.b limit 4;
+--replace_regex /"r_total_time_ms": [0-9]*[.]?[0-9]*/"r_total_time_ms": "REPLACED"/
+analyze format=json
+select * from t0,t2 where t2.a=t0.a order by t2.b limit 4;
+
+
+--echo #
+--echo # Try a SELECT with QEP in form: join { filesort { table0 }, table2 }
+--echo #
+explain
+select * from t0,t2 where t2.a=t0.a order by t0.a limit 4;
+## explain format=json
+## select * from t0,t2 where t2.a=t0.a order by t0.a limit 4;
+--replace_regex /"r_total_time_ms": [0-9]*[.]?[0-9]*/"r_total_time_ms": "REPLACED"/
+analyze format=json 
+select * from t0,t2 where t2.a=t0.a order by t0.a limit 4;
+
+drop table t2;
+drop table t0, t1;
--- a/sql/CMakeLists.txt
+++ b/sql/CMakeLists.txt
@ -105,7 +105,7 @@ SET (SQL_SOURCE

               # added in MariaDB:
               sql_explain.h sql_explain.cc
-               sql_analyze_stmt.h
+               sql_analyze_stmt.h sql_analyze_stmt.cc
               sql_lifo_buffer.h sql_join_cache.h sql_join_cache.cc
               create_options.cc multi_range_read.cc
               opt_index_cond_pushdown.cc opt_subselect.cc
--- a/sql/filesort.cc
+++ b/sql/filesort.cc
@ -145,7 +145,8 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
 		 SQL_SELECT *select, ha_rows max_rows,
                 bool sort_positions,
                 ha_rows *examined_rows,
-                 ha_rows *found_rows)
+                 ha_rows *found_rows,
+                 Filesort_tracker* tracker)
 {
  int error;
  size_t memory_available= thd->variables.sortbuff_size;
@ -211,6 +212,7 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
  else
    thd->inc_status_sort_scan();
  thd->query_plan_flags|= QPLAN_FILESORT;
+  tracker->report_use(max_rows);

  // If number of rows is not known, use as much of sort buffer as possible. 
  num_rows= table->file->estimate_rows_upper_bound();
@ -226,6 +228,7 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
    DBUG_PRINT("info", ("filesort PQ is applicable"));
    thd->query_plan_flags|= QPLAN_FILESORT_PRIORITY_QUEUE;
    status_var_increment(thd->status_var.filesort_pq_sorts_);
+    tracker->incr_pq_used();
    const size_t compare_length= param.sort_length;
    if (pq.init(param.max_rows,
                true,                           // max_at_top
@ -282,6 +285,7 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
      my_error(ER_OUT_OF_SORTMEMORY,MYF(ME_ERROR + ME_FATALERROR));
      goto err;
    }
+    tracker->report_sort_buffer_size(table_sort.sort_buffer_size());
  }

  if (open_cached_file(&buffpek_pointers,mysql_tmpdir,TEMP_PREFIX,
@ -300,6 +304,8 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
    goto err;

  maxbuffer= (uint) (my_b_tell(&buffpek_pointers)/sizeof(*buffpek));
+  tracker->report_merge_passes_at_start(thd->query_plan_fsort_passes);
+  tracker->report_row_numbers(param.examined_rows, *found_rows, num_rows);

  if (maxbuffer == 0)			// The whole set is in memory
  {
@ -365,6 +371,7 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,

  err:
  my_free(param.tmp_buffer);
+  tracker->report_merge_passes_at_end(thd->query_plan_fsort_passes);
  if (!subselect || !subselect->is_uncacheable())
  {
    table_sort.free_sort_buffer();
--- a/sql/filesort.h
+++ b/sql/filesort.h
@ -25,11 +25,13 @@ class SQL_SELECT;
 class THD;
 struct TABLE;
 typedef struct st_sort_field SORT_FIELD;
+class Filesort_tracker;

 ha_rows filesort(THD *thd, TABLE *table, st_sort_field *sortorder,
                 uint s_length, SQL_SELECT *select,
                 ha_rows max_rows, bool sort_positions,
-                 ha_rows *examined_rows, ha_rows *found_rows);
+                 ha_rows *examined_rows, ha_rows *found_rows,
+                 Filesort_tracker* tracker);
 void filesort_free_buffers(TABLE *table, bool full);
 void change_double_for_sort(double nr,uchar *to);

--- a/sql/handler.h
+++ b/sql/handler.h
@ -2596,9 +2596,13 @@ public:
  ulonglong rows_changed;
  /* One bigger than needed to avoid to test if key == MAX_KEY */
  ulonglong index_rows_read[MAX_KEY+1];
-  
+
+private:
  /* ANALYZE time tracker, if present */
  Exec_time_tracker *tracker;
+public:
+  void set_time_tracker(Exec_time_tracker *tracker_arg) { tracker=tracker_arg;}
+

  Item *pushed_idx_cond;
  uint pushed_idx_cond_keyno;  /* The index which the above condition is for */
--- a/sql/my_json_writer.cc
+++ b/sql/my_json_writer.cc
@ -130,6 +130,27 @@ void Json_writer::add_ll(longlong val)
 }


+/* Add a memory size, printing in Kb, Kb, Gb if necessary */
+void Json_writer::add_size(longlong val)
+{
+  char buf[64];
+  if (val < 1024) 
+    my_snprintf(buf, sizeof(buf), "%ld", val);
+  else if (val < 1024*1024*16)
+  {
+    /* Values less than 16MB are specified in KB for precision */
+    size_t len= my_snprintf(buf, sizeof(buf), "%ld", val/1024);
+    strcpy(buf + len, "Kb");
+  }
+  else
+  {
+    size_t len= my_snprintf(buf, sizeof(buf), "%ld", val/(1024*1024));
+    strcpy(buf + len, "Mb");
+  }
+  add_str(buf);
+}
+
+
 void Json_writer::add_double(double val)
 {
  char buf[64];
--- a/sql/my_json_writer.h
+++ b/sql/my_json_writer.h
@ -108,6 +108,7 @@ public:
  void add_str(const String &str);

  void add_ll(longlong val);
+  void add_size(longlong val);
  void add_double(double val);
  void add_bool(bool val);
  void add_null();
--- a/sql/sql_analyze_stmt.cc
+++ b/sql/sql_analyze_stmt.cc
@ -0,0 +1,62 @@
+/*
+   Copyright (c) 2015 MariaDB Corporation Ab
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+#ifdef USE_PRAGMA_IMPLEMENTATION
+#pragma implementation				// gcc: Class implementation
+#endif
+
+#include <my_global.h>
+#include "sql_priv.h"
+#include "sql_select.h"
+#include "my_json_writer.h"
+
+void Filesort_tracker::print_json(Json_writer *writer)
+{
+  const char *varied_str= "(varied across executions)";
+  writer->add_member("r_loops").add_ll(r_loops);
+  
+  if (r_limit != HA_POS_ERROR)
+  {
+    writer->add_member("r_limit");
+    if (r_limit == 0)
+      writer->add_str(varied_str);
+    else
+      writer->add_ll(rint(r_limit/r_loops));
+  }
+
+  writer->add_member("r_used_priority_queue"); 
+  if (r_used_pq == r_loops)
+    writer->add_bool(true);
+  else if (r_used_pq == 0)
+    writer->add_bool(false);
+  else
+    writer->add_str(varied_str);
+
+  writer->add_member("r_output_rows").add_ll(rint(r_output_rows / r_loops));
+
+  if (sort_passes)
+    writer->add_member("r_sort_passes").add_ll(rint(sort_passes / r_loops));
+
+  if (sort_buffer_size != 0)
+  {
+    writer->add_member("r_buffer_size");
+    if (sort_buffer_size == ulonglong(-1))
+      writer->add_str(varied_str);
+    else
+      writer->add_size(sort_buffer_size);
+  }
+}
+
--- a/sql/sql_analyze_stmt.h
+++ b/sql/sql_analyze_stmt.h
@ -14,6 +14,29 @@
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

+/*
+
+== ANALYZE-stmt classes ==
+
+This file contains classes for supporting "ANALYZE statement" feature. These are 
+a set of data structures that can be used to store the data about how the 
+statement executed.
+
+There are two kinds of data collection:
+
+1. Various counters. We assume that incrementing counters has very low
+overhead. Because of that, execution code increments counters unconditionally
+(even when not running "ANALYZE $statement" commands. You run regular SELECT/
+UPDATE/DELETE/etc and the counters are incremented).
+
+As a free bonus, this lets us print detailed information into the slow query
+log, should the query be slow.
+
+2. Timing data. Measuring the time it took to run parts of query has noticeable
+overhead. Because of that, we measure the time only when running "ANALYZE
+$stmt").
+
+*/

 /*
  A class for tracking time it takes to do a certain action
@ -85,4 +108,285 @@ public:
  if (unlikely((tracker)->timed)) \
  { (tracker)->stop_tracking(); }

+/*
+  A class for collecting read statistics.
+  
+  The idea is that we run several scans. Each scans gets rows, and then filters
+  some of them out.  We count scans, rows, and rows left after filtering.
+
+  (note: at the moment, the class is not actually tied to a physical table. 
+   It can be used to track reading from files, buffers, etc).
+*/
+
+class Table_access_tracker 
+{
+public:
+  Table_access_tracker() :
+    r_scans(0), r_rows(0), /*r_rows_after_table_cond(0),*/
+    r_rows_after_where(0)
+  {}
+
+  ha_rows r_scans; /* How many scans were ran on this join_tab */
+  ha_rows r_rows; /* How many rows we've got after that */
+  ha_rows r_rows_after_where; /* Rows after applying attached part of WHERE */
+
+  bool has_scans() { return (r_scans != 0); }
+  ha_rows get_loops() { return r_scans; }
+  double get_avg_rows()
+  {
+    return r_scans ? ((double)r_rows / r_scans): 0;
+  }
+
+  double get_filtered_after_where()
+  {
+    double r_filtered;
+    if (r_rows > 0)
+      r_filtered= (double)r_rows_after_where / r_rows;
+    else
+      r_filtered= 1.0;
+
+    return r_filtered;
+  }
+  
+  inline void on_scan_init() { r_scans++; }
+  inline void on_record_read() { r_rows++; }
+  inline void on_record_after_where() { r_rows_after_where++; }
+};
+
+
+class Json_writer;
+
+/*
+  This stores the data about how filesort executed.
+
+  A few things from here (e.g. r_used_pq, r_limit) belong to the query plan,
+  however, these parameters are calculated right during the execution so we 
+  can't easily put them into the query plan.
+
+  The class is designed to handle multiple invocations of filesort().
+*/
+
+class Filesort_tracker : public Sql_alloc
+{
+public:
+  Filesort_tracker() :
+    r_loops(0), r_limit(0), r_used_pq(0), 
+    r_examined_rows(0), r_sorted_rows(0), r_output_rows(0),
+    sort_passes(0),
+    sort_buffer_size(0)
+  {}
+  
+  /* Functions that filesort uses to report various things about its execution */
+
+  inline void report_use(ha_rows r_limit_arg)
+  {
+    if (!r_loops++)
+      r_limit= r_limit_arg;
+    else
+      r_limit= (r_limit != r_limit_arg)? 0: r_limit_arg;
+  }
+  inline void incr_pq_used() { r_used_pq++; }
+
+  inline void report_row_numbers(ha_rows examined_rows, 
+                                 ha_rows sorted_rows,
+                                 ha_rows returned_rows) 
+  { 
+    r_examined_rows += examined_rows;
+    r_sorted_rows   += sorted_rows;
+    r_output_rows   += returned_rows;
+  }
+
+  inline void report_merge_passes_at_start(ulong passes)
+  {
+    sort_passes -= passes;
+  }
+  inline void report_merge_passes_at_end(ulong passes)
+  {
+    sort_passes += passes;
+  }
+
+  inline void report_sort_buffer_size(size_t bufsize)
+  {
+    if (sort_buffer_size)
+      sort_buffer_size= ulonglong(-1); // multiple buffers of different sizes
+    else
+      sort_buffer_size= bufsize;
+  }
+  
+  /* Functions to get the statistics */
+  void print_json(Json_writer *writer);
+  
+  ulonglong get_r_loops() { return r_loops; }
+  double get_avg_examined_rows() 
+  { 
+    return ((double)r_examined_rows) / r_loops;
+  }
+  double get_avg_returned_rows()
+  { 
+    return ((double)r_output_rows) / r_loops; 
+  }
+  double get_r_filtered()
+  {
+    if (r_examined_rows > 0)
+      return ((double)r_sorted_rows / r_examined_rows);
+    else
+      return 1.0;
+  }
+private:
+  ulonglong r_loops; /* How many times filesort was invoked */
+  /*
+    LIMIT is typically a constant. There is never "LIMIT 0".
+      HA_POS_ERROR means we never had a limit
+      0            means different values of LIMIT were used in 
+                   different filesort invocations
+      other value  means the same LIMIT value was used every time.
+  */
+  ulonglong r_limit;
+  ulonglong r_used_pq; /* How many times PQ was used */
+
+  /* How many rows were examined (before checking the select->cond) */
+  ulonglong r_examined_rows;
+  
+  /* 
+    How many rows were put into sorting (this is examined_rows minus rows that
+    didn't pass the WHERE condition)
+  */
+  ulonglong r_sorted_rows;
+
+  /*
+    How many rows were returned. This is equal to r_sorted_rows, unless there
+    was a LIMIT N clause in which case filesort would not have returned more
+    than N rows.
+  */
+  ulonglong r_output_rows;
+
+  /* How many sorts in total (divide by r_count to get the average) */
+  ulonglong sort_passes;
+  
+  /* 
+    0              - means not used (or not known 
+    (ulonglong)-1  - multiple
+    other          - value
+  */
+  ulonglong sort_buffer_size;
+};
+
+
+typedef enum 
+{
+  EXPL_NO_TMP_TABLE=0,
+  EXPL_TMP_TABLE_BUFFER,
+  EXPL_TMP_TABLE_GROUP,
+  EXPL_TMP_TABLE_DISTINCT
+} enum_tmp_table_use;
+
+
+typedef enum 
+{
+  EXPL_ACTION_FILESORT,
+  EXPL_ACTION_TEMPTABLE,
+  EXPL_ACTION_REMOVE_DUPS,
+} enum_qep_action;
+
+
+/*
+  This is to track how a JOIN object has resolved ORDER/GROUP BY/DISTINCT
+  
+  We are not tied to the query plan at all, because query plan does not have 
+  sufficient information. *A lot* of decisions about ordering/grouping are 
+  made at very late stages (in JOIN::exec, JOIN::init_execution, in
+  create_sort_index and even in create_tmp_table).
+
+  The idea is that operations that happen during select execution will report
+  themselves. We have these operations:
+  - Sorting with filesort()
+  - Duplicate row removal (the one done by remove_duplicates()).
+  - Use of temporary table to buffer the result.
+
+  There is also "Selection" operation, done by do_select(). It reads rows,
+  there are several distinct cases:
+   1. doing the join operation on the base tables
+   2. reading the temporary table
+   3. reading the filesort output
+  it would be nice to build execution graph, e.g.
+
+    Select(JOIN op) -> temp.table -> filesort -> Select(filesort result)
+
+  the problem is that there is no way to tell what a do_select() call will do.
+
+  Our solution is not to have explicit selection operations. We make these
+  assumptions about the query plan:
+  - Select(JOIN op) is the first operation in the query plan
+  - Unless the first recorded operation is filesort(). filesort() is unable 
+    read result of a select, so when we find it first, the query plan is:
+
+    filesort(first join table) -> Select(JOIN op) -> ...
+
+  the other popular query plan is:
+
+    Select (JOIN op) -> temp.table -> filesort() -> ...
+
+///TODO: handle repeated execution with subselects!
+*/
+
+class Sort_and_group_tracker : public Sql_alloc
+{
+  enum { MAX_QEP_ACTIONS = 5 };
+
+  /* Query actions in the order they were made */
+  enum_qep_action qep_actions[MAX_QEP_ACTIONS];
+  uint n_actions;
+  
+  /* 
+    Trackers for filesort operation. JOIN::exec() may need at most two sorting
+    operations.
+  */
+  Filesort_tracker filesort_tracker[2];
+  int cur_tracker;
+  
+  /* Information about temporary tables */
+  enum_tmp_table_use tmp_table_kind[2];
+  int cur_tmp_table;
+
+  friend class Explain_select;
+
+public:
+  Sort_and_group_tracker() : 
+    n_actions(0),
+    cur_tracker(0),
+    cur_tmp_table(0)
+  {}
+
+  /*************** Reporting interface ***************/
+  /* Report that join execution is started */
+  void report_join_start()
+  {
+    n_actions= 0;
+    cur_tracker= 0;
+    cur_tmp_table= 0;
+  }
+
+  /* Report that a temporary table is created. */
+  void report_tmp_table(TABLE *tbl)
+  {
+    DBUG_ASSERT(n_actions < MAX_QEP_ACTIONS);
+    qep_actions[n_actions++]= EXPL_ACTION_TEMPTABLE;
+
+    DBUG_ASSERT(cur_tmp_table < 2);
+    cur_tmp_table++;
+  }
+
+  /* Report that we are doing a filesort. */
+  Filesort_tracker *report_sorting()
+  {
+    DBUG_ASSERT(n_actions < MAX_QEP_ACTIONS);
+    qep_actions[n_actions++]= EXPL_ACTION_FILESORT;
+
+    DBUG_ASSERT(cur_tracker < 2);
+    return &filesort_tracker[cur_tracker++];
+  }
+
+  /*************** Statistics retrieval interface ***************/
+  //enum_tmp_table_use get_tmp_table_type() { return join_result_tmp_table; }
+};

--- a/sql/sql_delete.cc
+++ b/sql/sql_delete.cc
@ -67,7 +67,8 @@ Explain_delete* Delete_plan::save_explain_delete_data(MEM_ROOT *mem_root, THD *t
  else
  {
    explain->deleting_all_rows= false;
-    Update_plan::save_explain_data_intern(mem_root, query, explain);
+    Update_plan::save_explain_data_intern(mem_root, explain, 
+                                          thd->lex->analyze_stmt);
  }
 
  query->add_upd_del_plan(explain);
@ -81,15 +82,15 @@ Update_plan::save_explain_update_data(MEM_ROOT *mem_root, THD *thd)
  Explain_query *query= thd->lex->explain;
  Explain_update* explain= 
    new (mem_root) Explain_update(mem_root, thd->lex->analyze_stmt);
-  save_explain_data_intern(mem_root, query, explain);
+  save_explain_data_intern(mem_root, explain, thd->lex->analyze_stmt);
  query->add_upd_del_plan(explain);
  return explain;
 }


 void Update_plan::save_explain_data_intern(MEM_ROOT *mem_root,
-                                           Explain_query *query, 
-                                           Explain_update *explain)
+                                           Explain_update *explain,
+                                           bool is_analyze)
 {
  explain->select_type= "SIMPLE";
  explain->table_name.append(table->pos_in_table_list->alias);
@ -109,6 +110,9 @@ void Update_plan::save_explain_data_intern(MEM_ROOT *mem_root,
    return;
  }
  
+  if (is_analyze)
+    table->file->set_time_tracker(&explain->table_tracker);
+
  select_lex->set_explain_type(TRUE);
  explain->select_type= select_lex->type;
  /* Partitions */
@ -151,7 +155,9 @@ void Update_plan::save_explain_data_intern(MEM_ROOT *mem_root,

  explain->using_where= MY_TEST(select && select->cond);
  explain->where_cond= select? select->cond: NULL;
-  explain->using_filesort= using_filesort;
+
+  if (using_filesort)
+    explain->filesort_tracker= new (mem_root) Filesort_tracker;
  explain->using_io_buffer= using_io_buffer;

  append_possible_keys(mem_root, explain->possible_keys, table, 
@ -486,13 +492,16 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
      table->sort.io_cache= (IO_CACHE *) my_malloc(sizeof(IO_CACHE),
                                                   MYF(MY_FAE | MY_ZEROFILL |
                                                       MY_THREAD_SPECIFIC));
-    
+      Filesort_tracker *fs_tracker= 
+        thd->lex->explain->get_upd_del_plan()->filesort_tracker;
+
      if (!(sortorder= make_unireg_sortorder(order, &length, NULL)) ||
 	  (table->sort.found_records= filesort(thd, table, sortorder, length,
                                               select, HA_POS_ERROR,
                                               true,
-                                               &examined_rows, &found_rows))
-	  == HA_POS_ERROR)
+                                               &examined_rows, &found_rows,
+                                               fs_tracker))
+	    == HA_POS_ERROR)
      {
        delete select;
        free_underlaid_joins(thd, &thd->lex->select_lex);
--- a/sql/sql_explain.cc
+++ b/sql/sql_explain.cc
@ -743,11 +743,6 @@ void Explain_select::print_explain_json(Explain_query *query,
  }
  else
  {
-    /*
-       TODO: how does this approach allow to print ORDER BY members?
-         Explain_basic_join does not have ORDER/GROUP.
-         A: factor out join tab printing loop into a common func.
-    */
    writer->add_member("query_block").start_object();
    writer->add_member("select_id").add_ll(select_id);
     
@ -761,8 +756,67 @@ void Explain_select::print_explain_json(Explain_query *query,
      writer->add_member("const_condition");
      write_item(writer, exec_const_cond);
    }
+     
+    Filesort_tracker *first_table_sort= false;
+    int started_objects= 0;
+
+    if (is_analyze)
+    {
+      /* ANALYZE has collected this part of query plan independently */
+      for (int i= ops_tracker.n_actions-1; i >= 0; i--)
+      {
+        if (ops_tracker.qep_actions[i] == EXPL_ACTION_FILESORT)
+        {
+          if (i == 0)
+          {
+            /* filesort operation was the first in the pipeline */
+            first_table_sort= &ops_tracker.filesort_tracker[0];
+            break;
+          }
+          writer->add_member("filesort").start_object();
+          started_objects++;
+        }
+        else if (ops_tracker.qep_actions[i] == EXPL_ACTION_TEMPTABLE)
+        {
+          writer->add_member("temporary_table").start_object();
+          started_objects++;
+          /*
+          if (tmp == EXPL_TMP_TABLE_BUFFER)
+            func= "buffer";
+          else if (tmp == EXPL_TMP_TABLE_GROUP)
+            func= "group-by";
+          else
+            func= "distinct";
+          writer->add_member("function").add_str(func);
+         */
+        }
+        else if (ops_tracker.qep_actions[i] == EXPL_ACTION_REMOVE_DUPS)
+        {
+          writer->add_member("duplicate_removal").start_object();
+          started_objects++;
+        }
+        else
+          DBUG_ASSERT(0);
+      }
+
+    }
+    else
+    {
+      /* This is just EXPLAIN. Try to produce something meaningful */
+      if (using_temporary)
+      {
+        started_objects= 1;
+        writer->add_member("temporary_table").start_object();
+        writer->add_member("function").add_str("buffer");
+      }
+    }
+      
+    Explain_basic_join::print_explain_json_interns(query, writer, is_analyze,
+                                                   first_table_sort);
+
+    for (;started_objects; started_objects--)
+      writer->end_object();

-    Explain_basic_join::print_explain_json_interns(query, writer, is_analyze);
    writer->end_object();
  }

@ -776,24 +830,27 @@ void Explain_basic_join::print_explain_json(Explain_query *query,
  writer->add_member("query_block").start_object();
  writer->add_member("select_id").add_ll(select_id);
  
-  print_explain_json_interns(query, writer, is_analyze);
+  print_explain_json_interns(query, writer, is_analyze, NULL);

  writer->end_object();
 }


-void Explain_basic_join::print_explain_json_interns(Explain_query *query, 
-                                                    Json_writer *writer, 
-                                                    bool is_analyze)
+void Explain_basic_join::
+print_explain_json_interns(Explain_query *query, 
+                           Json_writer *writer, 
+                           bool is_analyze,
+                           Filesort_tracker *first_table_sort)
 {
  Json_writer_nesting_guard guard(writer);
  for (uint i=0; i< n_join_tabs; i++)
  {
    if (join_tabs[i]->start_dups_weedout)
      writer->add_member("duplicates_removal").start_object();
-    
-    join_tabs[i]->print_explain_json(query, writer, is_analyze);
-    
+
+    join_tabs[i]->print_explain_json(query, writer, is_analyze,
+                                     (i==0)? first_table_sort : NULL);
+
    if (join_tabs[i]->end_dups_weedout)
      writer->end_object();
  }
@ -1230,11 +1287,47 @@ void add_json_keyset(Json_writer *writer, const char *elem_name,
 }


+/*
+  @param fs_tracker   Normally NULL. When not NULL, it means that the join tab
+                      used filesort.
+*/
+
 void Explain_table_access::print_explain_json(Explain_query *query,
                                              Json_writer *writer,
-                                              bool is_analyze)
+                                              bool is_analyze,
+                                              Filesort_tracker *fs_tracker)
 {
  Json_writer_nesting_guard guard(writer);
+  
+  if (fs_tracker)
+  {
+    /* filesort was invoked on this join tab before doing the join with the rest */
+    writer->add_member("read_sorted_file").start_object();
+    if (is_analyze)
+    {
+      writer->add_member("r_rows");
+      /*
+        r_rows when reading filesort result. This can be less than the number
+        of rows produced by filesort due to NL-join having LIMIT.
+      */
+      if (tracker.has_scans())
+        writer->add_double(tracker.get_avg_rows());
+      else
+        writer->add_null();
+
+      /* 
+        r_filtered when reading filesort result. We should have checked the
+        WHERE while doing filesort but lets check just in case.
+      */
+      if (tracker.has_scans() && tracker.get_filtered_after_where() < 1.0)
+      {
+        writer->add_member("r_filtered");
+        writer->add_double(tracker.get_filtered_after_where()*100.0);
+      }
+    }
+    writer->add_member("filesort").start_object();
+    fs_tracker->print_json(writer);
+  }

  if (bka_type.is_using_jbuf())
  {
@ -1322,13 +1415,21 @@ void Explain_table_access::print_explain_json(Explain_query *query,
  if (is_analyze)
  {
    writer->add_member("r_rows");
-    if (tracker.has_scans())
+    if (fs_tracker)
    {
-      double avg_rows= tracker.get_avg_rows();
-      writer->add_double(avg_rows);
+      /* Get r_rows value from filesort */
+      if (fs_tracker->get_r_loops())
+        writer->add_double(fs_tracker->get_avg_examined_rows());
+      else
+        writer->add_null();
    }
    else
-      writer->add_null();
+    {
+      if (tracker.has_scans())
+        writer->add_double(tracker.get_avg_rows());
+      else
+        writer->add_null();
+    }

    if (op_tracker.get_loops())
    {
@ -1345,10 +1446,22 @@ void Explain_table_access::print_explain_json(Explain_query *query,
  if (is_analyze)
  {
    writer->add_member("r_filtered");
-    if (tracker.has_scans())
-      writer->add_double(tracker.get_filtered_after_where()*100.0);
+    if (fs_tracker)
+    {
+      /* Get r_filtered value from filesort */
+      if (fs_tracker->get_r_loops())
+        writer->add_double(fs_tracker->get_r_filtered());
+      else
+        writer->add_null();
+    }
    else
-      writer->add_null();
+    {
+      /* Get r_filtered from the NL-join runtime */
+      if (tracker.has_scans())
+        writer->add_double(tracker.get_filtered_after_where()*100.0);
+      else
+        writer->add_null();
+    }
  }

  for (int i=0; i < (int)extra_tags.elements(); i++)
@ -1414,6 +1527,12 @@ void Explain_table_access::print_explain_json(Explain_query *query,
    writer->end_object();
  }

+  if (fs_tracker)
+  {
+    writer->end_object(); // filesort
+    writer->end_object(); // read_sorted_file
+  }
+
  writer->end_object();
 }

@ -1777,7 +1896,7 @@ int Explain_update::print_explain(Explain_query *query,
    extra_str.append(mrr_type);
  }
  
-  if (using_filesort)
+  if (is_using_filesort())
  {
    if (extra_str.length() !=0)
      extra_str.append(STRING_WITH_LEN("; "));
@ -1825,7 +1944,14 @@ void Explain_update::print_explain_json(Explain_query *query,

  writer->add_member("query_block").start_object();
  writer->add_member("select_id").add_ll(1);
-
+ 
+  /* This is the total time it took to do the UPDATE/DELETE */
+  if (is_analyze && command_tracker.get_loops())
+  {
+    writer->add_member("r_total_time_ms").
+            add_double(command_tracker.get_time_ms());
+  }
+  
  if (impossible_where || no_partitions)
  {
    const char *msg= impossible_where ?  STR_IMPOSSIBLE_WHERE : 
@ -1837,6 +1963,25 @@ void Explain_update::print_explain_json(Explain_query *query,
    return;
  }

+  DBUG_ASSERT(!(is_using_filesort() && using_io_buffer));
+  
+  bool doing_buffering= false;
+
+  if (is_using_filesort())
+  {
+    writer->add_member("filesort").start_object();
+    if (is_analyze)
+      filesort_tracker->print_json(writer);
+    doing_buffering= true;
+  }
+
+  if (using_io_buffer)
+  {
+    writer->add_member("buffer").start_object();
+    doing_buffering= true;
+  }
+
+  /* Produce elements that are common for buffered and un-buffered cases */
  writer->add_member("table").start_object();

  if (get_type() == EXPLAIN_UPDATE)
@ -1898,50 +2043,58 @@ void Explain_update::print_explain_json(Explain_query *query,
    writer->end_object();
  }
  
-#if 0
-  /* `ref` */
-  if (!ref_list.is_empty())
-  {
-    List_iterator_fast<char> it(ref_list);
-    const char *str;
-    writer->add_member("ref").start_array();
-    while ((str= it++))
-      writer->add_str(str);
-    writer->end_array();
-  }
-#endif
-
  /* `rows` */
  writer->add_member("rows").add_ll(rows);

-  /* `r_rows` */
-  if (is_analyze && tracker.has_scans())
-  {
-    double avg_rows= tracker.get_avg_rows();
-    writer->add_member("r_rows").add_double(avg_rows);
-  }
- 
-  /* UPDATE/DELETE do not produce `filtered` estimate */
-
-  /* `r_filtered` */
-  if (is_analyze)
-  {
-    double r_filtered= tracker.get_filtered_after_where() * 100.0;
-    writer->add_member("r_filtered").add_double(r_filtered);
-  }

  if (mrr_type.length() != 0)
    writer->add_member("mrr_type").add_str(mrr_type.ptr());
-  
-  if (using_filesort)
-    writer->add_member("using_filesort").add_ll(1);

-  if (using_io_buffer)
-    writer->add_member("using_io_buffer").add_ll(1);
+  if (is_analyze)
+  {
+    if (doing_buffering)
+    {
+      ha_rows r_rows;
+      double r_filtered;

-  if (is_analyze && command_tracker.get_loops())
-    writer->
-      add_member("r_total_time_ms").add_double(command_tracker.get_time_ms());
+      if (is_using_filesort())
+      {
+        if (filesort_tracker->get_r_loops())
+          r_rows= filesort_tracker->get_avg_examined_rows();
+        else
+          r_rows= 0;
+        r_filtered= filesort_tracker->get_r_filtered() * 100.0;
+      }
+      else
+      {
+        if (buf_tracker.has_scans())
+          r_rows= (ha_rows) buf_tracker.get_avg_rows();
+        else
+          r_rows= 0;
+        r_filtered= buf_tracker.get_filtered_after_where() * 100.0;
+      }
+      writer->add_member("r_rows").add_ll(r_rows);
+      writer->add_member("r_filtered").add_double(r_filtered);
+    }
+    else /* Not doing buffering */
+    {
+      writer->add_member("r_rows");
+      if (tracker.has_scans())
+        writer->add_double(tracker.get_avg_rows());
+      else
+        writer->add_null();
+
+      /* There is no 'filtered' estimate in UPDATE/DELETE atm */
+      double r_filtered= tracker.get_filtered_after_where() * 100.0;
+      writer->add_member("r_filtered").add_double(r_filtered);
+    }
+
+    if (table_tracker.get_loops())
+    {
+      writer->add_member("r_total_time_ms").
+              add_double(table_tracker.get_time_ms());
+    }
+  }

  if (where_cond)
  {
@ -1949,7 +2102,15 @@ void Explain_update::print_explain_json(Explain_query *query,
    write_item(writer, where_cond);
  }

+  /*** The part of plan that is before the buffering/sorting ends here ***/
+  if (is_using_filesort())
+    writer->end_object();
+
+  if (using_io_buffer)
+    writer->end_object();
+
  writer->end_object(); // table
+
  print_explain_json_for_children(query, writer, is_analyze);
  writer->end_object(); // query_block
 }
@ -2105,3 +2266,4 @@ void Explain_range_checked_fer::print_json(Json_writer *writer,
    writer->end_object();
  }
 }
+
--- a/sql/sql_explain.h
+++ b/sql/sql_explain.h
@ -59,51 +59,6 @@ public:

 class Json_writer;

-/*
-  A class for collecting read statistics.
-  
-  The idea is that we run several scans. Each scans gets rows, and then filters
-  some of them out.  We count scans, rows, and rows left after filtering.
-
-  (note: at the moment, the class is not actually tied to a physical table. 
-   It can be used to track reading from files, buffers, etc).
-*/
-
-class Table_access_tracker 
-{
-public:
-  Table_access_tracker() :
-    r_scans(0), r_rows(0), /*r_rows_after_table_cond(0),*/
-    r_rows_after_where(0)
-  {}
-
-  ha_rows r_scans; /* How many scans were ran on this join_tab */
-  ha_rows r_rows; /* How many rows we've got after that */
-  ha_rows r_rows_after_where; /* Rows after applying attached part of WHERE */
-
-  bool has_scans() { return (r_scans != 0); }
-  ha_rows get_loops() { return r_scans; }
-  double get_avg_rows()
-  {
-    return r_scans ? ((double)r_rows / r_scans): 0;
-  }
-
-  double get_filtered_after_where()
-  {
-    double r_filtered;
-    if (r_rows > 0)
-      r_filtered= (double)r_rows_after_where / r_rows;
-    else
-      r_filtered= 1.0;
-
-    return r_filtered;
-  }
-  
-  inline void on_scan_init() { r_scans++; }
-  inline void on_record_read() { r_rows++; }
-  inline void on_record_after_where() { r_rows_after_where++; }
-};
-
 /**************************************************************************************
 
  Data structures for producing EXPLAIN outputs.
@ -212,7 +167,8 @@ public:
                          bool is_analyze);

  void print_explain_json_interns(Explain_query *query, Json_writer *writer,
-                                  bool is_analyze);
+                                  bool is_analyze, 
+                                  Filesort_tracker *first_table_sort);

  /* A flat array of Explain structs for tables. */
  Explain_table_access** join_tabs;
@ -271,6 +227,8 @@ public:

  /* ANALYZE members */
  Time_and_counter_tracker time_tracker;
+
+  Sort_and_group_tracker  ops_tracker;
  
  int print_explain(Explain_query *query, select_result_sink *output, 
                    uint8 explain_flags, bool is_analyze);
@ -295,9 +253,9 @@ private:
 class Explain_union : public Explain_node
 {
 public:
-  Explain_union(MEM_ROOT *root) : 
-  Explain_node(root),
-  time_tracker(false)
+  Explain_union(MEM_ROOT *root, bool is_analyze) : 
+    Explain_node(root),
+    fake_select_lex_explain(root, is_analyze)
  {}

  enum explain_node_type get_type() { return EXPLAIN_UNION; }
@ -332,8 +290,13 @@ public:
  const char *fake_select_type;
  bool using_filesort;
  bool using_tmp;
-  /* TODO: the below is not printed yet:*/
-  Time_and_counter_tracker time_tracker; 
+  
+  /*
+    Explain data structure for "fake_select_lex" (i.e. for the degenerate
+    SELECT that reads UNION result).
+    It doesn't have a query plan, but we still need execution tracker, etc.
+  */
+  Explain_select fake_select_lex_explain;

  Table_access_tracker *get_fake_select_lex_tracker()
  {
@ -729,13 +692,13 @@ public:
  Table_access_tracker tracker;
  Exec_time_tracker op_tracker;
  Table_access_tracker jbuf_tracker;
-
+  
  int print_explain(select_result_sink *output, uint8 explain_flags, 
                    bool is_analyze,
                    uint select_id, const char *select_type,
                    bool using_temporary, bool using_filesort);
  void print_explain_json(Explain_query *query, Json_writer *writer,
-                          bool is_analyze);
+                          bool is_analyze, Filesort_tracker *fs_tracker);

 private:
  void append_tag_name(String *str, enum explain_extra_tag tag);
@ -759,6 +722,7 @@ public:

  Explain_update(MEM_ROOT *root, bool is_analyze) : 
    Explain_node(root),
+    filesort_tracker(NULL),
    command_tracker(is_analyze)
  {}

@ -793,15 +757,30 @@ public:

  ha_rows rows;

-  bool using_filesort;
  bool using_io_buffer;

+  /* Tracker for doing reads when filling the buffer */
+  Table_access_tracker buf_tracker;
+  
+  bool is_using_filesort() { return filesort_tracker? true: false; }
+  /*
+    Non-null value of filesort_tracker means "using filesort"
+
+    if we are using filesort, then table_tracker is for the io done inside
+    filesort.
+    
+    'tracker' is for tracking post-filesort reads.
+  */
+  Filesort_tracker *filesort_tracker;
+
  /* ANALYZE members and methods */
  Table_access_tracker tracker;

  /* This tracks execution of the whole command */
  Time_and_counter_tracker command_tracker;
-  //psergey-todo: io-tracker here.
+  
+  /* TODO: This tracks time to read rows from the table */
+  Exec_time_tracker table_tracker;

  virtual int print_explain(Explain_query *query, select_result_sink *output, 
                            uint8 explain_flags, bool is_analyze);
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@ -4331,7 +4331,9 @@ void LEX::restore_set_statement_var()
 int st_select_lex_unit::save_union_explain(Explain_query *output)
 {
  SELECT_LEX *first= first_select();
-  Explain_union *eu= new (output->mem_root) Explain_union(output->mem_root);
+  Explain_union *eu= 
+    new (output->mem_root) Explain_union(output->mem_root, 
+                                         thd->lex->analyze_stmt);

  if (derived)
    eu->connection_type= Explain_node::EXPLAIN_NODE_DERIVED;
@ -4374,12 +4376,7 @@ int st_select_lex_unit::save_union_explain_part2(Explain_query *output)
        eu->add_child(unit->first_select()->select_number);
      }
    }
-
-    /* 
-      Having a time tracker for reading UNION result is not very interesting
-      but is easier, as JOIN::exec now relies on having a tracker.
-    */
-    fake_select_lex->join->tracker= &eu->time_tracker;
+    fake_select_lex->join->explain= &eu->fake_select_lex_explain;
  }
  return 0;
 }
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@ -2335,8 +2335,7 @@ public:

  Explain_update* save_explain_update_data(MEM_ROOT *mem_root, THD *thd);
 protected:
-  void save_explain_data_intern(MEM_ROOT *mem_root, Explain_query *query,
-                                Explain_update *eu);
+  void save_explain_data_intern(MEM_ROOT *mem_root, Explain_update *eu, bool is_analyze);
 public:
  virtual ~Update_plan() {}

--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@ -1988,7 +1988,7 @@ int JOIN::init_execution()
 			   group_list && simple_group,
 			   select_options, tmp_rows_limit, "")))
      DBUG_RETURN(1);
-
+    explain->ops_tracker.report_tmp_table(exec_tmp_table1);
    /*
      We don't have to store rows in temp table that doesn't match HAVING if:
      - we are sorting the table and writing complete group rows to the
@ -2378,8 +2378,8 @@ void JOIN::save_explain_data(Explain_query *output, bool can_overwrite,
    */
    uint nr= select_lex->master_unit()->first_select()->select_number;
    Explain_union *eu= output->get_union(nr);
+    explain= &eu->fake_select_lex_explain;
    join_tab[0].tracker= eu->get_fake_select_lex_tracker();
-    tracker= &eu->time_tracker;
  }
 }

@ -2392,9 +2392,10 @@ void JOIN::exec()
                                               select_lex->select_number))
                        dbug_serve_apcs(thd, 1);
                 );
-  ANALYZE_START_TRACKING(tracker);
+  ANALYZE_START_TRACKING(&explain->time_tracker);
+  explain->ops_tracker.report_join_start();
  exec_inner();
-  ANALYZE_STOP_TRACKING(tracker);
+  ANALYZE_STOP_TRACKING(&explain->time_tracker);

  DBUG_EXECUTE_IF("show_explain_probe_join_exec_end", 
                  if (dbug_user_var_equals_int(thd, 
@ -2768,6 +2769,7 @@ void JOIN::exec_inner()
 						HA_POS_ERROR, "")))
 	  DBUG_VOID_RETURN;
 	curr_join->exec_tmp_table2= exec_tmp_table2;
+        explain->ops_tracker.report_tmp_table(exec_tmp_table2);
      }
      if (curr_join->group_list)
      {
@ -2869,6 +2871,7 @@ void JOIN::exec_inner()
      curr_join->select_distinct=0;
    }
    curr_tmp_table->reginfo.lock_type= TL_UNLOCK;
+    // psergey-todo: here is one place where we switch to
    if (curr_join->make_simple_join(this, curr_tmp_table))
      DBUG_VOID_RETURN;
    calc_group_buffer(curr_join, curr_join->group_list);
@ -3057,7 +3060,6 @@ void JOIN::exec_inner()
                          curr_join->table_count,
                          (int) curr_join->select_limit,
                          (int) unit->select_limit_cnt));
-
      if (create_sort_index(thd,
                            curr_join,
                            order_arg,
@ -20931,7 +20933,8 @@ create_sort_index(THD *thd, JOIN *join, ORDER *order,
    table->file->info(HA_STATUS_VARIABLE);	// Get record count
  filesort_retval= filesort(thd, table, join->sortorder, length,
                            select, filesort_limit, 0,
-                            &examined_rows, &found_rows);
+                            &examined_rows, &found_rows, 
+                            join->explain->ops_tracker.report_sorting());
  table->sort.found_records= filesort_retval;
  tab->records= found_rows;                     // For SQL_CALC_ROWS

@ -23442,10 +23445,10 @@ void JOIN_TAB::save_explain_data(Explain_table_access *eta, table_map prefix_tab
  
  tab->tracker= &eta->tracker;
  tab->jbuf_tracker= &eta->jbuf_tracker;
-  
+
  /* Enable the table access time tracker only for "ANALYZE stmt" */
  if (thd->lex->analyze_stmt)
-    tab->table->file->tracker= &eta->op_tracker;
+    tab->table->file->set_time_tracker(&eta->op_tracker);

  /* No need to save id and select_type here, they are kept in Explain_select */

@ -23849,7 +23852,6 @@ int JOIN::save_explain_data_intern(Explain_query *output, bool need_tmp_table,
                                   bool need_order, bool distinct, 
                                   const char *message)
 {
-  Explain_node *explain_node= 0;
  JOIN *join= this; /* Legacy: this code used to be a non-member function */
  int error= 0;
  DBUG_ENTER("JOIN::save_explain_data_intern");
@ -23864,33 +23866,32 @@ int JOIN::save_explain_data_intern(Explain_query *output, bool need_tmp_table,
  DBUG_ASSERT(!join->select_lex->master_unit()->derived ||
              join->select_lex->master_unit()->derived->is_materialized_derived());

+  explain= NULL;
+
  /* Don't log this into the slow query log */

  if (message)
  {
-    Explain_select *xpl_sel;
-    explain_node= xpl_sel= 
-      new (output->mem_root) Explain_select(output->mem_root, 
-                                            thd->lex->analyze_stmt);
+    explain= new (output->mem_root) Explain_select(output->mem_root, 
+                                                   thd->lex->analyze_stmt);
    join->select_lex->set_explain_type(true);

-    xpl_sel->select_id= join->select_lex->select_number;
-    xpl_sel->select_type= join->select_lex->type;
-    xpl_sel->message= message;
-    tracker= &xpl_sel->time_tracker;
+    explain->select_id= join->select_lex->select_number;
+    explain->select_type= join->select_lex->type;
+    /* Setting explain->message means that all other members are invalid */
+    explain->message= message;
+
    if (select_lex->master_unit()->derived)
-      xpl_sel->connection_type= Explain_node::EXPLAIN_NODE_DERIVED;
-    /* Setting xpl_sel->message means that all other members are invalid */
-    output->add_node(xpl_sel);
+      explain->connection_type= Explain_node::EXPLAIN_NODE_DERIVED;
+    output->add_node(explain);
  }
  else
  {
    Explain_select *xpl_sel;
-    explain_node= xpl_sel= 
+    explain= xpl_sel= 
      new (output->mem_root) Explain_select(output->mem_root, 
                                            thd->lex->analyze_stmt);
    table_map used_tables=0;
-    tracker= &xpl_sel->time_tracker;

    join->select_lex->set_explain_type(true);
    xpl_sel->select_id= join->select_lex->select_number;
@ -23986,13 +23987,12 @@ int JOIN::save_explain_data_intern(Explain_query *output, bool need_tmp_table,
    if (!(unit->item && unit->item->eliminated) &&                    // (1)
        (!unit->derived || unit->derived->is_materialized_derived())) // (2)
    {
-      explain_node->add_child(unit->first_select()->select_number);
+      explain->add_child(unit->first_select()->select_number);
    }
  }

  if (!error && select_lex->is_top_level_node())
    output->query_plan_ready();
-    

  DBUG_RETURN(error);
 }
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@ -1225,7 +1225,8 @@ public:
  /** Is set if we have a GROUP BY and we have ORDER BY on a constant. */
  bool          skip_sort_order;

-  bool need_tmp, hidden_group_fields;
+  bool need_tmp; 
+  bool hidden_group_fields;
  /* TRUE if there was full cleunap of the JOIN */
  bool cleaned;
  DYNAMIC_ARRAY keyuse;
@ -1282,7 +1283,7 @@ public:
  bool optimized; ///< flag to avoid double optimization in EXPLAIN
  bool initialized; ///< flag to avoid double init_execution calls

-  Time_and_counter_tracker *tracker;
+  Explain_select *explain;
  
  enum { QEP_NOT_PRESENT_YET, QEP_AVAILABLE, QEP_DELETED} have_query_plan;

@ -1377,7 +1378,7 @@ public:
    no_rows_in_result_called= 0;
    positions= best_positions= 0;

-    tracker= NULL;
+    explain= NULL;

    all_fields= fields_arg;
    if (&fields_list != &fields_arg)      /* Avoid valgrind-warning */
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@ -9340,6 +9340,7 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to,
      tables.db= from->s->db.str;

      THD_STAGE_INFO(thd, stage_sorting);
+      Filesort_tracker dummy_tracker;
      if (thd->lex->select_lex.setup_ref_array(thd, order_num) ||
          setup_order(thd, thd->lex->select_lex.ref_pointer_array,
                      &tables, fields, all_fields, order) ||
@ -9347,7 +9348,8 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to,
          (from->sort.found_records= filesort(thd, from, sortorder, length,
                                              NULL, HA_POS_ERROR,
                                              true,
-                                              &examined_rows, &found_rows)) ==
+                                              &examined_rows, &found_rows,
+                                              &dummy_tracker)) ==
          HA_POS_ERROR)
        goto err;
    }
--- a/sql/sql_union.cc
+++ b/sql/sql_union.cc
@ -969,7 +969,7 @@ bool st_select_lex_unit::exec()
            1st execution sets certain members (e.g. select_result) to perform
            subquery execution rather than EXPLAIN line production. In order 
            to reset them back, we re-do all of the actions (yes it is ugly):
-          */
+          */ // psergey-todo: is the above really necessary anymore?? 
 	  join->init(thd, item_list, fake_select_lex->options, result);
          saved_error= mysql_select(thd, &fake_select_lex->ref_pointer_array,
                                &result_table_list,
--- a/sql/sql_update.cc
+++ b/sql/sql_update.cc
@ -554,11 +554,15 @@ int mysql_update(THD *thd,
      table->sort.io_cache = (IO_CACHE *) my_malloc(sizeof(IO_CACHE),
 						    MYF(MY_FAE | MY_ZEROFILL |
                                                        MY_THREAD_SPECIFIC));
+      Filesort_tracker *fs_tracker= 
+        thd->lex->explain->get_upd_del_plan()->filesort_tracker;
+
      if (!(sortorder=make_unireg_sortorder(order, &length, NULL)) ||
          (table->sort.found_records= filesort(thd, table, sortorder, length,
                                               select, limit,
                                               true,
-                                               &examined_rows, &found_rows))
+                                               &examined_rows, &found_rows,
+                                               fs_tracker))
          == HA_POS_ERROR)
      {
 	goto err;
@ -578,7 +582,7 @@ int mysql_update(THD *thd,
 	we go trough the matching rows, save a pointer to them and
 	update these in a separate loop based on the pointer.
      */
-
+      explain->buf_tracker.on_scan_init();
      IO_CACHE tempfile;
      if (open_cached_file(&tempfile, mysql_tmpdir,TEMP_PREFIX,
 			   DISK_BUFFER_SIZE, MYF(MY_WME)))
@ -619,6 +623,7 @@ int mysql_update(THD *thd,

      while (!(error=info.read_record(&info)) && !thd->killed)
      {
+        explain->buf_tracker.on_record_read();
        if (table->vfield)
          update_virtual_fields(thd, table,
                                table->triggers ? VCOL_UPDATE_ALL :
@ -629,6 +634,7 @@ int mysql_update(THD *thd,
          if (table->file->was_semi_consistent_read())
 	    continue;  /* repeat the read of the same row if it still exists */

+          explain->buf_tracker.on_record_after_where();
 	  table->file->position(table->record[0]);
 	  if (my_b_write(&tempfile,table->file->ref,
 			 table->file->ref_length))