From 3b562dcf6e5423d41d41ef416c18187c3a946d9e Mon Sep 17 00:00:00 2001 From: Shishir Jaiswal Date: Tue, 16 May 2017 13:48:52 +0530 Subject: [PATCH] Bug#16212207 - LOAD XML INFILE PERFORMANCE WITH INDENTED XML DESCRIPTION =========== LOAD XML INFILE performance becomes painfully slow if the tags' value has any space(s) in between them. They're usually kept intentionally for indentation purpose. ANALYSIS ======== The extra spaces are calling clear_level() many a times which is having overhead of clearing taglist etc. This can be avoided altogether by skipping all such spaces. FIX === Trim all the starting whitespaces from the value before passing it to read_value() --- sql/sql_load.cc | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sql/sql_load.cc b/sql/sql_load.cc index c28c7cdb2db..a2c01c3b8a8 100644 --- a/sql/sql_load.cc +++ b/sql/sql_load.cc @@ -1,5 +1,5 @@ /* - Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1989,6 +1989,13 @@ int READ_INFO::read_xml() case '>': /* end tag - read tag value */ in_tag= false; + /* Skip all whitespaces */ + while (' ' == (chr= my_tospace(GET))); + /* + Push the first non-whitespace char back to Stack. This char would be + read in the upcoming call to read_value() + */ + PUSH(chr); chr= read_value('<', &value); if(chr == my_b_EOF) goto found_eof;