mirror of
				https://github.com/MariaDB/server.git
				synced 2025-10-31 10:56:12 +01:00 
			
		
		
		
	 eb26bf6e09
			
		
	
	
	eb26bf6e09
	
	
	
		
			
			it should now always be /path/to/exe Ver <tool version> Distrib <server version> for <OS> (<ARCH>) in all tools and clients
		
			
				
	
	
		
			482 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			482 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*
 | |
|    Copyright (c) 2001, 2013, Oracle and/or its affiliates.
 | |
|    Copyright (c) 2010, 2019, MariaDB
 | |
| 
 | |
|    This program is free software; you can redistribute it and/or modify
 | |
|    it under the terms of the GNU General Public License as published by
 | |
|    the Free Software Foundation; version 2 of the License.
 | |
| 
 | |
|    This program is distributed in the hope that it will be useful,
 | |
|    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
|    GNU General Public License for more details.
 | |
| 
 | |
|    You should have received a copy of the GNU General Public License
 | |
|    along with this program; if not, write to the Free Software
 | |
|    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
 | |
| */
 | |
| 
 | |
| /*
 | |
|    Character set conversion utility
 | |
| */
 | |
| 
 | |
| #define VER "1.0"
 | |
| #include "mariadb.h"
 | |
| #include "client_priv.h"
 | |
| #include "sql_string.h"
 | |
| #include "my_dir.h"
 | |
| #include <welcome_copyright_notice.h>
 | |
| 
 | |
| class CmdOpt
 | |
| {
 | |
| public:
 | |
|   const char *m_charset_from;
 | |
|   const char *m_charset_to;
 | |
|   const char *m_delimiter;
 | |
|   my_bool m_continue;
 | |
|   CmdOpt()
 | |
|    :m_charset_from("latin1"),
 | |
|     m_charset_to("latin1"),
 | |
|     m_delimiter(NULL),
 | |
|     m_continue(FALSE)
 | |
|   { }
 | |
|   static CHARSET_INFO *csinfo_by_name(const char *csname)
 | |
|   {
 | |
|     return get_charset_by_csname(csname, MY_CS_PRIMARY, MYF(MY_UTF8_IS_UTF8MB3));
 | |
|   }
 | |
|   CHARSET_INFO *csinfo_from() const
 | |
|   {
 | |
|     return m_charset_from ? csinfo_by_name(m_charset_from) : NULL;
 | |
|   }
 | |
|   CHARSET_INFO *csinfo_to() const
 | |
|   {
 | |
|     return m_charset_to ? csinfo_by_name(m_charset_to) : NULL;
 | |
|   }
 | |
| };
 | |
| 
 | |
| 
 | |
| static CmdOpt opt;
 | |
| 
 | |
| 
 | |
| static struct my_option long_options[] =
 | |
| {
 | |
|   {"from", 'f', "Specifies the encoding of the input.", &opt.m_charset_from,
 | |
|    &opt.m_charset_from, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
 | |
|   {"to", 't', "Specifies the encoding of the output.", &opt.m_charset_to,
 | |
|    &opt.m_charset_to, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
 | |
|   {"continue", 'c', "Silently ignore conversion errors.",
 | |
|    &opt.m_continue, &opt.m_continue, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
 | |
|   {"delimiter", 0, "Treat the specified characters as delimiters.",
 | |
|     &opt.m_delimiter, &opt.m_delimiter, 0, GET_STR, REQUIRED_ARG,
 | |
|     0, 0, 0, 0, 0, 0},
 | |
|   {"character-sets-dir", OPT_CHARSETS_DIR,
 | |
|    "Directory for character set files.", &charsets_dir,
 | |
|    &charsets_dir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
 | |
|   {0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
 | |
| };
 | |
| 
 | |
| 
 | |
| my_bool
 | |
| get_one_option(const struct my_option *opt,
 | |
|                const char *value, const char *filename)
 | |
| {
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| class File_buffer: public Binary_string
 | |
| {
 | |
| public:
 | |
|   bool load_binary_stream(FILE *file);
 | |
|   bool load_binary_file_by_name(const char *file);
 | |
| };
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Load data from a binary stream whose length is not known in advance,
 | |
|   e.g. from stdin.
 | |
| */
 | |
| bool File_buffer::load_binary_stream(FILE *file)
 | |
| {
 | |
|   for ( ; ; )
 | |
|   {
 | |
|     char buf[1024];
 | |
|     if (length() + sizeof(buf) > UINT_MAX32 || reserve(sizeof(buf)))
 | |
|     {
 | |
|       fprintf(stderr, "Input data is too large\n");
 | |
|       return true;
 | |
|     }
 | |
|     size_t nbytes= my_fread(file, (uchar *) end(), sizeof(buf), MYF(0));
 | |
|     if (!nbytes || nbytes == (size_t) -1)
 | |
|       return false;
 | |
|     str_length+= (uint32) nbytes;
 | |
|   }
 | |
|   return false;
 | |
| }
 | |
| 
 | |
| 
 | |
| /*
 | |
|   Load data from a file by name.
 | |
|   The file size is know.
 | |
| */
 | |
| bool File_buffer::load_binary_file_by_name(const char *filename)
 | |
| {
 | |
|   MY_STAT sbuf;
 | |
|   File fd;
 | |
| 
 | |
|   if (!my_stat(filename, &sbuf, MYF(0)))
 | |
|   {
 | |
|     fprintf(stderr, "my_stat failed for '%s'\n", filename);
 | |
|     return true;
 | |
|   }
 | |
|  
 | |
|   if (!MY_S_ISREG(sbuf.st_mode))
 | |
|   {
 | |
|     fprintf(stderr, "'%s' is not a regular file\n", filename);
 | |
|     return true;
 | |
|   }
 | |
| 
 | |
|   if ((size_t) sbuf.st_size > UINT_MAX32)
 | |
|   {
 | |
|     fprintf(stderr, "File '%s' is too large\n", filename);
 | |
|     return true;
 | |
|   }
 | |
| 
 | |
|   if (alloc((uint32) sbuf.st_size))
 | |
|   {
 | |
|     fprintf(stderr, "Failed to allocate read buffer\n");
 | |
|     return true;
 | |
|   }
 | |
| 
 | |
|   if ((fd= my_open(filename, O_RDONLY, MYF(0))) == -1)
 | |
|   {
 | |
|     fprintf(stderr, "Could not open '%s'\n", filename);
 | |
|     return true;
 | |
|   }
 | |
| 
 | |
|   size_t nbytes= my_read(fd, (uchar*) Ptr, (size_t)sbuf.st_size, MYF(0));
 | |
|   my_close(fd, MYF(0));
 | |
|   length((uint32) nbytes);
 | |
| 
 | |
|   return false;
 | |
| }
 | |
| 
 | |
| 
 | |
| class Delimiter
 | |
| {
 | |
| protected:
 | |
|   bool m_delimiter[127];
 | |
|   bool m_has_delimiter_cached;
 | |
|   bool has_delimiter_slow() const
 | |
|   {
 | |
|     for (size_t i= 0; i < sizeof(m_delimiter); i++)
 | |
|     {
 | |
|       if (m_delimiter[i])
 | |
|         return true;
 | |
|     }
 | |
|     return false;
 | |
|   }
 | |
|   bool unescape(char *to, char from) const
 | |
|   {
 | |
|     switch (from) {
 | |
|     case '\\': *to= '\\'; return false;
 | |
|     case 'r':  *to= '\r'; return false;
 | |
|     case 'n':  *to= '\n'; return false;
 | |
|     case 't':  *to= '\t'; return false;
 | |
|     case '0':  *to= '\0'; return false;
 | |
|     }
 | |
|     *to= '\0';
 | |
|     return true;
 | |
|   }
 | |
|   bool is_delimiter(char ch) const
 | |
|   {
 | |
|     return (signed char) ch < 0 ? false : m_delimiter[(uint32) ch];
 | |
|   }
 | |
| public:
 | |
|   Delimiter()
 | |
|    :m_has_delimiter_cached(false)
 | |
|   {
 | |
|     bzero(&m_delimiter, sizeof(m_delimiter));
 | |
|   }
 | |
|   bool has_delimiter() const
 | |
|   {
 | |
|     return m_has_delimiter_cached;
 | |
|   }
 | |
|   bool set_delimiter_unescape(const char *str)
 | |
|   {
 | |
|     m_has_delimiter_cached= false;
 | |
|     for ( ; *str; str++)
 | |
|     {
 | |
|       if ((signed char) *str < 0)
 | |
|         return true;
 | |
|       if (*str == '\\')
 | |
|       {
 | |
|         char unescaped;
 | |
|         str++;
 | |
|         if (!*str || unescape(&unescaped, *str))
 | |
|           return true;
 | |
|         m_delimiter[(uint) unescaped]= true;
 | |
|       }
 | |
|       else
 | |
|         m_delimiter[(uint) *str]= true;
 | |
|     }
 | |
|     m_has_delimiter_cached= has_delimiter_slow();
 | |
|     return false;
 | |
|   }
 | |
|   size_t get_delimiter_length(const char *str, const char *end) const
 | |
|   {
 | |
|     const char *str0= str;
 | |
|     for ( ; str < end; str++)
 | |
|     {
 | |
|       if (!is_delimiter(*str))
 | |
|         break;
 | |
|     }
 | |
|     return str - str0;
 | |
|   }
 | |
|   size_t get_data_length(const char *str, const char *end) const
 | |
|   {
 | |
|     const char *str0= str;
 | |
|     for ( ; str < end; str++)
 | |
|     {
 | |
|       if (is_delimiter(*str))
 | |
|         break;
 | |
|     }
 | |
|     return str - str0;
 | |
|   }
 | |
| };
 | |
| 
 | |
| 
 | |
| class Conv_inbuf
 | |
| {
 | |
|   const char *m_ptr;
 | |
|   const char *m_end;
 | |
| public:
 | |
|   Conv_inbuf(const char *from, size_t length)
 | |
|    :m_ptr(from), m_end(from + length)
 | |
|   { }
 | |
|   const char *ptr() const { return m_ptr; }
 | |
|   const char *end() const { return m_end; }
 | |
|   size_t length() const
 | |
|   {
 | |
|     return m_end - m_ptr;
 | |
|   }
 | |
| private:
 | |
|   LEX_CSTRING get_prefix(size_t len)
 | |
|   {
 | |
|     LEX_CSTRING res;
 | |
|     res.str= ptr();
 | |
|     res.length= len;
 | |
|     m_ptr+= len;
 | |
|     return res;
 | |
|   }
 | |
|   LEX_CSTRING get_empty_string() const
 | |
|   {
 | |
|     static LEX_CSTRING str= {NULL, 0};
 | |
|     return str;
 | |
|   }
 | |
| public:
 | |
|   LEX_CSTRING get_delimiter_chunk(const Delimiter &delimiter)
 | |
|   {
 | |
|     if (!delimiter.has_delimiter())
 | |
|       return get_empty_string();
 | |
|     size_t len= delimiter.get_delimiter_length(ptr(), end());
 | |
|     return get_prefix(len);
 | |
|   }
 | |
|   LEX_CSTRING get_data_chunk(const Delimiter &delimiter)
 | |
|   {
 | |
|     if (!delimiter.has_delimiter())
 | |
|       return get_prefix(length());
 | |
|     size_t len= delimiter.get_data_length(ptr(), end());
 | |
|     return get_prefix(len);
 | |
|   }
 | |
| };
 | |
| 
 | |
| 
 | |
| class Conv_outbuf: public Binary_string
 | |
| {
 | |
| public:
 | |
|   bool alloc(size_t out_max_length)
 | |
|   {
 | |
|     if (out_max_length >= UINT_MAX32)
 | |
|     {
 | |
|       fprintf(stderr, "The data needs a too large output buffer\n");
 | |
|       return true;
 | |
|     }
 | |
|     if (Binary_string::alloc((uint32) out_max_length))
 | |
|     {
 | |
|       fprintf(stderr, "Failed to allocate the output buffer\n");
 | |
|       return true;
 | |
|     }
 | |
|     return false;
 | |
|   }
 | |
| };
 | |
| 
 | |
| 
 | |
| class Conv: public String_copier, public Delimiter
 | |
| {
 | |
|   CHARSET_INFO *m_tocs;
 | |
|   CHARSET_INFO *m_fromcs;
 | |
|   bool m_continue;
 | |
| public:
 | |
|   Conv(CHARSET_INFO *tocs, CHARSET_INFO *fromcs, bool opt_continue)
 | |
|    :m_tocs(tocs), m_fromcs(fromcs), m_continue(opt_continue)
 | |
|   { }
 | |
|   size_t out_buffer_max_length(size_t from_length) const
 | |
|   {
 | |
|     return from_length / m_fromcs->mbminlen * m_tocs->mbmaxlen;
 | |
|   }
 | |
|   bool convert_data(const char *from, size_t length);
 | |
|   bool convert_binary_stream(FILE *file)
 | |
|   {
 | |
|     File_buffer buf;
 | |
|     return buf.load_binary_stream(file) ||
 | |
|            convert_data(buf.ptr(), buf.length());
 | |
|   }
 | |
|   bool convert_binary_file_by_name(const char *filename)
 | |
|   {
 | |
|     File_buffer buf;
 | |
|     return buf.load_binary_file_by_name(filename)||
 | |
|            convert_data(buf.ptr(), buf.length());
 | |
|   }
 | |
| private:
 | |
|   void report_error(const char *from) const
 | |
|   {
 | |
|     if (well_formed_error_pos())
 | |
|     {
 | |
|       fflush(stdout);
 | |
|       fprintf(stderr,
 | |
|               "Illegal %s byte sequence at position %d\n",
 | |
|               m_fromcs->cs_name.str,
 | |
|               (uint) (well_formed_error_pos() - from));
 | |
|     }
 | |
|     else if (cannot_convert_error_pos())
 | |
|     {
 | |
|       fflush(stdout);
 | |
|       fprintf(stderr,
 | |
|               "Conversion from %s to %s failed at position %d\n",
 | |
|               m_fromcs->cs_name.str, m_tocs->cs_name.str,
 | |
|               (uint) (cannot_convert_error_pos() - from));
 | |
|     }
 | |
|   }
 | |
|   size_t write(const char *str, size_t length) const
 | |
|   {
 | |
|     return my_fwrite(stdout, (uchar *) str, length, MY_WME);
 | |
|   }
 | |
| };
 | |
| 
 | |
| 
 | |
| bool Conv::convert_data(const char *from, size_t from_length)
 | |
| {
 | |
|   Conv_inbuf inbuf(from, from_length);
 | |
|   Conv_outbuf outbuf;
 | |
| 
 | |
|   if (outbuf.alloc(out_buffer_max_length(from_length)))
 | |
|     return true;
 | |
| 
 | |
|   for ( ; ; )
 | |
|   {
 | |
|     LEX_CSTRING delim, data;
 | |
|     
 | |
|     delim= inbuf.get_delimiter_chunk(*this);
 | |
|     if (delim.length)
 | |
|       write(delim.str, delim.length);
 | |
| 
 | |
|     data= inbuf.get_data_chunk(*this);
 | |
|     if (!data.length)
 | |
|       break;
 | |
|     size_t length= well_formed_copy(m_tocs,
 | |
|                                     (char *) outbuf.ptr(),
 | |
|                                     outbuf.alloced_length(),
 | |
|                                     m_fromcs, data.str, data.length);
 | |
|     outbuf.length((uint32) length);
 | |
| 
 | |
|     if (most_important_error_pos() && !m_continue)
 | |
|     {
 | |
|       report_error(from);
 | |
|       return true;
 | |
|     }
 | |
|     write(outbuf.ptr(), outbuf.length());
 | |
|   }
 | |
|   return false;
 | |
| }
 | |
| 
 | |
| 
 | |
| class Session
 | |
| {
 | |
| public:
 | |
|   Session(const char *prog)
 | |
|   {
 | |
|     MY_INIT(prog);
 | |
|   }
 | |
|   ~Session()
 | |
|   {
 | |
|     my_end(0);
 | |
|   }
 | |
|   void usage(void)
 | |
|   {
 | |
|     print_version();
 | |
|     puts("Character set conversion utility for MariaDB");
 | |
|     puts("Usage:");
 | |
|     printf("%s [OPTION...] [FILE...]\n", my_progname);
 | |
|     my_print_help(long_options);
 | |
|   }
 | |
| };
 | |
| 
 | |
| 
 | |
| int main(int argc, char *argv[])
 | |
| {
 | |
|   Session session(argv[0]);
 | |
|   CHARSET_INFO *charset_info_from= NULL;
 | |
|   CHARSET_INFO *charset_info_to= NULL;
 | |
| 
 | |
|   if (handle_options(&argc, &argv, long_options, get_one_option))
 | |
|   {
 | |
|     session.usage();
 | |
|     return 1;
 | |
|   }
 | |
| 
 | |
|   if (!(charset_info_from= opt.csinfo_from()))
 | |
|   {
 | |
|     fprintf(stderr, "Character set %s is not supported\n", opt.m_charset_from);
 | |
|     return 1;
 | |
|   }
 | |
| 
 | |
|   if (!(charset_info_to= opt.csinfo_to()))
 | |
|   {
 | |
|     fprintf(stderr, "Character set %s is not supported\n", opt.m_charset_to);
 | |
|     return 1;
 | |
|   }
 | |
| 
 | |
|   Conv conv(charset_info_to, charset_info_from, opt.m_continue);
 | |
|   if (opt.m_delimiter)
 | |
|   {
 | |
|     if (charset_info_from->mbminlen > 1 ||
 | |
|         charset_info_to->mbminlen > 1)
 | |
|     {
 | |
|       fprintf(stderr, "--delimiter cannot be used with %s to %s conversion\n",
 | |
|               charset_info_from->cs_name.str, charset_info_to->cs_name.str);
 | |
|       return 1;
 | |
|     }
 | |
|     if (conv.set_delimiter_unescape(opt.m_delimiter))
 | |
|     {
 | |
|       fprintf(stderr, "Bad --delimiter value\n");
 | |
|       return 1;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   if (argc == 0)
 | |
|   {
 | |
|     if (conv.convert_binary_stream(stdin))
 | |
|       return 1;
 | |
|   }
 | |
|   else
 | |
|   {
 | |
|     for (int i= 0; i < argc; i++)
 | |
|     {
 | |
|       if (conv.convert_binary_file_by_name(argv[i]))
 | |
|        return 1;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return 0;
 | |
| } /* main */
 |