mirror of
				https://github.com/MariaDB/server.git
				synced 2025-10-31 19:06:14 +01:00 
			
		
		
		
	 6075f12c65
			
		
	
	
	6075f12c65
	
	
	
		
			
			This is a non-functional change. It changes the way how case folding data
and weight data (for simple Unicode collations) are stored:
- Removing data types MY_UNICASE_CHARACTER, MY_UNICASE_INFO
- Using data types MY_CASEFOLD_CHARACTER, MY_CASEFOLD_INFO instead.
This patch changes simple Unicode collations in a similar way
how MDEV-30695 previously changed Asian collations.
No new MTR tests are needed. The underlying code is thoroughly
covered by a number of ctype_*_ws.test and ctype_*_casefold.test
files, which were added recently as a preparation
for this change.
Old and new Unicode data layout
-------------------------------
Case folding data is now stored in separate tables
consisting of MY_CASEFOLD_CHARACTER elements with two members:
    typedef struct casefold_info_char_t
    {
      uint32 toupper;
      uint32 tolower;
    } MY_CASEFOLD_CHARACTER;
while weight data (for simple non-UCA collations xxx_general_ci
and xxx_general_mysql500_ci) is stored in separate arrays of
uint16 elements.
Before this change case folding data and simple weight data were
stored together, in tables of the following elements with three members:
    typedef struct unicase_info_char_st
    {
      uint32 toupper;
      uint32 tolower;
      uint32 sort;          /* weights for simple collations */
    } MY_UNICASE_CHARACTER;
This data format was redundant, because weights (the "sort" member) were
needed only for these two simple Unicode collations:
- xxx_general_ci
- xxx_general_mysql500_ci
Adding case folding information for Unicode-14.0.0 using the old
format would waste memory without purpose.
Detailed changes
----------------
- Changing the underlying data types as described above
- Including unidata-dump.c into the sources.
  This program was earlier used to dump UnicodeData.txt
  (e.g. https://www.unicode.org/Public/14.0.0/ucd/UnicodeData.txt)
  into MySQL / MariaDB source files.
  It was originally written in 2002, but has not been distributed yet
  together with MySQL / MariaDB sources.
- Removing the old format Unicode data earlier dumped from UnicodeData.txt
  (versions 3.0.0 and 5.2.0) from ctype-utf8.c.
  Adding Unicode data in the new format into separate header files,
  to maintain the code easier:
    - ctype-unicode300-casefold.h
    - ctype-unicode300-casefold-tr.h
    - ctype-unicode300-general_ci.h
    - ctype-unicode300-general_mysql500_ci.h
    - ctype-unicode520-casefold.h
- Adding a new file ctype-unidata.c as an aggregator for
  the header files listed above.
		
	
			
		
			
				
	
	
		
			60 lines
		
	
	
	
		
			2.5 KiB
		
	
	
	
		
			CMake
		
	
	
	
	
	
			
		
		
	
	
			60 lines
		
	
	
	
		
			2.5 KiB
		
	
	
	
		
			CMake
		
	
	
	
	
	
| # Copyright (c) 2006, 2014, Oracle and/or its affiliates
 | |
| # 
 | |
| # This program is free software; you can redistribute it and/or modify
 | |
| # it under the terms of the GNU General Public License as published by
 | |
| # the Free Software Foundation; version 2 of the License.
 | |
| # 
 | |
| # This program is distributed in the hope that it will be useful,
 | |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
| # GNU General Public License for more details.
 | |
| # 
 | |
| # You should have received a copy of the GNU General Public License
 | |
| # along with this program; if not, write to the Free Software
 | |
| # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1335 USA
 | |
| 
 | |
| INCLUDE_DIRECTORIES(
 | |
| ${CMAKE_SOURCE_DIR}/include
 | |
| ${CMAKE_BINARY_DIR}/strings
 | |
| )
 | |
| 
 | |
| SET(STRINGS_SOURCES bchange.c bmove_upp.c ctype-big5.c ctype-bin.c ctype-cp932.c
 | |
|                 ctype-czech.c ctype-euc_kr.c ctype-eucjpms.c ctype-extra.c ctype-gb2312.c ctype-gbk.c
 | |
|                 ctype-latin1.c ctype-mb.c ctype-simple.c ctype-sjis.c ctype-tis620.c ctype-uca.c
 | |
|                 ctype-ucs2.c ctype-ujis.c ctype-utf8.c ctype-win1250ch.c ctype.c decimal.c dtoa.c int2str.c
 | |
|                 ctype-unidata.c
 | |
|                 is_prefix.c llstr.c longlong2str.c my_strtoll10.c my_vsnprintf.c
 | |
|                 str2int.c strcend.c strend.c strfill.c strmake.c strmov.c strnmov.c
 | |
|                 strxmov.c strxnmov.c xml.c
 | |
|                 strmov_overlapp.c
 | |
| 		my_strchr.c strcont.c strappend.c json_lib.c json_normalize.c)
 | |
| 
 | |
| IF(NOT HAVE_STRNLEN)
 | |
|   # OSX below 10.7 did not have strnlen
 | |
|   SET(STRINGS_SOURCES ${STRINGS_SOURCES} strnlen.c)
 | |
| ENDIF()
 | |
| # Avoid dependencies on perschema data defined in mysys
 | |
| ADD_DEFINITIONS(-DDISABLE_MYSQL_THREAD_H)
 | |
| ADD_CONVENIENCE_LIBRARY(strings ${STRINGS_SOURCES})
 | |
| TARGET_LINK_LIBRARIES(strings dbug mysys)
 | |
| MAYBE_DISABLE_IPO(strings)
 | |
| ADD_EXECUTABLE(conf_to_src EXCLUDE_FROM_ALL conf_to_src.c)
 | |
| SET_TARGET_PROPERTIES(conf_to_src PROPERTIES EXCLUDE_FROM_DEFAULT_BUILD TRUE)
 | |
| TARGET_LINK_LIBRARIES(conf_to_src mysys strings)
 | |
| 
 | |
| IF(NOT CMAKE_CROSSCOMPILING OR DEFINED CMAKE_CROSSCOMPILING_EMULATOR)
 | |
|   ADD_EXECUTABLE(uca-dump uca-dump.c)
 | |
| ENDIF()
 | |
| 
 | |
| ADD_CUSTOM_COMMAND(
 | |
|   OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ctype-uca1400data.h
 | |
|   COMMAND uca-dump --name-prefix=uca1400 --levels=3 ${PROJECT_SOURCE_DIR}/mysql-test/std_data/unicode/allkeys1400.txt > ctype-uca1400data.h
 | |
|   DEPENDS uca-dump
 | |
| )
 | |
| 
 | |
| ADD_CUSTOM_TARGET(
 | |
|         GenUnicodeDataSource
 | |
|         DEPENDS
 | |
|         ${CMAKE_CURRENT_BINARY_DIR}/ctype-uca1400data.h
 | |
| )
 | |
| ADD_DEPENDENCIES(strings GenUnicodeDataSource)
 |