mirror of
https://github.com/MariaDB/server.git
synced 2025-01-15 19:42:28 +01:00
6075f12c65
This is a non-functional change. It changes the way how case folding data and weight data (for simple Unicode collations) are stored: - Removing data types MY_UNICASE_CHARACTER, MY_UNICASE_INFO - Using data types MY_CASEFOLD_CHARACTER, MY_CASEFOLD_INFO instead. This patch changes simple Unicode collations in a similar way how MDEV-30695 previously changed Asian collations. No new MTR tests are needed. The underlying code is thoroughly covered by a number of ctype_*_ws.test and ctype_*_casefold.test files, which were added recently as a preparation for this change. Old and new Unicode data layout ------------------------------- Case folding data is now stored in separate tables consisting of MY_CASEFOLD_CHARACTER elements with two members: typedef struct casefold_info_char_t { uint32 toupper; uint32 tolower; } MY_CASEFOLD_CHARACTER; while weight data (for simple non-UCA collations xxx_general_ci and xxx_general_mysql500_ci) is stored in separate arrays of uint16 elements. Before this change case folding data and simple weight data were stored together, in tables of the following elements with three members: typedef struct unicase_info_char_st { uint32 toupper; uint32 tolower; uint32 sort; /* weights for simple collations */ } MY_UNICASE_CHARACTER; This data format was redundant, because weights (the "sort" member) were needed only for these two simple Unicode collations: - xxx_general_ci - xxx_general_mysql500_ci Adding case folding information for Unicode-14.0.0 using the old format would waste memory without purpose. Detailed changes ---------------- - Changing the underlying data types as described above - Including unidata-dump.c into the sources. This program was earlier used to dump UnicodeData.txt (e.g. https://www.unicode.org/Public/14.0.0/ucd/UnicodeData.txt) into MySQL / MariaDB source files. It was originally written in 2002, but has not been distributed yet together with MySQL / MariaDB sources. - Removing the old format Unicode data earlier dumped from UnicodeData.txt (versions 3.0.0 and 5.2.0) from ctype-utf8.c. Adding Unicode data in the new format into separate header files, to maintain the code easier: - ctype-unicode300-casefold.h - ctype-unicode300-casefold-tr.h - ctype-unicode300-general_ci.h - ctype-unicode300-general_mysql500_ci.h - ctype-unicode520-casefold.h - Adding a new file ctype-unidata.c as an aggregator for the header files listed above.
193 lines
8.6 KiB
C
193 lines
8.6 KiB
C
/* Copyright (c) 2000, 2013, Oracle and/or its affiliates.
|
|
Copyright (c) 2009, 2023, MariaDB Corporation.
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA
|
|
*/
|
|
|
|
/*
|
|
Generated by:
|
|
./unidata-dump \
|
|
--mode=casefold-tr \
|
|
--page-name=u300tr_casefold_page \
|
|
--page-name-derived=u300_casefold_page \
|
|
--index-name=my_u300tr_casefold_index \
|
|
--max-char=0xFFFF \
|
|
UnicodeData-3.0.0.txt
|
|
|
|
*/
|
|
const MY_CASEFOLD_CHARACTER u300tr_casefold_page00[256]={
|
|
{0x0000,0x0000},{0x0001,0x0001}, /* 0000 */
|
|
{0x0002,0x0002},{0x0003,0x0003}, /* 0002 */
|
|
{0x0004,0x0004},{0x0005,0x0005}, /* 0004 */
|
|
{0x0006,0x0006},{0x0007,0x0007}, /* 0006 */
|
|
{0x0008,0x0008},{0x0009,0x0009}, /* 0008 */
|
|
{0x000A,0x000A},{0x000B,0x000B}, /* 000A */
|
|
{0x000C,0x000C},{0x000D,0x000D}, /* 000C */
|
|
{0x000E,0x000E},{0x000F,0x000F}, /* 000E */
|
|
{0x0010,0x0010},{0x0011,0x0011}, /* 0010 */
|
|
{0x0012,0x0012},{0x0013,0x0013}, /* 0012 */
|
|
{0x0014,0x0014},{0x0015,0x0015}, /* 0014 */
|
|
{0x0016,0x0016},{0x0017,0x0017}, /* 0016 */
|
|
{0x0018,0x0018},{0x0019,0x0019}, /* 0018 */
|
|
{0x001A,0x001A},{0x001B,0x001B}, /* 001A */
|
|
{0x001C,0x001C},{0x001D,0x001D}, /* 001C */
|
|
{0x001E,0x001E},{0x001F,0x001F}, /* 001E */
|
|
{0x0020,0x0020},{0x0021,0x0021}, /* 0020 */
|
|
{0x0022,0x0022},{0x0023,0x0023}, /* 0022 */
|
|
{0x0024,0x0024},{0x0025,0x0025}, /* 0024 */
|
|
{0x0026,0x0026},{0x0027,0x0027}, /* 0026 */
|
|
{0x0028,0x0028},{0x0029,0x0029}, /* 0028 */
|
|
{0x002A,0x002A},{0x002B,0x002B}, /* 002A */
|
|
{0x002C,0x002C},{0x002D,0x002D}, /* 002C */
|
|
{0x002E,0x002E},{0x002F,0x002F}, /* 002E */
|
|
{0x0030,0x0030},{0x0031,0x0031}, /* 0030 */
|
|
{0x0032,0x0032},{0x0033,0x0033}, /* 0032 */
|
|
{0x0034,0x0034},{0x0035,0x0035}, /* 0034 */
|
|
{0x0036,0x0036},{0x0037,0x0037}, /* 0036 */
|
|
{0x0038,0x0038},{0x0039,0x0039}, /* 0038 */
|
|
{0x003A,0x003A},{0x003B,0x003B}, /* 003A */
|
|
{0x003C,0x003C},{0x003D,0x003D}, /* 003C */
|
|
{0x003E,0x003E},{0x003F,0x003F}, /* 003E */
|
|
{0x0040,0x0040},{0x0041,0x0061}, /* 0040 */
|
|
{0x0042,0x0062},{0x0043,0x0063}, /* 0042 */
|
|
{0x0044,0x0064},{0x0045,0x0065}, /* 0044 */
|
|
{0x0046,0x0066},{0x0047,0x0067}, /* 0046 */
|
|
{0x0048,0x0068},{0x0049,0x0131}, /* 0048 */
|
|
{0x004A,0x006A},{0x004B,0x006B}, /* 004A */
|
|
{0x004C,0x006C},{0x004D,0x006D}, /* 004C */
|
|
{0x004E,0x006E},{0x004F,0x006F}, /* 004E */
|
|
{0x0050,0x0070},{0x0051,0x0071}, /* 0050 */
|
|
{0x0052,0x0072},{0x0053,0x0073}, /* 0052 */
|
|
{0x0054,0x0074},{0x0055,0x0075}, /* 0054 */
|
|
{0x0056,0x0076},{0x0057,0x0077}, /* 0056 */
|
|
{0x0058,0x0078},{0x0059,0x0079}, /* 0058 */
|
|
{0x005A,0x007A},{0x005B,0x005B}, /* 005A */
|
|
{0x005C,0x005C},{0x005D,0x005D}, /* 005C */
|
|
{0x005E,0x005E},{0x005F,0x005F}, /* 005E */
|
|
{0x0060,0x0060},{0x0041,0x0061}, /* 0060 */
|
|
{0x0042,0x0062},{0x0043,0x0063}, /* 0062 */
|
|
{0x0044,0x0064},{0x0045,0x0065}, /* 0064 */
|
|
{0x0046,0x0066},{0x0047,0x0067}, /* 0066 */
|
|
{0x0048,0x0068},{0x0130,0x0069}, /* 0068 */
|
|
{0x004A,0x006A},{0x004B,0x006B}, /* 006A */
|
|
{0x004C,0x006C},{0x004D,0x006D}, /* 006C */
|
|
{0x004E,0x006E},{0x004F,0x006F}, /* 006E */
|
|
{0x0050,0x0070},{0x0051,0x0071}, /* 0070 */
|
|
{0x0052,0x0072},{0x0053,0x0073}, /* 0072 */
|
|
{0x0054,0x0074},{0x0055,0x0075}, /* 0074 */
|
|
{0x0056,0x0076},{0x0057,0x0077}, /* 0076 */
|
|
{0x0058,0x0078},{0x0059,0x0079}, /* 0078 */
|
|
{0x005A,0x007A},{0x007B,0x007B}, /* 007A */
|
|
{0x007C,0x007C},{0x007D,0x007D}, /* 007C */
|
|
{0x007E,0x007E},{0x007F,0x007F}, /* 007E */
|
|
{0x0080,0x0080},{0x0081,0x0081}, /* 0080 */
|
|
{0x0082,0x0082},{0x0083,0x0083}, /* 0082 */
|
|
{0x0084,0x0084},{0x0085,0x0085}, /* 0084 */
|
|
{0x0086,0x0086},{0x0087,0x0087}, /* 0086 */
|
|
{0x0088,0x0088},{0x0089,0x0089}, /* 0088 */
|
|
{0x008A,0x008A},{0x008B,0x008B}, /* 008A */
|
|
{0x008C,0x008C},{0x008D,0x008D}, /* 008C */
|
|
{0x008E,0x008E},{0x008F,0x008F}, /* 008E */
|
|
{0x0090,0x0090},{0x0091,0x0091}, /* 0090 */
|
|
{0x0092,0x0092},{0x0093,0x0093}, /* 0092 */
|
|
{0x0094,0x0094},{0x0095,0x0095}, /* 0094 */
|
|
{0x0096,0x0096},{0x0097,0x0097}, /* 0096 */
|
|
{0x0098,0x0098},{0x0099,0x0099}, /* 0098 */
|
|
{0x009A,0x009A},{0x009B,0x009B}, /* 009A */
|
|
{0x009C,0x009C},{0x009D,0x009D}, /* 009C */
|
|
{0x009E,0x009E},{0x009F,0x009F}, /* 009E */
|
|
{0x00A0,0x00A0},{0x00A1,0x00A1}, /* 00A0 */
|
|
{0x00A2,0x00A2},{0x00A3,0x00A3}, /* 00A2 */
|
|
{0x00A4,0x00A4},{0x00A5,0x00A5}, /* 00A4 */
|
|
{0x00A6,0x00A6},{0x00A7,0x00A7}, /* 00A6 */
|
|
{0x00A8,0x00A8},{0x00A9,0x00A9}, /* 00A8 */
|
|
{0x00AA,0x00AA},{0x00AB,0x00AB}, /* 00AA */
|
|
{0x00AC,0x00AC},{0x00AD,0x00AD}, /* 00AC */
|
|
{0x00AE,0x00AE},{0x00AF,0x00AF}, /* 00AE */
|
|
{0x00B0,0x00B0},{0x00B1,0x00B1}, /* 00B0 */
|
|
{0x00B2,0x00B2},{0x00B3,0x00B3}, /* 00B2 */
|
|
{0x00B4,0x00B4},{0x039C,0x00B5}, /* 00B4 */
|
|
{0x00B6,0x00B6},{0x00B7,0x00B7}, /* 00B6 */
|
|
{0x00B8,0x00B8},{0x00B9,0x00B9}, /* 00B8 */
|
|
{0x00BA,0x00BA},{0x00BB,0x00BB}, /* 00BA */
|
|
{0x00BC,0x00BC},{0x00BD,0x00BD}, /* 00BC */
|
|
{0x00BE,0x00BE},{0x00BF,0x00BF}, /* 00BE */
|
|
{0x00C0,0x00E0},{0x00C1,0x00E1}, /* 00C0 */
|
|
{0x00C2,0x00E2},{0x00C3,0x00E3}, /* 00C2 */
|
|
{0x00C4,0x00E4},{0x00C5,0x00E5}, /* 00C4 */
|
|
{0x00C6,0x00E6},{0x00C7,0x00E7}, /* 00C6 */
|
|
{0x00C8,0x00E8},{0x00C9,0x00E9}, /* 00C8 */
|
|
{0x00CA,0x00EA},{0x00CB,0x00EB}, /* 00CA */
|
|
{0x00CC,0x00EC},{0x00CD,0x00ED}, /* 00CC */
|
|
{0x00CE,0x00EE},{0x00CF,0x00EF}, /* 00CE */
|
|
{0x00D0,0x00F0},{0x00D1,0x00F1}, /* 00D0 */
|
|
{0x00D2,0x00F2},{0x00D3,0x00F3}, /* 00D2 */
|
|
{0x00D4,0x00F4},{0x00D5,0x00F5}, /* 00D4 */
|
|
{0x00D6,0x00F6},{0x00D7,0x00D7}, /* 00D6 */
|
|
{0x00D8,0x00F8},{0x00D9,0x00F9}, /* 00D8 */
|
|
{0x00DA,0x00FA},{0x00DB,0x00FB}, /* 00DA */
|
|
{0x00DC,0x00FC},{0x00DD,0x00FD}, /* 00DC */
|
|
{0x00DE,0x00FE},{0x00DF,0x00DF}, /* 00DE */
|
|
{0x00C0,0x00E0},{0x00C1,0x00E1}, /* 00E0 */
|
|
{0x00C2,0x00E2},{0x00C3,0x00E3}, /* 00E2 */
|
|
{0x00C4,0x00E4},{0x00C5,0x00E5}, /* 00E4 */
|
|
{0x00C6,0x00E6},{0x00C7,0x00E7}, /* 00E6 */
|
|
{0x00C8,0x00E8},{0x00C9,0x00E9}, /* 00E8 */
|
|
{0x00CA,0x00EA},{0x00CB,0x00EB}, /* 00EA */
|
|
{0x00CC,0x00EC},{0x00CD,0x00ED}, /* 00EC */
|
|
{0x00CE,0x00EE},{0x00CF,0x00EF}, /* 00EE */
|
|
{0x00D0,0x00F0},{0x00D1,0x00F1}, /* 00F0 */
|
|
{0x00D2,0x00F2},{0x00D3,0x00F3}, /* 00F2 */
|
|
{0x00D4,0x00F4},{0x00D5,0x00F5}, /* 00F4 */
|
|
{0x00D6,0x00F6},{0x00F7,0x00F7}, /* 00F6 */
|
|
{0x00D8,0x00F8},{0x00D9,0x00F9}, /* 00F8 */
|
|
{0x00DA,0x00FA},{0x00DB,0x00FB}, /* 00FA */
|
|
{0x00DC,0x00FC},{0x00DD,0x00FD}, /* 00FC */
|
|
{0x00DE,0x00FE},{0x0178,0x00FF} /* 00FE */
|
|
};
|
|
|
|
const MY_CASEFOLD_CHARACTER * my_u300tr_casefold_index[256]={
|
|
u300tr_casefold_page00, u300_casefold_page01, u300_casefold_page02, u300_casefold_page03, u300_casefold_page04, u300_casefold_page05, u300_casefold_page06, u300_casefold_page07,
|
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
NULL, NULL, NULL, NULL, NULL, NULL, u300_casefold_page1E, u300_casefold_page1F,
|
|
NULL, u300_casefold_page21, NULL, NULL, u300_casefold_page24, NULL, NULL, NULL,
|
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, u300_casefold_pageFF
|
|
};
|