2003-01-29 14:31:20 +01:00
|
|
|
|
/* Copyright (C) 2000 MySQL AB
|
|
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
|
|
|
|
|
|
|
|
|
|
#include <my_global.h>
|
|
|
|
|
#include "m_string.h"
|
|
|
|
|
#include "m_ctype.h"
|
|
|
|
|
|
|
|
|
|
static uchar ctype_latin1[] = {
|
|
|
|
|
0,
|
|
|
|
|
32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
|
|
|
|
|
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
|
|
|
|
72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
|
|
|
|
132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
|
|
|
|
|
16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
|
|
|
|
|
16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
|
|
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
|
2005-09-16 11:22:11 +02:00
|
|
|
|
16, 0, 16, 2, 16, 16, 16, 16, 16, 16, 1, 16, 1, 0, 1, 0,
|
|
|
|
|
0, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 16, 2, 0, 2, 1,
|
2003-01-29 14:31:20 +01:00
|
|
|
|
72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
|
|
|
|
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 16, 1, 1, 1, 1, 1, 1, 1, 2,
|
|
|
|
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
|
|
|
2, 2, 2, 2, 2, 2, 2, 16, 2, 2, 2, 2, 2, 2, 2, 2
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static uchar to_lower_latin1[] = {
|
|
|
|
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
|
|
|
|
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
|
|
|
|
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
|
|
|
|
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
|
|
|
|
64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
|
|
|
|
|
112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95,
|
|
|
|
|
96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
|
|
|
|
|
112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
|
|
|
|
|
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
|
|
|
|
|
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
|
|
|
|
|
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
|
|
|
|
|
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
|
|
|
|
|
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
|
|
|
|
|
240,241,242,243,244,245,246,215,248,249,250,251,252,253,254,223,
|
|
|
|
|
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
|
|
|
|
|
240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static uchar to_upper_latin1[] = {
|
|
|
|
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
|
|
|
|
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
|
|
|
|
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
|
|
|
|
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
|
|
|
|
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
|
|
|
|
|
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
|
|
|
|
|
96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
|
|
|
|
|
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
|
|
|
|
|
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
|
|
|
|
|
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
|
|
|
|
|
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
|
|
|
|
|
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
|
|
|
|
|
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
|
|
|
|
|
208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
|
|
|
|
|
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
|
|
|
|
|
208,209,210,211,212,213,214,247,216,217,218,219,220,221,222,255
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static uchar sort_order_latin1[] = {
|
|
|
|
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
|
|
|
|
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
|
|
|
|
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
|
|
|
|
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
|
|
|
|
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
|
|
|
|
|
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
|
|
|
|
|
96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
|
|
|
|
|
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
|
|
|
|
|
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
|
|
|
|
|
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
|
|
|
|
|
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
|
|
|
|
|
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
|
|
|
|
|
65, 65, 65, 65, 92, 91, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
|
|
|
|
|
68, 78, 79, 79, 79, 79, 93,215,216, 85, 85, 85, 89, 89,222,223,
|
|
|
|
|
65, 65, 65, 65, 92, 91, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
|
|
|
|
|
68, 78, 79, 79, 79, 79, 93,247,216, 85, 85, 85, 89, 89,222,255
|
|
|
|
|
};
|
|
|
|
|
|
2004-05-11 10:32:01 +02:00
|
|
|
|
/*
|
|
|
|
|
WL#1494 notes:
|
|
|
|
|
|
|
|
|
|
We'll use cp1252 instead of iso-8859-1.
|
|
|
|
|
cp1252 contains printable characters in the range 0x80-0x9F.
|
|
|
|
|
In ISO 8859-1, these code points have no associated printable
|
|
|
|
|
characters. Therefore, by converting from CP1252 to ISO 8859-1,
|
|
|
|
|
one would lose the euro (for instance). Since most people are
|
|
|
|
|
unaware of the difference, and since we don't really want a
|
|
|
|
|
"Windows ANSI" to differ from a "Unix ANSI", we will:
|
|
|
|
|
|
|
|
|
|
- continue to pretend the latin1 character set is ISO 8859-1
|
|
|
|
|
- actually allow the storage of euro etc. so it's actually cp1252
|
2005-07-21 13:05:19 +02:00
|
|
|
|
|
|
|
|
|
Also we'll map these five undefined cp1252 character:
|
|
|
|
|
0x81, 0x8D, 0x8F, 0x90, 0x9D
|
|
|
|
|
into corresponding control characters:
|
|
|
|
|
U+0081, U+008D, U+008F, U+0090, U+009D.
|
|
|
|
|
like ISO-8859-1 does. Otherwise, loading "mysqldump"
|
|
|
|
|
output doesn't reproduce these undefined characters.
|
2004-05-11 10:32:01 +02:00
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
unsigned short cs_to_uni[256]={
|
|
|
|
|
0x0000,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,
|
|
|
|
|
0x0008,0x0009,0x000A,0x000B,0x000C,0x000D,0x000E,0x000F,
|
|
|
|
|
0x0010,0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017,
|
|
|
|
|
0x0018,0x0019,0x001A,0x001B,0x001C,0x001D,0x001E,0x001F,
|
|
|
|
|
0x0020,0x0021,0x0022,0x0023,0x0024,0x0025,0x0026,0x0027,
|
|
|
|
|
0x0028,0x0029,0x002A,0x002B,0x002C,0x002D,0x002E,0x002F,
|
|
|
|
|
0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,
|
|
|
|
|
0x0038,0x0039,0x003A,0x003B,0x003C,0x003D,0x003E,0x003F,
|
|
|
|
|
0x0040,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047,
|
|
|
|
|
0x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0x004F,
|
|
|
|
|
0x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057,
|
|
|
|
|
0x0058,0x0059,0x005A,0x005B,0x005C,0x005D,0x005E,0x005F,
|
|
|
|
|
0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,
|
|
|
|
|
0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F,
|
|
|
|
|
0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,
|
|
|
|
|
0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F,
|
2005-07-21 13:05:19 +02:00
|
|
|
|
0x20AC,0x0081,0x201A,0x0192,0x201E,0x2026,0x2020,0x2021,
|
|
|
|
|
0x02C6,0x2030,0x0160,0x2039,0x0152,0x008D,0x017D,0x008F,
|
|
|
|
|
0x0090,0x2018,0x2019,0x201C,0x201D,0x2022,0x2013,0x2014,
|
|
|
|
|
0x02DC,0x2122,0x0161,0x203A,0x0153,0x009D,0x017E,0x0178,
|
2004-05-11 10:32:01 +02:00
|
|
|
|
0x00A0,0x00A1,0x00A2,0x00A3,0x00A4,0x00A5,0x00A6,0x00A7,
|
|
|
|
|
0x00A8,0x00A9,0x00AA,0x00AB,0x00AC,0x00AD,0x00AE,0x00AF,
|
|
|
|
|
0x00B0,0x00B1,0x00B2,0x00B3,0x00B4,0x00B5,0x00B6,0x00B7,
|
|
|
|
|
0x00B8,0x00B9,0x00BA,0x00BB,0x00BC,0x00BD,0x00BE,0x00BF,
|
|
|
|
|
0x00C0,0x00C1,0x00C2,0x00C3,0x00C4,0x00C5,0x00C6,0x00C7,
|
|
|
|
|
0x00C8,0x00C9,0x00CA,0x00CB,0x00CC,0x00CD,0x00CE,0x00CF,
|
|
|
|
|
0x00D0,0x00D1,0x00D2,0x00D3,0x00D4,0x00D5,0x00D6,0x00D7,
|
|
|
|
|
0x00D8,0x00D9,0x00DA,0x00DB,0x00DC,0x00DD,0x00DE,0x00DF,
|
|
|
|
|
0x00E0,0x00E1,0x00E2,0x00E3,0x00E4,0x00E5,0x00E6,0x00E7,
|
|
|
|
|
0x00E8,0x00E9,0x00EA,0x00EB,0x00EC,0x00ED,0x00EE,0x00EF,
|
|
|
|
|
0x00F0,0x00F1,0x00F2,0x00F3,0x00F4,0x00F5,0x00F6,0x00F7,
|
|
|
|
|
0x00F8,0x00F9,0x00FA,0x00FB,0x00FC,0x00FD,0x00FE,0x00FF
|
|
|
|
|
};
|
|
|
|
|
unsigned char pl00[256]={
|
|
|
|
|
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
|
|
|
|
|
0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
|
|
|
|
|
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
|
|
|
|
|
0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
|
|
|
|
|
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
|
|
|
|
|
0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
|
|
|
|
|
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
|
|
|
|
|
0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
|
|
|
|
|
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
|
|
|
|
|
0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
|
|
|
|
|
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
|
|
|
|
|
0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
|
|
|
|
|
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
|
|
|
|
|
0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
|
|
|
|
|
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
|
|
|
|
|
0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
|
2005-07-21 13:05:19 +02:00
|
|
|
|
0x00,0x81,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x8D,0x00,0x8F,
|
|
|
|
|
0x90,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x9D,0x00,0x00,
|
2004-05-11 10:32:01 +02:00
|
|
|
|
0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
|
|
|
|
|
0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
|
|
|
|
|
0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
|
|
|
|
|
0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
|
|
|
|
|
0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
|
|
|
|
|
0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
|
|
|
|
|
0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
|
|
|
|
|
0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
|
|
|
|
|
0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
|
|
|
|
|
0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
|
|
|
|
|
0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
|
|
|
|
|
0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
|
|
|
|
|
};
|
|
|
|
|
unsigned char pl01[256]={
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x8C,0x9C,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x8A,0x9A,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x9F,0x00,0x00,0x00,0x00,0x8E,0x9E,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x83,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
|
|
|
|
|
};
|
|
|
|
|
unsigned char pl02[256]={
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x88,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x98,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
|
|
|
|
|
};
|
|
|
|
|
unsigned char pl20[256]={
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x96,0x97,0x00,0x00,0x00,
|
|
|
|
|
0x91,0x92,0x82,0x00,0x93,0x94,0x84,0x00,
|
|
|
|
|
0x86,0x87,0x95,0x00,0x00,0x00,0x85,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x89,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x8B,0x9B,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x80,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
|
|
|
|
|
};
|
|
|
|
|
unsigned char pl21[256]={
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x99,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
|
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
|
|
|
|
|
};
|
|
|
|
|
unsigned char *uni_to_cs[256]={
|
|
|
|
|
pl00,pl01,pl02,NULL,NULL,NULL,NULL,NULL,
|
|
|
|
|
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
|
|
|
|
|
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
|
|
|
|
|
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
|
|
|
|
|
pl20,pl21,NULL,NULL,NULL,NULL,NULL,NULL,
|
|
|
|
|
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
|
|
|
|
|
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
|
|
|
|
|
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
|
|
|
|
|
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
|
|
|
|
|
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
|
|
|
|
|
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
|
|
|
|
|
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
|
|
|
|
|
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
|
|
|
|
|
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
|
|
|
|
|
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
|
|
|
|
|
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
|
|
|
|
|
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
|
|
|
|
|
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
|
|
|
|
|
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
|
|
|
|
|
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
|
|
|
|
|
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
|
|
|
|
|
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
|
|
|
|
|
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
|
|
|
|
|
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
|
|
|
|
|
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
|
|
|
|
|
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
|
|
|
|
|
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
|
|
|
|
|
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
|
|
|
|
|
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
|
|
|
|
|
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
|
|
|
|
|
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
|
|
|
|
|
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL
|
|
|
|
|
};
|
2003-01-29 14:31:20 +01:00
|
|
|
|
|
2003-01-30 07:47:15 +01:00
|
|
|
|
static
|
2004-05-11 10:32:01 +02:00
|
|
|
|
int my_mb_wc_latin1(CHARSET_INFO *cs __attribute__((unused)),
|
|
|
|
|
my_wc_t *wc,
|
|
|
|
|
const unsigned char *str,
|
|
|
|
|
const unsigned char *end __attribute__((unused)))
|
2003-01-30 07:47:15 +01:00
|
|
|
|
{
|
|
|
|
|
if (str >= end)
|
2005-12-12 18:42:09 +01:00
|
|
|
|
return MY_CS_TOOSMALL;
|
2003-01-30 07:47:15 +01:00
|
|
|
|
|
2004-05-11 10:32:01 +02:00
|
|
|
|
*wc=cs_to_uni[*str];
|
2005-12-12 18:42:09 +01:00
|
|
|
|
return (!wc[0] && str[0]) ? -1 : 1;
|
2003-01-30 07:47:15 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static
|
2004-05-11 10:32:01 +02:00
|
|
|
|
int my_wc_mb_latin1(CHARSET_INFO *cs __attribute__((unused)),
|
|
|
|
|
my_wc_t wc,
|
|
|
|
|
unsigned char *str,
|
|
|
|
|
unsigned char *end __attribute__((unused)))
|
2003-01-30 07:47:15 +01:00
|
|
|
|
{
|
2004-05-11 10:32:01 +02:00
|
|
|
|
unsigned char *pl;
|
|
|
|
|
|
2003-01-30 07:47:15 +01:00
|
|
|
|
if (str >= end)
|
|
|
|
|
return MY_CS_TOOSMALL;
|
2004-05-11 10:32:01 +02:00
|
|
|
|
|
|
|
|
|
pl= uni_to_cs[(wc>>8) & 0xFF];
|
|
|
|
|
str[0]= pl ? pl[wc & 0xFF] : '\0';
|
|
|
|
|
return (!str[0] && wc) ? MY_CS_ILUNI : 1;
|
2003-01-30 07:47:15 +01:00
|
|
|
|
}
|
|
|
|
|
|
2003-05-23 14:45:52 +02:00
|
|
|
|
static MY_CHARSET_HANDLER my_charset_handler=
|
|
|
|
|
{
|
2004-06-11 13:29:16 +02:00
|
|
|
|
NULL, /* init */
|
2003-05-23 14:45:52 +02:00
|
|
|
|
NULL,
|
2003-10-20 15:53:48 +02:00
|
|
|
|
my_mbcharlen_8bit,
|
2003-05-23 14:45:52 +02:00
|
|
|
|
my_numchars_8bit,
|
|
|
|
|
my_charpos_8bit,
|
2004-02-17 00:35:17 +01:00
|
|
|
|
my_well_formed_len_8bit,
|
2003-09-16 12:43:17 +02:00
|
|
|
|
my_lengthsp_8bit,
|
2004-08-25 08:39:43 +02:00
|
|
|
|
my_numcells_8bit,
|
2003-05-23 14:45:52 +02:00
|
|
|
|
my_mb_wc_latin1,
|
|
|
|
|
my_wc_mb_latin1,
|
WL#1386 - CTYPE table for unicode character sets
A prerequisite for several fulltext and XML bugs.
MY_CHARSET_HANDLER now has a new function "ctype"
to detect a type of the next character in a string
(i.e. digit, letter, space, punctuation, control, etc),
which now works correctly for both 8bit and multibyte charsets.
Previously only 8bit charsets worked correctly,
while any multibyte character was considered as letter
in multibyte charsets.
Many files:
Adding new function
Makefile.am:
Adding build rules for uctypedump,
a dump tool to create my_uctype.h
using Unicode Character Database file.
m_ctype.h:
Adding declaration of my_uni_ctype,
ctype data for Unicode.
Adding new member into MY_CHARSET_HANDLER
Makefile.am:
Adding my_uctype.h into noinst_HEADERS
my_uctype.h, uctypedump.c:
new files:
ctype data for unicode,
and the tool to generate it from
a Unicode Character Database file.
include/Makefile.am:
Adding my_uctype.h
include/m_ctype.h:
Adding declaration of my_uni_ctype,
ctype data for Unicode.
strings/Makefile.am:
Adding build rules for uctypedump,
a dump tool to create my_uctype.h
using Unicode Character Database file.
strings/ctype-big5.c:
Adding new function
strings/ctype-bin.c:
Adding new function
strings/ctype-cp932.c:
Adding new function
strings/ctype-euc_kr.c:
Adding new function
strings/ctype-eucjpms.c:
Adding new function
strings/ctype-gb2312.c:
Adding new function
strings/ctype-gbk.c:
Adding new function
strings/ctype-latin1.c:
Adding new function
strings/ctype-mb.c:
Adding new function
strings/ctype-simple.c:
Adding new function
strings/ctype-sjis.c:
Adding new function
strings/ctype-tis620.c:
Adding new function
strings/ctype-ucs2.c:
Adding new function
strings/ctype-ujis.c:
Adding new function
strings/ctype-utf8.c:
Adding new function
2006-02-02 07:07:47 +01:00
|
|
|
|
my_mb_ctype_8bit,
|
2003-05-23 14:45:52 +02:00
|
|
|
|
my_caseup_str_8bit,
|
|
|
|
|
my_casedn_str_8bit,
|
|
|
|
|
my_caseup_8bit,
|
|
|
|
|
my_casedn_8bit,
|
|
|
|
|
my_snprintf_8bit,
|
|
|
|
|
my_long10_to_str_8bit,
|
|
|
|
|
my_longlong10_to_str_8bit,
|
|
|
|
|
my_fill_8bit,
|
|
|
|
|
my_strntol_8bit,
|
|
|
|
|
my_strntoul_8bit,
|
|
|
|
|
my_strntoll_8bit,
|
|
|
|
|
my_strntoull_8bit,
|
|
|
|
|
my_strntod_8bit,
|
2004-09-25 12:29:33 +02:00
|
|
|
|
my_strtoll10_8bit,
|
2003-05-23 14:45:52 +02:00
|
|
|
|
my_scan_8bit
|
|
|
|
|
};
|
|
|
|
|
|
2003-01-30 07:47:15 +01:00
|
|
|
|
|
2003-05-23 15:39:55 +02:00
|
|
|
|
CHARSET_INFO my_charset_latin1=
|
2003-01-29 14:31:20 +01:00
|
|
|
|
{
|
2003-03-26 10:27:19 +01:00
|
|
|
|
8,0,0, /* number */
|
|
|
|
|
MY_CS_COMPILED | MY_CS_PRIMARY, /* state */
|
|
|
|
|
"latin1", /* cs name */
|
2003-04-01 15:54:01 +02:00
|
|
|
|
"latin1_swedish_ci", /* name */
|
2003-03-26 10:27:19 +01:00
|
|
|
|
"", /* comment */
|
2004-06-08 14:56:15 +02:00
|
|
|
|
NULL, /* tailoring */
|
2003-01-29 14:31:20 +01:00
|
|
|
|
ctype_latin1,
|
|
|
|
|
to_lower_latin1,
|
|
|
|
|
to_upper_latin1,
|
|
|
|
|
sort_order_latin1,
|
2004-06-12 17:36:58 +02:00
|
|
|
|
NULL, /* contractions */
|
2004-05-25 14:40:20 +02:00
|
|
|
|
NULL, /* sort_order_big*/
|
2004-05-11 10:32:01 +02:00
|
|
|
|
cs_to_uni, /* tab_to_uni */
|
2003-01-30 07:47:15 +01:00
|
|
|
|
NULL, /* tab_from_uni */
|
2005-06-06 13:54:15 +02:00
|
|
|
|
my_unicase_default, /* caseinfo */
|
2004-06-10 16:10:21 +02:00
|
|
|
|
NULL, /* state_map */
|
|
|
|
|
NULL, /* ident_map */
|
2003-08-18 14:24:50 +02:00
|
|
|
|
1, /* strxfrm_multiply */
|
2005-06-06 13:54:15 +02:00
|
|
|
|
1, /* caseup_multiply */
|
|
|
|
|
1, /* casedn_multiply */
|
2004-01-19 16:16:30 +01:00
|
|
|
|
1, /* mbminlen */
|
2003-01-29 14:31:20 +01:00
|
|
|
|
1, /* mbmaxlen */
|
2004-03-19 07:00:46 +01:00
|
|
|
|
0, /* min_sort_char */
|
2004-06-11 14:50:20 +02:00
|
|
|
|
255, /* max_sort_char */
|
2005-10-13 16:16:19 +02:00
|
|
|
|
' ', /* pad char */
|
2005-08-17 10:26:32 +02:00
|
|
|
|
0, /* escape_with_backslash_is_dangerous */
|
2003-05-23 14:45:52 +02:00
|
|
|
|
&my_charset_handler,
|
|
|
|
|
&my_collation_8bit_simple_ci_handler
|
2003-01-29 14:31:20 +01:00
|
|
|
|
};
|
2003-03-26 10:27:19 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* This file is the latin1 character set with German sorting
|
|
|
|
|
*
|
|
|
|
|
* The modern sort order is used, where:
|
|
|
|
|
*
|
|
|
|
|
* '<EFBFBD>' -> "ae"
|
|
|
|
|
* '<EFBFBD>' -> "oe"
|
|
|
|
|
* '<EFBFBD>' -> "ue"
|
|
|
|
|
* '<EFBFBD>' -> "ss"
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* This is a simple latin1 mapping table, which maps all accented
|
|
|
|
|
* characters to their non-accented equivalents. Note: in this
|
|
|
|
|
* table, '<EFBFBD>' is mapped to 'A', '<EFBFBD>' is mapped to 'Y', etc. - all
|
|
|
|
|
* accented characters except the following are treated the same way.
|
|
|
|
|
* <EFBFBD>, <EFBFBD>, <EFBFBD>, <EFBFBD>, <EFBFBD>, <EFBFBD>
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
static uchar sort_order_latin1_de[] = {
|
|
|
|
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
|
|
|
|
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
|
|
|
|
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
|
|
|
|
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
|
|
|
|
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
|
|
|
|
|
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
|
|
|
|
|
96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
|
|
|
|
|
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
|
|
|
|
|
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
|
|
|
|
|
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
|
|
|
|
|
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
|
|
|
|
|
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
|
|
|
|
|
65, 65, 65, 65,196, 65, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
|
|
|
|
|
68, 78, 79, 79, 79, 79,214,215,216, 85, 85, 85,220, 89,222,223,
|
|
|
|
|
65, 65, 65, 65,196, 65, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
|
|
|
|
|
68, 78, 79, 79, 79, 79,214,247,216, 85, 85, 85,220, 89,222, 89
|
|
|
|
|
};
|
|
|
|
|
|
2003-08-18 23:08:08 +02:00
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
same as sort_order_latin_de, but maps ALL accented chars to unaccented ones
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
uchar combo1map[]={
|
|
|
|
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
|
|
|
|
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
|
|
|
|
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
|
|
|
|
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
|
|
|
|
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
|
|
|
|
|
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
|
|
|
|
|
96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
|
|
|
|
|
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
|
|
|
|
|
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
|
|
|
|
|
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
|
|
|
|
|
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
|
|
|
|
|
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
|
|
|
|
|
65, 65, 65, 65, 65, 65, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
|
|
|
|
|
68, 78, 79, 79, 79, 79, 79,215,216, 85, 85, 85, 85, 89,222, 83,
|
|
|
|
|
65, 65, 65, 65, 65, 65, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
|
|
|
|
|
68, 78, 79, 79, 79, 79, 79,247,216, 85, 85, 85, 85, 89,222, 89
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
uchar combo2map[]={
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0,69, 0, 0, 0, 0, 0,69, 0, 0,83, 0, 0, 0, 0,69, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,69, 0, 0, 0, 0, 0,69, 0, 0, 0, 0
|
|
|
|
|
};
|
2003-03-26 10:27:19 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
Some notes about the following comparison rules:
|
|
|
|
|
By definition, my_strnncoll_latin_de must works exactly as if had called
|
|
|
|
|
my_strnxfrm_latin_de() on both strings and compared the result strings.
|
|
|
|
|
|
|
|
|
|
This means that:
|
|
|
|
|
<EFBFBD> must also matches <EFBFBD>E and A<EFBFBD>, because my_strxn_frm_latin_de() will convert
|
|
|
|
|
both to AE.
|
|
|
|
|
|
|
|
|
|
The other option would be to not do any accent removal in
|
|
|
|
|
sort_order_latin_de[] at all
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static int my_strnncoll_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
|
2004-03-25 14:05:01 +01:00
|
|
|
|
const uchar *a, uint a_length,
|
2004-06-10 21:18:57 +02:00
|
|
|
|
const uchar *b, uint b_length,
|
|
|
|
|
my_bool b_is_prefix)
|
2003-03-26 10:27:19 +01:00
|
|
|
|
{
|
2004-03-25 14:05:01 +01:00
|
|
|
|
const uchar *a_end= a + a_length;
|
|
|
|
|
const uchar *b_end= b + b_length;
|
|
|
|
|
uchar a_char, a_extend= 0, b_char, b_extend= 0;
|
2003-03-26 10:27:19 +01:00
|
|
|
|
|
2004-03-25 14:05:01 +01:00
|
|
|
|
while ((a < a_end || a_extend) && (b < b_end || b_extend))
|
2003-03-26 10:27:19 +01:00
|
|
|
|
{
|
2004-03-25 14:05:01 +01:00
|
|
|
|
if (a_extend)
|
2003-08-18 23:08:08 +02:00
|
|
|
|
{
|
2004-03-25 14:05:01 +01:00
|
|
|
|
a_char=a_extend; a_extend=0;
|
2003-08-18 23:08:08 +02:00
|
|
|
|
}
|
|
|
|
|
else
|
2003-03-26 10:27:19 +01:00
|
|
|
|
{
|
2004-03-25 14:05:01 +01:00
|
|
|
|
a_extend=combo2map[*a];
|
|
|
|
|
a_char=combo1map[*a++];
|
2003-03-26 10:27:19 +01:00
|
|
|
|
}
|
2004-03-25 14:05:01 +01:00
|
|
|
|
if (b_extend)
|
2003-08-18 23:08:08 +02:00
|
|
|
|
{
|
2004-03-25 14:05:01 +01:00
|
|
|
|
b_char=b_extend; b_extend=0;
|
2003-08-18 23:08:08 +02:00
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2004-03-25 14:05:01 +01:00
|
|
|
|
b_extend=combo2map[*b];
|
|
|
|
|
b_char=combo1map[*b++];
|
2003-08-18 23:08:08 +02:00
|
|
|
|
}
|
2004-03-25 14:05:01 +01:00
|
|
|
|
if (a_char != b_char)
|
|
|
|
|
return (int) a_char - (int) b_char;
|
2003-03-26 10:27:19 +01:00
|
|
|
|
}
|
2003-08-18 23:08:08 +02:00
|
|
|
|
/*
|
|
|
|
|
A simple test of string lengths won't work -- we test to see
|
|
|
|
|
which string ran out first
|
|
|
|
|
*/
|
2004-06-10 21:18:57 +02:00
|
|
|
|
return ((a < a_end || a_extend) ? (b_is_prefix ? 0 : 1) :
|
2004-03-25 14:05:01 +01:00
|
|
|
|
(b < b_end || b_extend) ? -1 : 0);
|
2003-03-26 10:27:19 +01:00
|
|
|
|
}
|
|
|
|
|
|
2003-08-18 23:08:08 +02:00
|
|
|
|
|
2004-03-25 14:05:01 +01:00
|
|
|
|
static int my_strnncollsp_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
|
|
|
|
|
const uchar *a, uint a_length,
|
2004-12-06 01:00:37 +01:00
|
|
|
|
const uchar *b, uint b_length,
|
|
|
|
|
my_bool diff_if_only_endspace_difference)
|
2003-03-26 10:27:19 +01:00
|
|
|
|
{
|
2004-12-06 01:00:37 +01:00
|
|
|
|
const uchar *a_end= a + a_length, *b_end= b + b_length;
|
2004-03-25 14:05:01 +01:00
|
|
|
|
uchar a_char, a_extend= 0, b_char, b_extend= 0;
|
2004-12-06 01:00:37 +01:00
|
|
|
|
int res;
|
|
|
|
|
|
|
|
|
|
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
|
|
|
|
|
diff_if_only_endspace_difference= 0;
|
|
|
|
|
#endif
|
2004-03-25 14:05:01 +01:00
|
|
|
|
|
|
|
|
|
while ((a < a_end || a_extend) && (b < b_end || b_extend))
|
|
|
|
|
{
|
|
|
|
|
if (a_extend)
|
|
|
|
|
{
|
|
|
|
|
a_char=a_extend;
|
|
|
|
|
a_extend= 0;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
a_extend= combo2map[*a];
|
|
|
|
|
a_char= combo1map[*a++];
|
|
|
|
|
}
|
|
|
|
|
if (b_extend)
|
|
|
|
|
{
|
|
|
|
|
b_char= b_extend;
|
|
|
|
|
b_extend= 0;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
b_extend= combo2map[*b];
|
|
|
|
|
b_char= combo1map[*b++];
|
|
|
|
|
}
|
|
|
|
|
if (a_char != b_char)
|
|
|
|
|
return (int) a_char - (int) b_char;
|
|
|
|
|
}
|
|
|
|
|
/* Check if double character last */
|
|
|
|
|
if (a_extend)
|
|
|
|
|
return 1;
|
|
|
|
|
if (b_extend)
|
|
|
|
|
return -1;
|
|
|
|
|
|
2004-12-06 01:00:37 +01:00
|
|
|
|
res= 0;
|
2004-03-25 14:05:01 +01:00
|
|
|
|
if (a != a_end || b != b_end)
|
|
|
|
|
{
|
2005-02-01 15:27:08 +01:00
|
|
|
|
int swap= 1;
|
2004-12-06 01:00:37 +01:00
|
|
|
|
if (diff_if_only_endspace_difference)
|
|
|
|
|
res= 1; /* Assume 'a' is bigger */
|
2004-03-25 14:05:01 +01:00
|
|
|
|
/*
|
|
|
|
|
Check the next not space character of the longer key. If it's < ' ',
|
|
|
|
|
then it's smaller than the other key.
|
|
|
|
|
*/
|
|
|
|
|
if (a == a_end)
|
|
|
|
|
{
|
|
|
|
|
/* put shorter key in a */
|
|
|
|
|
a_end= b_end;
|
|
|
|
|
a= b;
|
|
|
|
|
swap= -1; /* swap sign of result */
|
2004-12-06 01:00:37 +01:00
|
|
|
|
res= -res;
|
2004-03-25 14:05:01 +01:00
|
|
|
|
}
|
|
|
|
|
for ( ; a < a_end ; a++)
|
|
|
|
|
{
|
|
|
|
|
if (*a != ' ')
|
2005-02-01 15:27:08 +01:00
|
|
|
|
return (*a < ' ') ? -swap : swap;
|
2004-03-25 14:05:01 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
2004-12-06 01:00:37 +01:00
|
|
|
|
return res;
|
2003-03-26 10:27:19 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static int my_strnxfrm_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
|
2003-08-18 23:08:08 +02:00
|
|
|
|
uchar * dest, uint len,
|
|
|
|
|
const uchar * src, uint srclen)
|
2003-03-26 10:27:19 +01:00
|
|
|
|
{
|
|
|
|
|
const uchar *de = dest + len;
|
|
|
|
|
const uchar *se = src + srclen;
|
2003-08-18 23:08:08 +02:00
|
|
|
|
for ( ; src < se && dest < de ; src++)
|
2003-03-26 10:27:19 +01:00
|
|
|
|
{
|
2003-08-18 23:08:08 +02:00
|
|
|
|
uchar chr=combo1map[*src];
|
|
|
|
|
*dest++=chr;
|
|
|
|
|
if ((chr=combo2map[*src]) && dest < de)
|
|
|
|
|
*dest++=chr;
|
2003-03-26 10:27:19 +01:00
|
|
|
|
}
|
2005-01-13 15:12:04 +01:00
|
|
|
|
if (dest < de)
|
|
|
|
|
bfill(dest, de - dest, ' ');
|
|
|
|
|
return (int) len;
|
2003-03-26 10:27:19 +01:00
|
|
|
|
}
|
|
|
|
|
|
2003-05-23 14:45:52 +02:00
|
|
|
|
|
2004-03-25 14:05:01 +01:00
|
|
|
|
void my_hash_sort_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
|
|
|
|
|
const uchar *key, uint len,
|
|
|
|
|
ulong *nr1, ulong *nr2)
|
|
|
|
|
{
|
|
|
|
|
const uchar *end= key+len;
|
|
|
|
|
/*
|
|
|
|
|
Remove end space. We have to do this to be able to compare
|
|
|
|
|
'AE' and '<EFBFBD>' as identical
|
|
|
|
|
*/
|
|
|
|
|
while (end > key && end[-1] == ' ')
|
|
|
|
|
end--;
|
|
|
|
|
|
|
|
|
|
for (; key < end ; key++)
|
|
|
|
|
{
|
|
|
|
|
uint X= (uint) combo1map[(uint) *key];
|
|
|
|
|
nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * X) + (nr1[0] << 8);
|
|
|
|
|
nr2[0]+=3;
|
|
|
|
|
if ((X= combo2map[*key]))
|
|
|
|
|
{
|
|
|
|
|
nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * X) + (nr1[0] << 8);
|
|
|
|
|
nr2[0]+=3;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2003-05-23 15:39:55 +02:00
|
|
|
|
static MY_COLLATION_HANDLER my_collation_german2_ci_handler=
|
2003-05-23 14:45:52 +02:00
|
|
|
|
{
|
2004-06-11 13:29:16 +02:00
|
|
|
|
NULL, /* init */
|
2003-08-18 23:08:08 +02:00
|
|
|
|
my_strnncoll_latin1_de,
|
|
|
|
|
my_strnncollsp_latin1_de,
|
|
|
|
|
my_strnxfrm_latin1_de,
|
2005-01-26 13:34:09 +01:00
|
|
|
|
my_strnxfrmlen_simple,
|
2003-08-18 23:08:08 +02:00
|
|
|
|
my_like_range_simple,
|
|
|
|
|
my_wildcmp_8bit,
|
|
|
|
|
my_strcasecmp_8bit,
|
2003-09-19 12:18:19 +02:00
|
|
|
|
my_instr_simple,
|
2005-05-05 18:13:57 +02:00
|
|
|
|
my_hash_sort_latin1_de,
|
|
|
|
|
my_propagate_complex
|
2003-05-23 14:45:52 +02:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
2003-05-23 15:39:55 +02:00
|
|
|
|
CHARSET_INFO my_charset_latin1_german2_ci=
|
2003-03-26 10:27:19 +01:00
|
|
|
|
{
|
2003-08-18 23:08:08 +02:00
|
|
|
|
31,0,0, /* number */
|
|
|
|
|
MY_CS_COMPILED|MY_CS_STRNXFRM, /* state */
|
|
|
|
|
"latin1", /* cs name */
|
|
|
|
|
"latin1_german2_ci", /* name */
|
|
|
|
|
"", /* comment */
|
2004-06-08 14:56:15 +02:00
|
|
|
|
NULL, /* tailoring */
|
2003-08-18 23:08:08 +02:00
|
|
|
|
ctype_latin1,
|
|
|
|
|
to_lower_latin1,
|
|
|
|
|
to_upper_latin1,
|
2004-08-26 09:02:11 +02:00
|
|
|
|
sort_order_latin1_de,
|
2004-06-12 17:36:58 +02:00
|
|
|
|
NULL, /* contractions */
|
2004-05-25 14:40:20 +02:00
|
|
|
|
NULL, /* sort_order_big*/
|
2004-05-11 10:32:01 +02:00
|
|
|
|
cs_to_uni, /* tab_to_uni */
|
2003-08-18 23:08:08 +02:00
|
|
|
|
NULL, /* tab_from_uni */
|
2005-06-06 13:54:15 +02:00
|
|
|
|
my_unicase_default, /* caseinfo */
|
2004-06-10 16:10:21 +02:00
|
|
|
|
NULL, /* state_map */
|
|
|
|
|
NULL, /* ident_map */
|
2003-08-18 23:08:08 +02:00
|
|
|
|
2, /* strxfrm_multiply */
|
2005-06-06 13:54:15 +02:00
|
|
|
|
1, /* caseup_multiply */
|
|
|
|
|
1, /* casedn_multiply */
|
2004-01-19 16:16:30 +01:00
|
|
|
|
1, /* mbminlen */
|
2003-08-18 23:08:08 +02:00
|
|
|
|
1, /* mbmaxlen */
|
2004-03-19 07:00:46 +01:00
|
|
|
|
0, /* min_sort_char */
|
2004-06-11 14:50:20 +02:00
|
|
|
|
247, /* max_sort_char */
|
2005-10-13 16:16:19 +02:00
|
|
|
|
' ', /* pad char */
|
2005-08-17 10:26:32 +02:00
|
|
|
|
0, /* escape_with_backslash_is_dangerous */
|
2003-08-18 23:08:08 +02:00
|
|
|
|
&my_charset_handler,
|
|
|
|
|
&my_collation_german2_ci_handler
|
2003-03-26 10:27:19 +01:00
|
|
|
|
};
|
|
|
|
|
|
2003-05-23 15:39:55 +02:00
|
|
|
|
|
|
|
|
|
CHARSET_INFO my_charset_latin1_bin=
|
|
|
|
|
{
|
2003-08-18 23:08:08 +02:00
|
|
|
|
47,0,0, /* number */
|
|
|
|
|
MY_CS_COMPILED|MY_CS_BINSORT, /* state */
|
|
|
|
|
"latin1", /* cs name */
|
|
|
|
|
"latin1_bin", /* name */
|
|
|
|
|
"", /* comment */
|
2004-06-08 14:56:15 +02:00
|
|
|
|
NULL, /* tailoring */
|
2003-08-18 23:08:08 +02:00
|
|
|
|
ctype_latin1,
|
|
|
|
|
to_lower_latin1,
|
|
|
|
|
to_upper_latin1,
|
2004-08-18 09:07:54 +02:00
|
|
|
|
NULL, /* sort_order */
|
2004-06-12 17:36:58 +02:00
|
|
|
|
NULL, /* contractions */
|
2004-05-25 14:40:20 +02:00
|
|
|
|
NULL, /* sort_order_big*/
|
2004-05-11 10:32:01 +02:00
|
|
|
|
cs_to_uni, /* tab_to_uni */
|
2003-08-18 23:08:08 +02:00
|
|
|
|
NULL, /* tab_from_uni */
|
2005-06-06 13:54:15 +02:00
|
|
|
|
my_unicase_default, /* caseinfo */
|
2004-06-10 16:10:21 +02:00
|
|
|
|
NULL, /* state_map */
|
|
|
|
|
NULL, /* ident_map */
|
2003-08-18 23:10:21 +02:00
|
|
|
|
1, /* strxfrm_multiply */
|
2005-06-06 13:54:15 +02:00
|
|
|
|
1, /* caseup_multiply */
|
|
|
|
|
1, /* casedn_multiply */
|
2004-01-19 16:16:30 +01:00
|
|
|
|
1, /* mbminlen */
|
2003-08-18 23:08:08 +02:00
|
|
|
|
1, /* mbmaxlen */
|
2004-03-19 07:00:46 +01:00
|
|
|
|
0, /* min_sort_char */
|
2004-06-11 14:50:20 +02:00
|
|
|
|
255, /* max_sort_char */
|
2005-10-13 16:16:19 +02:00
|
|
|
|
' ', /* pad char */
|
2005-08-17 10:26:32 +02:00
|
|
|
|
0, /* escape_with_backslash_is_dangerous */
|
2003-08-18 23:08:08 +02:00
|
|
|
|
&my_charset_handler,
|
2003-09-19 12:18:19 +02:00
|
|
|
|
&my_collation_8bit_bin_handler
|
2003-05-23 15:39:55 +02:00
|
|
|
|
};
|
|
|
|
|
|