Updates for multi-byte character sets

(Note: test 'union' fails, but Sanja promised to fix this)
This commit is contained in:
monty@mashka.mysql.fi 2003-01-14 14:28:36 +02:00
commit 7e9b27eaf5
38 changed files with 564 additions and 489 deletions

View file

@ -22,19 +22,19 @@ pkglib_LIBRARIES = libmystrings.a
# Exact one of ASSEMBLER_X
if ASSEMBLER_x86
ASRCS = strings-x86.s longlong2str-x86.s
CSRCS = bfill.c bmove.c bmove512.c bchange.c strxnmov.c int2str.c str2int.c r_strinstr.c atof.c bcmp.c strtol.c strtoul.c strtoll.c strtoull.c llstr.c strnlen.c ctype.c ctype-simple.c ctype-mb.c ctype-big5.c ctype-czech.c ctype-euc_kr.c ctype-gb2312.c ctype-gbk.c ctype-latin1_de.c ctype-sjis.c ctype-tis620.c ctype-ujis.c ctype-utf8.c ctype-win1250ch.c ctype-bin.c
CSRCS = bfill.c bmove.c bmove512.c bchange.c strxnmov.c int2str.c str2int.c r_strinstr.c atof.c bcmp.c strtol.c strtoul.c strtoll.c strtoull.c llstr.c strnlen.c ctype.c ctype-simple.c ctype-mb.c ctype-big5.c ctype-czech.c ctype-euc_kr.c ctype-gb2312.c ctype-gbk.c ctype-latin1_de.c ctype-sjis.c ctype-tis620.c ctype-ujis.c ctype-utf8.c ctype-win1250ch.c ctype-bin.c my_vsnprintf.c
else
if ASSEMBLER_sparc32
# These file MUST all be on the same line!! Otherwise automake
# generats a very broken makefile
ASRCS = bmove_upp-sparc.s strappend-sparc.s strend-sparc.s strinstr-sparc.s strmake-sparc.s strmov-sparc.s strnmov-sparc.s strstr-sparc.s
CSRCS = strcont.c strfill.c strcend.c is_prefix.c longlong2str.c bfill.c bmove.c bmove512.c bchange.c strxnmov.c int2str.c str2int.c r_strinstr.c atof.c bcmp.c strtol.c strtoul.c strtoll.c strtoull.c llstr.c strnlen.c strxmov.c ctype.c ctype-simple.c ctype-mb.c ctype-big5.c ctype-czech.c ctype-euc_kr.c ctype-gb2312.c ctype-gbk.c ctype-latin1_de.c ctype-sjis.c ctype-tis620.c ctype-ujis.c ctype-utf8.c ctype-win1250ch.c ctype-bin.c
CSRCS = strcont.c strfill.c strcend.c is_prefix.c longlong2str.c bfill.c bmove.c bmove512.c bchange.c strxnmov.c int2str.c str2int.c r_strinstr.c atof.c bcmp.c strtol.c strtoul.c strtoll.c strtoull.c llstr.c strnlen.c strxmov.c ctype.c ctype-simple.c ctype-mb.c ctype-big5.c ctype-czech.c ctype-euc_kr.c ctype-gb2312.c ctype-gbk.c ctype-latin1_de.c ctype-sjis.c ctype-tis620.c ctype-ujis.c ctype-utf8.c ctype-win1250ch.c ctype-bin.c my_vsnprintf.c
else
#no assembler
ASRCS =
# These file MUST all be on the same line!! Otherwise automake
# generats a very broken makefile
CSRCS = strxmov.c bmove_upp.c strappend.c strcont.c strend.c strfill.c strcend.c is_prefix.c strstr.c strinstr.c strmake.c strnmov.c strmov.c longlong2str.c bfill.c bmove.c bmove512.c bchange.c strxnmov.c int2str.c str2int.c r_strinstr.c atof.c bcmp.c strtol.c strtoul.c strtoll.c strtoull.c llstr.c strnlen.c ctype.c ctype-simple.c ctype-mb.c ctype-big5.c ctype-czech.c ctype-euc_kr.c ctype-gb2312.c ctype-gbk.c ctype-latin1_de.c ctype-sjis.c ctype-tis620.c ctype-ujis.c ctype-utf8.c ctype-win1250ch.c ctype-bin.c
CSRCS = strxmov.c bmove_upp.c strappend.c strcont.c strend.c strfill.c strcend.c is_prefix.c strstr.c strinstr.c strmake.c strnmov.c strmov.c longlong2str.c bfill.c bmove.c bmove512.c bchange.c strxnmov.c int2str.c str2int.c r_strinstr.c atof.c bcmp.c strtol.c strtoul.c strtoll.c strtoull.c llstr.c strnlen.c ctype.c ctype-simple.c ctype-mb.c ctype-big5.c ctype-czech.c ctype-euc_kr.c ctype-gb2312.c ctype-gbk.c ctype-latin1_de.c ctype-sjis.c ctype-tis620.c ctype-ujis.c ctype-utf8.c ctype-win1250ch.c ctype-bin.c my_vsnprintf.c
endif
endif

View file

@ -110,88 +110,40 @@ int my_mb_wc_8bit(CHARSET_INFO *cs,my_wc_t *wc,
}
int my_wc_mb_8bit(CHARSET_INFO *cs,my_wc_t wc,
unsigned char *s,
unsigned char *e __attribute__((unused)))
unsigned char *str,
unsigned char *end __attribute__((unused)))
{
MY_UNI_IDX *idx;
for(idx=cs->tab_from_uni; idx->tab ; idx++){
if(idx->from<=wc && idx->to>=wc){
s[0]=idx->tab[wc-idx->from];
return (!s[0] && wc) ? MY_CS_ILUNI : 1;
for (idx=cs->tab_from_uni; idx->tab ; idx++)
{
if (idx->from <= wc && idx->to >= wc)
{
str[0]= idx->tab[wc - idx->from];
return (!str[0] && wc) ? MY_CS_ILUNI : 1;
}
}
return MY_CS_ILUNI;
}
#ifdef NOT_USED
static int my_vsnprintf_8bit(char *to, size_t n, const char* fmt, va_list ap)
{
char *start=to, *end=to+n-1;
for (; *fmt ; fmt++)
{
if (fmt[0] != '%')
{
if (to == end) /* End of buffer */
break;
*to++= *fmt; /* Copy ordinary char */
continue;
}
/* Skip if max size is used (to be compatible with printf) */
fmt++;
while (my_isdigit(system_charset_info,*fmt) || *fmt == '.' || *fmt == '-')
fmt++;
if (*fmt == 'l')
fmt++;
if (*fmt == 's') /* String parameter */
{
reg2 char *par = va_arg(ap, char *);
uint plen,left_len = (uint)(end-to);
if (!par) par = (char*)"(null)";
plen = (uint) strlen(par);
if (left_len <= plen)
plen = left_len - 1;
to=strnmov(to,par,plen);
continue;
}
else if (*fmt == 'd' || *fmt == 'u') /* Integer parameter */
{
register int iarg;
if ((uint) (end-to) < 16)
break;
iarg = va_arg(ap, int);
if (*fmt == 'd')
to=int10_to_str((long) iarg,to, -10);
else
to=int10_to_str((long) (uint) iarg,to,10);
continue;
}
/* We come here on '%%', unknown code or too long parameter */
if (to == end)
break;
*to++='%'; /* % used as % or unknown code */
}
DBUG_ASSERT(to <= end);
*to='\0'; /* End of errmessage */
return (uint) (to - start);
}
#endif
/*
We can't use vsprintf here as it's not guaranteed to return
the length on all operating systems.
This function is also not called in a safe environment, so the
end buffer must be checked.
*/
int my_snprintf_8bit(CHARSET_INFO *cs __attribute__((unused)),
char* to, uint n __attribute__((unused)),
const char* fmt, ...)
{
va_list args;
int result;
va_start(args,fmt);
#ifdef NOT_USED
return my_vsnprintf_8bit(to, n, fmt, args);
#endif
/*
FIXME: generally not safe, but it is OK for now
FIXME: as far as it's not called unsafely in the current code
*/
return vsprintf(to,fmt,args); /* FIXME */
result= my_vsnprintf(to, n, fmt, args);
va_end(args);
return result;
}
@ -690,28 +642,48 @@ noconv:
return 0L;
}
double my_strntod_8bit(CHARSET_INFO *cs __attribute__((unused)),
const char *s, uint l, char **e)
/*
Read double from string
SYNOPSIS:
my_strntod_8bit()
cs Character set information
str String to convert to double
length Optional length for string.
end pointer to end of converted string
NOTES:
If length is not INT_MAX32 or str[length] != 0 then the given str must
be writeable
If length == INT_MAX32 the str must be \0 terminated.
It's implemented this way to save a buffer allocation and a memory copy.
RETURN
value of number in string
*/
double my_strntod_8bit(CHARSET_INFO *cs __attribute__((unused)),
char *str, uint length, char **end)
{
char buf[256];
double res;
if((l+1)>sizeof(buf))
{
if (e)
memcpy(*e,s,sizeof(s));
return 0;
}
strncpy(buf,s,l);
buf[l]='\0';
res=strtod(buf,e);
if (e)
memcpy(*e,*e-buf+s,sizeof(s));
return res;
char end_char;
double result;
if (length == INT_MAX32 || str[length] == 0)
return strtod(str, end);
end_char= str[length];
str[length]= 0;
result= strtod(str, end);
str[length]= end_char; /* Restore end char */
return result;
}
/*
This is a fast version optimized for the case of radix 10 / -10
Assume len >= 1
*/
int my_l10tostr_8bit(CHARSET_INFO *cs __attribute__((unused)),
@ -720,18 +692,19 @@ int my_l10tostr_8bit(CHARSET_INFO *cs __attribute__((unused)),
char buffer[66];
register char *p, *e;
long int new_val;
int sl=0;
uint l;
uint sign=0;
e = p = &buffer[sizeof(buffer)-1];
*e='\0';
*p= 0;
if (radix < 0)
{
if (val < 0)
{
sl = 1;
val = -val;
val= -val;
*dst++= '-';
len--;
sign= 1;
}
}
@ -746,41 +719,38 @@ int my_l10tostr_8bit(CHARSET_INFO *cs __attribute__((unused)),
val= new_val;
}
if (sl)
{
*--p='-';
}
l=e-p;
l=(l>len)?len:l;
memcpy(dst,p,l);
return (int)l;
len= min(len, (uint) (e-p));
memcpy(dst, p, len);
return (int) len+sign;
}
int my_ll10tostr_8bit(CHARSET_INFO *cs __attribute__((unused)),
char *dst, uint len, int radix, longlong val)
{
char buffer[65];
register char *p, *e;
long long_val;
int sl=0;
uint l;
uint sign= 0;
if (radix < 0)
{
if (val < 0)
{
sl=1;
val = -val;
*dst++= '-';
len--;
sign= 1;
}
}
e = p = &buffer[sizeof(buffer)-1];
*p='\0';
*p= 0;
if (val == 0)
{
*--p='0';
*--p= '0';
len= 1;
goto cnv;
}
@ -800,16 +770,10 @@ int my_ll10tostr_8bit(CHARSET_INFO *cs __attribute__((unused)),
long_val= quo;
}
len= min(len, (uint) (e-p));
cnv:
if (sl)
{
*--p='-';
}
l=e-p;
l=(l>len)?len:l;
memcpy(dst,p,l);
return (int)(e-p);
memcpy(dst, p, len);
return len+sign;
}

View file

@ -2874,37 +2874,31 @@ bs:
double my_strntod_ucs2(CHARSET_INFO *cs __attribute__((unused)),
const char *nptr, uint l, char **endptr)
char *nptr, uint length, char **endptr)
{
char buf[256];
double res;
register char *b=buf;
register const char *s=nptr;
register const char *e=nptr+l;
register const char *end;
my_wc_t wc;
int cnv;
if((l+1)>sizeof(buf))
{
if (endptr)
*endptr=(char*)nptr;
my_errno=ERANGE;
return 0;
}
while ((cnv=cs->mb_wc(cs,&wc,s,e))>0)
/* Cut too long strings */
if (length >= sizeof(buf))
length= sizeof(buf)-1;
end=nptr+length;
while ((cnv=cs->mb_wc(cs,&wc,s,end)) > 0)
{
s+=cnv;
if (wc < 128)
{
*b++=wc;
}
else
break;
if (wc > (int) (uchar) 'e' || !wc)
break; /* Can't be part of double */
*b++=wc;
}
*b='\0';
*b= 0;
res=strtod(buf,endptr);
res=strtod(buf, endptr);
if (endptr)
*endptr=(char*) (*endptr-buf+nptr);
return res;

166
strings/my_vsnprintf.c Normal file
View file

@ -0,0 +1,166 @@
/* Copyright (C) 2000 MySQL AB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
#include <my_global.h>
#include <m_string.h>
#include <stdarg.h>
#include <m_ctype.h>
#include <assert.h>
/*
Limited snprintf() implementations
IMPLEMENTION:
Supports following formats:
%#d
%#u
%#.#s Note #.# is skiped
RETURN
length of result string
*/
int my_snprintf(char* to, size_t n, const char* fmt, ...)
{
va_list args;
int result;
va_start(args,fmt);
result= my_vsnprintf(to, n, fmt, args);
va_end(args);
return result;
}
int my_vsnprintf(char *to, size_t n, const char* fmt, va_list ap)
{
char *start=to, *end=to+n-1;
uint length, num_state, pre_zero;
for (; *fmt ; fmt++)
{
if (fmt[0] != '%')
{
if (to == end) /* End of buffer */
break;
*to++= *fmt; /* Copy ordinary char */
continue;
}
fmt++; /* skip '%' */
/* Read max fill size (only used with %d and %u) */
if (*fmt == '-')
fmt++;
length= num_state= pre_zero= 0;
for (;; fmt++)
{
if (my_isdigit(system_charset_info,*fmt))
{
if (!num_state)
{
length=length*10+ (uint) (*fmt-'0');
if (!length)
pre_zero= 1; /* first digit was 0 */
}
continue;
}
if (*fmt != '.' || num_state)
break;
num_state= 1;
}
if (*fmt == 'l')
fmt++;
if (*fmt == 's') /* String parameter */
{
reg2 char *par = va_arg(ap, char *);
uint plen,left_len = (uint)(end-to);
if (!par) par = (char*)"(null)";
plen = (uint) strlen(par);
if (left_len <= plen)
plen = left_len - 1;
to=strnmov(to,par,plen);
continue;
}
else if (*fmt == 'd' || *fmt == 'u') /* Integer parameter */
{
register int iarg;
char *to_start= to;
if ((uint) (end-to) < max(16,length))
break;
iarg = va_arg(ap, int);
if (*fmt == 'd')
to=int10_to_str((long) iarg,to, -10);
else
to=int10_to_str((long) (uint) iarg,to,10);
/* If %#d syntax was used, we have to pre-zero/pre-space the string */
if (length)
{
uint res_length= (uint) (to - to_start);
if (res_length < length)
{
uint diff= (length- res_length);
bmove_upp(to+diff, to, res_length);
bfill(to-res_length, diff, pre_zero ? '0' : ' ');
to+= diff;
}
}
continue;
}
/* We come here on '%%', unknown code or too long parameter */
if (to == end)
break;
*to++='%'; /* % used as % or unknown code */
}
DBUG_ASSERT(to <= end);
*to='\0'; /* End of errmessage */
return (uint) (to - start);
}
#ifdef MAIN
#define OVERRUN_SENTRY 250
static void my_printf(const char * fmt, ...)
{
char buf[33];
int n;
va_list ar;
va_start(ar, fmt);
buf[sizeof(buf)-1]=OVERRUN_SENTRY;
n = my_vsnprintf(buf, sizeof(buf)-1,fmt, ar);
printf(buf);
printf("n=%d, strlen=%d\n", n, strlen(buf));
if (buf[sizeof(buf)-1] != OVERRUN_SENTRY)
{
fprintf(stderr, "Buffer overrun\n");
abort();
}
va_end(ar);
}
int main()
{
my_printf("Hello\n");
my_printf("Hello int, %d\n", 1);
my_printf("Hello string '%s'\n", "I am a string");
my_printf("Hello hack hack hack hack hack hack hack %d\n", 1);
my_printf("Hello %d hack %d\n", 1, 4);
my_printf("Hello %d hack hack hack hack hack %d\n", 1, 4);
my_printf("Hello '%s' hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh\n", "hack");
my_printf("Hello hhhhhhhhhhhhhh %d sssssssssssssss\n", 1);
my_printf("Hello %u\n", 1);
my_printf("conn %ld to: '%-.64s' user: '%-.32s' host:\
`%-.64s' (%-.64s)", 1, 0,0,0,0);
return 0;
}
#endif