2009-02-15 03:18:30 +01:00
|
|
|
/*
|
|
|
|
Licensed Materials - Property of IBM
|
|
|
|
DB2 Storage Engine Enablement
|
|
|
|
Copyright IBM Corporation 2007,2008
|
|
|
|
All rights reserved
|
|
|
|
|
|
|
|
Redistribution and use in source and binary forms, with or without modification,
|
|
|
|
are permitted provided that the following conditions are met:
|
|
|
|
(a) Redistributions of source code must retain this list of conditions, the
|
|
|
|
copyright notice in section {d} below, and the disclaimer following this
|
|
|
|
list of conditions.
|
|
|
|
(b) Redistributions in binary form must reproduce this list of conditions, the
|
|
|
|
copyright notice in section (d) below, and the disclaimer following this
|
|
|
|
list of conditions, in the documentation and/or other materials provided
|
|
|
|
with the distribution.
|
|
|
|
(c) The name of IBM may not be used to endorse or promote products derived from
|
|
|
|
this software without specific prior written permission.
|
|
|
|
(d) The text of the required copyright notice is:
|
|
|
|
Licensed Materials - Property of IBM
|
|
|
|
DB2 Storage Engine Enablement
|
|
|
|
Copyright IBM Corporation 2007,2008
|
|
|
|
All rights reserved
|
|
|
|
|
|
|
|
THIS SOFTWARE IS PROVIDED BY IBM CORPORATION "AS IS" AND ANY EXPRESS OR IMPLIED
|
|
|
|
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
|
|
|
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
|
|
|
|
SHALL IBM CORPORATION BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
|
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
|
|
|
|
OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
CONTRACT, STRICT LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
|
|
|
|
IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
|
|
|
|
OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
@file
|
|
|
|
|
|
|
|
@brief A direct map optimization of iconv and related functions
|
|
|
|
This was show to significantly reduce character conversion cost
|
|
|
|
for short strings when compared to calling iconv system code.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef DB2I_MYCONV_H
|
|
|
|
#define DB2I_MYCONV_H
|
|
|
|
|
|
|
|
|
|
|
|
#include <sys/time.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <wchar.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <iconv.h>
|
|
|
|
#include <ctype.h>
|
|
|
|
#include <time.h>
|
|
|
|
#include <stdarg.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
#ifndef TRUE
|
|
|
|
#define TRUE 1
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef FALSE
|
|
|
|
#define FALSE 0
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
#define INTERN inline
|
|
|
|
#define EXTERN extern "C"
|
|
|
|
#else
|
|
|
|
#define INTERN static
|
|
|
|
#define EXTERN extern
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
/* ANSI integer data types */
|
|
|
|
#if defined(__OS400_TGTVRM__)
|
|
|
|
/* for DTAMDL(*P128), datamodel(P128): int/long/pointer=4/4/16 */
|
|
|
|
/* LLP64:4/4/8 is used for teraspace ?? */
|
|
|
|
typedef short int16_t;
|
|
|
|
typedef unsigned short uint16_t;
|
|
|
|
typedef int int32_t;
|
|
|
|
typedef unsigned int uint32_t;
|
|
|
|
typedef long long int64_t;
|
|
|
|
typedef unsigned long long uint64_t;
|
|
|
|
#elif defined(PASE)
|
|
|
|
/* PASE uses IPL32: int/long/pointer=4/4/4 + long long */
|
|
|
|
#elif defined(__64BIT__)
|
|
|
|
/* AIX 64 bit uses LP64: int/long/pointer=4/8/8 */
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define CONVERTER_ICONV 1
|
|
|
|
#define CONVERTER_DMAP 2
|
|
|
|
|
|
|
|
#define DMAP_S2S 10
|
|
|
|
#define DMAP_S2U 20
|
|
|
|
#define DMAP_D2U 30
|
|
|
|
#define DMAP_E2U 40
|
|
|
|
#define DMAP_U2S 120
|
|
|
|
#define DMAP_T2S 125
|
|
|
|
#define DMAP_U2D 130
|
|
|
|
#define DMAP_T2D 135
|
|
|
|
#define DMAP_U2E 140
|
|
|
|
#define DMAP_T2E 145
|
|
|
|
#define DMAP_S28 220
|
|
|
|
#define DMAP_D28 230
|
|
|
|
#define DMAP_E28 240
|
|
|
|
#define DMAP_82S 310
|
|
|
|
#define DMAP_82D 320
|
|
|
|
#define DMAP_82E 330
|
|
|
|
#define DMAP_U28 410
|
|
|
|
#define DMAP_82U 420
|
|
|
|
#define DMAP_T28 425
|
|
|
|
#define DMAP_U2U 510
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct __dmap_rec *dmap_t;
|
|
|
|
|
|
|
|
struct __dmap_rec
|
|
|
|
{
|
|
|
|
uint32_t codingSchema;
|
|
|
|
unsigned char * dmapS2S; /* SBCS -> SBCS */
|
|
|
|
/* The following conversion needs be followed by conversion from UCS-2/UTF-16 to UTF-8 */
|
|
|
|
UniChar * dmapD12U; /* DBCS(non-EUC) -> UCS-2/UTF-16 */
|
|
|
|
UniChar * dmapD22U; /* DBCS(non-EUC) -> UCS-2/UTF-16 */
|
|
|
|
UniChar * dmapE02U; /* EUC/SS0 -> UCS-2/UTF-16 */
|
|
|
|
UniChar * dmapE12U; /* EUC/SS1 -> UCS-2/UTF-16 */
|
|
|
|
UniChar * dmapE22U; /* EUC/0x8E + SS2 -> UCS-2/UTF-16 */
|
|
|
|
UniChar * dmapE32U; /* EUC/0x8F + SS3 -> UCS-2/UTF-16 */
|
|
|
|
uchar * dmapU2D; /* UCS-2 -> DBCS */
|
|
|
|
uchar * dmapU2S; /* UCS-2 -> EUC SS0 */
|
|
|
|
uchar * dmapU2M2; /* UCS-2 -> EUC SS1 */
|
|
|
|
uchar * dmapU2M3; /* UCS-2 -> EUC SS2/SS3 */
|
|
|
|
/* All of these pointers/tables are not used at the same time.
|
|
|
|
* You may be able save some space if you consolidate them.
|
|
|
|
*/
|
|
|
|
uchar * dmapS28; /* SBCS -> UTF-8 */
|
|
|
|
uchar * dmapD28; /* DBCS -> UTF-8 */
|
|
|
|
};
|
|
|
|
|
|
|
|
typedef struct __myconv_rec *myconv_t;
|
|
|
|
struct __myconv_rec
|
|
|
|
{
|
|
|
|
uint32_t converterType;
|
|
|
|
uint32_t index; /* for close */
|
|
|
|
union {
|
|
|
|
iconv_t cnv_iconv;
|
|
|
|
dmap_t cnv_dmap;
|
|
|
|
};
|
|
|
|
int32_t allocatedSize;
|
|
|
|
int32_t fromCcsid;
|
|
|
|
int32_t toCcsid;
|
|
|
|
UniChar subD; /* DBCS substitution char */
|
|
|
|
char subS; /* SBCS substitution char */
|
|
|
|
UniChar srcSubD; /* DBCS substitution char of src codepage */
|
|
|
|
char srcSubS; /* SBCS substitution char of src codepage */
|
|
|
|
char from [41+1]; /* codepage name is up to 41 bytes */
|
|
|
|
char to [41+1]; /* codepage name is up to 41 bytes */
|
|
|
|
#ifdef __64BIT__
|
|
|
|
char reserved[10]; /* align 128 */
|
|
|
|
#else
|
|
|
|
char reserved[14]; /* align 128 */
|
|
|
|
#endif
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
EXTERN int32_t myconvDebug;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
EXTERN int myconvGetES(CCSID);
|
|
|
|
EXTERN int myconvIsEBCDIC(const char *);
|
|
|
|
EXTERN int myconvIsASCII(const char *);
|
|
|
|
EXTERN int myconvIsUnicode(const char *); /* UTF-8, UTF-16, or UCS-2 */
|
|
|
|
EXTERN int myconvIsUnicode2(const char *); /* 2 byte Unicode */
|
|
|
|
EXTERN int myconvIsUCS2(const char *);
|
|
|
|
EXTERN int myconvIsUTF16(const char *);
|
|
|
|
EXTERN int myconvIsUTF8(const char *);
|
|
|
|
EXTERN int myconvIsEUC(const char *);
|
|
|
|
EXTERN int myconvIsISO(const char *);
|
|
|
|
EXTERN int myconvIsSBCS(const char *);
|
|
|
|
EXTERN int myconvIsDBCS(const char *);
|
|
|
|
EXTERN char myconvGetSubS(const char *);
|
|
|
|
EXTERN UniChar myconvGetSubD(const char *);
|
|
|
|
|
|
|
|
|
|
|
|
EXTERN myconv_t myconv_open(const char*, const char*, int32_t);
|
|
|
|
EXTERN int myconv_close(myconv_t);
|
|
|
|
|
|
|
|
INTERN size_t myconv_iconv(myconv_t cd ,
|
|
|
|
char** inBuf,
|
|
|
|
size_t* inBytesLeft,
|
|
|
|
char** outBuf,
|
|
|
|
size_t* outBytesLeft,
|
|
|
|
size_t* numSub)
|
|
|
|
{
|
|
|
|
return iconv(cd->cnv_iconv, inBuf, inBytesLeft, outBuf, outBytesLeft);
|
|
|
|
}
|
|
|
|
|
|
|
|
INTERN size_t myconv_dmap(myconv_t cd,
|
|
|
|
char** inBuf,
|
|
|
|
size_t* inBytesLeft,
|
|
|
|
char** outBuf,
|
|
|
|
size_t* outBytesLeft,
|
|
|
|
size_t* numSub)
|
|
|
|
{
|
|
|
|
if (cd->cnv_dmap->codingSchema == DMAP_S2S) {
|
|
|
|
register unsigned char * dmapS2S=cd->cnv_dmap->dmapS2S;
|
|
|
|
register int inLen=*inBytesLeft;
|
|
|
|
register char * pOut=*outBuf;
|
|
|
|
register char * pIn=*inBuf;
|
|
|
|
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
|
|
|
|
register char subS=cd->subS;
|
|
|
|
register size_t numS=0;
|
|
|
|
while (0 < inLen) {
|
|
|
|
if (pLastOutBuf < pOut)
|
|
|
|
break;
|
|
|
|
if (*pIn == 0x00) {
|
|
|
|
*pOut=0x00;
|
|
|
|
} else {
|
|
|
|
*pOut=dmapS2S[*pIn];
|
|
|
|
if (*pOut == 0x00) {
|
2009-07-06 14:19:32 +05:30
|
|
|
errno=EILSEQ; /* 116 */
|
2009-02-15 03:18:30 +01:00
|
|
|
*outBytesLeft-=(*inBytesLeft-inLen);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (*pOut == subS) {
|
|
|
|
if ((*pOut=dmapS2S[*pIn]) == subS) {
|
|
|
|
if (*pIn != cd->srcSubS)
|
|
|
|
++numS;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
++pIn;
|
|
|
|
--inLen;
|
|
|
|
++pOut;
|
|
|
|
}
|
|
|
|
*outBytesLeft-=(*inBytesLeft-inLen);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
} else if (cd->cnv_dmap->codingSchema == DMAP_E2U) {
|
|
|
|
/* use uchar * instead of UniChar to avoid memcpy */
|
|
|
|
register uchar * dmapE02U=(uchar *) (cd->cnv_dmap->dmapE02U);
|
|
|
|
register uchar * dmapE12U=(uchar *) (cd->cnv_dmap->dmapE12U);
|
|
|
|
register uchar * dmapE22U=(uchar *) (cd->cnv_dmap->dmapE22U);
|
|
|
|
register uchar * dmapE32U=(uchar *) (cd->cnv_dmap->dmapE32U);
|
|
|
|
register int inLen=*inBytesLeft;
|
|
|
|
register char * pOut=*outBuf;
|
|
|
|
register char * pIn=*inBuf;
|
|
|
|
register int offset;
|
|
|
|
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
|
|
|
|
register size_t numS=0;
|
|
|
|
while (0 < inLen) {
|
|
|
|
if (pLastOutBuf < pOut)
|
|
|
|
break;
|
|
|
|
if (*pIn == 0x00) {
|
|
|
|
*pOut=0x00;
|
|
|
|
++pOut;
|
|
|
|
*pOut=0x00;
|
|
|
|
++pOut;
|
|
|
|
++pIn;
|
|
|
|
--inLen;
|
|
|
|
} else {
|
|
|
|
if (*pIn == 0x8E) { /* SS2 */
|
|
|
|
if (inLen < 2) {
|
|
|
|
if (cd->fromCcsid == 33722 || /* IBM-eucJP */
|
|
|
|
cd->fromCcsid == 964) /* IBM-eucTW */
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
else
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
++pIn;
|
|
|
|
if (*pIn < 0xA0) {
|
|
|
|
if (cd->fromCcsid == 964) /* IBM-eucTW */
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
else
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-1;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
offset=(*pIn - 0xA0);
|
|
|
|
offset<<=1;
|
|
|
|
if (dmapE22U[offset] == 0x00 &&
|
|
|
|
dmapE22U[offset+1] == 0x00) { /* 2 bytes */
|
|
|
|
if (inLen < 3) {
|
|
|
|
if (cd->fromCcsid == 964) /* IBM-eucTW */
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
else
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-1;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
offset=(*pIn - 0xA0) * 0x60 + 0x60;
|
|
|
|
++pIn;
|
|
|
|
if (*pIn < 0xA0) {
|
|
|
|
if (cd->fromCcsid == 964) /* IBM-eucTW */
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
else
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-2;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
offset+=(*pIn - 0xA0);
|
|
|
|
offset<<=1;
|
|
|
|
if (dmapE22U[offset] == 0x00 &&
|
|
|
|
dmapE22U[offset+1] == 0x00) {
|
|
|
|
if (cd->fromCcsid == 964) /* IBM-eucTW */
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
else
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-2;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
*pOut=dmapE22U[offset];
|
|
|
|
++pOut;
|
|
|
|
*pOut=dmapE22U[offset+1];
|
|
|
|
++pOut;
|
|
|
|
if (dmapE22U[offset] == 0xFF &&
|
|
|
|
dmapE22U[offset+1] == 0xFD) {
|
|
|
|
if (pIn[-2] * 0x100 + pIn[-1] != cd->srcSubD)
|
|
|
|
++numS;
|
|
|
|
}
|
|
|
|
++pIn;
|
|
|
|
inLen-=3;
|
|
|
|
} else { /* 1 bytes */
|
|
|
|
*pOut=dmapE22U[offset];
|
|
|
|
++pOut;
|
|
|
|
*pOut=dmapE22U[offset+1];
|
|
|
|
++pOut;
|
|
|
|
++pIn;
|
|
|
|
inLen-=2;
|
|
|
|
}
|
|
|
|
} else if (*pIn == 0x8F) { /* SS3 */
|
|
|
|
if (inLen < 2) {
|
|
|
|
if (cd->fromCcsid == 33722) /* IBM-eucJP */
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
else
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
++pIn;
|
|
|
|
if (*pIn < 0xA0) {
|
|
|
|
if (cd->fromCcsid == 970 || /* IBM-eucKR */
|
|
|
|
cd->fromCcsid == 964 || /* IBM-eucTW */
|
|
|
|
cd->fromCcsid == 1383 || /* IBM-eucCN */
|
|
|
|
(cd->fromCcsid == 33722 && 3 <= inLen)) /* IBM-eucJP */
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
else
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-1;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
offset=(*pIn - 0xA0);
|
|
|
|
offset<<=1;
|
|
|
|
if (dmapE32U[offset] == 0x00 &&
|
|
|
|
dmapE32U[offset+1] == 0x00) { /* 0x8F + 2 bytes */
|
|
|
|
if (inLen < 3) {
|
|
|
|
if (cd->fromCcsid == 33722)
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
else
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-1;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
offset=(*pIn - 0xA0) * 0x60 + 0x60;
|
|
|
|
++pIn;
|
|
|
|
if (*pIn < 0xA0) {
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-2;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
offset+=(*pIn - 0xA0);
|
|
|
|
offset<<=1;
|
|
|
|
if (dmapE32U[offset] == 0x00 &&
|
|
|
|
dmapE32U[offset+1] == 0x00) {
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-2;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
*pOut=dmapE32U[offset];
|
|
|
|
++pOut;
|
|
|
|
*pOut=dmapE32U[offset+1];
|
|
|
|
++pOut;
|
|
|
|
if (dmapE32U[offset] == 0xFF &&
|
|
|
|
dmapE32U[offset+1] == 0xFD) {
|
|
|
|
if (pIn[-2] * 0x100 + pIn[-1] != cd->srcSubD)
|
|
|
|
++numS;
|
|
|
|
}
|
|
|
|
++pIn;
|
|
|
|
inLen-=3;
|
|
|
|
} else { /* 0x8F + 1 bytes */
|
|
|
|
*pOut=dmapE32U[offset];
|
|
|
|
++pOut;
|
|
|
|
*pOut=dmapE32U[offset+1];
|
|
|
|
++pOut;
|
|
|
|
++pIn;
|
|
|
|
inLen-=2;
|
|
|
|
}
|
|
|
|
|
|
|
|
} else {
|
|
|
|
offset=*pIn;
|
|
|
|
offset<<=1;
|
|
|
|
if (dmapE02U[offset] == 0x00 &&
|
|
|
|
dmapE02U[offset+1] == 0x00) { /* SS1 */
|
|
|
|
if (inLen < 2) {
|
|
|
|
if ((cd->fromCcsid == 33722 && (*pIn == 0xA0 || (0xA9 <= *pIn && *pIn <= 0xAF) || *pIn == 0xFF)) ||
|
|
|
|
(cd->fromCcsid == 970 && (*pIn == 0xA0 || *pIn == 0xAD || *pIn == 0xAE || *pIn == 0xAF || *pIn == 0xFF)) ||
|
|
|
|
(cd->fromCcsid == 964 && (*pIn == 0xA0 || (0xAA <= *pIn && *pIn <= 0xC1) || *pIn == 0xC3 || *pIn == 0xFE || *pIn == 0xFF)) ||
|
|
|
|
(cd->fromCcsid == 1383 && (*pIn == 0xA0 || *pIn == 0xFF)))
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
else
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (*pIn < 0xA0) {
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
offset=(*pIn - 0xA0) * 0x60;
|
|
|
|
++pIn;
|
|
|
|
if (*pIn < 0xA0) {
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-1;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
offset+=(*pIn - 0xA0);
|
|
|
|
offset<<=1;
|
|
|
|
if (dmapE12U[offset] == 0x00 &&
|
|
|
|
dmapE12U[offset+1] == 0x00) { /* undefined mapping */
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-1;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
*pOut=dmapE12U[offset];
|
|
|
|
++pOut;
|
|
|
|
*pOut=dmapE12U[offset+1];
|
|
|
|
++pOut;
|
|
|
|
if (dmapE12U[offset] == 0xFF &&
|
|
|
|
dmapE12U[offset+1] == 0xFD) {
|
|
|
|
if (pIn[-1] * 0x100 + pIn[0] != cd->srcSubD)
|
|
|
|
++numS;
|
|
|
|
}
|
|
|
|
++pIn;
|
|
|
|
inLen-=2;
|
|
|
|
} else {
|
|
|
|
*pOut=dmapE02U[offset];
|
|
|
|
++pOut;
|
|
|
|
*pOut=dmapE02U[offset+1];
|
|
|
|
++pOut;
|
|
|
|
if (dmapE02U[offset] == 0x00 &&
|
|
|
|
dmapE02U[offset+1] == 0x1A) {
|
|
|
|
if (*pIn != cd->srcSubS)
|
|
|
|
++numS;
|
|
|
|
}
|
|
|
|
++pIn;
|
|
|
|
--inLen;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
|
|
} else if (cd->cnv_dmap->codingSchema == DMAP_E28) {
|
|
|
|
/* use uchar * instead of UniChar to avoid memcpy */
|
|
|
|
register uchar * dmapE02U=(uchar *) (cd->cnv_dmap->dmapE02U);
|
|
|
|
register uchar * dmapE12U=(uchar *) (cd->cnv_dmap->dmapE12U);
|
|
|
|
register uchar * dmapE22U=(uchar *) (cd->cnv_dmap->dmapE22U);
|
|
|
|
register uchar * dmapE32U=(uchar *) (cd->cnv_dmap->dmapE32U);
|
|
|
|
register int inLen=*inBytesLeft;
|
|
|
|
register char * pOut=*outBuf;
|
|
|
|
register char * pIn=*inBuf;
|
|
|
|
register int offset;
|
|
|
|
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
|
|
|
|
register size_t numS=0;
|
|
|
|
register UniChar in; /* copy part of U28 */
|
|
|
|
register UniChar ucs2;
|
|
|
|
while (0 < inLen) {
|
|
|
|
if (pLastOutBuf < pOut)
|
|
|
|
break;
|
|
|
|
if (*pIn == 0x00) {
|
|
|
|
*pOut=0x00;
|
|
|
|
++pOut;
|
|
|
|
++pIn;
|
|
|
|
--inLen;
|
|
|
|
} else {
|
|
|
|
if (*pIn == 0x8E) { /* SS2 */
|
|
|
|
if (inLen < 2) {
|
|
|
|
if (cd->fromCcsid == 33722 || /* IBM-eucJP */
|
|
|
|
cd->fromCcsid == 964) /* IBM-eucTW */
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
else
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
++pIn;
|
|
|
|
if (*pIn < 0xA0) {
|
|
|
|
if (cd->fromCcsid == 964) /* IBM-eucTW */
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
else
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-1;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
offset=(*pIn - 0xA0);
|
|
|
|
offset<<=1;
|
|
|
|
if (dmapE22U[offset] == 0x00 &&
|
|
|
|
dmapE22U[offset+1] == 0x00) { /* 2 bytes */
|
|
|
|
if (inLen < 3) {
|
|
|
|
if (cd->fromCcsid == 964) /* IBM-eucTW */
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
else
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-1;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
offset=(*pIn - 0xA0) * 0x60 + 0x60;
|
|
|
|
++pIn;
|
|
|
|
if (*pIn < 0xA0) {
|
|
|
|
if (cd->fromCcsid == 964) /* IBM-eucTW */
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
else
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-2;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
offset+=(*pIn - 0xA0);
|
|
|
|
offset<<=1;
|
|
|
|
if (dmapE22U[offset] == 0x00 &&
|
|
|
|
dmapE22U[offset+1] == 0x00) {
|
|
|
|
if (cd->fromCcsid == 964) /* IBM-eucTW */
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
else
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-2;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
in=dmapE22U[offset];
|
|
|
|
in<<=8;
|
|
|
|
in+=dmapE22U[offset+1];
|
|
|
|
if (dmapE22U[offset] == 0xFF &&
|
|
|
|
dmapE22U[offset+1] == 0xFD) {
|
|
|
|
if (pIn[-2] * 0x100 + pIn[-1] != cd->srcSubD)
|
|
|
|
++numS;
|
|
|
|
}
|
|
|
|
++pIn;
|
|
|
|
inLen-=3;
|
|
|
|
} else { /* 1 bytes */
|
|
|
|
in=dmapE22U[offset];
|
|
|
|
in<<=8;
|
|
|
|
in+=dmapE22U[offset+1];
|
|
|
|
++pIn;
|
|
|
|
inLen-=2;
|
|
|
|
}
|
|
|
|
} else if (*pIn == 0x8F) { /* SS3 */
|
|
|
|
if (inLen < 2) {
|
|
|
|
if (cd->fromCcsid == 33722) /* IBM-eucJP */
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
else
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
++pIn;
|
|
|
|
if (*pIn < 0xA0) {
|
|
|
|
if (cd->fromCcsid == 970 || /* IBM-eucKR */
|
|
|
|
cd->fromCcsid == 964 || /* IBM-eucTW */
|
|
|
|
cd->fromCcsid == 1383 || /* IBM-eucCN */
|
|
|
|
(cd->fromCcsid == 33722 && 3 <= inLen)) /* IBM-eucJP */
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
else
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-1;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
offset=(*pIn - 0xA0);
|
|
|
|
offset<<=1;
|
|
|
|
if (dmapE32U[offset] == 0x00 &&
|
|
|
|
dmapE32U[offset+1] == 0x00) { /* 0x8F + 2 bytes */
|
|
|
|
if (inLen < 3) {
|
|
|
|
if (cd->fromCcsid == 33722)
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
else
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-1;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
offset=(*pIn - 0xA0) * 0x60 + 0x60;
|
|
|
|
++pIn;
|
|
|
|
if (*pIn < 0xA0) {
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-2;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
offset+=(*pIn - 0xA0);
|
|
|
|
offset<<=1;
|
|
|
|
if (dmapE32U[offset] == 0x00 &&
|
|
|
|
dmapE32U[offset+1] == 0x00) {
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-2;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
in=dmapE32U[offset];
|
|
|
|
in<<=8;
|
|
|
|
in+=dmapE32U[offset+1];
|
|
|
|
if (dmapE32U[offset] == 0xFF &&
|
|
|
|
dmapE32U[offset+1] == 0xFD) {
|
|
|
|
if (pIn[-2] * 0x100 + pIn[-1] != cd->srcSubD)
|
|
|
|
++numS;
|
|
|
|
}
|
|
|
|
++pIn;
|
|
|
|
inLen-=3;
|
|
|
|
} else { /* 0x8F + 1 bytes */
|
|
|
|
in=dmapE32U[offset];
|
|
|
|
in<<=8;
|
|
|
|
in+=dmapE32U[offset+1];
|
|
|
|
++pIn;
|
|
|
|
inLen-=2;
|
|
|
|
}
|
|
|
|
|
|
|
|
} else {
|
|
|
|
offset=*pIn;
|
|
|
|
offset<<=1;
|
|
|
|
if (dmapE02U[offset] == 0x00 &&
|
|
|
|
dmapE02U[offset+1] == 0x00) { /* SS1 */
|
|
|
|
if (inLen < 2) {
|
|
|
|
if ((cd->fromCcsid == 33722 && (*pIn == 0xA0 || (0xA9 <= *pIn && *pIn <= 0xAF) || *pIn == 0xFF)) ||
|
|
|
|
(cd->fromCcsid == 970 && (*pIn == 0xA0 || *pIn == 0xAD || *pIn == 0xAE || *pIn == 0xAF || *pIn == 0xFF)) ||
|
|
|
|
(cd->fromCcsid == 964 && (*pIn == 0xA0 || (0xAA <= *pIn && *pIn <= 0xC1) || *pIn == 0xC3 || *pIn == 0xFE || *pIn == 0xFF)) ||
|
|
|
|
(cd->fromCcsid == 1383 && (*pIn == 0xA0 || *pIn == 0xFF)))
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
else
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (*pIn < 0xA0) {
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
offset=(*pIn - 0xA0) * 0x60;
|
|
|
|
++pIn;
|
|
|
|
if (*pIn < 0xA0) {
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-1;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
offset+=(*pIn - 0xA0);
|
|
|
|
offset<<=1;
|
|
|
|
if (dmapE12U[offset] == 0x00 &&
|
|
|
|
dmapE12U[offset+1] == 0x00) { /* undefined mapping */
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-1;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
in=dmapE12U[offset];
|
|
|
|
in<<=8;
|
|
|
|
in+=dmapE12U[offset+1];
|
|
|
|
if (dmapE12U[offset] == 0xFF &&
|
|
|
|
dmapE12U[offset+1] == 0xFD) {
|
|
|
|
if (pIn[-1] * 0x100 + pIn[0] != cd->srcSubD)
|
|
|
|
++numS;
|
|
|
|
}
|
|
|
|
++pIn;
|
|
|
|
inLen-=2;
|
|
|
|
} else {
|
|
|
|
in=dmapE02U[offset];
|
|
|
|
in<<=8;
|
|
|
|
in+=dmapE02U[offset+1];
|
|
|
|
if (dmapE02U[offset] == 0x00 &&
|
|
|
|
dmapE02U[offset+1] == 0x1A) {
|
|
|
|
if (*pIn != cd->srcSubS)
|
|
|
|
++numS;
|
|
|
|
}
|
|
|
|
++pIn;
|
|
|
|
--inLen;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ucs2=in;
|
|
|
|
if ((in & 0xFF80) == 0x0000) { /* U28: in & 0b1111111110000000 == 0x0000 */
|
|
|
|
*pOut=in;
|
|
|
|
++pOut;
|
|
|
|
} else if ((in & 0xF800) == 0x0000) { /* in & 0b1111100000000000 == 0x0000 */
|
|
|
|
register uchar byte;
|
|
|
|
in>>=6;
|
|
|
|
in&=0x001F; /* 0b0000000000011111 */
|
|
|
|
in|=0x00C0; /* 0b0000000011000000 */
|
|
|
|
*pOut=in;
|
|
|
|
++pOut;
|
|
|
|
byte=ucs2; /* dmapD12U[offset+1]; */
|
|
|
|
byte&=0x3F; /* 0b00111111; */
|
|
|
|
byte|=0x80; /* 0b10000000; */
|
|
|
|
*pOut=byte;
|
|
|
|
++pOut;
|
|
|
|
} else if ((in & 0xFC00) == 0xD800) {
|
|
|
|
*pOut=0xEF;
|
|
|
|
++pOut;
|
|
|
|
*pOut=0xBF;
|
|
|
|
++pOut;
|
|
|
|
*pOut=0xBD;
|
|
|
|
++pOut;
|
|
|
|
} else {
|
|
|
|
register uchar byte;
|
|
|
|
register uchar work;
|
|
|
|
byte=(ucs2>>8); /* dmapD12U[offset]; */
|
|
|
|
byte>>=4;
|
|
|
|
byte|=0xE0; /* 0b11100000; */
|
|
|
|
*pOut=byte;
|
|
|
|
++pOut;
|
|
|
|
|
|
|
|
byte=(ucs2>>8); /* dmapD12U[offset]; */
|
|
|
|
byte<<=2;
|
|
|
|
work=ucs2; /* dmapD12U[offset+1]; */
|
|
|
|
work>>=6;
|
|
|
|
byte|=work;
|
|
|
|
byte&=0x3F; /* 0b00111111; */
|
|
|
|
byte|=0x80; /* 0b10000000; */
|
|
|
|
*pOut=byte;
|
|
|
|
++pOut;
|
|
|
|
|
|
|
|
byte=ucs2; /* dmapD12U[offset+1]; */
|
|
|
|
byte&=0x3F; /* 0b00111111; */
|
|
|
|
byte|=0x80; /* 0b10000000; */
|
|
|
|
*pOut=byte;
|
|
|
|
++pOut;
|
|
|
|
}
|
|
|
|
/* end of U28 */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
} else if (cd->cnv_dmap->codingSchema == DMAP_U2E) {
|
|
|
|
register uchar * dmapU2S=cd->cnv_dmap->dmapU2S;
|
|
|
|
register uchar * dmapU2M2=cd->cnv_dmap->dmapU2M2 - 0x80 * 2;
|
|
|
|
register uchar * dmapU2M3=cd->cnv_dmap->dmapU2M3 - 0x80 * 3;
|
|
|
|
register int inLen=*inBytesLeft;
|
|
|
|
register char * pOut=*outBuf;
|
|
|
|
register char * pIn=*inBuf;
|
|
|
|
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
|
|
|
|
register char subS=cd->subS;
|
|
|
|
register char * pSubD=(char *) &(cd->subD);
|
|
|
|
register size_t numS=0;
|
|
|
|
register size_t rc=0;
|
|
|
|
while (0 < inLen) {
|
|
|
|
register uint32_t in;
|
|
|
|
if (inLen == 1) {
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (pLastOutBuf < pOut)
|
|
|
|
break;
|
|
|
|
in=pIn[0];
|
|
|
|
in<<=8;
|
|
|
|
in+=pIn[1];
|
|
|
|
if (in == 0x0000) {
|
|
|
|
*pOut=0x00;
|
|
|
|
++pOut;
|
|
|
|
} else if (in < 0x100 && dmapU2S[in] != 0x0000) {
|
|
|
|
if ((*pOut=dmapU2S[in]) == subS) {
|
|
|
|
if (in != cd->srcSubS)
|
|
|
|
++numS;
|
|
|
|
}
|
|
|
|
++pOut;
|
|
|
|
} else {
|
|
|
|
in<<=1;
|
|
|
|
if (dmapU2M2[in] == 0x00) { /* not found in dmapU2M2 */
|
|
|
|
in*=1.5;
|
|
|
|
if (dmapU2M3[in] == 0x00) { /* not found in dmapU2M3*/
|
|
|
|
*pOut=pSubD[0];
|
|
|
|
++pOut;
|
|
|
|
*pOut=pSubD[1];
|
|
|
|
++pOut;
|
|
|
|
++numS;
|
|
|
|
++rc;
|
|
|
|
} else {
|
|
|
|
*pOut=dmapU2M3[in];
|
|
|
|
++pOut;
|
|
|
|
*pOut=dmapU2M3[1+in];
|
|
|
|
++pOut;
|
|
|
|
*pOut=dmapU2M3[2+in];
|
|
|
|
++pOut;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
*pOut=dmapU2M2[in];
|
|
|
|
++pOut;
|
|
|
|
if (dmapU2M2[1+in] == 0x00) {
|
|
|
|
if (*pOut == subS) {
|
|
|
|
in>>=1;
|
|
|
|
if (in != cd->srcSubS)
|
|
|
|
++numS;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
*pOut=dmapU2M2[1+in];
|
|
|
|
++pOut;
|
|
|
|
if (memcmp(pOut-2, pSubD, 2) == 0) {
|
|
|
|
in>>=1;
|
|
|
|
if (in != cd->srcSubD) {
|
|
|
|
++numS;
|
|
|
|
++rc;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
pIn+=2;
|
|
|
|
inLen-=2;
|
|
|
|
}
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return rc; /* compatibility to iconv() */
|
|
|
|
|
|
|
|
} else if (cd->cnv_dmap->codingSchema == DMAP_T2E) {
|
|
|
|
register uchar * dmapU2S=cd->cnv_dmap->dmapU2S;
|
|
|
|
register uchar * dmapU2M2=cd->cnv_dmap->dmapU2M2 - 0x80 * 2;
|
|
|
|
register uchar * dmapU2M3=cd->cnv_dmap->dmapU2M3 - 0x80 * 3;
|
|
|
|
register int inLen=*inBytesLeft;
|
|
|
|
register char * pOut=*outBuf;
|
|
|
|
register char * pIn=*inBuf;
|
|
|
|
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
|
|
|
|
register char subS=cd->subS;
|
|
|
|
register char * pSubD=(char *) &(cd->subD);
|
|
|
|
register size_t numS=0;
|
|
|
|
register size_t rc=0;
|
|
|
|
while (0 < inLen) {
|
|
|
|
register uint32_t in;
|
|
|
|
if (inLen == 1) {
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen-1;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
++numS;
|
|
|
|
*numSub+=numS;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (pLastOutBuf < pOut)
|
|
|
|
break;
|
|
|
|
in=pIn[0];
|
|
|
|
in<<=8;
|
|
|
|
in+=pIn[1];
|
|
|
|
if (in == 0x0000) {
|
|
|
|
*pOut=0x00;
|
|
|
|
++pOut;
|
|
|
|
} else if (0xD800 <= in && in <= 0xDBFF) { /* first byte of surrogate */
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*inBytesLeft=inLen-2;
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn+2;
|
|
|
|
++numS;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
} else if (0xDC00 <= in && in <= 0xDFFF) { /* second byte of surrogate */
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*inBytesLeft=inLen-1;
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
++numS;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
} else if (in < 0x100 && dmapU2S[in] != 0x0000) {
|
|
|
|
if ((*pOut=dmapU2S[in]) == subS) {
|
|
|
|
if (in != cd->srcSubS)
|
|
|
|
++numS;
|
|
|
|
}
|
|
|
|
++pOut;
|
|
|
|
} else {
|
|
|
|
in<<=1;
|
|
|
|
if (dmapU2M2[in] == 0x00) { /* not found in dmapU2M2 */
|
|
|
|
in*=1.5;
|
|
|
|
if (dmapU2M3[in] == 0x00) { /* not found in dmapU2M3*/
|
|
|
|
*pOut=pSubD[0];
|
|
|
|
++pOut;
|
|
|
|
*pOut=pSubD[1];
|
|
|
|
++pOut;
|
|
|
|
++numS;
|
|
|
|
++rc;
|
|
|
|
} else {
|
|
|
|
*pOut=dmapU2M3[in];
|
|
|
|
++pOut;
|
|
|
|
*pOut=dmapU2M3[1+in];
|
|
|
|
++pOut;
|
|
|
|
*pOut=dmapU2M3[2+in];
|
|
|
|
++pOut;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
*pOut=dmapU2M2[in];
|
|
|
|
++pOut;
|
|
|
|
if (dmapU2M2[1+in] == 0x00) {
|
|
|
|
if (*pOut == subS) {
|
|
|
|
in>>=1;
|
|
|
|
if (in != cd->srcSubS)
|
|
|
|
++numS;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
*pOut=dmapU2M2[1+in];
|
|
|
|
++pOut;
|
|
|
|
if (memcmp(pOut-2, pSubD, 2) == 0) {
|
|
|
|
in>>=1;
|
|
|
|
if (in != cd->srcSubD) {
|
|
|
|
++numS;
|
|
|
|
++rc;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
pIn+=2;
|
|
|
|
inLen-=2;
|
|
|
|
}
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
} else if (cd->cnv_dmap->codingSchema == DMAP_82E) {
|
|
|
|
register uchar * dmapU2S=cd->cnv_dmap->dmapU2S;
|
|
|
|
register uchar * dmapU2M2=cd->cnv_dmap->dmapU2M2 - 0x80 * 2;
|
|
|
|
register uchar * dmapU2M3=cd->cnv_dmap->dmapU2M3 - 0x80 * 3;
|
|
|
|
register int inLen=*inBytesLeft;
|
|
|
|
register char * pOut=*outBuf;
|
|
|
|
register char * pIn=*inBuf;
|
|
|
|
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
|
|
|
|
register char subS=cd->subS;
|
|
|
|
register char * pSubD=(char *) &(cd->subD);
|
|
|
|
register size_t numS=0;
|
|
|
|
register size_t rc=0;
|
|
|
|
while (0 < inLen) {
|
|
|
|
register uint32_t in;
|
|
|
|
uint32_t in2;
|
|
|
|
if (pLastOutBuf < pOut)
|
|
|
|
break;
|
|
|
|
/* convert from UTF-8 to UCS-2 */
|
|
|
|
if (*pIn == 0x00) {
|
|
|
|
in=0x0000;
|
|
|
|
++pIn;
|
|
|
|
--inLen;
|
|
|
|
} else { /* 82U: */
|
|
|
|
register uchar byte1=*pIn;
|
|
|
|
if ((byte1 & 0x80) == 0x00) { /* if (byte1 & 0b10000000 == 0b00000000) { */
|
|
|
|
/* 1 bytes sequence: 0xxxxxxx => 00000000 0xxxxxxx*/
|
|
|
|
in=byte1;
|
|
|
|
++pIn;
|
|
|
|
--inLen;
|
|
|
|
} else if ((byte1 & 0xE0) == 0xC0) { /* (byte1 & 0b11100000 == 0b11000000) { */
|
|
|
|
if (inLen < 2) {
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (byte1 == 0xC0 || byte1 == 0xC1) { /* invalid sequence */
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
/* 2 bytes sequence:
|
|
|
|
110yyyyy 10xxxxxx => 00000yyy yyxxxxxx */
|
|
|
|
register uchar byte2;
|
|
|
|
++pIn;
|
|
|
|
byte2=*pIn;
|
|
|
|
if ((byte2 & 0xC0) == 0x80) { /* byte2 & 0b11000000 == 0b10000000) { */
|
|
|
|
register uchar work=byte1;
|
|
|
|
work<<=6;
|
|
|
|
byte2&=0x3F; /* 0b00111111; */
|
|
|
|
byte2|=work;
|
|
|
|
|
|
|
|
byte1&=0x1F; /* 0b00011111; */
|
|
|
|
byte1>>=2;
|
|
|
|
in=byte1;
|
|
|
|
in<<=8;
|
|
|
|
in+=byte2;
|
|
|
|
inLen-=2;
|
|
|
|
++pIn;
|
|
|
|
} else { /* invalid sequence */
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-1;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
} else if ((byte1 & 0xF0) == 0xE0) { /* byte1 & 0b11110000 == 0b11100000 */
|
|
|
|
/* 3 bytes sequence:
|
|
|
|
1110zzzz 10yyyyyy 10xxxxxx => zzzzyyyy yyxxxxxx */
|
|
|
|
register uchar byte2;
|
|
|
|
register uchar byte3;
|
|
|
|
if (inLen < 3) {
|
|
|
|
if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
else
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
++pIn;
|
|
|
|
byte2=*pIn;
|
|
|
|
++pIn;
|
|
|
|
byte3=*pIn;
|
|
|
|
if ((byte2 & 0xC0) != 0x80 ||
|
|
|
|
(byte3 & 0xC0) != 0x80 ||
|
|
|
|
(byte1 == 0xE0 && byte2 < 0xA0)) { /* invalid sequence, only 0xA0-0xBF allowed after 0xE0 */
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-2;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
{
|
|
|
|
register uchar work=byte2;
|
|
|
|
work<<=6;
|
|
|
|
byte3&=0x3F; /* 0b00111111; */
|
|
|
|
byte3|=work;
|
|
|
|
|
|
|
|
byte2&=0x3F; /* 0b00111111; */
|
|
|
|
byte2>>=2;
|
|
|
|
|
|
|
|
byte1<<=4;
|
|
|
|
in=byte1 | byte2;;
|
|
|
|
in<<=8;
|
|
|
|
in+=byte3;
|
|
|
|
inLen-=3;
|
|
|
|
++pIn;
|
|
|
|
}
|
|
|
|
} else if ((0xF0 <= byte1 && byte1 <= 0xF4)) { /* (bytes1 & 11111000) == 0x1110000 */
|
|
|
|
/* 4 bytes sequence
|
|
|
|
11110uuu 10uuzzzz 10yyyyyy 10xxxxxx => 110110ww wwzzzzyy 110111yy yyxxxxxx
|
|
|
|
where uuuuu = wwww + 1 */
|
|
|
|
register uchar byte2;
|
|
|
|
register uchar byte3;
|
|
|
|
register uchar byte4;
|
|
|
|
if (inLen < 4) {
|
|
|
|
if ((inLen >= 2 && (pIn[1] & 0xC0) != 0x80) ||
|
|
|
|
(inLen >= 3 && (pIn[2] & 0xC0) != 0x80) ||
|
|
|
|
(cd->toCcsid == 13488) )
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
else
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
++pIn;
|
|
|
|
byte2=*pIn;
|
|
|
|
++pIn;
|
|
|
|
byte3=*pIn;
|
|
|
|
++pIn;
|
|
|
|
byte4=*pIn;
|
|
|
|
if ((byte2 & 0xC0) == 0x80 && /* byte2 & 0b11000000 == 0b10000000 */
|
|
|
|
(byte3 & 0xC0) == 0x80 && /* byte3 & 0b11000000 == 0b10000000 */
|
|
|
|
(byte4 & 0xC0) == 0x80) { /* byte4 & 0b11000000 == 0b10000000 */
|
|
|
|
register uchar work=byte2;
|
|
|
|
if (byte1 == 0xF0 && byte2 < 0x90) {
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-3;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
/* iconv() returns 0 for 0xF4908080 and convert to 0x00
|
|
|
|
} else if (byte1 == 0xF4 && byte2 > 0x8F) {
|
|
|
|
errno=EINVAL;
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-3;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
|
|
|
work&=0x30; /* 0b00110000; */
|
|
|
|
work>>=4;
|
|
|
|
byte1&=0x07; /* 0b00000111; */
|
|
|
|
byte1<<=2;
|
|
|
|
byte1+=work; /* uuuuu */
|
|
|
|
--byte1; /* wwww */
|
|
|
|
|
|
|
|
work=byte1 & 0x0F;
|
|
|
|
work>>=2;
|
|
|
|
work+=0xD8; /* 0b11011011; */
|
|
|
|
in=work;
|
|
|
|
in<<=8;
|
|
|
|
|
|
|
|
byte1<<=6;
|
|
|
|
byte2<<=2;
|
|
|
|
byte2&=0x3C; /* 0b00111100; */
|
|
|
|
work=byte3;
|
|
|
|
work>>=4;
|
|
|
|
work&=0x03; /* 0b00000011; */
|
|
|
|
work|=byte1;
|
|
|
|
work|=byte2;
|
|
|
|
in+=work;
|
|
|
|
|
|
|
|
work=byte3;
|
|
|
|
work>>=2;
|
|
|
|
work&=0x03; /* 0b00000011; */
|
|
|
|
work|=0xDC; /* 0b110111xx; */
|
|
|
|
in2=work;
|
|
|
|
in2<<=8;
|
|
|
|
|
|
|
|
byte3<<=6;
|
|
|
|
byte4&=0x3F; /* 0b00111111; */
|
|
|
|
byte4|=byte3;
|
|
|
|
in2+=byte4;
|
|
|
|
inLen-=4;
|
|
|
|
++pIn;
|
|
|
|
#ifdef match_with_GBK
|
|
|
|
if ((0xD800 == in && in2 < 0xDC80) ||
|
|
|
|
(0xD840 == in && in2 < 0xDC80) ||
|
|
|
|
(0xD880 == in && in2 < 0xDC80) ||
|
|
|
|
(0xD8C0 == in && in2 < 0xDC80) ||
|
|
|
|
(0xD900 == in && in2 < 0xDC80) ||
|
|
|
|
(0xD940 == in && in2 < 0xDC80) ||
|
|
|
|
(0xD980 == in && in2 < 0xDC80) ||
|
|
|
|
(0xD9C0 == in && in2 < 0xDC80) ||
|
|
|
|
(0xDA00 == in && in2 < 0xDC80) ||
|
|
|
|
(0xDA40 == in && in2 < 0xDC80) ||
|
|
|
|
(0xDA80 == in && in2 < 0xDC80) ||
|
|
|
|
(0xDAC0 == in && in2 < 0xDC80) ||
|
|
|
|
(0xDB00 == in && in2 < 0xDC80) ||
|
|
|
|
(0xDB40 == in && in2 < 0xDC80) ||
|
|
|
|
(0xDB80 == in && in2 < 0xDC80) ||
|
|
|
|
(0xDBC0 == in && in2 < 0xDC80)) {
|
|
|
|
#else
|
|
|
|
if ((0xD800 <= in && in <= 0xDBFF) &&
|
|
|
|
(0xDC00 <= in2 && in2 <= 0xDFFF)) {
|
|
|
|
#endif
|
|
|
|
*pOut=subS;
|
|
|
|
++pOut;
|
|
|
|
++numS;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
} else { /* invalid sequence */
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-3;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
} else if (0xF5 <= byte1 && byte1 <= 0xFF) { /* minic iconv() behavior */
|
|
|
|
if (inLen < 4 ||
|
|
|
|
(inLen >= 4 && byte1 == 0xF8 && pIn[1] < 0x90) ||
|
|
|
|
pIn[1] < 0x80 || 0xBF < pIn[1] ||
|
|
|
|
pIn[2] < 0x80 || 0xBF < pIn[2] ||
|
|
|
|
pIn[3] < 0x80 || 0xBF < pIn[3] ) {
|
|
|
|
if (inLen == 1)
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
else if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
else if (inLen == 3 && ((pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80))
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
else if (inLen >= 4 && (byte1 == 0xF8 || (pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80 || (pIn[3] & 0xC0) != 0x80))
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
else
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
} else if ((pIn[1] == 0x80 || pIn[1] == 0x90 || pIn[1] == 0xA0 || pIn[1] == 0xB0) &&
|
|
|
|
pIn[2] < 0x82) {
|
|
|
|
*pOut=subS; /* Though returns replacement character, which iconv() does not return. */
|
|
|
|
++pOut;
|
|
|
|
++numS;
|
|
|
|
pIn+=4;
|
|
|
|
inLen-=4;
|
|
|
|
continue;
|
|
|
|
} else {
|
|
|
|
*pOut=pSubD[0]; /* Though returns replacement character, which iconv() does not return. */
|
|
|
|
++pOut;
|
|
|
|
*pOut=pSubD[1];
|
|
|
|
++pOut;
|
|
|
|
++numS;
|
|
|
|
pIn+=4;
|
|
|
|
inLen-=4;
|
|
|
|
continue;
|
|
|
|
/* iconv() returns 0 with strange 1 byte converted values */
|
|
|
|
}
|
|
|
|
|
|
|
|
} else { /* invalid sequence */
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* end of UTF-8 to UCS-2 */
|
|
|
|
if (in == 0x0000) {
|
|
|
|
*pOut=0x00;
|
|
|
|
++pOut;
|
|
|
|
} else if (in < 0x100 && dmapU2S[in] != 0x0000) {
|
|
|
|
if ((*pOut=dmapU2S[in]) == subS) {
|
|
|
|
if (in != cd->srcSubS)
|
|
|
|
++numS;
|
|
|
|
}
|
|
|
|
++pOut;
|
|
|
|
} else {
|
|
|
|
in<<=1;
|
|
|
|
if (dmapU2M2[in] == 0x00) { /* not found in dmapU2M2 */
|
|
|
|
in*=1.5;
|
|
|
|
if (dmapU2M3[in] == 0x00) { /* not found in dmapU2M3*/
|
|
|
|
*pOut=pSubD[0];
|
|
|
|
++pOut;
|
|
|
|
*pOut=pSubD[1];
|
|
|
|
++pOut;
|
|
|
|
++numS;
|
|
|
|
++rc;
|
|
|
|
} else {
|
|
|
|
*pOut=dmapU2M3[in];
|
|
|
|
++pOut;
|
|
|
|
*pOut=dmapU2M3[1+in];
|
|
|
|
++pOut;
|
|
|
|
*pOut=dmapU2M3[2+in];
|
|
|
|
++pOut;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
*pOut=dmapU2M2[in];
|
|
|
|
++pOut;
|
|
|
|
if (dmapU2M2[1+in] == 0x00) {
|
|
|
|
if (*pOut == subS) {
|
|
|
|
in>>=1;
|
|
|
|
if (in != cd->srcSubS)
|
|
|
|
++numS;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
*pOut=dmapU2M2[1+in];
|
|
|
|
++pOut;
|
|
|
|
if (memcmp(pOut-2, pSubD, 2) == 0) {
|
|
|
|
in>>=1;
|
|
|
|
if (in != cd->srcSubD) {
|
|
|
|
++numS;
|
|
|
|
++rc;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
} else if (cd->cnv_dmap->codingSchema == DMAP_S2U) {
|
|
|
|
/* use uchar * instead of UniChar to avoid memcpy */
|
|
|
|
register uchar * dmapD12U=(uchar *) (cd->cnv_dmap->dmapD12U);
|
|
|
|
register int inLen=*inBytesLeft;
|
|
|
|
register char * pOut=*outBuf;
|
|
|
|
register char * pIn=*inBuf;
|
|
|
|
register int offset;
|
|
|
|
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
|
|
|
|
register size_t numS=0;
|
|
|
|
while (0 < inLen) {
|
|
|
|
if (pLastOutBuf < pOut)
|
|
|
|
break;
|
|
|
|
if (*pIn == 0x00) {
|
|
|
|
*pOut=0x00;
|
|
|
|
++pOut;
|
|
|
|
*pOut=0x00;
|
|
|
|
++pOut;
|
|
|
|
++pIn;
|
|
|
|
--inLen;
|
|
|
|
} else {
|
|
|
|
offset=*pIn;
|
|
|
|
offset<<=1;
|
|
|
|
*pOut=dmapD12U[offset];
|
|
|
|
++pOut;
|
|
|
|
*pOut=dmapD12U[offset+1];
|
|
|
|
++pOut;
|
|
|
|
if (dmapD12U[offset] == 0x00) {
|
|
|
|
if (dmapD12U[offset+1] == 0x1A) {
|
|
|
|
if (*pIn != cd->srcSubS)
|
|
|
|
++numS;
|
|
|
|
} else if (dmapD12U[offset+1] == 0x00) {
|
|
|
|
pOut-=2;
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
++pIn;
|
|
|
|
--inLen;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
} else if (cd->cnv_dmap->codingSchema == DMAP_S28) {
|
|
|
|
/* use uchar * instead of UniChar to avoid memcpy */
|
|
|
|
register uchar * dmapD12U=(uchar *) (cd->cnv_dmap->dmapD12U);
|
|
|
|
register int inLen=*inBytesLeft;
|
|
|
|
register char * pOut=*outBuf;
|
|
|
|
register char * pIn=*inBuf;
|
|
|
|
register int offset;
|
|
|
|
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
|
|
|
|
register size_t numS=0;
|
|
|
|
register UniChar in; /* copy part of U28 */
|
|
|
|
while (0 < inLen) {
|
|
|
|
if (pLastOutBuf < pOut)
|
|
|
|
break;
|
|
|
|
if (*pIn == 0x00) {
|
|
|
|
*pOut=0x00;
|
|
|
|
++pOut;
|
|
|
|
++pIn;
|
|
|
|
--inLen;
|
|
|
|
} else {
|
|
|
|
offset=*pIn;
|
|
|
|
offset<<=1;
|
|
|
|
in=dmapD12U[offset];
|
|
|
|
in<<=8;
|
|
|
|
in+=dmapD12U[offset+1];
|
|
|
|
if ((in & 0xFF80) == 0x0000) { /* U28: in & 0b1111111110000000 == 0x0000 */
|
|
|
|
if (in == 0x000) {
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
*pOut=in;
|
|
|
|
++pOut;
|
|
|
|
} else if ((in & 0xF800) == 0x0000) { /* in & 0b1111100000000000 == 0x0000 */
|
|
|
|
register uchar byte;
|
|
|
|
in>>=6;
|
|
|
|
in&=0x001F; /* 0b0000000000011111 */
|
|
|
|
in|=0x00C0; /* 0b0000000011000000 */
|
|
|
|
*pOut=in;
|
|
|
|
++pOut;
|
|
|
|
byte=dmapD12U[offset+1];
|
|
|
|
byte&=0x3F; /* 0b00111111; */
|
|
|
|
byte|=0x80; /* 0b10000000; */
|
|
|
|
*pOut=byte;
|
|
|
|
++pOut;
|
|
|
|
} else if ((in & 0xFC00) == 0xD800) { /* There should not be no surrogate character in SBCS. */
|
|
|
|
*pOut=0xEF;
|
|
|
|
++pOut;
|
|
|
|
*pOut=0xBF;
|
|
|
|
++pOut;
|
|
|
|
*pOut=0xBD;
|
|
|
|
++pOut;
|
|
|
|
} else {
|
|
|
|
register uchar byte;
|
|
|
|
register uchar work;
|
|
|
|
byte=dmapD12U[offset];
|
|
|
|
byte>>=4;
|
|
|
|
byte|=0xE0; /* 0b11100000; */
|
|
|
|
*pOut=byte;
|
|
|
|
++pOut;
|
|
|
|
|
|
|
|
byte=dmapD12U[offset];
|
|
|
|
byte<<=2;
|
|
|
|
work=dmapD12U[offset+1];
|
|
|
|
work>>=6;
|
|
|
|
byte|=work;
|
|
|
|
byte&=0x3F; /* 0b00111111; */
|
|
|
|
byte|=0x80; /* 0b10000000; */
|
|
|
|
*pOut=byte;
|
|
|
|
++pOut;
|
|
|
|
|
|
|
|
byte=dmapD12U[offset+1];
|
|
|
|
byte&=0x3F; /* 0b00111111; */
|
|
|
|
byte|=0x80; /* 0b10000000; */
|
|
|
|
*pOut=byte;
|
|
|
|
++pOut;
|
|
|
|
}
|
|
|
|
/* end of U28 */
|
|
|
|
if (dmapD12U[offset] == 0x00) {
|
|
|
|
if (dmapD12U[offset+1] == 0x1A) {
|
|
|
|
if (*pIn != cd->srcSubS)
|
|
|
|
++numS;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
++pIn;
|
|
|
|
--inLen;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
} else if (cd->cnv_dmap->codingSchema == DMAP_U2S) {
|
|
|
|
register uchar * dmapU2S=cd->cnv_dmap->dmapU2S;
|
|
|
|
register int inLen=*inBytesLeft;
|
|
|
|
register char * pOut=*outBuf;
|
|
|
|
register char * pIn=*inBuf;
|
|
|
|
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
|
|
|
|
register char subS=cd->subS;
|
|
|
|
register size_t numS=0;
|
|
|
|
while (0 < inLen) {
|
|
|
|
register uint32_t in;
|
|
|
|
if (inLen == 1) {
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (pLastOutBuf < pOut)
|
|
|
|
break;
|
|
|
|
in=pIn[0];
|
|
|
|
in<<=8;
|
|
|
|
in+=pIn[1];
|
|
|
|
if (in == 0x0000) {
|
|
|
|
*pOut=0x00;
|
|
|
|
} else {
|
|
|
|
if ((*pOut=dmapU2S[in]) == 0x00) {
|
|
|
|
*pOut=subS;
|
|
|
|
++numS;
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
} else if (*pOut == subS) {
|
|
|
|
if (in != cd->srcSubS)
|
|
|
|
++numS;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
++pOut;
|
|
|
|
pIn+=2;
|
|
|
|
inLen-=2;
|
|
|
|
}
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return numS;
|
|
|
|
|
|
|
|
} else if (cd->cnv_dmap->codingSchema == DMAP_T2S) {
|
|
|
|
register uchar * dmapU2S=cd->cnv_dmap->dmapU2S;
|
|
|
|
register int inLen=*inBytesLeft;
|
|
|
|
register char * pOut=*outBuf;
|
|
|
|
register char * pIn=*inBuf;
|
|
|
|
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
|
|
|
|
register char subS=cd->subS;
|
|
|
|
register size_t numS=0;
|
|
|
|
while (0 < inLen) {
|
|
|
|
register uint32_t in;
|
|
|
|
if (inLen == 1) {
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
|
|
|
|
*inBytesLeft=inLen-1;
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
++numS;
|
|
|
|
*numSub+=numS;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (pLastOutBuf < pOut)
|
|
|
|
break;
|
|
|
|
in=pIn[0];
|
|
|
|
in<<=8;
|
|
|
|
in+=pIn[1];
|
|
|
|
if (in == 0x0000) {
|
|
|
|
*pOut=0x00;
|
|
|
|
|
|
|
|
} else if (0xD800 <= in && in <= 0xDFFF) { /* 0xD800-0xDFFF, surrogate first and second values */
|
|
|
|
if (0xDC00 <= in ) {
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*inBytesLeft=inLen-1;
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
} else if (inLen < 4) {
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*inBytesLeft=inLen-2;
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn+2;
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
} else {
|
|
|
|
register uint32_t in2;
|
|
|
|
in2=pIn[2];
|
|
|
|
in2<<=8;
|
|
|
|
in2+=pIn[3];
|
|
|
|
if (0xDC00 <= in2 && in2 <= 0xDFFF) { /* second surrogate character =0xDC00 - 0xDFFF*/
|
|
|
|
*pOut=subS;
|
|
|
|
++numS;
|
|
|
|
pIn+=4;
|
|
|
|
} else {
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*inBytesLeft=inLen-1;
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if ((*pOut=dmapU2S[in]) == 0x00) {
|
|
|
|
*pOut=subS;
|
|
|
|
++numS;
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
} else if (*pOut == subS) {
|
|
|
|
if (in != cd->srcSubS)
|
|
|
|
++numS;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
++pOut;
|
|
|
|
pIn+=2;
|
|
|
|
inLen-=2;
|
|
|
|
}
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
} else if (cd->cnv_dmap->codingSchema == DMAP_82S) {
|
|
|
|
register uchar * dmapU2S=cd->cnv_dmap->dmapU2S;
|
|
|
|
register int inLen=*inBytesLeft;
|
|
|
|
register char * pOut=*outBuf;
|
|
|
|
register char * pIn=*inBuf;
|
|
|
|
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
|
|
|
|
register char subS=cd->subS;
|
|
|
|
register size_t numS=0;
|
|
|
|
while (0 < inLen) {
|
|
|
|
register uint32_t in;
|
|
|
|
uint32_t in2; /* The second surrogate value */
|
|
|
|
if (pLastOutBuf < pOut)
|
|
|
|
break;
|
|
|
|
/* convert from UTF-8 to UCS-2 */
|
|
|
|
if (*pIn == 0x00) {
|
|
|
|
in=0x0000;
|
|
|
|
++pIn;
|
|
|
|
--inLen;
|
|
|
|
} else { /* 82U: */
|
|
|
|
register uchar byte1=*pIn;
|
|
|
|
if ((byte1 & 0x80) == 0x00) { /* if (byte1 & 0b10000000 == 0b00000000) { */
|
|
|
|
/* 1 bytes sequence: 0xxxxxxx => 00000000 0xxxxxxx*/
|
|
|
|
in=byte1;
|
|
|
|
++pIn;
|
|
|
|
--inLen;
|
|
|
|
} else if ((byte1 & 0xE0) == 0xC0) { /* (byte1 & 0b11100000 == 0b11000000) { */
|
|
|
|
if (inLen < 2) {
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (byte1 == 0xC0 || byte1 == 0xC1) { /* invalid sequence */
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
/* 2 bytes sequence:
|
|
|
|
110yyyyy 10xxxxxx => 00000yyy yyxxxxxx */
|
|
|
|
register uchar byte2;
|
|
|
|
++pIn;
|
|
|
|
byte2=*pIn;
|
|
|
|
if ((byte2 & 0xC0) == 0x80) { /* byte2 & 0b11000000 == 0b10000000) { */
|
|
|
|
register uchar work=byte1;
|
|
|
|
work<<=6;
|
|
|
|
byte2&=0x3F; /* 0b00111111; */
|
|
|
|
byte2|=work;
|
|
|
|
|
|
|
|
byte1&=0x1F; /* 0b00011111; */
|
|
|
|
byte1>>=2;
|
|
|
|
in=byte1;
|
|
|
|
in<<=8;
|
|
|
|
in+=byte2;
|
|
|
|
inLen-=2;
|
|
|
|
++pIn;
|
|
|
|
} else { /* invalid sequence */
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-1;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
} else if ((byte1 & 0xF0) == 0xE0) { /* byte1 & 0b11110000 == 0b11100000 */
|
|
|
|
/* 3 bytes sequence:
|
|
|
|
1110zzzz 10yyyyyy 10xxxxxx => zzzzyyyy yyxxxxxx */
|
|
|
|
register uchar byte2;
|
|
|
|
register uchar byte3;
|
|
|
|
if (inLen < 3) {
|
|
|
|
if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
else
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
++pIn;
|
|
|
|
byte2=*pIn;
|
|
|
|
++pIn;
|
|
|
|
byte3=*pIn;
|
|
|
|
if ((byte2 & 0xC0) != 0x80 ||
|
|
|
|
(byte3 & 0xC0) != 0x80 ||
|
|
|
|
(byte1 == 0xE0 && byte2 < 0xA0)) { /* invalid sequence, only 0xA0-0xBF allowed after 0xE0 */
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-2;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
{
|
|
|
|
register uchar work=byte2;
|
|
|
|
work<<=6;
|
|
|
|
byte3&=0x3F; /* 0b00111111; */
|
|
|
|
byte3|=work;
|
|
|
|
|
|
|
|
byte2&=0x3F; /* 0b00111111; */
|
|
|
|
byte2>>=2;
|
|
|
|
|
|
|
|
byte1<<=4;
|
|
|
|
in=byte1 | byte2;;
|
|
|
|
in<<=8;
|
|
|
|
in+=byte3;
|
|
|
|
inLen-=3;
|
|
|
|
++pIn;
|
|
|
|
}
|
|
|
|
} else if ((0xF0 <= byte1 && byte1 <= 0xF4) || /* (bytes1 & 11111000) == 0x1110000 */
|
|
|
|
((byte1&=0xF7) && 0xF0 <= byte1 && byte1 <= 0xF4)) { /* minic iconv() behavior */
|
|
|
|
/* 4 bytes sequence
|
|
|
|
11110uuu 10uuzzzz 10yyyyyy 10xxxxxx => 110110ww wwzzzzyy 110111yy yyxxxxxx
|
|
|
|
where uuuuu = wwww + 1 */
|
|
|
|
register uchar byte2;
|
|
|
|
register uchar byte3;
|
|
|
|
register uchar byte4;
|
|
|
|
if (inLen < 4) {
|
|
|
|
if ((inLen >= 2 && (pIn[1] & 0xC0) != 0x80) ||
|
|
|
|
(inLen >= 3 && (pIn[2] & 0xC0) != 0x80) ||
|
|
|
|
(cd->toCcsid == 13488) )
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
else
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
++pIn;
|
|
|
|
byte2=*pIn;
|
|
|
|
++pIn;
|
|
|
|
byte3=*pIn;
|
|
|
|
++pIn;
|
|
|
|
byte4=*pIn;
|
|
|
|
if ((byte2 & 0xC0) == 0x80 && /* byte2 & 0b11000000 == 0b10000000 */
|
|
|
|
(byte3 & 0xC0) == 0x80 && /* byte3 & 0b11000000 == 0b10000000 */
|
|
|
|
(byte4 & 0xC0) == 0x80) { /* byte4 & 0b11000000 == 0b10000000 */
|
|
|
|
register uchar work=byte2;
|
|
|
|
if (byte1 == 0xF0 && byte2 < 0x90) {
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-3;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
/* iconv() returns 0 for 0xF4908080 and convert to 0x00
|
|
|
|
} else if (byte1 == 0xF4 && byte2 > 0x8F) {
|
|
|
|
errno=EINVAL;
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-3;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
|
|
|
work&=0x30; /* 0b00110000; */
|
|
|
|
work>>=4;
|
|
|
|
byte1&=0x07; /* 0b00000111; */
|
|
|
|
byte1<<=2;
|
|
|
|
byte1+=work; /* uuuuu */
|
|
|
|
--byte1; /* wwww */
|
|
|
|
|
|
|
|
work=byte1 & 0x0F;
|
|
|
|
work>>=2;
|
|
|
|
work+=0xD8; /* 0b11011011; */
|
|
|
|
in=work;
|
|
|
|
in<<=8;
|
|
|
|
|
|
|
|
byte1<<=6;
|
|
|
|
byte2<<=2;
|
|
|
|
byte2&=0x3C; /* 0b00111100; */
|
|
|
|
work=byte3;
|
|
|
|
work>>=4;
|
|
|
|
work&=0x03; /* 0b00000011; */
|
|
|
|
work|=byte1;
|
|
|
|
work|=byte2;
|
|
|
|
in+=work;
|
|
|
|
|
|
|
|
work=byte3;
|
|
|
|
work>>=2;
|
|
|
|
work&=0x03; /* 0b00000011; */
|
|
|
|
work|=0xDC; /* 0b110111xx; */
|
|
|
|
in2=work;
|
|
|
|
in2<<=8;
|
|
|
|
|
|
|
|
byte3<<=6;
|
|
|
|
byte4&=0x3F; /* 0b00111111; */
|
|
|
|
byte4|=byte3;
|
|
|
|
in2+=byte4;
|
|
|
|
inLen-=4;
|
|
|
|
++pIn;
|
|
|
|
} else { /* invalid sequence */
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-3;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
} else if ((byte1 & 0xF0) == 0xF0) { /* minic iconv() behavior */
|
|
|
|
if (inLen < 4 ||
|
|
|
|
pIn[1] < 0x80 || 0xBF < pIn[1] ||
|
|
|
|
pIn[2] < 0x80 || 0xBF < pIn[2] ||
|
|
|
|
pIn[3] < 0x80 || 0xBF < pIn[3] ) {
|
|
|
|
if (inLen == 1)
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
else if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
else if (inLen == 3 && ((pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80))
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
else if (inLen >= 4 && ((pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80 || (pIn[3] & 0xC0) != 0x80))
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
else
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
} else {
|
|
|
|
*pOut=subS; /* Though returns replacement character, which iconv() does not return. */
|
|
|
|
++pOut;
|
|
|
|
++numS;
|
|
|
|
pIn+=4;
|
|
|
|
inLen-=4;
|
|
|
|
/* UTF-8_IBM-850 0xF0908080 : converted value does not match, iconv=0x00, dmap=0x7F
|
|
|
|
UTF-8_IBM-850 0xF0908081 : converted value does not match, iconv=0x01, dmap=0x7F
|
|
|
|
UTF-8_IBM-850 0xF0908082 : converted value does not match, iconv=0x02, dmap=0x7F
|
|
|
|
UTF-8_IBM-850 0xF0908083 : converted value does not match, iconv=0x03, dmap=0x7F
|
|
|
|
....
|
|
|
|
UTF-8_IBM-850 0xF09081BE : converted value does not match, iconv=0x7E, dmap=0x7F
|
|
|
|
UTF-8_IBM-850 0xF09081BF : converted value does not match, iconv=0x1C, dmap=0x7F
|
|
|
|
UTF-8_IBM-850 0xF09082A0 : converted value does not match, iconv=0xFF, dmap=0x7F
|
|
|
|
UTF-8_IBM-850 0xF09082A1 : converted value does not match, iconv=0xAD, dmap=0x7F
|
|
|
|
....
|
|
|
|
*/
|
|
|
|
continue;
|
|
|
|
/* iconv() returns 0 with strange 1 byte converted values */
|
|
|
|
}
|
|
|
|
|
|
|
|
} else { /* invalid sequence */
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* end of UTF-8 to UCS-2 */
|
|
|
|
if (in == 0x0000) {
|
|
|
|
*pOut=0x00;
|
|
|
|
} else {
|
|
|
|
if ((*pOut=dmapU2S[in]) == 0x00) {
|
|
|
|
*pOut=subS;
|
|
|
|
++numS;
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
} else if (*pOut == subS) {
|
|
|
|
if (in != cd->srcSubS) {
|
|
|
|
++numS;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
++pOut;
|
|
|
|
}
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
} else if (cd->cnv_dmap->codingSchema == DMAP_D2U) {
|
|
|
|
/* use uchar * instead of UniChar to avoid memcpy */
|
|
|
|
register uchar * dmapD12U=(uchar *) (cd->cnv_dmap->dmapD12U);
|
|
|
|
register uchar * dmapD22U=(uchar *) (cd->cnv_dmap->dmapD22U);
|
|
|
|
register int inLen=*inBytesLeft;
|
|
|
|
register char * pOut=*outBuf;
|
|
|
|
register char * pIn=*inBuf;
|
|
|
|
register int offset;
|
|
|
|
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
|
|
|
|
register size_t numS=0;
|
|
|
|
while (0 < inLen) {
|
|
|
|
if (pLastOutBuf < pOut)
|
|
|
|
break;
|
|
|
|
if (*pIn == 0x00) {
|
|
|
|
*pOut=0x00;
|
|
|
|
++pOut;
|
|
|
|
*pOut=0x00;
|
|
|
|
++pOut;
|
|
|
|
++pIn;
|
|
|
|
--inLen;
|
|
|
|
} else {
|
|
|
|
offset=*pIn;
|
|
|
|
offset<<=1;
|
|
|
|
if (dmapD12U[offset] == 0x00 &&
|
|
|
|
dmapD12U[offset+1] == 0x00) { /* DBCS */
|
|
|
|
if (inLen < 2) {
|
|
|
|
if (*pIn == 0x80 || *pIn == 0xFF ||
|
|
|
|
(cd->fromCcsid == 943 && (*pIn == 0x85 || *pIn == 0x86 || *pIn == 0xA0 || *pIn == 0xEB || *pIn == 0xEC || *pIn == 0xEF || *pIn == 0xFD || *pIn == 0xFE)) ||
|
|
|
|
(cd->fromCcsid == 932 && (*pIn == 0x85 || *pIn == 0x86 || *pIn == 0x87 || *pIn == 0xEB || *pIn == 0xEC || *pIn == 0xED || *pIn == 0xEE || *pIn == 0xEF)) ||
|
|
|
|
(cd->fromCcsid == 1381 && ((0x85 <= *pIn && *pIn <= 0x8B) || (0xAA <= *pIn && *pIn <= 0xAF) || (0xF8 <= *pIn && *pIn <= 0xFE))))
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
else
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
offset-=0x100;
|
|
|
|
++pIn;
|
|
|
|
offset<<=8;
|
|
|
|
offset+=(*pIn * 2);
|
|
|
|
if (dmapD22U[offset] == 0x00 &&
|
|
|
|
dmapD22U[offset+1] == 0x00) {
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-1;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
*pOut=dmapD22U[offset];
|
|
|
|
++pOut;
|
|
|
|
*pOut=dmapD22U[offset+1];
|
|
|
|
++pOut;
|
|
|
|
if (dmapD22U[offset] == 0xFF &&
|
|
|
|
dmapD22U[offset+1] == 0xFD) {
|
|
|
|
if (pIn[-1] * 0x100 + pIn[0] != cd->srcSubD)
|
|
|
|
++numS;
|
|
|
|
}
|
|
|
|
++pIn;
|
|
|
|
inLen-=2;
|
|
|
|
} else { /* SBCS */
|
|
|
|
*pOut=dmapD12U[offset];
|
|
|
|
++pOut;
|
|
|
|
*pOut=dmapD12U[offset+1];
|
|
|
|
++pOut;
|
|
|
|
if (dmapD12U[offset] == 0x00 &&
|
|
|
|
dmapD12U[offset+1] == 0x1A) {
|
|
|
|
if (*pIn != cd->srcSubS)
|
|
|
|
++numS;
|
|
|
|
}
|
|
|
|
++pIn;
|
|
|
|
--inLen;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
} else if (cd->cnv_dmap->codingSchema == DMAP_D28) {
|
|
|
|
/* use uchar * instead of UniChar to avoid memcpy */
|
|
|
|
register uchar * dmapD12U=(uchar *) (cd->cnv_dmap->dmapD12U);
|
|
|
|
register uchar * dmapD22U=(uchar *) (cd->cnv_dmap->dmapD22U);
|
|
|
|
register int inLen=*inBytesLeft;
|
|
|
|
register char * pOut=*outBuf;
|
|
|
|
register char * pIn=*inBuf;
|
|
|
|
register int offset;
|
|
|
|
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
|
|
|
|
register size_t numS=0;
|
|
|
|
register UniChar in; /* copy part of U28 */
|
|
|
|
register UniChar ucs2;
|
|
|
|
while (0 < inLen) {
|
|
|
|
if (pLastOutBuf < pOut)
|
|
|
|
break;
|
|
|
|
if (*pIn == 0x00) {
|
|
|
|
*pOut=0x00;
|
|
|
|
++pOut;
|
|
|
|
++pIn;
|
|
|
|
--inLen;
|
|
|
|
} else {
|
|
|
|
offset=*pIn;
|
|
|
|
offset<<=1;
|
|
|
|
if (dmapD12U[offset] == 0x00 &&
|
|
|
|
dmapD12U[offset+1] == 0x00) { /* DBCS */
|
|
|
|
if (inLen < 2) {
|
|
|
|
if (*pIn == 0x80 || *pIn == 0xFF ||
|
|
|
|
(cd->fromCcsid == 943 && (*pIn == 0x85 || *pIn == 0x86 || *pIn == 0xA0 || *pIn == 0xEB || *pIn == 0xEC || *pIn == 0xEF || *pIn == 0xFD || *pIn == 0xFE)) ||
|
|
|
|
(cd->fromCcsid == 932 && (*pIn == 0x85 || *pIn == 0x86 || *pIn == 0x87 || *pIn == 0xEB || *pIn == 0xEC || *pIn == 0xED || *pIn == 0xEE || *pIn == 0xEF)) ||
|
|
|
|
(cd->fromCcsid == 1381 && ((0x85 <= *pIn && *pIn <= 0x8B) || (0xAA <= *pIn && *pIn <= 0xAF) || (0xF8 <= *pIn && *pIn <= 0xFE))))
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
else
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
offset-=0x100;
|
|
|
|
++pIn;
|
|
|
|
offset<<=8;
|
|
|
|
offset+=(*pIn * 2);
|
|
|
|
if (dmapD22U[offset] == 0x00 &&
|
|
|
|
dmapD22U[offset+1] == 0x00) {
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-1;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
in=dmapD22U[offset];
|
|
|
|
in<<=8;
|
|
|
|
in+=dmapD22U[offset+1];
|
|
|
|
ucs2=in;
|
|
|
|
if (dmapD22U[offset] == 0xFF &&
|
|
|
|
dmapD22U[offset+1] == 0xFD) {
|
|
|
|
if (in != cd->srcSubD)
|
|
|
|
++numS;
|
|
|
|
}
|
|
|
|
++pIn;
|
|
|
|
inLen-=2;
|
|
|
|
} else { /* SBCS */
|
|
|
|
in=dmapD12U[offset];
|
|
|
|
in<<=8;
|
|
|
|
in+=dmapD12U[offset+1];
|
|
|
|
ucs2=in;
|
|
|
|
if (dmapD12U[offset] == 0x00 &&
|
|
|
|
dmapD12U[offset+1] == 0x1A) {
|
|
|
|
if (in != cd->srcSubS)
|
|
|
|
++numS;
|
|
|
|
}
|
|
|
|
++pIn;
|
|
|
|
--inLen;
|
|
|
|
}
|
|
|
|
if ((in & 0xFF80) == 0x0000) { /* U28: in & 0b1111111110000000 == 0x0000 */
|
|
|
|
*pOut=in;
|
|
|
|
++pOut;
|
|
|
|
} else if ((in & 0xF800) == 0x0000) { /* in & 0b1111100000000000 == 0x0000 */
|
|
|
|
register uchar byte;
|
|
|
|
in>>=6;
|
|
|
|
in&=0x001F; /* 0b0000000000011111 */
|
|
|
|
in|=0x00C0; /* 0b0000000011000000 */
|
|
|
|
*pOut=in;
|
|
|
|
++pOut;
|
|
|
|
byte=ucs2; /* dmapD12U[offset+1]; */
|
|
|
|
byte&=0x3F; /* 0b00111111; */
|
|
|
|
byte|=0x80; /* 0b10000000; */
|
|
|
|
*pOut=byte;
|
|
|
|
++pOut;
|
|
|
|
} else if ((in & 0xFC00) == 0xD800) { /* There should not be no surrogate character in SBCS. */
|
|
|
|
*pOut=0xEF;
|
|
|
|
++pOut;
|
|
|
|
*pOut=0xBF;
|
|
|
|
++pOut;
|
|
|
|
*pOut=0xBD;
|
|
|
|
++pOut;
|
|
|
|
} else {
|
|
|
|
register uchar byte;
|
|
|
|
register uchar work;
|
|
|
|
byte=(ucs2>>8); /* dmapD12U[offset]; */
|
|
|
|
byte>>=4;
|
|
|
|
byte|=0xE0; /* 0b11100000; */
|
|
|
|
*pOut=byte;
|
|
|
|
++pOut;
|
|
|
|
|
|
|
|
byte=(ucs2>>8); /* dmapD12U[offset]; */
|
|
|
|
byte<<=2;
|
|
|
|
work=ucs2; /* dmapD12U[offset+1]; */
|
|
|
|
work>>=6;
|
|
|
|
byte|=work;
|
|
|
|
byte&=0x3F; /* 0b00111111; */
|
|
|
|
byte|=0x80; /* 0b10000000; */
|
|
|
|
*pOut=byte;
|
|
|
|
++pOut;
|
|
|
|
|
|
|
|
byte=ucs2; /* dmapD12U[offset+1]; */
|
|
|
|
byte&=0x3F; /* 0b00111111; */
|
|
|
|
byte|=0x80; /* 0b10000000; */
|
|
|
|
*pOut=byte;
|
|
|
|
++pOut;
|
|
|
|
}
|
|
|
|
/* end of U28 */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
} else if (cd->cnv_dmap->codingSchema == DMAP_U2D) {
|
|
|
|
register uchar * dmapU2D=cd->cnv_dmap->dmapU2D;
|
|
|
|
register int inLen=*inBytesLeft;
|
|
|
|
register char * pOut=*outBuf;
|
|
|
|
register char * pIn=*inBuf;
|
|
|
|
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
|
|
|
|
register char subS=cd->subS;
|
|
|
|
register char * pSubD=(char *) &(cd->subD);
|
|
|
|
register size_t numS=0;
|
|
|
|
while (0 < inLen) {
|
|
|
|
register uint32_t in;
|
|
|
|
if (inLen == 1) {
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (pLastOutBuf < pOut)
|
|
|
|
break;
|
|
|
|
in=pIn[0];
|
|
|
|
in<<=8;
|
|
|
|
in+=pIn[1];
|
|
|
|
if (in == 0x0000) {
|
|
|
|
*pOut=0x00;
|
|
|
|
++pOut;
|
|
|
|
} else {
|
|
|
|
in<<=1;
|
|
|
|
*pOut=dmapU2D[in];
|
|
|
|
++pOut;
|
|
|
|
if (dmapU2D[in+1] == 0x00) { /* SBCS */
|
|
|
|
if (*pOut == subS) {
|
|
|
|
if (in != cd->srcSubS)
|
|
|
|
++numS;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
*pOut=dmapU2D[in+1];
|
|
|
|
++pOut;
|
|
|
|
if (dmapU2D[in] == pSubD[0] &&
|
|
|
|
dmapU2D[in+1] == pSubD[1]) {
|
|
|
|
in>>=1;
|
|
|
|
if (in != cd->srcSubD)
|
|
|
|
++numS;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
pIn+=2;
|
|
|
|
inLen-=2;
|
|
|
|
}
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return numS; /* to minic iconv() behavior */
|
|
|
|
|
|
|
|
} else if (cd->cnv_dmap->codingSchema == DMAP_T2D) {
|
|
|
|
register uchar * dmapU2D=cd->cnv_dmap->dmapU2D;
|
|
|
|
register int inLen=*inBytesLeft;
|
|
|
|
register char * pOut=*outBuf;
|
|
|
|
register char * pIn=*inBuf;
|
|
|
|
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
|
|
|
|
register char subS=cd->subS;
|
|
|
|
register char * pSubD=(char *) &(cd->subD);
|
|
|
|
register size_t numS=0;
|
|
|
|
while (0 < inLen) {
|
|
|
|
register uint32_t in;
|
|
|
|
if (inLen == 1) {
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*inBytesLeft=inLen-1;
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
++numS;
|
|
|
|
*numSub+=numS;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (pLastOutBuf < pOut)
|
|
|
|
break;
|
|
|
|
in=pIn[0];
|
|
|
|
in<<=8;
|
|
|
|
in+=pIn[1];
|
|
|
|
if (in == 0x0000) {
|
|
|
|
*pOut=0x00;
|
|
|
|
++pOut;
|
|
|
|
} else if (0xD800 <= in && in <= 0xDBFF) { /* first byte of surrogate */
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*inBytesLeft=inLen-2;
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn+2;
|
|
|
|
++numS;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
} else if (0xDC00 <= in && in <= 0xDFFF) { /* second byte of surrogate */
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*inBytesLeft=inLen-1;
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
++numS;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
} else {
|
|
|
|
in<<=1;
|
|
|
|
*pOut=dmapU2D[in];
|
|
|
|
++pOut;
|
|
|
|
if (dmapU2D[in+1] == 0x00) { /* SBCS */
|
|
|
|
if (*pOut == subS) {
|
|
|
|
if (in != cd->srcSubS)
|
|
|
|
++numS;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
*pOut=dmapU2D[in+1];
|
|
|
|
++pOut;
|
|
|
|
if (dmapU2D[in] == pSubD[0] &&
|
|
|
|
dmapU2D[in+1] == pSubD[1]) {
|
|
|
|
in>>=1;
|
|
|
|
if (in != cd->srcSubD)
|
|
|
|
++numS;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
pIn+=2;
|
|
|
|
inLen-=2;
|
|
|
|
}
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return 0; /* to minic iconv() behavior */
|
|
|
|
|
|
|
|
} else if (cd->cnv_dmap->codingSchema == DMAP_82D) {
|
|
|
|
register uchar * dmapU2D=cd->cnv_dmap->dmapU2D;
|
|
|
|
register int inLen=*inBytesLeft;
|
|
|
|
register char * pOut=*outBuf;
|
|
|
|
register char * pIn=*inBuf;
|
|
|
|
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
|
|
|
|
register char subS=cd->subS;
|
|
|
|
register char * pSubD=(char *) &(cd->subD);
|
|
|
|
register size_t numS=0;
|
|
|
|
while (0 < inLen) {
|
|
|
|
register uint32_t in;
|
|
|
|
uint32_t in2;
|
|
|
|
if (pLastOutBuf < pOut)
|
|
|
|
break;
|
|
|
|
/* convert from UTF-8 to UCS-2 */
|
|
|
|
if (*pIn == 0x00) {
|
|
|
|
in=0x0000;
|
|
|
|
++pIn;
|
|
|
|
--inLen;
|
|
|
|
} else { /* 82U: */
|
|
|
|
register uchar byte1=*pIn;
|
|
|
|
if ((byte1 & 0x80) == 0x00) { /* if (byte1 & 0b10000000 == 0b00000000) { */
|
|
|
|
/* 1 bytes sequence: 0xxxxxxx => 00000000 0xxxxxxx*/
|
|
|
|
in=byte1;
|
|
|
|
++pIn;
|
|
|
|
--inLen;
|
|
|
|
} else if ((byte1 & 0xE0) == 0xC0) { /* (byte1 & 0b11100000 == 0b11000000) { */
|
|
|
|
if (inLen < 2) {
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (byte1 == 0xC0 || byte1 == 0xC1) { /* invalid sequence */
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
/* 2 bytes sequence:
|
|
|
|
110yyyyy 10xxxxxx => 00000yyy yyxxxxxx */
|
|
|
|
register uchar byte2;
|
|
|
|
++pIn;
|
|
|
|
byte2=*pIn;
|
|
|
|
if ((byte2 & 0xC0) == 0x80) { /* byte2 & 0b11000000 == 0b10000000) { */
|
|
|
|
register uchar work=byte1;
|
|
|
|
work<<=6;
|
|
|
|
byte2&=0x3F; /* 0b00111111; */
|
|
|
|
byte2|=work;
|
|
|
|
|
|
|
|
byte1&=0x1F; /* 0b00011111; */
|
|
|
|
byte1>>=2;
|
|
|
|
in=byte1;
|
|
|
|
in<<=8;
|
|
|
|
in+=byte2;
|
|
|
|
inLen-=2;
|
|
|
|
++pIn;
|
|
|
|
} else { /* invalid sequence */
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-1;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
} else if ((byte1 & 0xF0) == 0xE0) { /* byte1 & 0b11110000 == 0b11100000 */
|
|
|
|
/* 3 bytes sequence:
|
|
|
|
1110zzzz 10yyyyyy 10xxxxxx => zzzzyyyy yyxxxxxx */
|
|
|
|
register uchar byte2;
|
|
|
|
register uchar byte3;
|
|
|
|
if (inLen < 3) {
|
|
|
|
if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
else
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
++pIn;
|
|
|
|
byte2=*pIn;
|
|
|
|
++pIn;
|
|
|
|
byte3=*pIn;
|
|
|
|
if ((byte2 & 0xC0) != 0x80 ||
|
|
|
|
(byte3 & 0xC0) != 0x80 ||
|
|
|
|
(byte1 == 0xE0 && byte2 < 0xA0)) { /* invalid sequence, only 0xA0-0xBF allowed after 0xE0 */
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-2;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
{
|
|
|
|
register uchar work=byte2;
|
|
|
|
work<<=6;
|
|
|
|
byte3&=0x3F; /* 0b00111111; */
|
|
|
|
byte3|=work;
|
|
|
|
|
|
|
|
byte2&=0x3F; /* 0b00111111; */
|
|
|
|
byte2>>=2;
|
|
|
|
|
|
|
|
byte1<<=4;
|
|
|
|
in=byte1 | byte2;;
|
|
|
|
in<<=8;
|
|
|
|
in+=byte3;
|
|
|
|
inLen-=3;
|
|
|
|
++pIn;
|
|
|
|
}
|
|
|
|
} else if ((0xF0 <= byte1 && byte1 <= 0xF4)) { /* (bytes1 & 11111000) == 0x1110000 */
|
|
|
|
/* 4 bytes sequence
|
|
|
|
11110uuu 10uuzzzz 10yyyyyy 10xxxxxx => 110110ww wwzzzzyy 110111yy yyxxxxxx
|
|
|
|
where uuuuu = wwww + 1 */
|
|
|
|
register uchar byte2;
|
|
|
|
register uchar byte3;
|
|
|
|
register uchar byte4;
|
|
|
|
if (inLen < 4) {
|
|
|
|
if ((inLen >= 2 && (pIn[1] & 0xC0) != 0x80) ||
|
|
|
|
(inLen >= 3 && (pIn[2] & 0xC0) != 0x80) ||
|
|
|
|
(cd->toCcsid == 13488) )
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
else
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
++pIn;
|
|
|
|
byte2=*pIn;
|
|
|
|
++pIn;
|
|
|
|
byte3=*pIn;
|
|
|
|
++pIn;
|
|
|
|
byte4=*pIn;
|
|
|
|
if ((byte2 & 0xC0) == 0x80 && /* byte2 & 0b11000000 == 0b10000000 */
|
|
|
|
(byte3 & 0xC0) == 0x80 && /* byte3 & 0b11000000 == 0b10000000 */
|
|
|
|
(byte4 & 0xC0) == 0x80) { /* byte4 & 0b11000000 == 0b10000000 */
|
|
|
|
register uchar work=byte2;
|
|
|
|
if (byte1 == 0xF0 && byte2 < 0x90) {
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-3;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
/* iconv() returns 0 for 0xF4908080 and convert to 0x00
|
|
|
|
} else if (byte1 == 0xF4 && byte2 > 0x8F) {
|
|
|
|
errno=EINVAL;
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-3;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
|
|
|
work&=0x30; /* 0b00110000; */
|
|
|
|
work>>=4;
|
|
|
|
byte1&=0x07; /* 0b00000111; */
|
|
|
|
byte1<<=2;
|
|
|
|
byte1+=work; /* uuuuu */
|
|
|
|
--byte1; /* wwww */
|
|
|
|
|
|
|
|
work=byte1 & 0x0F;
|
|
|
|
work>>=2;
|
|
|
|
work+=0xD8; /* 0b11011011; */
|
|
|
|
in=work;
|
|
|
|
in<<=8;
|
|
|
|
|
|
|
|
byte1<<=6;
|
|
|
|
byte2<<=2;
|
|
|
|
byte2&=0x3C; /* 0b00111100; */
|
|
|
|
work=byte3;
|
|
|
|
work>>=4;
|
|
|
|
work&=0x03; /* 0b00000011; */
|
|
|
|
work|=byte1;
|
|
|
|
work|=byte2;
|
|
|
|
in+=work;
|
|
|
|
|
|
|
|
work=byte3;
|
|
|
|
work>>=2;
|
|
|
|
work&=0x03; /* 0b00000011; */
|
|
|
|
work|=0xDC; /* 0b110111xx; */
|
|
|
|
in2=work;
|
|
|
|
in2<<=8;
|
|
|
|
|
|
|
|
byte3<<=6;
|
|
|
|
byte4&=0x3F; /* 0b00111111; */
|
|
|
|
byte4|=byte3;
|
|
|
|
in2+=byte4;
|
|
|
|
inLen-=4;
|
|
|
|
++pIn;
|
|
|
|
#ifdef match_with_GBK
|
|
|
|
if ((0xD800 == in && in2 < 0xDC80) ||
|
|
|
|
(0xD840 == in && in2 < 0xDC80) ||
|
|
|
|
(0xD880 == in && in2 < 0xDC80) ||
|
|
|
|
(0xD8C0 == in && in2 < 0xDC80) ||
|
|
|
|
(0xD900 == in && in2 < 0xDC80) ||
|
|
|
|
(0xD940 == in && in2 < 0xDC80) ||
|
|
|
|
(0xD980 == in && in2 < 0xDC80) ||
|
|
|
|
(0xD9C0 == in && in2 < 0xDC80) ||
|
|
|
|
(0xDA00 == in && in2 < 0xDC80) ||
|
|
|
|
(0xDA40 == in && in2 < 0xDC80) ||
|
|
|
|
(0xDA80 == in && in2 < 0xDC80) ||
|
|
|
|
(0xDAC0 == in && in2 < 0xDC80) ||
|
|
|
|
(0xDB00 == in && in2 < 0xDC80) ||
|
|
|
|
(0xDB40 == in && in2 < 0xDC80) ||
|
|
|
|
(0xDB80 == in && in2 < 0xDC80) ||
|
|
|
|
(0xDBC0 == in && in2 < 0xDC80)) {
|
|
|
|
#else
|
|
|
|
if ((0xD800 <= in && in <= 0xDBFF) &&
|
|
|
|
(0xDC00 <= in2 && in2 <= 0xDFFF)) {
|
|
|
|
#endif
|
|
|
|
*pOut=subS;
|
|
|
|
++pOut;
|
|
|
|
++numS;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
} else { /* invalid sequence */
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-3;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
} else if (0xF5 <= byte1 && byte1 <= 0xFF) { /* minic iconv() behavior */
|
|
|
|
if (inLen < 4 ||
|
|
|
|
(inLen >= 4 && byte1 == 0xF8 && pIn[1] < 0x90) ||
|
|
|
|
pIn[1] < 0x80 || 0xBF < pIn[1] ||
|
|
|
|
pIn[2] < 0x80 || 0xBF < pIn[2] ||
|
|
|
|
pIn[3] < 0x80 || 0xBF < pIn[3] ) {
|
|
|
|
if (inLen == 1)
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
else if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
else if (inLen == 3 && ((pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80))
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
else if (inLen >= 4 && (byte1 == 0xF8 || (pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80 || (pIn[3] & 0xC0) != 0x80))
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
else
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
} else if ((pIn[1] == 0x80 || pIn[1] == 0x90 || pIn[1] == 0xA0 || pIn[1] == 0xB0) &&
|
|
|
|
pIn[2] < 0x82) {
|
|
|
|
*pOut=subS; /* Though returns replacement character, which iconv() does not return. */
|
|
|
|
++pOut;
|
|
|
|
++numS;
|
|
|
|
pIn+=4;
|
|
|
|
inLen-=4;
|
|
|
|
continue;
|
|
|
|
} else {
|
|
|
|
*pOut=pSubD[0]; /* Though returns replacement character, which iconv() does not return. */
|
|
|
|
++pOut;
|
|
|
|
*pOut=pSubD[1];
|
|
|
|
++pOut;
|
|
|
|
++numS;
|
|
|
|
pIn+=4;
|
|
|
|
inLen-=4;
|
|
|
|
continue;
|
|
|
|
/* iconv() returns 0 with strange 1 byte converted values */
|
|
|
|
}
|
|
|
|
|
|
|
|
} else { /* invalid sequence */
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* end of UTF-8 to UCS-2 */
|
|
|
|
if (in == 0x0000) {
|
|
|
|
*pOut=0x00;
|
|
|
|
++pOut;
|
|
|
|
} else {
|
|
|
|
in<<=1;
|
|
|
|
*pOut=dmapU2D[in];
|
|
|
|
++pOut;
|
|
|
|
if (dmapU2D[in+1] == 0x00) { /* SBCS */
|
|
|
|
if (dmapU2D[in] == subS) {
|
|
|
|
in>>=1;
|
|
|
|
if (in != cd->srcSubS)
|
|
|
|
++numS;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
*pOut=dmapU2D[in+1];
|
|
|
|
++pOut;
|
|
|
|
if (dmapU2D[in] == pSubD[0] &&
|
|
|
|
dmapU2D[in+1] == pSubD[1]) {
|
|
|
|
in>>=1;
|
|
|
|
if (in != cd->srcSubD)
|
|
|
|
++numS;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
} else if (cd->cnv_dmap->codingSchema == DMAP_82U) {
|
|
|
|
/* See http://unicode.org/versions/corrigendum1.html */
|
|
|
|
/* convert from UTF-8 to UTF-16 can cover all conversion from UTF-8 to UCS-2 */
|
|
|
|
register int inLen=*inBytesLeft;
|
|
|
|
register char * pOut=*outBuf;
|
|
|
|
register char * pIn=*inBuf;
|
|
|
|
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
|
|
|
|
register size_t numS=0;
|
|
|
|
while (0 < inLen) {
|
|
|
|
if (pLastOutBuf < pOut)
|
|
|
|
break;
|
|
|
|
if (*pIn == 0x00) {
|
|
|
|
*pOut=0x00;
|
|
|
|
++pOut;
|
|
|
|
*pOut=0x00;
|
|
|
|
++pOut;
|
|
|
|
++pIn;
|
|
|
|
--inLen;
|
|
|
|
} else { /* 82U: */
|
|
|
|
register uchar byte1=*pIn;
|
|
|
|
if ((byte1 & 0x80) == 0x00) { /* if (byte1 & 0b10000000 == 0b00000000) { */
|
|
|
|
/* 1 bytes sequence: 0xxxxxxx => 00000000 0xxxxxxx*/
|
|
|
|
*pOut=0x00;
|
|
|
|
++pOut;
|
|
|
|
*pOut=byte1;
|
|
|
|
++pOut;
|
|
|
|
++pIn;
|
|
|
|
--inLen;
|
|
|
|
} else if ((byte1 & 0xE0) == 0xC0) { /* (byte1 & 0b11100000 == 0b11000000) { */
|
|
|
|
if (inLen < 2) {
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (byte1 == 0xC0 || byte1 == 0xC1) { /* invalid sequence */
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
/* 2 bytes sequence:
|
|
|
|
110yyyyy 10xxxxxx => 00000yyy yyxxxxxx */
|
|
|
|
register uchar byte2;
|
|
|
|
++pIn;
|
|
|
|
byte2=*pIn;
|
|
|
|
if ((byte2 & 0xC0) == 0x80) { /* byte2 & 0b11000000 == 0b10000000) { */
|
|
|
|
register uchar work=byte1;
|
|
|
|
work<<=6;
|
|
|
|
byte2&=0x3F; /* 0b00111111; */
|
|
|
|
byte2|=work;
|
|
|
|
|
|
|
|
byte1&=0x1F; /* 0b00011111; */
|
|
|
|
byte1>>=2;
|
|
|
|
*pOut=byte1;
|
|
|
|
++pOut;
|
|
|
|
*pOut=byte2;
|
|
|
|
++pOut;
|
|
|
|
inLen-=2;
|
|
|
|
++pIn;
|
|
|
|
} else { /* invalid sequence */
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-1;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
} else if ((byte1 & 0xF0) == 0xE0) { /* byte1 & 0b11110000 == 0b11100000 */
|
|
|
|
/* 3 bytes sequence:
|
|
|
|
1110zzzz 10yyyyyy 10xxxxxx => zzzzyyyy yyxxxxxx */
|
|
|
|
register uchar byte2;
|
|
|
|
register uchar byte3;
|
|
|
|
if (inLen < 3) {
|
|
|
|
if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
else
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
++pIn;
|
|
|
|
byte2=*pIn;
|
|
|
|
++pIn;
|
|
|
|
byte3=*pIn;
|
|
|
|
if ((byte2 & 0xC0) != 0x80 ||
|
|
|
|
(byte3 & 0xC0) != 0x80 ||
|
|
|
|
(byte1 == 0xE0 && byte2 < 0xA0)) { /* invalid sequence, only 0xA0-0xBF allowed after 0xE0 */
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-2;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
{
|
|
|
|
register uchar work=byte2;
|
|
|
|
work<<=6;
|
|
|
|
byte3&=0x3F; /* 0b00111111; */
|
|
|
|
byte3|=work;
|
|
|
|
|
|
|
|
byte2&=0x3F; /* 0b00111111; */
|
|
|
|
byte2>>=2;
|
|
|
|
|
|
|
|
byte1<<=4;
|
|
|
|
*pOut=byte1 | byte2;;
|
|
|
|
++pOut;
|
|
|
|
*pOut=byte3;
|
|
|
|
++pOut;
|
|
|
|
inLen-=3;
|
|
|
|
++pIn;
|
|
|
|
}
|
|
|
|
} else if ((0xF0 <= byte1 && byte1 <= 0xF4) || /* (bytes1 & 11111000) == 0x1110000 */
|
|
|
|
((byte1&=0xF7) && 0xF0 <= byte1 && byte1 <= 0xF4)) { /* minic iconv() behavior */
|
|
|
|
/* 4 bytes sequence
|
|
|
|
11110uuu 10uuzzzz 10yyyyyy 10xxxxxx => 110110ww wwzzzzyy 110111yy yyxxxxxx
|
|
|
|
where uuuuu = wwww + 1 */
|
|
|
|
register uchar byte2;
|
|
|
|
register uchar byte3;
|
|
|
|
register uchar byte4;
|
|
|
|
if (inLen < 4 || cd->toCcsid == 13488) {
|
|
|
|
if ((inLen >= 2 && (pIn[1] & 0xC0) != 0x80) ||
|
|
|
|
(inLen >= 3 && (pIn[2] & 0xC0) != 0x80) ||
|
|
|
|
(cd->toCcsid == 13488) )
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
else
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
++pIn;
|
|
|
|
byte2=*pIn;
|
|
|
|
++pIn;
|
|
|
|
byte3=*pIn;
|
|
|
|
++pIn;
|
|
|
|
byte4=*pIn;
|
|
|
|
if ((byte2 & 0xC0) == 0x80 && /* byte2 & 0b11000000 == 0b10000000 */
|
|
|
|
(byte3 & 0xC0) == 0x80 && /* byte3 & 0b11000000 == 0b10000000 */
|
|
|
|
(byte4 & 0xC0) == 0x80) { /* byte4 & 0b11000000 == 0b10000000 */
|
|
|
|
register uchar work=byte2;
|
|
|
|
if (byte1 == 0xF0 && byte2 < 0x90) {
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-3;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
} else if (byte1 == 0xF4 && byte2 > 0x8F) {
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-3;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
work&=0x30; /* 0b00110000; */
|
|
|
|
work>>=4;
|
|
|
|
byte1&=0x07; /* 0b00000111; */
|
|
|
|
byte1<<=2;
|
|
|
|
byte1+=work; /* uuuuu */
|
|
|
|
--byte1; /* wwww */
|
|
|
|
|
|
|
|
work=byte1 & 0x0F;
|
|
|
|
work>>=2;
|
|
|
|
work+=0xD8; /* 0b11011011; */
|
|
|
|
*pOut=work;
|
|
|
|
++pOut;
|
|
|
|
|
|
|
|
byte1<<=6;
|
|
|
|
byte2<<=2;
|
|
|
|
byte2&=0x3C; /* 0b00111100; */
|
|
|
|
work=byte3;
|
|
|
|
work>>=4;
|
|
|
|
work&=0x03; /* 0b00000011; */
|
|
|
|
work|=byte1;
|
|
|
|
work|=byte2;
|
|
|
|
*pOut=work;
|
|
|
|
++pOut;
|
|
|
|
|
|
|
|
work=byte3;
|
|
|
|
work>>=2;
|
|
|
|
work&=0x03; /* 0b00000011; */
|
|
|
|
work|=0xDC; /* 0b110111xx; */
|
|
|
|
*pOut=work;
|
|
|
|
++pOut;
|
|
|
|
|
|
|
|
byte3<<=6;
|
|
|
|
byte4&=0x3F; /* 0b00111111; */
|
|
|
|
byte4|=byte3;
|
|
|
|
*pOut=byte4;
|
|
|
|
++pOut;
|
|
|
|
inLen-=4;
|
|
|
|
++pIn;
|
|
|
|
} else { /* invalid sequence */
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn-3;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
} else if ((byte1 & 0xF0) == 0xF0) {
|
|
|
|
if (cd->toCcsid == 13488) {
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
} else {
|
|
|
|
if (inLen == 1)
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
else if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
else if (inLen == 3 && ((pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80))
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
else if (inLen >= 4 && ((pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80 || (pIn[3] & 0xC0) != 0x80))
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
else
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
}
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
} else { /* invalid sequence */
|
|
|
|
errno=EILSEQ; /* 116 */
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
*numSub+=numS;
|
|
|
|
return 0;
|
|
|
|
} else if (cd->cnv_dmap->codingSchema == DMAP_U28) {
|
|
|
|
/* See http://unicode.org/versions/corrigendum1.html */
|
|
|
|
register int inLen=*inBytesLeft;
|
|
|
|
register char * pOut=*outBuf;
|
|
|
|
register char * pIn=*inBuf;
|
|
|
|
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
|
|
|
|
// register size_t numS=0;
|
|
|
|
while (0 < inLen) {
|
|
|
|
register uint32_t in;
|
|
|
|
if (inLen == 1) {
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (pLastOutBuf < pOut)
|
|
|
|
break;
|
|
|
|
in=pIn[0];
|
|
|
|
in<<=8;
|
|
|
|
in+=pIn[1];
|
|
|
|
if (in == 0x0000) {
|
|
|
|
*pOut=0x00;
|
|
|
|
++pOut;
|
|
|
|
} else if ((in & 0xFF80) == 0x0000) { /* U28: in & 0b1111111110000000 == 0x0000 */
|
|
|
|
*pOut=in;
|
|
|
|
++pOut;
|
|
|
|
} else if ((in & 0xF800) == 0x0000) { /* in & 0b1111100000000000 == 0x0000 */
|
|
|
|
register uchar byte;
|
|
|
|
in>>=6;
|
|
|
|
in&=0x001F; /* 0b0000000000011111 */
|
|
|
|
in|=0x00C0; /* 0b0000000011000000 */
|
|
|
|
*pOut=in;
|
|
|
|
++pOut;
|
|
|
|
byte=pIn[1];
|
|
|
|
byte&=0x3F; /* 0b00111111; */
|
|
|
|
byte|=0x80; /* 0b10000000; */
|
|
|
|
*pOut=byte;
|
|
|
|
++pOut;
|
|
|
|
} else {
|
|
|
|
register uchar byte;
|
|
|
|
register uchar work;
|
|
|
|
byte=pIn[0];
|
|
|
|
byte>>=4;
|
|
|
|
byte|=0xE0; /* 0b11100000; */
|
|
|
|
*pOut=byte;
|
|
|
|
++pOut;
|
|
|
|
|
|
|
|
byte=pIn[0];
|
|
|
|
byte<<=2;
|
|
|
|
work=pIn[1];
|
|
|
|
work>>=6;
|
|
|
|
byte|=work;
|
|
|
|
byte&=0x3F; /* 0b00111111; */
|
|
|
|
byte|=0x80; /* 0b10000000; */
|
|
|
|
*pOut=byte;
|
|
|
|
++pOut;
|
|
|
|
|
|
|
|
byte=pIn[1];
|
|
|
|
byte&=0x3F; /* 0b00111111; */
|
|
|
|
byte|=0x80; /* 0b10000000; */
|
|
|
|
*pOut=byte;
|
|
|
|
++pOut;
|
|
|
|
}
|
|
|
|
pIn+=2;
|
|
|
|
inLen-=2;
|
|
|
|
}
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
// *numSub+=numS;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
} else if (cd->cnv_dmap->codingSchema == DMAP_T28) { /* UTF-16_UTF-8 */
|
|
|
|
/* See http://unicode.org/versions/corrigendum1.html */
|
|
|
|
register int inLen=*inBytesLeft;
|
|
|
|
register char * pOut=*outBuf;
|
|
|
|
register char * pIn=*inBuf;
|
|
|
|
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
|
|
|
|
// register size_t numS=0;
|
|
|
|
while (0 < inLen) {
|
|
|
|
register uint32_t in;
|
|
|
|
if (inLen == 1) {
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*inBytesLeft=0;
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (pLastOutBuf < pOut)
|
|
|
|
break;
|
|
|
|
in=pIn[0];
|
|
|
|
in<<=8;
|
|
|
|
in+=pIn[1];
|
|
|
|
if (in == 0x0000) {
|
|
|
|
*pOut=0x00;
|
|
|
|
++pOut;
|
|
|
|
} else if ((in & 0xFF80) == 0x0000) { /* U28: in & 0b1111111110000000 == 0x0000 */
|
|
|
|
*pOut=in;
|
|
|
|
++pOut;
|
|
|
|
} else if ((in & 0xF800) == 0x0000) { /* in & 0b1111100000000000 == 0x0000 */
|
|
|
|
register uchar byte;
|
|
|
|
in>>=6;
|
|
|
|
in&=0x001F; /* 0b0000000000011111 */
|
|
|
|
in|=0x00C0; /* 0b0000000011000000 */
|
|
|
|
*pOut=in;
|
|
|
|
++pOut;
|
|
|
|
byte=pIn[1];
|
|
|
|
byte&=0x3F; /* 0b00111111; */
|
|
|
|
byte|=0x80; /* 0b10000000; */
|
|
|
|
*pOut=byte;
|
|
|
|
++pOut;
|
|
|
|
} else if ((in & 0xFC00) == 0xD800) { /* in & 0b1111110000000000 == 0b1101100000000000, first surrogate character */
|
|
|
|
if (0xDC00 <= in ) {
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*inBytesLeft=inLen-1;
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
} else if (inLen < 4) {
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*inBytesLeft=inLen-2;
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn+2;
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
} else if ((pIn[2] & 0xFC) != 0xDC) { /* pIn[2] & 0b11111100 == 0b11011100, second surrogate character */
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*inBytesLeft=inLen-2;
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn+2;
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
} else {
|
|
|
|
register uchar byte;
|
|
|
|
register uchar work;
|
|
|
|
in>>=6;
|
|
|
|
in&=0x000F; /* 0b0000000000001111 */
|
|
|
|
byte=in; /* wwww */
|
|
|
|
++byte; /* uuuuu */
|
|
|
|
work=byte; /* save uuuuu */
|
|
|
|
byte>>=2;
|
|
|
|
byte|=0xF0; /* 0b11110000; */
|
|
|
|
*pOut=byte;
|
|
|
|
++pOut;
|
|
|
|
|
|
|
|
byte=work;
|
|
|
|
byte&=0x03; /* 0b00000011; */
|
|
|
|
byte<<=4;
|
|
|
|
byte|=0x80; /* 0b10000000; */
|
|
|
|
work=pIn[1];
|
|
|
|
work&=0x3C; /* 0b00111100; */
|
|
|
|
work>>=2;
|
|
|
|
byte|=work;
|
|
|
|
*pOut=byte;
|
|
|
|
++pOut;
|
|
|
|
|
|
|
|
byte=pIn[1];
|
|
|
|
byte&=0x03; /* 0b00000011; */
|
|
|
|
byte<<=4;
|
|
|
|
byte|=0x80; /* 0b10000000; */
|
|
|
|
work=pIn[2];
|
|
|
|
work&=0x03; /* 0b00000011; */
|
|
|
|
work<<=2;
|
|
|
|
byte|=work;
|
|
|
|
work=pIn[3];
|
|
|
|
work>>=6;
|
|
|
|
byte|=work;
|
|
|
|
*pOut=byte;
|
|
|
|
++pOut;
|
|
|
|
|
|
|
|
byte=pIn[3];
|
|
|
|
byte&=0x3F; /* 0b00111111; */
|
|
|
|
byte|=0x80; /* 0b10000000; */
|
|
|
|
*pOut=byte;
|
|
|
|
++pOut;
|
|
|
|
pIn+=2;
|
|
|
|
inLen-=2;
|
|
|
|
}
|
|
|
|
} else if ((in & 0xFC00) == 0xDC00) { /* in & 0b11111100 == 0b11011100, second surrogate character */
|
|
|
|
errno=EINVAL; /* 22 */
|
|
|
|
*inBytesLeft=inLen-1;
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
} else {
|
|
|
|
register uchar byte;
|
|
|
|
register uchar work;
|
|
|
|
byte=pIn[0];
|
|
|
|
byte>>=4;
|
|
|
|
byte|=0xE0; /* 0b11100000; */
|
|
|
|
*pOut=byte;
|
|
|
|
++pOut;
|
|
|
|
|
|
|
|
byte=pIn[0];
|
|
|
|
byte<<=2;
|
|
|
|
work=pIn[1];
|
|
|
|
work>>=6;
|
|
|
|
byte|=work;
|
|
|
|
byte&=0x3F; /* 0b00111111; */
|
|
|
|
byte|=0x80; /* 0b10000000; */
|
|
|
|
*pOut=byte;
|
|
|
|
++pOut;
|
|
|
|
|
|
|
|
byte=pIn[1];
|
|
|
|
byte&=0x3F; /* 0b00111111; */
|
|
|
|
byte|=0x80; /* 0b10000000; */
|
|
|
|
*pOut=byte;
|
|
|
|
++pOut;
|
|
|
|
}
|
|
|
|
pIn+=2;
|
|
|
|
inLen-=2;
|
|
|
|
}
|
|
|
|
*outBytesLeft-=(pOut-*outBuf);
|
|
|
|
*inBytesLeft=inLen;
|
|
|
|
*outBuf=pOut;
|
|
|
|
*inBuf=pIn;
|
|
|
|
// *numSub+=numS;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
} else if (cd->cnv_dmap->codingSchema == DMAP_U2U) { /* UTF-16_UCS-2 */
|
|
|
|
register int inLen=*inBytesLeft;
|
|
|
|
register int outLen=*outBytesLeft;
|
|
|
|
if (inLen <= outLen) {
|
|
|
|
memcpy(*outBuf, *inBuf, inLen);
|
|
|
|
(*outBytesLeft)-=inLen;
|
|
|
|
(*inBuf)+=inLen;
|
|
|
|
(*outBuf)+=inLen;
|
|
|
|
*inBytesLeft=0;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
memcpy(*outBuf, *inBuf, outLen);
|
|
|
|
(*outBytesLeft)=0;
|
|
|
|
(*inBuf)+=outLen;
|
|
|
|
(*outBuf)+=outLen;
|
|
|
|
*inBytesLeft-=outLen;
|
|
|
|
return (*inBytesLeft);
|
|
|
|
|
|
|
|
} else {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef DEBUG
|
|
|
|
inline size_t myconv(myconv_t cd ,
|
|
|
|
char** inBuf,
|
|
|
|
size_t* inBytesLeft,
|
|
|
|
char** outBuf,
|
|
|
|
size_t* outBytesLeft,
|
|
|
|
size_t* numSub)
|
|
|
|
{
|
|
|
|
if (cd->converterType == CONVERTER_ICONV) {
|
|
|
|
return myconv_iconv(cd,inBuf,inBytesLeft,outBuf,outBytesLeft,numSub);
|
|
|
|
} else if (cd->converterType == CONVERTER_DMAP) {
|
|
|
|
return myconv_dmap(cd,inBuf,inBytesLeft,outBuf,outBytesLeft,numSub);
|
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline char * converterName(int32_t type)
|
|
|
|
{
|
|
|
|
if (type == CONVERTER_ICONV)
|
|
|
|
return "iconv";
|
|
|
|
else if (type == CONVERTER_DMAP)
|
|
|
|
return "dmap";
|
|
|
|
|
|
|
|
return "?????";
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
#define myconv(a,b,c,d,e,f) \
|
|
|
|
(((a)->converterType == CONVERTER_ICONV)? myconv_iconv((a),(b),(c),(d),(e),(f)): (((a)->converterType == CONVERTER_DMAP)? myconv_dmap((a),(b),(c),(d),(e),(f)): -1))
|
|
|
|
|
|
|
|
|
|
|
|
#define converterName(a) \
|
|
|
|
(((a) == CONVERTER_ICONV)? "iconv": ((a) == CONVERTER_DMAP)? "dmap": "?????")
|
|
|
|
#endif
|
|
|
|
|
|
|
|
void initMyconv();
|
|
|
|
void cleanupMyconv();
|
|
|
|
|
|
|
|
#endif
|