mariadb/storage/ibmdb2i/db2i_myconv.h
V Narayanan 0c66f4a64a Bug#45803 Inaccurate estimates for partial key values with IBMDB2I
Some collations were causing IBMDB2I to report
inaccurate key range estimations to the optimizer
for LIKE clauses that select substrings. This can
be seen by running EXPLAIN. This problem primarily
affects multi-byte and unicode character sets.

This patch involves substantial changes to several
modules. There are a number of problems with the
character set and collation handling. These problems
have been or are being fixed,  and a comprehensive
test has been included which should provide much
better coverage than there was before. This test
is enabled only for IBM i 6.1, because that version
has support for the greatest number of collations.

mysql-test/suite/ibmdb2i/r/ibmdb2i_collations.result:
  Bug#45803 Inaccurate estimates for partial key values with IBMDB2I
  
  result file for test case.
mysql-test/suite/ibmdb2i/t/ibmdb2i_collations.test:
  Bug#45803 Inaccurate estimates for partial key values with IBMDB2I
  
  Tests for character sets and collations. This test
  is enabled only for IBM i 6.1, because that version
  has support for the greatest number of collations.
storage/ibmdb2i/db2i_conversion.cc:
  Bug#45803 Inaccurate estimates for partial key values with IBMDB2I
  
  - Added support in convertFieldChars to enable records_in_range
    to determine how many substitute characters were inserted and
    to suppress conversion warnings.
  
  - Fixed bug which was causing all multi-byte and Unicode fields
    to be created as UTF16 (CCSID 1200) fields in DB2. The corrected
    code will now create UCS2 fields as UCS2 (CCSID 13488), UTF8
    fields (except for utf8_general_ci) as UTF8 (CCSID 1208), and
    all other multi-byte or Unicode fields as UTF16.  This will only
    affect tables that are newly created through the IBMDB2I storage
    engine. Existing IBMDB2I tables will retain the original CCSID
    until recreated. The existing behavior is believed to be
    functionally correct, but it may negatively impact performance
    by causing unnecessary character conversion. Additionally, users
    accessing IBMDB2I tables through DB2 should be aware that mixing 
    tables created before and after this change may require extra type
    casts or other workarounds.  For this reason, users who have
    existing IBMDB2I tables using a Unicode collation other than
    utf8_general_ci are encouraged to recreate their tables (e.g.
    ALTER TABLE t1 ENGINE=IBMDB2I) in order to get the updated CCSIDs
    associated with their DB2 tables.
  
  - Improved error reporting for unsupported character sets by forcing
    a check for the iconv conversion table at table creation time,
    rather than at data access time.
storage/ibmdb2i/db2i_myconv.h:
  Bug#45803 Inaccurate estimates for partial key values with IBMDB2I
  
  Fix to set errno when iconv fails.
storage/ibmdb2i/db2i_rir.cc:
  Bug#45803 Inaccurate estimates for partial key values with IBMDB2I
  
  Significant improvements were made to the records_in_range code
  that handles partial length string data in keys for optimizer plan
  estimation. Previously, to obtain an estimate for a partial key
  value, the implementation would perform any necessary character
  conversion and then attempt to determine the unpadded length of
  the partial key by searching for the minimum or maximum sort
  character. While this algorithm was sufficient for most single-byte
  character sets, it did not treat Unicode and multi-byte strings
  correctly. Furthermore, due to an operating system limitation,
  partial keys having UTF8 collations (ICU sort sequences in DB2)
  could not be estimated with this method.
  
  With this patch, the code no longer attempts to explicitly determine
  the unpadded length of the key. Instead, the entire key is converted
  (if necessary), including padding, and then passed to the operating
  system for estimation. Depending on the source and target character
  sets and collations, additional logic is required to correctly
  handle cases in which MySQL uses unconvertible or differently
  -weighted values to pad the key. The bulk of the patch exists
  to implement this additional logic.
storage/ibmdb2i/ha_ibmdb2i.h:
  Bug#45803 Inaccurate estimates for partial key values with IBMDB2I
  
  The convertFieldChars declaration was updated to support additional 
  optional behaviors.
2009-07-06 14:19:32 +05:30

3201 lines
99 KiB
C

/*
Licensed Materials - Property of IBM
DB2 Storage Engine Enablement
Copyright IBM Corporation 2007,2008
All rights reserved
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
(a) Redistributions of source code must retain this list of conditions, the
copyright notice in section {d} below, and the disclaimer following this
list of conditions.
(b) Redistributions in binary form must reproduce this list of conditions, the
copyright notice in section (d) below, and the disclaimer following this
list of conditions, in the documentation and/or other materials provided
with the distribution.
(c) The name of IBM may not be used to endorse or promote products derived from
this software without specific prior written permission.
(d) The text of the required copyright notice is:
Licensed Materials - Property of IBM
DB2 Storage Engine Enablement
Copyright IBM Corporation 2007,2008
All rights reserved
THIS SOFTWARE IS PROVIDED BY IBM CORPORATION "AS IS" AND ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
SHALL IBM CORPORATION BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
OF SUCH DAMAGE.
*/
/**
@file
@brief A direct map optimization of iconv and related functions
This was show to significantly reduce character conversion cost
for short strings when compared to calling iconv system code.
*/
#ifndef DB2I_MYCONV_H
#define DB2I_MYCONV_H
#include <sys/time.h>
#include <stdlib.h>
#include <stdio.h>
#include <wchar.h>
#include <errno.h>
#include <iconv.h>
#include <ctype.h>
#include <time.h>
#include <stdarg.h>
#include <string.h>
#ifndef TRUE
#define TRUE 1
#endif
#ifndef FALSE
#define FALSE 0
#endif
#ifdef __cplusplus
#define INTERN inline
#define EXTERN extern "C"
#else
#define INTERN static
#define EXTERN extern
#endif
/* ANSI integer data types */
#if defined(__OS400_TGTVRM__)
/* for DTAMDL(*P128), datamodel(P128): int/long/pointer=4/4/16 */
/* LLP64:4/4/8 is used for teraspace ?? */
typedef short int16_t;
typedef unsigned short uint16_t;
typedef int int32_t;
typedef unsigned int uint32_t;
typedef long long int64_t;
typedef unsigned long long uint64_t;
#elif defined(PASE)
/* PASE uses IPL32: int/long/pointer=4/4/4 + long long */
#elif defined(__64BIT__)
/* AIX 64 bit uses LP64: int/long/pointer=4/8/8 */
#endif
#define CONVERTER_ICONV 1
#define CONVERTER_DMAP 2
#define DMAP_S2S 10
#define DMAP_S2U 20
#define DMAP_D2U 30
#define DMAP_E2U 40
#define DMAP_U2S 120
#define DMAP_T2S 125
#define DMAP_U2D 130
#define DMAP_T2D 135
#define DMAP_U2E 140
#define DMAP_T2E 145
#define DMAP_S28 220
#define DMAP_D28 230
#define DMAP_E28 240
#define DMAP_82S 310
#define DMAP_82D 320
#define DMAP_82E 330
#define DMAP_U28 410
#define DMAP_82U 420
#define DMAP_T28 425
#define DMAP_U2U 510
typedef struct __dmap_rec *dmap_t;
struct __dmap_rec
{
uint32_t codingSchema;
unsigned char * dmapS2S; /* SBCS -> SBCS */
/* The following conversion needs be followed by conversion from UCS-2/UTF-16 to UTF-8 */
UniChar * dmapD12U; /* DBCS(non-EUC) -> UCS-2/UTF-16 */
UniChar * dmapD22U; /* DBCS(non-EUC) -> UCS-2/UTF-16 */
UniChar * dmapE02U; /* EUC/SS0 -> UCS-2/UTF-16 */
UniChar * dmapE12U; /* EUC/SS1 -> UCS-2/UTF-16 */
UniChar * dmapE22U; /* EUC/0x8E + SS2 -> UCS-2/UTF-16 */
UniChar * dmapE32U; /* EUC/0x8F + SS3 -> UCS-2/UTF-16 */
uchar * dmapU2D; /* UCS-2 -> DBCS */
uchar * dmapU2S; /* UCS-2 -> EUC SS0 */
uchar * dmapU2M2; /* UCS-2 -> EUC SS1 */
uchar * dmapU2M3; /* UCS-2 -> EUC SS2/SS3 */
/* All of these pointers/tables are not used at the same time.
* You may be able save some space if you consolidate them.
*/
uchar * dmapS28; /* SBCS -> UTF-8 */
uchar * dmapD28; /* DBCS -> UTF-8 */
};
typedef struct __myconv_rec *myconv_t;
struct __myconv_rec
{
uint32_t converterType;
uint32_t index; /* for close */
union {
iconv_t cnv_iconv;
dmap_t cnv_dmap;
};
int32_t allocatedSize;
int32_t fromCcsid;
int32_t toCcsid;
UniChar subD; /* DBCS substitution char */
char subS; /* SBCS substitution char */
UniChar srcSubD; /* DBCS substitution char of src codepage */
char srcSubS; /* SBCS substitution char of src codepage */
char from [41+1]; /* codepage name is up to 41 bytes */
char to [41+1]; /* codepage name is up to 41 bytes */
#ifdef __64BIT__
char reserved[10]; /* align 128 */
#else
char reserved[14]; /* align 128 */
#endif
};
EXTERN int32_t myconvDebug;
EXTERN int myconvGetES(CCSID);
EXTERN int myconvIsEBCDIC(const char *);
EXTERN int myconvIsASCII(const char *);
EXTERN int myconvIsUnicode(const char *); /* UTF-8, UTF-16, or UCS-2 */
EXTERN int myconvIsUnicode2(const char *); /* 2 byte Unicode */
EXTERN int myconvIsUCS2(const char *);
EXTERN int myconvIsUTF16(const char *);
EXTERN int myconvIsUTF8(const char *);
EXTERN int myconvIsEUC(const char *);
EXTERN int myconvIsISO(const char *);
EXTERN int myconvIsSBCS(const char *);
EXTERN int myconvIsDBCS(const char *);
EXTERN char myconvGetSubS(const char *);
EXTERN UniChar myconvGetSubD(const char *);
EXTERN myconv_t myconv_open(const char*, const char*, int32_t);
EXTERN int myconv_close(myconv_t);
INTERN size_t myconv_iconv(myconv_t cd ,
char** inBuf,
size_t* inBytesLeft,
char** outBuf,
size_t* outBytesLeft,
size_t* numSub)
{
return iconv(cd->cnv_iconv, inBuf, inBytesLeft, outBuf, outBytesLeft);
}
INTERN size_t myconv_dmap(myconv_t cd,
char** inBuf,
size_t* inBytesLeft,
char** outBuf,
size_t* outBytesLeft,
size_t* numSub)
{
if (cd->cnv_dmap->codingSchema == DMAP_S2S) {
register unsigned char * dmapS2S=cd->cnv_dmap->dmapS2S;
register int inLen=*inBytesLeft;
register char * pOut=*outBuf;
register char * pIn=*inBuf;
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
register char subS=cd->subS;
register size_t numS=0;
while (0 < inLen) {
if (pLastOutBuf < pOut)
break;
if (*pIn == 0x00) {
*pOut=0x00;
} else {
*pOut=dmapS2S[*pIn];
if (*pOut == 0x00) {
errno=EILSEQ; /* 116 */
*outBytesLeft-=(*inBytesLeft-inLen);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return -1;
}
if (*pOut == subS) {
if ((*pOut=dmapS2S[*pIn]) == subS) {
if (*pIn != cd->srcSubS)
++numS;
}
}
}
++pIn;
--inLen;
++pOut;
}
*outBytesLeft-=(*inBytesLeft-inLen);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return 0;
} else if (cd->cnv_dmap->codingSchema == DMAP_E2U) {
/* use uchar * instead of UniChar to avoid memcpy */
register uchar * dmapE02U=(uchar *) (cd->cnv_dmap->dmapE02U);
register uchar * dmapE12U=(uchar *) (cd->cnv_dmap->dmapE12U);
register uchar * dmapE22U=(uchar *) (cd->cnv_dmap->dmapE22U);
register uchar * dmapE32U=(uchar *) (cd->cnv_dmap->dmapE32U);
register int inLen=*inBytesLeft;
register char * pOut=*outBuf;
register char * pIn=*inBuf;
register int offset;
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
register size_t numS=0;
while (0 < inLen) {
if (pLastOutBuf < pOut)
break;
if (*pIn == 0x00) {
*pOut=0x00;
++pOut;
*pOut=0x00;
++pOut;
++pIn;
--inLen;
} else {
if (*pIn == 0x8E) { /* SS2 */
if (inLen < 2) {
if (cd->fromCcsid == 33722 || /* IBM-eucJP */
cd->fromCcsid == 964) /* IBM-eucTW */
errno=EINVAL; /* 22 */
else
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
return -1;
}
++pIn;
if (*pIn < 0xA0) {
if (cd->fromCcsid == 964) /* IBM-eucTW */
errno=EINVAL; /* 22 */
else
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-1;
return -1;
}
offset=(*pIn - 0xA0);
offset<<=1;
if (dmapE22U[offset] == 0x00 &&
dmapE22U[offset+1] == 0x00) { /* 2 bytes */
if (inLen < 3) {
if (cd->fromCcsid == 964) /* IBM-eucTW */
errno=EINVAL; /* 22 */
else
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-1;
return -1;
}
offset=(*pIn - 0xA0) * 0x60 + 0x60;
++pIn;
if (*pIn < 0xA0) {
if (cd->fromCcsid == 964) /* IBM-eucTW */
errno=EINVAL; /* 22 */
else
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-2;
return -1;
}
offset+=(*pIn - 0xA0);
offset<<=1;
if (dmapE22U[offset] == 0x00 &&
dmapE22U[offset+1] == 0x00) {
if (cd->fromCcsid == 964) /* IBM-eucTW */
errno=EINVAL; /* 22 */
else
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-2;
return -1;
}
*pOut=dmapE22U[offset];
++pOut;
*pOut=dmapE22U[offset+1];
++pOut;
if (dmapE22U[offset] == 0xFF &&
dmapE22U[offset+1] == 0xFD) {
if (pIn[-2] * 0x100 + pIn[-1] != cd->srcSubD)
++numS;
}
++pIn;
inLen-=3;
} else { /* 1 bytes */
*pOut=dmapE22U[offset];
++pOut;
*pOut=dmapE22U[offset+1];
++pOut;
++pIn;
inLen-=2;
}
} else if (*pIn == 0x8F) { /* SS3 */
if (inLen < 2) {
if (cd->fromCcsid == 33722) /* IBM-eucJP */
errno=EINVAL; /* 22 */
else
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
return -1;
}
++pIn;
if (*pIn < 0xA0) {
if (cd->fromCcsid == 970 || /* IBM-eucKR */
cd->fromCcsid == 964 || /* IBM-eucTW */
cd->fromCcsid == 1383 || /* IBM-eucCN */
(cd->fromCcsid == 33722 && 3 <= inLen)) /* IBM-eucJP */
errno=EILSEQ; /* 116 */
else
errno=EINVAL; /* 22 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-1;
return -1;
}
offset=(*pIn - 0xA0);
offset<<=1;
if (dmapE32U[offset] == 0x00 &&
dmapE32U[offset+1] == 0x00) { /* 0x8F + 2 bytes */
if (inLen < 3) {
if (cd->fromCcsid == 33722)
errno=EINVAL; /* 22 */
else
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-1;
return -1;
}
offset=(*pIn - 0xA0) * 0x60 + 0x60;
++pIn;
if (*pIn < 0xA0) {
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-2;
return -1;
}
offset+=(*pIn - 0xA0);
offset<<=1;
if (dmapE32U[offset] == 0x00 &&
dmapE32U[offset+1] == 0x00) {
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-2;
return -1;
}
*pOut=dmapE32U[offset];
++pOut;
*pOut=dmapE32U[offset+1];
++pOut;
if (dmapE32U[offset] == 0xFF &&
dmapE32U[offset+1] == 0xFD) {
if (pIn[-2] * 0x100 + pIn[-1] != cd->srcSubD)
++numS;
}
++pIn;
inLen-=3;
} else { /* 0x8F + 1 bytes */
*pOut=dmapE32U[offset];
++pOut;
*pOut=dmapE32U[offset+1];
++pOut;
++pIn;
inLen-=2;
}
} else {
offset=*pIn;
offset<<=1;
if (dmapE02U[offset] == 0x00 &&
dmapE02U[offset+1] == 0x00) { /* SS1 */
if (inLen < 2) {
if ((cd->fromCcsid == 33722 && (*pIn == 0xA0 || (0xA9 <= *pIn && *pIn <= 0xAF) || *pIn == 0xFF)) ||
(cd->fromCcsid == 970 && (*pIn == 0xA0 || *pIn == 0xAD || *pIn == 0xAE || *pIn == 0xAF || *pIn == 0xFF)) ||
(cd->fromCcsid == 964 && (*pIn == 0xA0 || (0xAA <= *pIn && *pIn <= 0xC1) || *pIn == 0xC3 || *pIn == 0xFE || *pIn == 0xFF)) ||
(cd->fromCcsid == 1383 && (*pIn == 0xA0 || *pIn == 0xFF)))
errno=EILSEQ; /* 116 */
else
errno=EINVAL; /* 22 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
return -1;
}
if (*pIn < 0xA0) {
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
return -1;
}
offset=(*pIn - 0xA0) * 0x60;
++pIn;
if (*pIn < 0xA0) {
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-1;
return -1;
}
offset+=(*pIn - 0xA0);
offset<<=1;
if (dmapE12U[offset] == 0x00 &&
dmapE12U[offset+1] == 0x00) { /* undefined mapping */
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-1;
return -1;
}
*pOut=dmapE12U[offset];
++pOut;
*pOut=dmapE12U[offset+1];
++pOut;
if (dmapE12U[offset] == 0xFF &&
dmapE12U[offset+1] == 0xFD) {
if (pIn[-1] * 0x100 + pIn[0] != cd->srcSubD)
++numS;
}
++pIn;
inLen-=2;
} else {
*pOut=dmapE02U[offset];
++pOut;
*pOut=dmapE02U[offset+1];
++pOut;
if (dmapE02U[offset] == 0x00 &&
dmapE02U[offset+1] == 0x1A) {
if (*pIn != cd->srcSubS)
++numS;
}
++pIn;
--inLen;
}
}
}
}
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return 0;
} else if (cd->cnv_dmap->codingSchema == DMAP_E28) {
/* use uchar * instead of UniChar to avoid memcpy */
register uchar * dmapE02U=(uchar *) (cd->cnv_dmap->dmapE02U);
register uchar * dmapE12U=(uchar *) (cd->cnv_dmap->dmapE12U);
register uchar * dmapE22U=(uchar *) (cd->cnv_dmap->dmapE22U);
register uchar * dmapE32U=(uchar *) (cd->cnv_dmap->dmapE32U);
register int inLen=*inBytesLeft;
register char * pOut=*outBuf;
register char * pIn=*inBuf;
register int offset;
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
register size_t numS=0;
register UniChar in; /* copy part of U28 */
register UniChar ucs2;
while (0 < inLen) {
if (pLastOutBuf < pOut)
break;
if (*pIn == 0x00) {
*pOut=0x00;
++pOut;
++pIn;
--inLen;
} else {
if (*pIn == 0x8E) { /* SS2 */
if (inLen < 2) {
if (cd->fromCcsid == 33722 || /* IBM-eucJP */
cd->fromCcsid == 964) /* IBM-eucTW */
errno=EINVAL; /* 22 */
else
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
return -1;
}
++pIn;
if (*pIn < 0xA0) {
if (cd->fromCcsid == 964) /* IBM-eucTW */
errno=EINVAL; /* 22 */
else
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-1;
return -1;
}
offset=(*pIn - 0xA0);
offset<<=1;
if (dmapE22U[offset] == 0x00 &&
dmapE22U[offset+1] == 0x00) { /* 2 bytes */
if (inLen < 3) {
if (cd->fromCcsid == 964) /* IBM-eucTW */
errno=EINVAL; /* 22 */
else
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-1;
return -1;
}
offset=(*pIn - 0xA0) * 0x60 + 0x60;
++pIn;
if (*pIn < 0xA0) {
if (cd->fromCcsid == 964) /* IBM-eucTW */
errno=EINVAL; /* 22 */
else
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-2;
return -1;
}
offset+=(*pIn - 0xA0);
offset<<=1;
if (dmapE22U[offset] == 0x00 &&
dmapE22U[offset+1] == 0x00) {
if (cd->fromCcsid == 964) /* IBM-eucTW */
errno=EINVAL; /* 22 */
else
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-2;
return -1;
}
in=dmapE22U[offset];
in<<=8;
in+=dmapE22U[offset+1];
if (dmapE22U[offset] == 0xFF &&
dmapE22U[offset+1] == 0xFD) {
if (pIn[-2] * 0x100 + pIn[-1] != cd->srcSubD)
++numS;
}
++pIn;
inLen-=3;
} else { /* 1 bytes */
in=dmapE22U[offset];
in<<=8;
in+=dmapE22U[offset+1];
++pIn;
inLen-=2;
}
} else if (*pIn == 0x8F) { /* SS3 */
if (inLen < 2) {
if (cd->fromCcsid == 33722) /* IBM-eucJP */
errno=EINVAL; /* 22 */
else
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
return -1;
}
++pIn;
if (*pIn < 0xA0) {
if (cd->fromCcsid == 970 || /* IBM-eucKR */
cd->fromCcsid == 964 || /* IBM-eucTW */
cd->fromCcsid == 1383 || /* IBM-eucCN */
(cd->fromCcsid == 33722 && 3 <= inLen)) /* IBM-eucJP */
errno=EILSEQ; /* 116 */
else
errno=EINVAL; /* 22 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-1;
return -1;
}
offset=(*pIn - 0xA0);
offset<<=1;
if (dmapE32U[offset] == 0x00 &&
dmapE32U[offset+1] == 0x00) { /* 0x8F + 2 bytes */
if (inLen < 3) {
if (cd->fromCcsid == 33722)
errno=EINVAL; /* 22 */
else
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-1;
return -1;
}
offset=(*pIn - 0xA0) * 0x60 + 0x60;
++pIn;
if (*pIn < 0xA0) {
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-2;
return -1;
}
offset+=(*pIn - 0xA0);
offset<<=1;
if (dmapE32U[offset] == 0x00 &&
dmapE32U[offset+1] == 0x00) {
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-2;
return -1;
}
in=dmapE32U[offset];
in<<=8;
in+=dmapE32U[offset+1];
if (dmapE32U[offset] == 0xFF &&
dmapE32U[offset+1] == 0xFD) {
if (pIn[-2] * 0x100 + pIn[-1] != cd->srcSubD)
++numS;
}
++pIn;
inLen-=3;
} else { /* 0x8F + 1 bytes */
in=dmapE32U[offset];
in<<=8;
in+=dmapE32U[offset+1];
++pIn;
inLen-=2;
}
} else {
offset=*pIn;
offset<<=1;
if (dmapE02U[offset] == 0x00 &&
dmapE02U[offset+1] == 0x00) { /* SS1 */
if (inLen < 2) {
if ((cd->fromCcsid == 33722 && (*pIn == 0xA0 || (0xA9 <= *pIn && *pIn <= 0xAF) || *pIn == 0xFF)) ||
(cd->fromCcsid == 970 && (*pIn == 0xA0 || *pIn == 0xAD || *pIn == 0xAE || *pIn == 0xAF || *pIn == 0xFF)) ||
(cd->fromCcsid == 964 && (*pIn == 0xA0 || (0xAA <= *pIn && *pIn <= 0xC1) || *pIn == 0xC3 || *pIn == 0xFE || *pIn == 0xFF)) ||
(cd->fromCcsid == 1383 && (*pIn == 0xA0 || *pIn == 0xFF)))
errno=EILSEQ; /* 116 */
else
errno=EINVAL; /* 22 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
return -1;
}
if (*pIn < 0xA0) {
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
return -1;
}
offset=(*pIn - 0xA0) * 0x60;
++pIn;
if (*pIn < 0xA0) {
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-1;
return -1;
}
offset+=(*pIn - 0xA0);
offset<<=1;
if (dmapE12U[offset] == 0x00 &&
dmapE12U[offset+1] == 0x00) { /* undefined mapping */
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-1;
return -1;
}
in=dmapE12U[offset];
in<<=8;
in+=dmapE12U[offset+1];
if (dmapE12U[offset] == 0xFF &&
dmapE12U[offset+1] == 0xFD) {
if (pIn[-1] * 0x100 + pIn[0] != cd->srcSubD)
++numS;
}
++pIn;
inLen-=2;
} else {
in=dmapE02U[offset];
in<<=8;
in+=dmapE02U[offset+1];
if (dmapE02U[offset] == 0x00 &&
dmapE02U[offset+1] == 0x1A) {
if (*pIn != cd->srcSubS)
++numS;
}
++pIn;
--inLen;
}
}
ucs2=in;
if ((in & 0xFF80) == 0x0000) { /* U28: in & 0b1111111110000000 == 0x0000 */
*pOut=in;
++pOut;
} else if ((in & 0xF800) == 0x0000) { /* in & 0b1111100000000000 == 0x0000 */
register uchar byte;
in>>=6;
in&=0x001F; /* 0b0000000000011111 */
in|=0x00C0; /* 0b0000000011000000 */
*pOut=in;
++pOut;
byte=ucs2; /* dmapD12U[offset+1]; */
byte&=0x3F; /* 0b00111111; */
byte|=0x80; /* 0b10000000; */
*pOut=byte;
++pOut;
} else if ((in & 0xFC00) == 0xD800) {
*pOut=0xEF;
++pOut;
*pOut=0xBF;
++pOut;
*pOut=0xBD;
++pOut;
} else {
register uchar byte;
register uchar work;
byte=(ucs2>>8); /* dmapD12U[offset]; */
byte>>=4;
byte|=0xE0; /* 0b11100000; */
*pOut=byte;
++pOut;
byte=(ucs2>>8); /* dmapD12U[offset]; */
byte<<=2;
work=ucs2; /* dmapD12U[offset+1]; */
work>>=6;
byte|=work;
byte&=0x3F; /* 0b00111111; */
byte|=0x80; /* 0b10000000; */
*pOut=byte;
++pOut;
byte=ucs2; /* dmapD12U[offset+1]; */
byte&=0x3F; /* 0b00111111; */
byte|=0x80; /* 0b10000000; */
*pOut=byte;
++pOut;
}
/* end of U28 */
}
}
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return 0;
} else if (cd->cnv_dmap->codingSchema == DMAP_U2E) {
register uchar * dmapU2S=cd->cnv_dmap->dmapU2S;
register uchar * dmapU2M2=cd->cnv_dmap->dmapU2M2 - 0x80 * 2;
register uchar * dmapU2M3=cd->cnv_dmap->dmapU2M3 - 0x80 * 3;
register int inLen=*inBytesLeft;
register char * pOut=*outBuf;
register char * pIn=*inBuf;
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
register char subS=cd->subS;
register char * pSubD=(char *) &(cd->subD);
register size_t numS=0;
register size_t rc=0;
while (0 < inLen) {
register uint32_t in;
if (inLen == 1) {
errno=EINVAL; /* 22 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
return -1;
}
if (pLastOutBuf < pOut)
break;
in=pIn[0];
in<<=8;
in+=pIn[1];
if (in == 0x0000) {
*pOut=0x00;
++pOut;
} else if (in < 0x100 && dmapU2S[in] != 0x0000) {
if ((*pOut=dmapU2S[in]) == subS) {
if (in != cd->srcSubS)
++numS;
}
++pOut;
} else {
in<<=1;
if (dmapU2M2[in] == 0x00) { /* not found in dmapU2M2 */
in*=1.5;
if (dmapU2M3[in] == 0x00) { /* not found in dmapU2M3*/
*pOut=pSubD[0];
++pOut;
*pOut=pSubD[1];
++pOut;
++numS;
++rc;
} else {
*pOut=dmapU2M3[in];
++pOut;
*pOut=dmapU2M3[1+in];
++pOut;
*pOut=dmapU2M3[2+in];
++pOut;
}
} else {
*pOut=dmapU2M2[in];
++pOut;
if (dmapU2M2[1+in] == 0x00) {
if (*pOut == subS) {
in>>=1;
if (in != cd->srcSubS)
++numS;
}
} else {
*pOut=dmapU2M2[1+in];
++pOut;
if (memcmp(pOut-2, pSubD, 2) == 0) {
in>>=1;
if (in != cd->srcSubD) {
++numS;
++rc;
}
}
}
}
}
pIn+=2;
inLen-=2;
}
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return rc; /* compatibility to iconv() */
} else if (cd->cnv_dmap->codingSchema == DMAP_T2E) {
register uchar * dmapU2S=cd->cnv_dmap->dmapU2S;
register uchar * dmapU2M2=cd->cnv_dmap->dmapU2M2 - 0x80 * 2;
register uchar * dmapU2M3=cd->cnv_dmap->dmapU2M3 - 0x80 * 3;
register int inLen=*inBytesLeft;
register char * pOut=*outBuf;
register char * pIn=*inBuf;
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
register char subS=cd->subS;
register char * pSubD=(char *) &(cd->subD);
register size_t numS=0;
register size_t rc=0;
while (0 < inLen) {
register uint32_t in;
if (inLen == 1) {
errno=EINVAL; /* 22 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen-1;
*outBuf=pOut;
*inBuf=pIn;
++numS;
*numSub+=numS;
return 0;
}
if (pLastOutBuf < pOut)
break;
in=pIn[0];
in<<=8;
in+=pIn[1];
if (in == 0x0000) {
*pOut=0x00;
++pOut;
} else if (0xD800 <= in && in <= 0xDBFF) { /* first byte of surrogate */
errno=EINVAL; /* 22 */
*inBytesLeft=inLen-2;
*outBytesLeft-=(pOut-*outBuf);
*outBuf=pOut;
*inBuf=pIn+2;
++numS;
*numSub+=numS;
return -1;
} else if (0xDC00 <= in && in <= 0xDFFF) { /* second byte of surrogate */
errno=EINVAL; /* 22 */
*inBytesLeft=inLen-1;
*outBytesLeft-=(pOut-*outBuf);
*outBuf=pOut;
*inBuf=pIn;
++numS;
*numSub+=numS;
return -1;
} else if (in < 0x100 && dmapU2S[in] != 0x0000) {
if ((*pOut=dmapU2S[in]) == subS) {
if (in != cd->srcSubS)
++numS;
}
++pOut;
} else {
in<<=1;
if (dmapU2M2[in] == 0x00) { /* not found in dmapU2M2 */
in*=1.5;
if (dmapU2M3[in] == 0x00) { /* not found in dmapU2M3*/
*pOut=pSubD[0];
++pOut;
*pOut=pSubD[1];
++pOut;
++numS;
++rc;
} else {
*pOut=dmapU2M3[in];
++pOut;
*pOut=dmapU2M3[1+in];
++pOut;
*pOut=dmapU2M3[2+in];
++pOut;
}
} else {
*pOut=dmapU2M2[in];
++pOut;
if (dmapU2M2[1+in] == 0x00) {
if (*pOut == subS) {
in>>=1;
if (in != cd->srcSubS)
++numS;
}
} else {
*pOut=dmapU2M2[1+in];
++pOut;
if (memcmp(pOut-2, pSubD, 2) == 0) {
in>>=1;
if (in != cd->srcSubD) {
++numS;
++rc;
}
}
}
}
}
pIn+=2;
inLen-=2;
}
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return 0;
} else if (cd->cnv_dmap->codingSchema == DMAP_82E) {
register uchar * dmapU2S=cd->cnv_dmap->dmapU2S;
register uchar * dmapU2M2=cd->cnv_dmap->dmapU2M2 - 0x80 * 2;
register uchar * dmapU2M3=cd->cnv_dmap->dmapU2M3 - 0x80 * 3;
register int inLen=*inBytesLeft;
register char * pOut=*outBuf;
register char * pIn=*inBuf;
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
register char subS=cd->subS;
register char * pSubD=(char *) &(cd->subD);
register size_t numS=0;
register size_t rc=0;
while (0 < inLen) {
register uint32_t in;
uint32_t in2;
if (pLastOutBuf < pOut)
break;
/* convert from UTF-8 to UCS-2 */
if (*pIn == 0x00) {
in=0x0000;
++pIn;
--inLen;
} else { /* 82U: */
register uchar byte1=*pIn;
if ((byte1 & 0x80) == 0x00) { /* if (byte1 & 0b10000000 == 0b00000000) { */
/* 1 bytes sequence: 0xxxxxxx => 00000000 0xxxxxxx*/
in=byte1;
++pIn;
--inLen;
} else if ((byte1 & 0xE0) == 0xC0) { /* (byte1 & 0b11100000 == 0b11000000) { */
if (inLen < 2) {
errno=EINVAL; /* 22 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return -1;
}
if (byte1 == 0xC0 || byte1 == 0xC1) { /* invalid sequence */
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return -1;
}
/* 2 bytes sequence:
110yyyyy 10xxxxxx => 00000yyy yyxxxxxx */
register uchar byte2;
++pIn;
byte2=*pIn;
if ((byte2 & 0xC0) == 0x80) { /* byte2 & 0b11000000 == 0b10000000) { */
register uchar work=byte1;
work<<=6;
byte2&=0x3F; /* 0b00111111; */
byte2|=work;
byte1&=0x1F; /* 0b00011111; */
byte1>>=2;
in=byte1;
in<<=8;
in+=byte2;
inLen-=2;
++pIn;
} else { /* invalid sequence */
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-1;
*numSub+=numS;
return -1;
}
} else if ((byte1 & 0xF0) == 0xE0) { /* byte1 & 0b11110000 == 0b11100000 */
/* 3 bytes sequence:
1110zzzz 10yyyyyy 10xxxxxx => zzzzyyyy yyxxxxxx */
register uchar byte2;
register uchar byte3;
if (inLen < 3) {
if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
errno=EILSEQ; /* 116 */
else
errno=EINVAL; /* 22 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return -1;
}
++pIn;
byte2=*pIn;
++pIn;
byte3=*pIn;
if ((byte2 & 0xC0) != 0x80 ||
(byte3 & 0xC0) != 0x80 ||
(byte1 == 0xE0 && byte2 < 0xA0)) { /* invalid sequence, only 0xA0-0xBF allowed after 0xE0 */
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-2;
*numSub+=numS;
return -1;
}
{
register uchar work=byte2;
work<<=6;
byte3&=0x3F; /* 0b00111111; */
byte3|=work;
byte2&=0x3F; /* 0b00111111; */
byte2>>=2;
byte1<<=4;
in=byte1 | byte2;;
in<<=8;
in+=byte3;
inLen-=3;
++pIn;
}
} else if ((0xF0 <= byte1 && byte1 <= 0xF4)) { /* (bytes1 & 11111000) == 0x1110000 */
/* 4 bytes sequence
11110uuu 10uuzzzz 10yyyyyy 10xxxxxx => 110110ww wwzzzzyy 110111yy yyxxxxxx
where uuuuu = wwww + 1 */
register uchar byte2;
register uchar byte3;
register uchar byte4;
if (inLen < 4) {
if ((inLen >= 2 && (pIn[1] & 0xC0) != 0x80) ||
(inLen >= 3 && (pIn[2] & 0xC0) != 0x80) ||
(cd->toCcsid == 13488) )
errno=EILSEQ; /* 116 */
else
errno=EINVAL; /* 22 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return -1;
}
++pIn;
byte2=*pIn;
++pIn;
byte3=*pIn;
++pIn;
byte4=*pIn;
if ((byte2 & 0xC0) == 0x80 && /* byte2 & 0b11000000 == 0b10000000 */
(byte3 & 0xC0) == 0x80 && /* byte3 & 0b11000000 == 0b10000000 */
(byte4 & 0xC0) == 0x80) { /* byte4 & 0b11000000 == 0b10000000 */
register uchar work=byte2;
if (byte1 == 0xF0 && byte2 < 0x90) {
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-3;
*numSub+=numS;
return -1;
/* iconv() returns 0 for 0xF4908080 and convert to 0x00
} else if (byte1 == 0xF4 && byte2 > 0x8F) {
errno=EINVAL;
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-3;
*numSub+=numS;
return -1;
*/
}
work&=0x30; /* 0b00110000; */
work>>=4;
byte1&=0x07; /* 0b00000111; */
byte1<<=2;
byte1+=work; /* uuuuu */
--byte1; /* wwww */
work=byte1 & 0x0F;
work>>=2;
work+=0xD8; /* 0b11011011; */
in=work;
in<<=8;
byte1<<=6;
byte2<<=2;
byte2&=0x3C; /* 0b00111100; */
work=byte3;
work>>=4;
work&=0x03; /* 0b00000011; */
work|=byte1;
work|=byte2;
in+=work;
work=byte3;
work>>=2;
work&=0x03; /* 0b00000011; */
work|=0xDC; /* 0b110111xx; */
in2=work;
in2<<=8;
byte3<<=6;
byte4&=0x3F; /* 0b00111111; */
byte4|=byte3;
in2+=byte4;
inLen-=4;
++pIn;
#ifdef match_with_GBK
if ((0xD800 == in && in2 < 0xDC80) ||
(0xD840 == in && in2 < 0xDC80) ||
(0xD880 == in && in2 < 0xDC80) ||
(0xD8C0 == in && in2 < 0xDC80) ||
(0xD900 == in && in2 < 0xDC80) ||
(0xD940 == in && in2 < 0xDC80) ||
(0xD980 == in && in2 < 0xDC80) ||
(0xD9C0 == in && in2 < 0xDC80) ||
(0xDA00 == in && in2 < 0xDC80) ||
(0xDA40 == in && in2 < 0xDC80) ||
(0xDA80 == in && in2 < 0xDC80) ||
(0xDAC0 == in && in2 < 0xDC80) ||
(0xDB00 == in && in2 < 0xDC80) ||
(0xDB40 == in && in2 < 0xDC80) ||
(0xDB80 == in && in2 < 0xDC80) ||
(0xDBC0 == in && in2 < 0xDC80)) {
#else
if ((0xD800 <= in && in <= 0xDBFF) &&
(0xDC00 <= in2 && in2 <= 0xDFFF)) {
#endif
*pOut=subS;
++pOut;
++numS;
continue;
}
} else { /* invalid sequence */
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-3;
*numSub+=numS;
return -1;
}
} else if (0xF5 <= byte1 && byte1 <= 0xFF) { /* minic iconv() behavior */
if (inLen < 4 ||
(inLen >= 4 && byte1 == 0xF8 && pIn[1] < 0x90) ||
pIn[1] < 0x80 || 0xBF < pIn[1] ||
pIn[2] < 0x80 || 0xBF < pIn[2] ||
pIn[3] < 0x80 || 0xBF < pIn[3] ) {
if (inLen == 1)
errno=EINVAL; /* 22 */
else if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
errno=EILSEQ; /* 116 */
else if (inLen == 3 && ((pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80))
errno=EILSEQ; /* 116 */
else if (inLen >= 4 && (byte1 == 0xF8 || (pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80 || (pIn[3] & 0xC0) != 0x80))
errno=EILSEQ; /* 116 */
else
errno=EINVAL; /* 22 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return -1;
} else if ((pIn[1] == 0x80 || pIn[1] == 0x90 || pIn[1] == 0xA0 || pIn[1] == 0xB0) &&
pIn[2] < 0x82) {
*pOut=subS; /* Though returns replacement character, which iconv() does not return. */
++pOut;
++numS;
pIn+=4;
inLen-=4;
continue;
} else {
*pOut=pSubD[0]; /* Though returns replacement character, which iconv() does not return. */
++pOut;
*pOut=pSubD[1];
++pOut;
++numS;
pIn+=4;
inLen-=4;
continue;
/* iconv() returns 0 with strange 1 byte converted values */
}
} else { /* invalid sequence */
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return -1;
}
}
/* end of UTF-8 to UCS-2 */
if (in == 0x0000) {
*pOut=0x00;
++pOut;
} else if (in < 0x100 && dmapU2S[in] != 0x0000) {
if ((*pOut=dmapU2S[in]) == subS) {
if (in != cd->srcSubS)
++numS;
}
++pOut;
} else {
in<<=1;
if (dmapU2M2[in] == 0x00) { /* not found in dmapU2M2 */
in*=1.5;
if (dmapU2M3[in] == 0x00) { /* not found in dmapU2M3*/
*pOut=pSubD[0];
++pOut;
*pOut=pSubD[1];
++pOut;
++numS;
++rc;
} else {
*pOut=dmapU2M3[in];
++pOut;
*pOut=dmapU2M3[1+in];
++pOut;
*pOut=dmapU2M3[2+in];
++pOut;
}
} else {
*pOut=dmapU2M2[in];
++pOut;
if (dmapU2M2[1+in] == 0x00) {
if (*pOut == subS) {
in>>=1;
if (in != cd->srcSubS)
++numS;
}
} else {
*pOut=dmapU2M2[1+in];
++pOut;
if (memcmp(pOut-2, pSubD, 2) == 0) {
in>>=1;
if (in != cd->srcSubD) {
++numS;
++rc;
}
}
}
}
}
}
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return 0;
} else if (cd->cnv_dmap->codingSchema == DMAP_S2U) {
/* use uchar * instead of UniChar to avoid memcpy */
register uchar * dmapD12U=(uchar *) (cd->cnv_dmap->dmapD12U);
register int inLen=*inBytesLeft;
register char * pOut=*outBuf;
register char * pIn=*inBuf;
register int offset;
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
register size_t numS=0;
while (0 < inLen) {
if (pLastOutBuf < pOut)
break;
if (*pIn == 0x00) {
*pOut=0x00;
++pOut;
*pOut=0x00;
++pOut;
++pIn;
--inLen;
} else {
offset=*pIn;
offset<<=1;
*pOut=dmapD12U[offset];
++pOut;
*pOut=dmapD12U[offset+1];
++pOut;
if (dmapD12U[offset] == 0x00) {
if (dmapD12U[offset+1] == 0x1A) {
if (*pIn != cd->srcSubS)
++numS;
} else if (dmapD12U[offset+1] == 0x00) {
pOut-=2;
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return -1;
}
}
++pIn;
--inLen;
}
}
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return 0;
} else if (cd->cnv_dmap->codingSchema == DMAP_S28) {
/* use uchar * instead of UniChar to avoid memcpy */
register uchar * dmapD12U=(uchar *) (cd->cnv_dmap->dmapD12U);
register int inLen=*inBytesLeft;
register char * pOut=*outBuf;
register char * pIn=*inBuf;
register int offset;
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
register size_t numS=0;
register UniChar in; /* copy part of U28 */
while (0 < inLen) {
if (pLastOutBuf < pOut)
break;
if (*pIn == 0x00) {
*pOut=0x00;
++pOut;
++pIn;
--inLen;
} else {
offset=*pIn;
offset<<=1;
in=dmapD12U[offset];
in<<=8;
in+=dmapD12U[offset+1];
if ((in & 0xFF80) == 0x0000) { /* U28: in & 0b1111111110000000 == 0x0000 */
if (in == 0x000) {
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return -1;
}
*pOut=in;
++pOut;
} else if ((in & 0xF800) == 0x0000) { /* in & 0b1111100000000000 == 0x0000 */
register uchar byte;
in>>=6;
in&=0x001F; /* 0b0000000000011111 */
in|=0x00C0; /* 0b0000000011000000 */
*pOut=in;
++pOut;
byte=dmapD12U[offset+1];
byte&=0x3F; /* 0b00111111; */
byte|=0x80; /* 0b10000000; */
*pOut=byte;
++pOut;
} else if ((in & 0xFC00) == 0xD800) { /* There should not be no surrogate character in SBCS. */
*pOut=0xEF;
++pOut;
*pOut=0xBF;
++pOut;
*pOut=0xBD;
++pOut;
} else {
register uchar byte;
register uchar work;
byte=dmapD12U[offset];
byte>>=4;
byte|=0xE0; /* 0b11100000; */
*pOut=byte;
++pOut;
byte=dmapD12U[offset];
byte<<=2;
work=dmapD12U[offset+1];
work>>=6;
byte|=work;
byte&=0x3F; /* 0b00111111; */
byte|=0x80; /* 0b10000000; */
*pOut=byte;
++pOut;
byte=dmapD12U[offset+1];
byte&=0x3F; /* 0b00111111; */
byte|=0x80; /* 0b10000000; */
*pOut=byte;
++pOut;
}
/* end of U28 */
if (dmapD12U[offset] == 0x00) {
if (dmapD12U[offset+1] == 0x1A) {
if (*pIn != cd->srcSubS)
++numS;
}
}
++pIn;
--inLen;
}
}
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return 0;
} else if (cd->cnv_dmap->codingSchema == DMAP_U2S) {
register uchar * dmapU2S=cd->cnv_dmap->dmapU2S;
register int inLen=*inBytesLeft;
register char * pOut=*outBuf;
register char * pIn=*inBuf;
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
register char subS=cd->subS;
register size_t numS=0;
while (0 < inLen) {
register uint32_t in;
if (inLen == 1) {
errno=EINVAL; /* 22 */
*inBytesLeft=inLen;
*outBytesLeft-=(pOut-*outBuf);
*outBuf=pOut;
*inBuf=pIn;
return -1;
}
if (pLastOutBuf < pOut)
break;
in=pIn[0];
in<<=8;
in+=pIn[1];
if (in == 0x0000) {
*pOut=0x00;
} else {
if ((*pOut=dmapU2S[in]) == 0x00) {
*pOut=subS;
++numS;
errno=EINVAL; /* 22 */
} else if (*pOut == subS) {
if (in != cd->srcSubS)
++numS;
}
}
++pOut;
pIn+=2;
inLen-=2;
}
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return numS;
} else if (cd->cnv_dmap->codingSchema == DMAP_T2S) {
register uchar * dmapU2S=cd->cnv_dmap->dmapU2S;
register int inLen=*inBytesLeft;
register char * pOut=*outBuf;
register char * pIn=*inBuf;
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
register char subS=cd->subS;
register size_t numS=0;
while (0 < inLen) {
register uint32_t in;
if (inLen == 1) {
errno=EINVAL; /* 22 */
*inBytesLeft=inLen-1;
*outBytesLeft-=(pOut-*outBuf);
*outBuf=pOut;
*inBuf=pIn;
++numS;
*numSub+=numS;
return 0;
}
if (pLastOutBuf < pOut)
break;
in=pIn[0];
in<<=8;
in+=pIn[1];
if (in == 0x0000) {
*pOut=0x00;
} else if (0xD800 <= in && in <= 0xDFFF) { /* 0xD800-0xDFFF, surrogate first and second values */
if (0xDC00 <= in ) {
errno=EINVAL; /* 22 */
*inBytesLeft=inLen-1;
*outBytesLeft-=(pOut-*outBuf);
*outBuf=pOut;
*inBuf=pIn;
return -1;
} else if (inLen < 4) {
errno=EINVAL; /* 22 */
*inBytesLeft=inLen-2;
*outBytesLeft-=(pOut-*outBuf);
*outBuf=pOut;
*inBuf=pIn+2;
return -1;
} else {
register uint32_t in2;
in2=pIn[2];
in2<<=8;
in2+=pIn[3];
if (0xDC00 <= in2 && in2 <= 0xDFFF) { /* second surrogate character =0xDC00 - 0xDFFF*/
*pOut=subS;
++numS;
pIn+=4;
} else {
errno=EINVAL; /* 22 */
*inBytesLeft=inLen-1;
*outBytesLeft-=(pOut-*outBuf);
*outBuf=pOut;
*inBuf=pIn;
return -1;
}
}
} else {
if ((*pOut=dmapU2S[in]) == 0x00) {
*pOut=subS;
++numS;
errno=EINVAL; /* 22 */
} else if (*pOut == subS) {
if (in != cd->srcSubS)
++numS;
}
}
++pOut;
pIn+=2;
inLen-=2;
}
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return 0;
} else if (cd->cnv_dmap->codingSchema == DMAP_82S) {
register uchar * dmapU2S=cd->cnv_dmap->dmapU2S;
register int inLen=*inBytesLeft;
register char * pOut=*outBuf;
register char * pIn=*inBuf;
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
register char subS=cd->subS;
register size_t numS=0;
while (0 < inLen) {
register uint32_t in;
uint32_t in2; /* The second surrogate value */
if (pLastOutBuf < pOut)
break;
/* convert from UTF-8 to UCS-2 */
if (*pIn == 0x00) {
in=0x0000;
++pIn;
--inLen;
} else { /* 82U: */
register uchar byte1=*pIn;
if ((byte1 & 0x80) == 0x00) { /* if (byte1 & 0b10000000 == 0b00000000) { */
/* 1 bytes sequence: 0xxxxxxx => 00000000 0xxxxxxx*/
in=byte1;
++pIn;
--inLen;
} else if ((byte1 & 0xE0) == 0xC0) { /* (byte1 & 0b11100000 == 0b11000000) { */
if (inLen < 2) {
errno=EINVAL; /* 22 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return -1;
}
if (byte1 == 0xC0 || byte1 == 0xC1) { /* invalid sequence */
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return -1;
}
/* 2 bytes sequence:
110yyyyy 10xxxxxx => 00000yyy yyxxxxxx */
register uchar byte2;
++pIn;
byte2=*pIn;
if ((byte2 & 0xC0) == 0x80) { /* byte2 & 0b11000000 == 0b10000000) { */
register uchar work=byte1;
work<<=6;
byte2&=0x3F; /* 0b00111111; */
byte2|=work;
byte1&=0x1F; /* 0b00011111; */
byte1>>=2;
in=byte1;
in<<=8;
in+=byte2;
inLen-=2;
++pIn;
} else { /* invalid sequence */
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-1;
*numSub+=numS;
return -1;
}
} else if ((byte1 & 0xF0) == 0xE0) { /* byte1 & 0b11110000 == 0b11100000 */
/* 3 bytes sequence:
1110zzzz 10yyyyyy 10xxxxxx => zzzzyyyy yyxxxxxx */
register uchar byte2;
register uchar byte3;
if (inLen < 3) {
if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
errno=EILSEQ; /* 116 */
else
errno=EINVAL; /* 22 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return -1;
}
++pIn;
byte2=*pIn;
++pIn;
byte3=*pIn;
if ((byte2 & 0xC0) != 0x80 ||
(byte3 & 0xC0) != 0x80 ||
(byte1 == 0xE0 && byte2 < 0xA0)) { /* invalid sequence, only 0xA0-0xBF allowed after 0xE0 */
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-2;
*numSub+=numS;
return -1;
}
{
register uchar work=byte2;
work<<=6;
byte3&=0x3F; /* 0b00111111; */
byte3|=work;
byte2&=0x3F; /* 0b00111111; */
byte2>>=2;
byte1<<=4;
in=byte1 | byte2;;
in<<=8;
in+=byte3;
inLen-=3;
++pIn;
}
} else if ((0xF0 <= byte1 && byte1 <= 0xF4) || /* (bytes1 & 11111000) == 0x1110000 */
((byte1&=0xF7) && 0xF0 <= byte1 && byte1 <= 0xF4)) { /* minic iconv() behavior */
/* 4 bytes sequence
11110uuu 10uuzzzz 10yyyyyy 10xxxxxx => 110110ww wwzzzzyy 110111yy yyxxxxxx
where uuuuu = wwww + 1 */
register uchar byte2;
register uchar byte3;
register uchar byte4;
if (inLen < 4) {
if ((inLen >= 2 && (pIn[1] & 0xC0) != 0x80) ||
(inLen >= 3 && (pIn[2] & 0xC0) != 0x80) ||
(cd->toCcsid == 13488) )
errno=EILSEQ; /* 116 */
else
errno=EINVAL; /* 22 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return -1;
}
++pIn;
byte2=*pIn;
++pIn;
byte3=*pIn;
++pIn;
byte4=*pIn;
if ((byte2 & 0xC0) == 0x80 && /* byte2 & 0b11000000 == 0b10000000 */
(byte3 & 0xC0) == 0x80 && /* byte3 & 0b11000000 == 0b10000000 */
(byte4 & 0xC0) == 0x80) { /* byte4 & 0b11000000 == 0b10000000 */
register uchar work=byte2;
if (byte1 == 0xF0 && byte2 < 0x90) {
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-3;
*numSub+=numS;
return -1;
/* iconv() returns 0 for 0xF4908080 and convert to 0x00
} else if (byte1 == 0xF4 && byte2 > 0x8F) {
errno=EINVAL;
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-3;
*numSub+=numS;
return -1;
*/
}
work&=0x30; /* 0b00110000; */
work>>=4;
byte1&=0x07; /* 0b00000111; */
byte1<<=2;
byte1+=work; /* uuuuu */
--byte1; /* wwww */
work=byte1 & 0x0F;
work>>=2;
work+=0xD8; /* 0b11011011; */
in=work;
in<<=8;
byte1<<=6;
byte2<<=2;
byte2&=0x3C; /* 0b00111100; */
work=byte3;
work>>=4;
work&=0x03; /* 0b00000011; */
work|=byte1;
work|=byte2;
in+=work;
work=byte3;
work>>=2;
work&=0x03; /* 0b00000011; */
work|=0xDC; /* 0b110111xx; */
in2=work;
in2<<=8;
byte3<<=6;
byte4&=0x3F; /* 0b00111111; */
byte4|=byte3;
in2+=byte4;
inLen-=4;
++pIn;
} else { /* invalid sequence */
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-3;
*numSub+=numS;
return -1;
}
} else if ((byte1 & 0xF0) == 0xF0) { /* minic iconv() behavior */
if (inLen < 4 ||
pIn[1] < 0x80 || 0xBF < pIn[1] ||
pIn[2] < 0x80 || 0xBF < pIn[2] ||
pIn[3] < 0x80 || 0xBF < pIn[3] ) {
if (inLen == 1)
errno=EINVAL; /* 22 */
else if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
errno=EILSEQ; /* 116 */
else if (inLen == 3 && ((pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80))
errno=EILSEQ; /* 116 */
else if (inLen >= 4 && ((pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80 || (pIn[3] & 0xC0) != 0x80))
errno=EILSEQ; /* 116 */
else
errno=EINVAL; /* 22 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return -1;
} else {
*pOut=subS; /* Though returns replacement character, which iconv() does not return. */
++pOut;
++numS;
pIn+=4;
inLen-=4;
/* UTF-8_IBM-850 0xF0908080 : converted value does not match, iconv=0x00, dmap=0x7F
UTF-8_IBM-850 0xF0908081 : converted value does not match, iconv=0x01, dmap=0x7F
UTF-8_IBM-850 0xF0908082 : converted value does not match, iconv=0x02, dmap=0x7F
UTF-8_IBM-850 0xF0908083 : converted value does not match, iconv=0x03, dmap=0x7F
....
UTF-8_IBM-850 0xF09081BE : converted value does not match, iconv=0x7E, dmap=0x7F
UTF-8_IBM-850 0xF09081BF : converted value does not match, iconv=0x1C, dmap=0x7F
UTF-8_IBM-850 0xF09082A0 : converted value does not match, iconv=0xFF, dmap=0x7F
UTF-8_IBM-850 0xF09082A1 : converted value does not match, iconv=0xAD, dmap=0x7F
....
*/
continue;
/* iconv() returns 0 with strange 1 byte converted values */
}
} else { /* invalid sequence */
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return -1;
}
}
/* end of UTF-8 to UCS-2 */
if (in == 0x0000) {
*pOut=0x00;
} else {
if ((*pOut=dmapU2S[in]) == 0x00) {
*pOut=subS;
++numS;
errno=EINVAL; /* 22 */
} else if (*pOut == subS) {
if (in != cd->srcSubS) {
++numS;
}
}
}
++pOut;
}
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return 0;
} else if (cd->cnv_dmap->codingSchema == DMAP_D2U) {
/* use uchar * instead of UniChar to avoid memcpy */
register uchar * dmapD12U=(uchar *) (cd->cnv_dmap->dmapD12U);
register uchar * dmapD22U=(uchar *) (cd->cnv_dmap->dmapD22U);
register int inLen=*inBytesLeft;
register char * pOut=*outBuf;
register char * pIn=*inBuf;
register int offset;
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
register size_t numS=0;
while (0 < inLen) {
if (pLastOutBuf < pOut)
break;
if (*pIn == 0x00) {
*pOut=0x00;
++pOut;
*pOut=0x00;
++pOut;
++pIn;
--inLen;
} else {
offset=*pIn;
offset<<=1;
if (dmapD12U[offset] == 0x00 &&
dmapD12U[offset+1] == 0x00) { /* DBCS */
if (inLen < 2) {
if (*pIn == 0x80 || *pIn == 0xFF ||
(cd->fromCcsid == 943 && (*pIn == 0x85 || *pIn == 0x86 || *pIn == 0xA0 || *pIn == 0xEB || *pIn == 0xEC || *pIn == 0xEF || *pIn == 0xFD || *pIn == 0xFE)) ||
(cd->fromCcsid == 932 && (*pIn == 0x85 || *pIn == 0x86 || *pIn == 0x87 || *pIn == 0xEB || *pIn == 0xEC || *pIn == 0xED || *pIn == 0xEE || *pIn == 0xEF)) ||
(cd->fromCcsid == 1381 && ((0x85 <= *pIn && *pIn <= 0x8B) || (0xAA <= *pIn && *pIn <= 0xAF) || (0xF8 <= *pIn && *pIn <= 0xFE))))
errno=EILSEQ; /* 116 */
else
errno=EINVAL; /* 22 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
return -1;
}
offset-=0x100;
++pIn;
offset<<=8;
offset+=(*pIn * 2);
if (dmapD22U[offset] == 0x00 &&
dmapD22U[offset+1] == 0x00) {
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-1;
return -1;
}
*pOut=dmapD22U[offset];
++pOut;
*pOut=dmapD22U[offset+1];
++pOut;
if (dmapD22U[offset] == 0xFF &&
dmapD22U[offset+1] == 0xFD) {
if (pIn[-1] * 0x100 + pIn[0] != cd->srcSubD)
++numS;
}
++pIn;
inLen-=2;
} else { /* SBCS */
*pOut=dmapD12U[offset];
++pOut;
*pOut=dmapD12U[offset+1];
++pOut;
if (dmapD12U[offset] == 0x00 &&
dmapD12U[offset+1] == 0x1A) {
if (*pIn != cd->srcSubS)
++numS;
}
++pIn;
--inLen;
}
}
}
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return 0;
} else if (cd->cnv_dmap->codingSchema == DMAP_D28) {
/* use uchar * instead of UniChar to avoid memcpy */
register uchar * dmapD12U=(uchar *) (cd->cnv_dmap->dmapD12U);
register uchar * dmapD22U=(uchar *) (cd->cnv_dmap->dmapD22U);
register int inLen=*inBytesLeft;
register char * pOut=*outBuf;
register char * pIn=*inBuf;
register int offset;
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
register size_t numS=0;
register UniChar in; /* copy part of U28 */
register UniChar ucs2;
while (0 < inLen) {
if (pLastOutBuf < pOut)
break;
if (*pIn == 0x00) {
*pOut=0x00;
++pOut;
++pIn;
--inLen;
} else {
offset=*pIn;
offset<<=1;
if (dmapD12U[offset] == 0x00 &&
dmapD12U[offset+1] == 0x00) { /* DBCS */
if (inLen < 2) {
if (*pIn == 0x80 || *pIn == 0xFF ||
(cd->fromCcsid == 943 && (*pIn == 0x85 || *pIn == 0x86 || *pIn == 0xA0 || *pIn == 0xEB || *pIn == 0xEC || *pIn == 0xEF || *pIn == 0xFD || *pIn == 0xFE)) ||
(cd->fromCcsid == 932 && (*pIn == 0x85 || *pIn == 0x86 || *pIn == 0x87 || *pIn == 0xEB || *pIn == 0xEC || *pIn == 0xED || *pIn == 0xEE || *pIn == 0xEF)) ||
(cd->fromCcsid == 1381 && ((0x85 <= *pIn && *pIn <= 0x8B) || (0xAA <= *pIn && *pIn <= 0xAF) || (0xF8 <= *pIn && *pIn <= 0xFE))))
errno=EILSEQ; /* 116 */
else
errno=EINVAL; /* 22 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
return -1;
}
offset-=0x100;
++pIn;
offset<<=8;
offset+=(*pIn * 2);
if (dmapD22U[offset] == 0x00 &&
dmapD22U[offset+1] == 0x00) {
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-1;
return -1;
}
in=dmapD22U[offset];
in<<=8;
in+=dmapD22U[offset+1];
ucs2=in;
if (dmapD22U[offset] == 0xFF &&
dmapD22U[offset+1] == 0xFD) {
if (in != cd->srcSubD)
++numS;
}
++pIn;
inLen-=2;
} else { /* SBCS */
in=dmapD12U[offset];
in<<=8;
in+=dmapD12U[offset+1];
ucs2=in;
if (dmapD12U[offset] == 0x00 &&
dmapD12U[offset+1] == 0x1A) {
if (in != cd->srcSubS)
++numS;
}
++pIn;
--inLen;
}
if ((in & 0xFF80) == 0x0000) { /* U28: in & 0b1111111110000000 == 0x0000 */
*pOut=in;
++pOut;
} else if ((in & 0xF800) == 0x0000) { /* in & 0b1111100000000000 == 0x0000 */
register uchar byte;
in>>=6;
in&=0x001F; /* 0b0000000000011111 */
in|=0x00C0; /* 0b0000000011000000 */
*pOut=in;
++pOut;
byte=ucs2; /* dmapD12U[offset+1]; */
byte&=0x3F; /* 0b00111111; */
byte|=0x80; /* 0b10000000; */
*pOut=byte;
++pOut;
} else if ((in & 0xFC00) == 0xD800) { /* There should not be no surrogate character in SBCS. */
*pOut=0xEF;
++pOut;
*pOut=0xBF;
++pOut;
*pOut=0xBD;
++pOut;
} else {
register uchar byte;
register uchar work;
byte=(ucs2>>8); /* dmapD12U[offset]; */
byte>>=4;
byte|=0xE0; /* 0b11100000; */
*pOut=byte;
++pOut;
byte=(ucs2>>8); /* dmapD12U[offset]; */
byte<<=2;
work=ucs2; /* dmapD12U[offset+1]; */
work>>=6;
byte|=work;
byte&=0x3F; /* 0b00111111; */
byte|=0x80; /* 0b10000000; */
*pOut=byte;
++pOut;
byte=ucs2; /* dmapD12U[offset+1]; */
byte&=0x3F; /* 0b00111111; */
byte|=0x80; /* 0b10000000; */
*pOut=byte;
++pOut;
}
/* end of U28 */
}
}
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return 0;
} else if (cd->cnv_dmap->codingSchema == DMAP_U2D) {
register uchar * dmapU2D=cd->cnv_dmap->dmapU2D;
register int inLen=*inBytesLeft;
register char * pOut=*outBuf;
register char * pIn=*inBuf;
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
register char subS=cd->subS;
register char * pSubD=(char *) &(cd->subD);
register size_t numS=0;
while (0 < inLen) {
register uint32_t in;
if (inLen == 1) {
errno=EINVAL; /* 22 */
*inBytesLeft=inLen;
*outBytesLeft-=(pOut-*outBuf);
*outBuf=pOut;
*inBuf=pIn;
return -1;
}
if (pLastOutBuf < pOut)
break;
in=pIn[0];
in<<=8;
in+=pIn[1];
if (in == 0x0000) {
*pOut=0x00;
++pOut;
} else {
in<<=1;
*pOut=dmapU2D[in];
++pOut;
if (dmapU2D[in+1] == 0x00) { /* SBCS */
if (*pOut == subS) {
if (in != cd->srcSubS)
++numS;
}
} else {
*pOut=dmapU2D[in+1];
++pOut;
if (dmapU2D[in] == pSubD[0] &&
dmapU2D[in+1] == pSubD[1]) {
in>>=1;
if (in != cd->srcSubD)
++numS;
}
}
}
pIn+=2;
inLen-=2;
}
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return numS; /* to minic iconv() behavior */
} else if (cd->cnv_dmap->codingSchema == DMAP_T2D) {
register uchar * dmapU2D=cd->cnv_dmap->dmapU2D;
register int inLen=*inBytesLeft;
register char * pOut=*outBuf;
register char * pIn=*inBuf;
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
register char subS=cd->subS;
register char * pSubD=(char *) &(cd->subD);
register size_t numS=0;
while (0 < inLen) {
register uint32_t in;
if (inLen == 1) {
errno=EINVAL; /* 22 */
*inBytesLeft=inLen-1;
*outBytesLeft-=(pOut-*outBuf);
*outBuf=pOut;
*inBuf=pIn;
++numS;
*numSub+=numS;
return 0;
}
if (pLastOutBuf < pOut)
break;
in=pIn[0];
in<<=8;
in+=pIn[1];
if (in == 0x0000) {
*pOut=0x00;
++pOut;
} else if (0xD800 <= in && in <= 0xDBFF) { /* first byte of surrogate */
errno=EINVAL; /* 22 */
*inBytesLeft=inLen-2;
*outBytesLeft-=(pOut-*outBuf);
*outBuf=pOut;
*inBuf=pIn+2;
++numS;
*numSub+=numS;
return -1;
} else if (0xDC00 <= in && in <= 0xDFFF) { /* second byte of surrogate */
errno=EINVAL; /* 22 */
*inBytesLeft=inLen-1;
*outBytesLeft-=(pOut-*outBuf);
*outBuf=pOut;
*inBuf=pIn;
++numS;
*numSub+=numS;
return -1;
} else {
in<<=1;
*pOut=dmapU2D[in];
++pOut;
if (dmapU2D[in+1] == 0x00) { /* SBCS */
if (*pOut == subS) {
if (in != cd->srcSubS)
++numS;
}
} else {
*pOut=dmapU2D[in+1];
++pOut;
if (dmapU2D[in] == pSubD[0] &&
dmapU2D[in+1] == pSubD[1]) {
in>>=1;
if (in != cd->srcSubD)
++numS;
}
}
}
pIn+=2;
inLen-=2;
}
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return 0; /* to minic iconv() behavior */
} else if (cd->cnv_dmap->codingSchema == DMAP_82D) {
register uchar * dmapU2D=cd->cnv_dmap->dmapU2D;
register int inLen=*inBytesLeft;
register char * pOut=*outBuf;
register char * pIn=*inBuf;
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
register char subS=cd->subS;
register char * pSubD=(char *) &(cd->subD);
register size_t numS=0;
while (0 < inLen) {
register uint32_t in;
uint32_t in2;
if (pLastOutBuf < pOut)
break;
/* convert from UTF-8 to UCS-2 */
if (*pIn == 0x00) {
in=0x0000;
++pIn;
--inLen;
} else { /* 82U: */
register uchar byte1=*pIn;
if ((byte1 & 0x80) == 0x00) { /* if (byte1 & 0b10000000 == 0b00000000) { */
/* 1 bytes sequence: 0xxxxxxx => 00000000 0xxxxxxx*/
in=byte1;
++pIn;
--inLen;
} else if ((byte1 & 0xE0) == 0xC0) { /* (byte1 & 0b11100000 == 0b11000000) { */
if (inLen < 2) {
errno=EINVAL; /* 22 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return -1;
}
if (byte1 == 0xC0 || byte1 == 0xC1) { /* invalid sequence */
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return -1;
}
/* 2 bytes sequence:
110yyyyy 10xxxxxx => 00000yyy yyxxxxxx */
register uchar byte2;
++pIn;
byte2=*pIn;
if ((byte2 & 0xC0) == 0x80) { /* byte2 & 0b11000000 == 0b10000000) { */
register uchar work=byte1;
work<<=6;
byte2&=0x3F; /* 0b00111111; */
byte2|=work;
byte1&=0x1F; /* 0b00011111; */
byte1>>=2;
in=byte1;
in<<=8;
in+=byte2;
inLen-=2;
++pIn;
} else { /* invalid sequence */
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-1;
*numSub+=numS;
return -1;
}
} else if ((byte1 & 0xF0) == 0xE0) { /* byte1 & 0b11110000 == 0b11100000 */
/* 3 bytes sequence:
1110zzzz 10yyyyyy 10xxxxxx => zzzzyyyy yyxxxxxx */
register uchar byte2;
register uchar byte3;
if (inLen < 3) {
if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
errno=EILSEQ; /* 116 */
else
errno=EINVAL; /* 22 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return -1;
}
++pIn;
byte2=*pIn;
++pIn;
byte3=*pIn;
if ((byte2 & 0xC0) != 0x80 ||
(byte3 & 0xC0) != 0x80 ||
(byte1 == 0xE0 && byte2 < 0xA0)) { /* invalid sequence, only 0xA0-0xBF allowed after 0xE0 */
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-2;
*numSub+=numS;
return -1;
}
{
register uchar work=byte2;
work<<=6;
byte3&=0x3F; /* 0b00111111; */
byte3|=work;
byte2&=0x3F; /* 0b00111111; */
byte2>>=2;
byte1<<=4;
in=byte1 | byte2;;
in<<=8;
in+=byte3;
inLen-=3;
++pIn;
}
} else if ((0xF0 <= byte1 && byte1 <= 0xF4)) { /* (bytes1 & 11111000) == 0x1110000 */
/* 4 bytes sequence
11110uuu 10uuzzzz 10yyyyyy 10xxxxxx => 110110ww wwzzzzyy 110111yy yyxxxxxx
where uuuuu = wwww + 1 */
register uchar byte2;
register uchar byte3;
register uchar byte4;
if (inLen < 4) {
if ((inLen >= 2 && (pIn[1] & 0xC0) != 0x80) ||
(inLen >= 3 && (pIn[2] & 0xC0) != 0x80) ||
(cd->toCcsid == 13488) )
errno=EILSEQ; /* 116 */
else
errno=EINVAL; /* 22 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return -1;
}
++pIn;
byte2=*pIn;
++pIn;
byte3=*pIn;
++pIn;
byte4=*pIn;
if ((byte2 & 0xC0) == 0x80 && /* byte2 & 0b11000000 == 0b10000000 */
(byte3 & 0xC0) == 0x80 && /* byte3 & 0b11000000 == 0b10000000 */
(byte4 & 0xC0) == 0x80) { /* byte4 & 0b11000000 == 0b10000000 */
register uchar work=byte2;
if (byte1 == 0xF0 && byte2 < 0x90) {
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-3;
*numSub+=numS;
return -1;
/* iconv() returns 0 for 0xF4908080 and convert to 0x00
} else if (byte1 == 0xF4 && byte2 > 0x8F) {
errno=EINVAL;
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-3;
*numSub+=numS;
return -1;
*/
}
work&=0x30; /* 0b00110000; */
work>>=4;
byte1&=0x07; /* 0b00000111; */
byte1<<=2;
byte1+=work; /* uuuuu */
--byte1; /* wwww */
work=byte1 & 0x0F;
work>>=2;
work+=0xD8; /* 0b11011011; */
in=work;
in<<=8;
byte1<<=6;
byte2<<=2;
byte2&=0x3C; /* 0b00111100; */
work=byte3;
work>>=4;
work&=0x03; /* 0b00000011; */
work|=byte1;
work|=byte2;
in+=work;
work=byte3;
work>>=2;
work&=0x03; /* 0b00000011; */
work|=0xDC; /* 0b110111xx; */
in2=work;
in2<<=8;
byte3<<=6;
byte4&=0x3F; /* 0b00111111; */
byte4|=byte3;
in2+=byte4;
inLen-=4;
++pIn;
#ifdef match_with_GBK
if ((0xD800 == in && in2 < 0xDC80) ||
(0xD840 == in && in2 < 0xDC80) ||
(0xD880 == in && in2 < 0xDC80) ||
(0xD8C0 == in && in2 < 0xDC80) ||
(0xD900 == in && in2 < 0xDC80) ||
(0xD940 == in && in2 < 0xDC80) ||
(0xD980 == in && in2 < 0xDC80) ||
(0xD9C0 == in && in2 < 0xDC80) ||
(0xDA00 == in && in2 < 0xDC80) ||
(0xDA40 == in && in2 < 0xDC80) ||
(0xDA80 == in && in2 < 0xDC80) ||
(0xDAC0 == in && in2 < 0xDC80) ||
(0xDB00 == in && in2 < 0xDC80) ||
(0xDB40 == in && in2 < 0xDC80) ||
(0xDB80 == in && in2 < 0xDC80) ||
(0xDBC0 == in && in2 < 0xDC80)) {
#else
if ((0xD800 <= in && in <= 0xDBFF) &&
(0xDC00 <= in2 && in2 <= 0xDFFF)) {
#endif
*pOut=subS;
++pOut;
++numS;
continue;
}
} else { /* invalid sequence */
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-3;
*numSub+=numS;
return -1;
}
} else if (0xF5 <= byte1 && byte1 <= 0xFF) { /* minic iconv() behavior */
if (inLen < 4 ||
(inLen >= 4 && byte1 == 0xF8 && pIn[1] < 0x90) ||
pIn[1] < 0x80 || 0xBF < pIn[1] ||
pIn[2] < 0x80 || 0xBF < pIn[2] ||
pIn[3] < 0x80 || 0xBF < pIn[3] ) {
if (inLen == 1)
errno=EINVAL; /* 22 */
else if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
errno=EILSEQ; /* 116 */
else if (inLen == 3 && ((pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80))
errno=EILSEQ; /* 116 */
else if (inLen >= 4 && (byte1 == 0xF8 || (pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80 || (pIn[3] & 0xC0) != 0x80))
errno=EILSEQ; /* 116 */
else
errno=EINVAL; /* 22 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return -1;
} else if ((pIn[1] == 0x80 || pIn[1] == 0x90 || pIn[1] == 0xA0 || pIn[1] == 0xB0) &&
pIn[2] < 0x82) {
*pOut=subS; /* Though returns replacement character, which iconv() does not return. */
++pOut;
++numS;
pIn+=4;
inLen-=4;
continue;
} else {
*pOut=pSubD[0]; /* Though returns replacement character, which iconv() does not return. */
++pOut;
*pOut=pSubD[1];
++pOut;
++numS;
pIn+=4;
inLen-=4;
continue;
/* iconv() returns 0 with strange 1 byte converted values */
}
} else { /* invalid sequence */
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return -1;
}
}
/* end of UTF-8 to UCS-2 */
if (in == 0x0000) {
*pOut=0x00;
++pOut;
} else {
in<<=1;
*pOut=dmapU2D[in];
++pOut;
if (dmapU2D[in+1] == 0x00) { /* SBCS */
if (dmapU2D[in] == subS) {
in>>=1;
if (in != cd->srcSubS)
++numS;
}
} else {
*pOut=dmapU2D[in+1];
++pOut;
if (dmapU2D[in] == pSubD[0] &&
dmapU2D[in+1] == pSubD[1]) {
in>>=1;
if (in != cd->srcSubD)
++numS;
}
}
}
}
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return 0;
} else if (cd->cnv_dmap->codingSchema == DMAP_82U) {
/* See http://unicode.org/versions/corrigendum1.html */
/* convert from UTF-8 to UTF-16 can cover all conversion from UTF-8 to UCS-2 */
register int inLen=*inBytesLeft;
register char * pOut=*outBuf;
register char * pIn=*inBuf;
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
register size_t numS=0;
while (0 < inLen) {
if (pLastOutBuf < pOut)
break;
if (*pIn == 0x00) {
*pOut=0x00;
++pOut;
*pOut=0x00;
++pOut;
++pIn;
--inLen;
} else { /* 82U: */
register uchar byte1=*pIn;
if ((byte1 & 0x80) == 0x00) { /* if (byte1 & 0b10000000 == 0b00000000) { */
/* 1 bytes sequence: 0xxxxxxx => 00000000 0xxxxxxx*/
*pOut=0x00;
++pOut;
*pOut=byte1;
++pOut;
++pIn;
--inLen;
} else if ((byte1 & 0xE0) == 0xC0) { /* (byte1 & 0b11100000 == 0b11000000) { */
if (inLen < 2) {
errno=EINVAL; /* 22 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return -1;
}
if (byte1 == 0xC0 || byte1 == 0xC1) { /* invalid sequence */
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return -1;
}
/* 2 bytes sequence:
110yyyyy 10xxxxxx => 00000yyy yyxxxxxx */
register uchar byte2;
++pIn;
byte2=*pIn;
if ((byte2 & 0xC0) == 0x80) { /* byte2 & 0b11000000 == 0b10000000) { */
register uchar work=byte1;
work<<=6;
byte2&=0x3F; /* 0b00111111; */
byte2|=work;
byte1&=0x1F; /* 0b00011111; */
byte1>>=2;
*pOut=byte1;
++pOut;
*pOut=byte2;
++pOut;
inLen-=2;
++pIn;
} else { /* invalid sequence */
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-1;
*numSub+=numS;
return -1;
}
} else if ((byte1 & 0xF0) == 0xE0) { /* byte1 & 0b11110000 == 0b11100000 */
/* 3 bytes sequence:
1110zzzz 10yyyyyy 10xxxxxx => zzzzyyyy yyxxxxxx */
register uchar byte2;
register uchar byte3;
if (inLen < 3) {
if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
errno=EILSEQ; /* 116 */
else
errno=EINVAL; /* 22 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return -1;
}
++pIn;
byte2=*pIn;
++pIn;
byte3=*pIn;
if ((byte2 & 0xC0) != 0x80 ||
(byte3 & 0xC0) != 0x80 ||
(byte1 == 0xE0 && byte2 < 0xA0)) { /* invalid sequence, only 0xA0-0xBF allowed after 0xE0 */
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-2;
*numSub+=numS;
return -1;
}
{
register uchar work=byte2;
work<<=6;
byte3&=0x3F; /* 0b00111111; */
byte3|=work;
byte2&=0x3F; /* 0b00111111; */
byte2>>=2;
byte1<<=4;
*pOut=byte1 | byte2;;
++pOut;
*pOut=byte3;
++pOut;
inLen-=3;
++pIn;
}
} else if ((0xF0 <= byte1 && byte1 <= 0xF4) || /* (bytes1 & 11111000) == 0x1110000 */
((byte1&=0xF7) && 0xF0 <= byte1 && byte1 <= 0xF4)) { /* minic iconv() behavior */
/* 4 bytes sequence
11110uuu 10uuzzzz 10yyyyyy 10xxxxxx => 110110ww wwzzzzyy 110111yy yyxxxxxx
where uuuuu = wwww + 1 */
register uchar byte2;
register uchar byte3;
register uchar byte4;
if (inLen < 4 || cd->toCcsid == 13488) {
if ((inLen >= 2 && (pIn[1] & 0xC0) != 0x80) ||
(inLen >= 3 && (pIn[2] & 0xC0) != 0x80) ||
(cd->toCcsid == 13488) )
errno=EILSEQ; /* 116 */
else
errno=EINVAL; /* 22 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return -1;
}
++pIn;
byte2=*pIn;
++pIn;
byte3=*pIn;
++pIn;
byte4=*pIn;
if ((byte2 & 0xC0) == 0x80 && /* byte2 & 0b11000000 == 0b10000000 */
(byte3 & 0xC0) == 0x80 && /* byte3 & 0b11000000 == 0b10000000 */
(byte4 & 0xC0) == 0x80) { /* byte4 & 0b11000000 == 0b10000000 */
register uchar work=byte2;
if (byte1 == 0xF0 && byte2 < 0x90) {
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-3;
*numSub+=numS;
return -1;
} else if (byte1 == 0xF4 && byte2 > 0x8F) {
errno=EINVAL; /* 22 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-3;
*numSub+=numS;
return -1;
}
work&=0x30; /* 0b00110000; */
work>>=4;
byte1&=0x07; /* 0b00000111; */
byte1<<=2;
byte1+=work; /* uuuuu */
--byte1; /* wwww */
work=byte1 & 0x0F;
work>>=2;
work+=0xD8; /* 0b11011011; */
*pOut=work;
++pOut;
byte1<<=6;
byte2<<=2;
byte2&=0x3C; /* 0b00111100; */
work=byte3;
work>>=4;
work&=0x03; /* 0b00000011; */
work|=byte1;
work|=byte2;
*pOut=work;
++pOut;
work=byte3;
work>>=2;
work&=0x03; /* 0b00000011; */
work|=0xDC; /* 0b110111xx; */
*pOut=work;
++pOut;
byte3<<=6;
byte4&=0x3F; /* 0b00111111; */
byte4|=byte3;
*pOut=byte4;
++pOut;
inLen-=4;
++pIn;
} else { /* invalid sequence */
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn-3;
*numSub+=numS;
return -1;
}
} else if ((byte1 & 0xF0) == 0xF0) {
if (cd->toCcsid == 13488) {
errno=EILSEQ; /* 116 */
} else {
if (inLen == 1)
errno=EINVAL; /* 22 */
else if (inLen == 2 && (pIn[1] & 0xC0) != 0x80)
errno=EILSEQ; /* 116 */
else if (inLen == 3 && ((pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80))
errno=EILSEQ; /* 116 */
else if (inLen >= 4 && ((pIn[1] & 0xC0) != 0x80 || (pIn[2] & 0xC0) != 0x80 || (pIn[3] & 0xC0) != 0x80))
errno=EILSEQ; /* 116 */
else
errno=EINVAL; /* 22 */
}
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return -1;
} else { /* invalid sequence */
errno=EILSEQ; /* 116 */
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return -1;
}
}
}
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
*numSub+=numS;
return 0;
} else if (cd->cnv_dmap->codingSchema == DMAP_U28) {
/* See http://unicode.org/versions/corrigendum1.html */
register int inLen=*inBytesLeft;
register char * pOut=*outBuf;
register char * pIn=*inBuf;
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
// register size_t numS=0;
while (0 < inLen) {
register uint32_t in;
if (inLen == 1) {
errno=EINVAL; /* 22 */
*inBytesLeft=inLen;
*outBytesLeft-=(pOut-*outBuf);
*outBuf=pOut;
*inBuf=pIn;
return -1;
}
if (pLastOutBuf < pOut)
break;
in=pIn[0];
in<<=8;
in+=pIn[1];
if (in == 0x0000) {
*pOut=0x00;
++pOut;
} else if ((in & 0xFF80) == 0x0000) { /* U28: in & 0b1111111110000000 == 0x0000 */
*pOut=in;
++pOut;
} else if ((in & 0xF800) == 0x0000) { /* in & 0b1111100000000000 == 0x0000 */
register uchar byte;
in>>=6;
in&=0x001F; /* 0b0000000000011111 */
in|=0x00C0; /* 0b0000000011000000 */
*pOut=in;
++pOut;
byte=pIn[1];
byte&=0x3F; /* 0b00111111; */
byte|=0x80; /* 0b10000000; */
*pOut=byte;
++pOut;
} else {
register uchar byte;
register uchar work;
byte=pIn[0];
byte>>=4;
byte|=0xE0; /* 0b11100000; */
*pOut=byte;
++pOut;
byte=pIn[0];
byte<<=2;
work=pIn[1];
work>>=6;
byte|=work;
byte&=0x3F; /* 0b00111111; */
byte|=0x80; /* 0b10000000; */
*pOut=byte;
++pOut;
byte=pIn[1];
byte&=0x3F; /* 0b00111111; */
byte|=0x80; /* 0b10000000; */
*pOut=byte;
++pOut;
}
pIn+=2;
inLen-=2;
}
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
// *numSub+=numS;
return 0;
} else if (cd->cnv_dmap->codingSchema == DMAP_T28) { /* UTF-16_UTF-8 */
/* See http://unicode.org/versions/corrigendum1.html */
register int inLen=*inBytesLeft;
register char * pOut=*outBuf;
register char * pIn=*inBuf;
register char * pLastOutBuf = *outBuf + *outBytesLeft - 1;
// register size_t numS=0;
while (0 < inLen) {
register uint32_t in;
if (inLen == 1) {
errno=EINVAL; /* 22 */
*inBytesLeft=0;
*outBytesLeft-=(pOut-*outBuf);
*outBuf=pOut;
*inBuf=pIn;
return 0;
}
if (pLastOutBuf < pOut)
break;
in=pIn[0];
in<<=8;
in+=pIn[1];
if (in == 0x0000) {
*pOut=0x00;
++pOut;
} else if ((in & 0xFF80) == 0x0000) { /* U28: in & 0b1111111110000000 == 0x0000 */
*pOut=in;
++pOut;
} else if ((in & 0xF800) == 0x0000) { /* in & 0b1111100000000000 == 0x0000 */
register uchar byte;
in>>=6;
in&=0x001F; /* 0b0000000000011111 */
in|=0x00C0; /* 0b0000000011000000 */
*pOut=in;
++pOut;
byte=pIn[1];
byte&=0x3F; /* 0b00111111; */
byte|=0x80; /* 0b10000000; */
*pOut=byte;
++pOut;
} else if ((in & 0xFC00) == 0xD800) { /* in & 0b1111110000000000 == 0b1101100000000000, first surrogate character */
if (0xDC00 <= in ) {
errno=EINVAL; /* 22 */
*inBytesLeft=inLen-1;
*outBytesLeft-=(pOut-*outBuf);
*outBuf=pOut;
*inBuf=pIn;
return -1;
} else if (inLen < 4) {
errno=EINVAL; /* 22 */
*inBytesLeft=inLen-2;
*outBytesLeft-=(pOut-*outBuf);
*outBuf=pOut;
*inBuf=pIn+2;
return -1;
} else if ((pIn[2] & 0xFC) != 0xDC) { /* pIn[2] & 0b11111100 == 0b11011100, second surrogate character */
errno=EINVAL; /* 22 */
*inBytesLeft=inLen-2;
*outBytesLeft-=(pOut-*outBuf);
*outBuf=pOut;
*inBuf=pIn+2;
return -1;
} else {
register uchar byte;
register uchar work;
in>>=6;
in&=0x000F; /* 0b0000000000001111 */
byte=in; /* wwww */
++byte; /* uuuuu */
work=byte; /* save uuuuu */
byte>>=2;
byte|=0xF0; /* 0b11110000; */
*pOut=byte;
++pOut;
byte=work;
byte&=0x03; /* 0b00000011; */
byte<<=4;
byte|=0x80; /* 0b10000000; */
work=pIn[1];
work&=0x3C; /* 0b00111100; */
work>>=2;
byte|=work;
*pOut=byte;
++pOut;
byte=pIn[1];
byte&=0x03; /* 0b00000011; */
byte<<=4;
byte|=0x80; /* 0b10000000; */
work=pIn[2];
work&=0x03; /* 0b00000011; */
work<<=2;
byte|=work;
work=pIn[3];
work>>=6;
byte|=work;
*pOut=byte;
++pOut;
byte=pIn[3];
byte&=0x3F; /* 0b00111111; */
byte|=0x80; /* 0b10000000; */
*pOut=byte;
++pOut;
pIn+=2;
inLen-=2;
}
} else if ((in & 0xFC00) == 0xDC00) { /* in & 0b11111100 == 0b11011100, second surrogate character */
errno=EINVAL; /* 22 */
*inBytesLeft=inLen-1;
*outBytesLeft-=(pOut-*outBuf);
*outBuf=pOut;
*inBuf=pIn;
return -1;
} else {
register uchar byte;
register uchar work;
byte=pIn[0];
byte>>=4;
byte|=0xE0; /* 0b11100000; */
*pOut=byte;
++pOut;
byte=pIn[0];
byte<<=2;
work=pIn[1];
work>>=6;
byte|=work;
byte&=0x3F; /* 0b00111111; */
byte|=0x80; /* 0b10000000; */
*pOut=byte;
++pOut;
byte=pIn[1];
byte&=0x3F; /* 0b00111111; */
byte|=0x80; /* 0b10000000; */
*pOut=byte;
++pOut;
}
pIn+=2;
inLen-=2;
}
*outBytesLeft-=(pOut-*outBuf);
*inBytesLeft=inLen;
*outBuf=pOut;
*inBuf=pIn;
// *numSub+=numS;
return 0;
} else if (cd->cnv_dmap->codingSchema == DMAP_U2U) { /* UTF-16_UCS-2 */
register int inLen=*inBytesLeft;
register int outLen=*outBytesLeft;
if (inLen <= outLen) {
memcpy(*outBuf, *inBuf, inLen);
(*outBytesLeft)-=inLen;
(*inBuf)+=inLen;
(*outBuf)+=inLen;
*inBytesLeft=0;
return 0;
}
memcpy(*outBuf, *inBuf, outLen);
(*outBytesLeft)=0;
(*inBuf)+=outLen;
(*outBuf)+=outLen;
*inBytesLeft-=outLen;
return (*inBytesLeft);
} else {
return -1;
}
return 0;
}
#ifdef DEBUG
inline size_t myconv(myconv_t cd ,
char** inBuf,
size_t* inBytesLeft,
char** outBuf,
size_t* outBytesLeft,
size_t* numSub)
{
if (cd->converterType == CONVERTER_ICONV) {
return myconv_iconv(cd,inBuf,inBytesLeft,outBuf,outBytesLeft,numSub);
} else if (cd->converterType == CONVERTER_DMAP) {
return myconv_dmap(cd,inBuf,inBytesLeft,outBuf,outBytesLeft,numSub);
}
return -1;
}
inline char * converterName(int32_t type)
{
if (type == CONVERTER_ICONV)
return "iconv";
else if (type == CONVERTER_DMAP)
return "dmap";
return "?????";
}
#else
#define myconv(a,b,c,d,e,f) \
(((a)->converterType == CONVERTER_ICONV)? myconv_iconv((a),(b),(c),(d),(e),(f)): (((a)->converterType == CONVERTER_DMAP)? myconv_dmap((a),(b),(c),(d),(e),(f)): -1))
#define converterName(a) \
(((a) == CONVERTER_ICONV)? "iconv": ((a) == CONVERTER_DMAP)? "dmap": "?????")
#endif
void initMyconv();
void cleanupMyconv();
#endif