mariadb/extra/yassl/taocrypt/src/des.cpp
Harin Vadodaria b9f2b1c135 Bug#19370676 : YASSL PRE-AUTH BUFFER OVERFLOW WHEN CLIENT
LIES ABOUT SUITE_LEN_
               and
Bug#19355577 : YASSL PRE-AUTH BUFFER OVERFLOW WHEN CLIENT
               LIES ABOUT COMP_LEN_

Description : Updating yaSSL to version 2.3.4.
2014-08-23 08:59:03 +05:30

778 lines
25 KiB
C++

/*
Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
MA 02110-1301 USA.
*/
/* C++ part based on Wei Dai's des.cpp from CryptoPP */
/* x86 asm is original */
#if defined(TAOCRYPT_KERNEL_MODE)
#define DO_TAOCRYPT_KERNEL_MODE
#endif // only some modules now support this
#include "runtime.hpp"
#include "des.hpp"
#ifdef USE_SYS_STL
#include <algorithm>
#else
#include "algorithm.hpp"
#endif
namespace STL = STL_NAMESPACE;
namespace TaoCrypt {
/* permuted choice table (key) */
static const byte pc1[] = {
57, 49, 41, 33, 25, 17, 9,
1, 58, 50, 42, 34, 26, 18,
10, 2, 59, 51, 43, 35, 27,
19, 11, 3, 60, 52, 44, 36,
63, 55, 47, 39, 31, 23, 15,
7, 62, 54, 46, 38, 30, 22,
14, 6, 61, 53, 45, 37, 29,
21, 13, 5, 28, 20, 12, 4
};
/* number left rotations of pc1 */
static const byte totrot[] = {
1,2,4,6,8,10,12,14,15,17,19,21,23,25,27,28
};
/* permuted choice key (table) */
static const byte pc2[] = {
14, 17, 11, 24, 1, 5,
3, 28, 15, 6, 21, 10,
23, 19, 12, 4, 26, 8,
16, 7, 27, 20, 13, 2,
41, 52, 31, 37, 47, 55,
30, 40, 51, 45, 33, 48,
44, 49, 39, 56, 34, 53,
46, 42, 50, 36, 29, 32
};
/* End of DES-defined tables */
/* bit 0 is left-most in byte */
static const int bytebit[] = {
0200,0100,040,020,010,04,02,01
};
const word32 Spbox[8][64] = {
{
0x01010400,0x00000000,0x00010000,0x01010404,
0x01010004,0x00010404,0x00000004,0x00010000,
0x00000400,0x01010400,0x01010404,0x00000400,
0x01000404,0x01010004,0x01000000,0x00000004,
0x00000404,0x01000400,0x01000400,0x00010400,
0x00010400,0x01010000,0x01010000,0x01000404,
0x00010004,0x01000004,0x01000004,0x00010004,
0x00000000,0x00000404,0x00010404,0x01000000,
0x00010000,0x01010404,0x00000004,0x01010000,
0x01010400,0x01000000,0x01000000,0x00000400,
0x01010004,0x00010000,0x00010400,0x01000004,
0x00000400,0x00000004,0x01000404,0x00010404,
0x01010404,0x00010004,0x01010000,0x01000404,
0x01000004,0x00000404,0x00010404,0x01010400,
0x00000404,0x01000400,0x01000400,0x00000000,
0x00010004,0x00010400,0x00000000,0x01010004},
{
0x80108020,0x80008000,0x00008000,0x00108020,
0x00100000,0x00000020,0x80100020,0x80008020,
0x80000020,0x80108020,0x80108000,0x80000000,
0x80008000,0x00100000,0x00000020,0x80100020,
0x00108000,0x00100020,0x80008020,0x00000000,
0x80000000,0x00008000,0x00108020,0x80100000,
0x00100020,0x80000020,0x00000000,0x00108000,
0x00008020,0x80108000,0x80100000,0x00008020,
0x00000000,0x00108020,0x80100020,0x00100000,
0x80008020,0x80100000,0x80108000,0x00008000,
0x80100000,0x80008000,0x00000020,0x80108020,
0x00108020,0x00000020,0x00008000,0x80000000,
0x00008020,0x80108000,0x00100000,0x80000020,
0x00100020,0x80008020,0x80000020,0x00100020,
0x00108000,0x00000000,0x80008000,0x00008020,
0x80000000,0x80100020,0x80108020,0x00108000},
{
0x00000208,0x08020200,0x00000000,0x08020008,
0x08000200,0x00000000,0x00020208,0x08000200,
0x00020008,0x08000008,0x08000008,0x00020000,
0x08020208,0x00020008,0x08020000,0x00000208,
0x08000000,0x00000008,0x08020200,0x00000200,
0x00020200,0x08020000,0x08020008,0x00020208,
0x08000208,0x00020200,0x00020000,0x08000208,
0x00000008,0x08020208,0x00000200,0x08000000,
0x08020200,0x08000000,0x00020008,0x00000208,
0x00020000,0x08020200,0x08000200,0x00000000,
0x00000200,0x00020008,0x08020208,0x08000200,
0x08000008,0x00000200,0x00000000,0x08020008,
0x08000208,0x00020000,0x08000000,0x08020208,
0x00000008,0x00020208,0x00020200,0x08000008,
0x08020000,0x08000208,0x00000208,0x08020000,
0x00020208,0x00000008,0x08020008,0x00020200},
{
0x00802001,0x00002081,0x00002081,0x00000080,
0x00802080,0x00800081,0x00800001,0x00002001,
0x00000000,0x00802000,0x00802000,0x00802081,
0x00000081,0x00000000,0x00800080,0x00800001,
0x00000001,0x00002000,0x00800000,0x00802001,
0x00000080,0x00800000,0x00002001,0x00002080,
0x00800081,0x00000001,0x00002080,0x00800080,
0x00002000,0x00802080,0x00802081,0x00000081,
0x00800080,0x00800001,0x00802000,0x00802081,
0x00000081,0x00000000,0x00000000,0x00802000,
0x00002080,0x00800080,0x00800081,0x00000001,
0x00802001,0x00002081,0x00002081,0x00000080,
0x00802081,0x00000081,0x00000001,0x00002000,
0x00800001,0x00002001,0x00802080,0x00800081,
0x00002001,0x00002080,0x00800000,0x00802001,
0x00000080,0x00800000,0x00002000,0x00802080},
{
0x00000100,0x02080100,0x02080000,0x42000100,
0x00080000,0x00000100,0x40000000,0x02080000,
0x40080100,0x00080000,0x02000100,0x40080100,
0x42000100,0x42080000,0x00080100,0x40000000,
0x02000000,0x40080000,0x40080000,0x00000000,
0x40000100,0x42080100,0x42080100,0x02000100,
0x42080000,0x40000100,0x00000000,0x42000000,
0x02080100,0x02000000,0x42000000,0x00080100,
0x00080000,0x42000100,0x00000100,0x02000000,
0x40000000,0x02080000,0x42000100,0x40080100,
0x02000100,0x40000000,0x42080000,0x02080100,
0x40080100,0x00000100,0x02000000,0x42080000,
0x42080100,0x00080100,0x42000000,0x42080100,
0x02080000,0x00000000,0x40080000,0x42000000,
0x00080100,0x02000100,0x40000100,0x00080000,
0x00000000,0x40080000,0x02080100,0x40000100},
{
0x20000010,0x20400000,0x00004000,0x20404010,
0x20400000,0x00000010,0x20404010,0x00400000,
0x20004000,0x00404010,0x00400000,0x20000010,
0x00400010,0x20004000,0x20000000,0x00004010,
0x00000000,0x00400010,0x20004010,0x00004000,
0x00404000,0x20004010,0x00000010,0x20400010,
0x20400010,0x00000000,0x00404010,0x20404000,
0x00004010,0x00404000,0x20404000,0x20000000,
0x20004000,0x00000010,0x20400010,0x00404000,
0x20404010,0x00400000,0x00004010,0x20000010,
0x00400000,0x20004000,0x20000000,0x00004010,
0x20000010,0x20404010,0x00404000,0x20400000,
0x00404010,0x20404000,0x00000000,0x20400010,
0x00000010,0x00004000,0x20400000,0x00404010,
0x00004000,0x00400010,0x20004010,0x00000000,
0x20404000,0x20000000,0x00400010,0x20004010},
{
0x00200000,0x04200002,0x04000802,0x00000000,
0x00000800,0x04000802,0x00200802,0x04200800,
0x04200802,0x00200000,0x00000000,0x04000002,
0x00000002,0x04000000,0x04200002,0x00000802,
0x04000800,0x00200802,0x00200002,0x04000800,
0x04000002,0x04200000,0x04200800,0x00200002,
0x04200000,0x00000800,0x00000802,0x04200802,
0x00200800,0x00000002,0x04000000,0x00200800,
0x04000000,0x00200800,0x00200000,0x04000802,
0x04000802,0x04200002,0x04200002,0x00000002,
0x00200002,0x04000000,0x04000800,0x00200000,
0x04200800,0x00000802,0x00200802,0x04200800,
0x00000802,0x04000002,0x04200802,0x04200000,
0x00200800,0x00000000,0x00000002,0x04200802,
0x00000000,0x00200802,0x04200000,0x00000800,
0x04000002,0x04000800,0x00000800,0x00200002},
{
0x10001040,0x00001000,0x00040000,0x10041040,
0x10000000,0x10001040,0x00000040,0x10000000,
0x00040040,0x10040000,0x10041040,0x00041000,
0x10041000,0x00041040,0x00001000,0x00000040,
0x10040000,0x10000040,0x10001000,0x00001040,
0x00041000,0x00040040,0x10040040,0x10041000,
0x00001040,0x00000000,0x00000000,0x10040040,
0x10000040,0x10001000,0x00041040,0x00040000,
0x00041040,0x00040000,0x10041000,0x00001000,
0x00000040,0x10040040,0x00001000,0x00041040,
0x10001000,0x00000040,0x10000040,0x10040000,
0x10040040,0x10000000,0x00040000,0x10001040,
0x00000000,0x10041040,0x00040040,0x10000040,
0x10040000,0x10001000,0x10001040,0x00000000,
0x10041040,0x00041000,0x00041000,0x00001040,
0x00001040,0x00040040,0x10000000,0x10041000}
};
void BasicDES::SetKey(const byte* key, word32 /*length*/, CipherDir dir)
{
byte buffer[56+56+8];
byte *const pc1m = buffer; /* place to modify pc1 into */
byte *const pcr = pc1m + 56; /* place to rotate pc1 into */
byte *const ks = pcr + 56;
register int i,j,l;
int m;
for (j = 0; j < 56; j++) { /* convert pc1 to bits of key */
l = pc1[j] - 1; /* integer bit location */
m = l & 07; /* find bit */
pc1m[j] = (key[l >> 3] & /* find which key byte l is in */
bytebit[m]) /* and which bit of that byte */
? 1 : 0; /* and store 1-bit result */
}
for (i = 0; i < 16; i++) { /* key chunk for each iteration */
memset(ks, 0, 8); /* Clear key schedule */
for (j = 0; j < 56; j++) /* rotate pc1 the right amount */
pcr[j] = pc1m[(l = j + totrot[i]) < (j < 28 ? 28 : 56) ? l: l-28];
/* rotate left and right halves independently */
for (j = 0; j < 48; j++){ /* select bits individually */
/* check bit that goes to ks[j] */
if (pcr[pc2[j] - 1]){
/* mask it in if it's there */
l= j % 6;
ks[j/6] |= bytebit[l] >> 2;
}
}
/* Now convert to odd/even interleaved form for use in F */
k_[2*i] = ((word32)ks[0] << 24)
| ((word32)ks[2] << 16)
| ((word32)ks[4] << 8)
| ((word32)ks[6]);
k_[2*i + 1] = ((word32)ks[1] << 24)
| ((word32)ks[3] << 16)
| ((word32)ks[5] << 8)
| ((word32)ks[7]);
}
// reverse key schedule order
if (dir == DECRYPTION)
for (i = 0; i < 16; i += 2) {
STL::swap(k_[i], k_[32 - 2 - i]);
STL::swap(k_[i+1], k_[32 - 1 - i]);
}
}
static inline void IPERM(word32& left, word32& right)
{
word32 work;
right = rotlFixed(right, 4U);
work = (left ^ right) & 0xf0f0f0f0;
left ^= work;
right = rotrFixed(right^work, 20U);
work = (left ^ right) & 0xffff0000;
left ^= work;
right = rotrFixed(right^work, 18U);
work = (left ^ right) & 0x33333333;
left ^= work;
right = rotrFixed(right^work, 6U);
work = (left ^ right) & 0x00ff00ff;
left ^= work;
right = rotlFixed(right^work, 9U);
work = (left ^ right) & 0xaaaaaaaa;
left = rotlFixed(left^work, 1U);
right ^= work;
}
static inline void FPERM(word32& left, word32& right)
{
word32 work;
right = rotrFixed(right, 1U);
work = (left ^ right) & 0xaaaaaaaa;
right ^= work;
left = rotrFixed(left^work, 9U);
work = (left ^ right) & 0x00ff00ff;
right ^= work;
left = rotlFixed(left^work, 6U);
work = (left ^ right) & 0x33333333;
right ^= work;
left = rotlFixed(left^work, 18U);
work = (left ^ right) & 0xffff0000;
right ^= work;
left = rotlFixed(left^work, 20U);
work = (left ^ right) & 0xf0f0f0f0;
right ^= work;
left = rotrFixed(left^work, 4U);
}
void BasicDES::RawProcessBlock(word32& lIn, word32& rIn) const
{
word32 l = lIn, r = rIn;
const word32* kptr = k_;
for (unsigned i=0; i<8; i++)
{
word32 work = rotrFixed(r, 4U) ^ kptr[4*i+0];
l ^= Spbox[6][(work) & 0x3f]
^ Spbox[4][(work >> 8) & 0x3f]
^ Spbox[2][(work >> 16) & 0x3f]
^ Spbox[0][(work >> 24) & 0x3f];
work = r ^ kptr[4*i+1];
l ^= Spbox[7][(work) & 0x3f]
^ Spbox[5][(work >> 8) & 0x3f]
^ Spbox[3][(work >> 16) & 0x3f]
^ Spbox[1][(work >> 24) & 0x3f];
work = rotrFixed(l, 4U) ^ kptr[4*i+2];
r ^= Spbox[6][(work) & 0x3f]
^ Spbox[4][(work >> 8) & 0x3f]
^ Spbox[2][(work >> 16) & 0x3f]
^ Spbox[0][(work >> 24) & 0x3f];
work = l ^ kptr[4*i+3];
r ^= Spbox[7][(work) & 0x3f]
^ Spbox[5][(work >> 8) & 0x3f]
^ Spbox[3][(work >> 16) & 0x3f]
^ Spbox[1][(work >> 24) & 0x3f];
}
lIn = l; rIn = r;
}
typedef BlockGetAndPut<word32, BigEndian> Block;
void DES::ProcessAndXorBlock(const byte* in, const byte* xOr, byte* out) const
{
word32 l,r;
Block::Get(in)(l)(r);
IPERM(l,r);
RawProcessBlock(l, r);
FPERM(l,r);
Block::Put(xOr, out)(r)(l);
}
void DES_EDE2::SetKey(const byte* key, word32 sz, CipherDir dir)
{
des1_.SetKey(key, sz, dir);
des2_.SetKey(key + 8, sz, ReverseDir(dir));
}
void DES_EDE2::ProcessAndXorBlock(const byte* in, const byte* xOr,
byte* out) const
{
word32 l,r;
Block::Get(in)(l)(r);
IPERM(l,r);
des1_.RawProcessBlock(l, r);
des2_.RawProcessBlock(r, l);
des1_.RawProcessBlock(l, r);
FPERM(l,r);
Block::Put(xOr, out)(r)(l);
}
void DES_EDE3::SetKey(const byte* key, word32 sz, CipherDir dir)
{
des1_.SetKey(key+(dir==ENCRYPTION?0:2*8), sz, dir);
des2_.SetKey(key+8, sz, ReverseDir(dir));
des3_.SetKey(key+(dir==DECRYPTION?0:2*8), sz, dir);
}
#if defined(DO_DES_ASM)
// ia32 optimized version
void DES_EDE3::Process(byte* out, const byte* in, word32 sz)
{
if (!isMMX) {
Mode_BASE::Process(out, in, sz);
return;
}
word32 blocks = sz / DES_BLOCK_SIZE;
if (mode_ == CBC)
if (dir_ == ENCRYPTION)
while (blocks--) {
r_[0] ^= *(word32*)in;
r_[1] ^= *(word32*)(in + 4);
AsmProcess((byte*)r_, (byte*)r_, (void*)Spbox);
memcpy(out, r_, DES_BLOCK_SIZE);
in += DES_BLOCK_SIZE;
out += DES_BLOCK_SIZE;
}
else
while (blocks--) {
AsmProcess(in, out, (void*)Spbox);
*(word32*)out ^= r_[0];
*(word32*)(out + 4) ^= r_[1];
memcpy(r_, in, DES_BLOCK_SIZE);
out += DES_BLOCK_SIZE;
in += DES_BLOCK_SIZE;
}
else
while (blocks--) {
AsmProcess(in, out, (void*)Spbox);
out += DES_BLOCK_SIZE;
in += DES_BLOCK_SIZE;
}
}
#endif // DO_DES_ASM
void DES_EDE3::ProcessAndXorBlock(const byte* in, const byte* xOr,
byte* out) const
{
word32 l,r;
Block::Get(in)(l)(r);
IPERM(l,r);
des1_.RawProcessBlock(l, r);
des2_.RawProcessBlock(r, l);
des3_.RawProcessBlock(l, r);
FPERM(l,r);
Block::Put(xOr, out)(r)(l);
}
#if defined(DO_DES_ASM)
/* Uses IPERM algorithm from above
left is in eax
right is in ebx
uses ecx
*/
#define AsmIPERM() \
AS2( rol ebx, 4 ) \
AS2( mov ecx, eax ) \
AS2( xor ecx, ebx ) \
AS2( and ecx, 0xf0f0f0f0 ) \
AS2( xor ebx, ecx ) \
AS2( xor eax, ecx ) \
AS2( ror ebx, 20 ) \
AS2( mov ecx, eax ) \
AS2( xor ecx, ebx ) \
AS2( and ecx, 0xffff0000 ) \
AS2( xor ebx, ecx ) \
AS2( xor eax, ecx ) \
AS2( ror ebx, 18 ) \
AS2( mov ecx, eax ) \
AS2( xor ecx, ebx ) \
AS2( and ecx, 0x33333333 ) \
AS2( xor ebx, ecx ) \
AS2( xor eax, ecx ) \
AS2( ror ebx, 6 ) \
AS2( mov ecx, eax ) \
AS2( xor ecx, ebx ) \
AS2( and ecx, 0x00ff00ff ) \
AS2( xor ebx, ecx ) \
AS2( xor eax, ecx ) \
AS2( rol ebx, 9 ) \
AS2( mov ecx, eax ) \
AS2( xor ecx, ebx ) \
AS2( and ecx, 0xaaaaaaaa ) \
AS2( xor eax, ecx ) \
AS2( rol eax, 1 ) \
AS2( xor ebx, ecx )
/* Uses FPERM algorithm from above
left is in eax
right is in ebx
uses ecx
*/
#define AsmFPERM() \
AS2( ror ebx, 1 ) \
AS2( mov ecx, eax ) \
AS2( xor ecx, ebx ) \
AS2( and ecx, 0xaaaaaaaa ) \
AS2( xor eax, ecx ) \
AS2( xor ebx, ecx ) \
AS2( ror eax, 9 ) \
AS2( mov ecx, ebx ) \
AS2( xor ecx, eax ) \
AS2( and ecx, 0x00ff00ff ) \
AS2( xor eax, ecx ) \
AS2( xor ebx, ecx ) \
AS2( rol eax, 6 ) \
AS2( mov ecx, ebx ) \
AS2( xor ecx, eax ) \
AS2( and ecx, 0x33333333 ) \
AS2( xor eax, ecx ) \
AS2( xor ebx, ecx ) \
AS2( rol eax, 18 ) \
AS2( mov ecx, ebx ) \
AS2( xor ecx, eax ) \
AS2( and ecx, 0xffff0000 ) \
AS2( xor eax, ecx ) \
AS2( xor ebx, ecx ) \
AS2( rol eax, 20 ) \
AS2( mov ecx, ebx ) \
AS2( xor ecx, eax ) \
AS2( and ecx, 0xf0f0f0f0 ) \
AS2( xor eax, ecx ) \
AS2( xor ebx, ecx ) \
AS2( ror eax, 4 )
/* DesRound implements this algorithm:
word32 work = rotrFixed(r, 4U) ^ key[0];
l ^= Spbox[6][(work) & 0x3f]
^ Spbox[4][(work >> 8) & 0x3f]
^ Spbox[2][(work >> 16) & 0x3f]
^ Spbox[0][(work >> 24) & 0x3f];
work = r ^ key[1];
l ^= Spbox[7][(work) & 0x3f]
^ Spbox[5][(work >> 8) & 0x3f]
^ Spbox[3][(work >> 16) & 0x3f]
^ Spbox[1][(work >> 24) & 0x3f];
work = rotrFixed(l, 4U) ^ key[2];
r ^= Spbox[6][(work) & 0x3f]
^ Spbox[4][(work >> 8) & 0x3f]
^ Spbox[2][(work >> 16) & 0x3f]
^ Spbox[0][(work >> 24) & 0x3f];
work = l ^ key[3];
r ^= Spbox[7][(work) & 0x3f]
^ Spbox[5][(work >> 8) & 0x3f]
^ Spbox[3][(work >> 16) & 0x3f]
^ Spbox[1][(work >> 24) & 0x3f];
left is in aex
right is in ebx
key is in edx
edvances key for next round
uses ecx, esi, and edi
*/
#define DesRound() \
AS2( mov ecx, ebx )\
AS2( mov esi, DWORD PTR [edx] )\
AS2( ror ecx, 4 )\
AS2( xor ecx, esi )\
AS2( and ecx, 0x3f3f3f3f )\
AS2( movzx esi, cl )\
AS2( movzx edi, ch )\
AS2( xor eax, [ebp + esi*4 + 6*256] )\
AS2( shr ecx, 16 )\
AS2( xor eax, [ebp + edi*4 + 4*256] )\
AS2( movzx esi, cl )\
AS2( movzx edi, ch )\
AS2( xor eax, [ebp + esi*4 + 2*256] )\
AS2( mov esi, DWORD PTR [edx + 4] )\
AS2( xor eax, [ebp + edi*4] )\
AS2( mov ecx, ebx )\
AS2( xor ecx, esi )\
AS2( and ecx, 0x3f3f3f3f )\
AS2( movzx esi, cl )\
AS2( movzx edi, ch )\
AS2( xor eax, [ebp + esi*4 + 7*256] )\
AS2( shr ecx, 16 )\
AS2( xor eax, [ebp + edi*4 + 5*256] )\
AS2( movzx esi, cl )\
AS2( movzx edi, ch )\
AS2( xor eax, [ebp + esi*4 + 3*256] )\
AS2( mov esi, DWORD PTR [edx + 8] )\
AS2( xor eax, [ebp + edi*4 + 1*256] )\
AS2( mov ecx, eax )\
AS2( ror ecx, 4 )\
AS2( xor ecx, esi )\
AS2( and ecx, 0x3f3f3f3f )\
AS2( movzx esi, cl )\
AS2( movzx edi, ch )\
AS2( xor ebx, [ebp + esi*4 + 6*256] )\
AS2( shr ecx, 16 )\
AS2( xor ebx, [ebp + edi*4 + 4*256] )\
AS2( movzx esi, cl )\
AS2( movzx edi, ch )\
AS2( xor ebx, [ebp + esi*4 + 2*256] )\
AS2( mov esi, DWORD PTR [edx + 12] )\
AS2( xor ebx, [ebp + edi*4] )\
AS2( mov ecx, eax )\
AS2( xor ecx, esi )\
AS2( and ecx, 0x3f3f3f3f )\
AS2( movzx esi, cl )\
AS2( movzx edi, ch )\
AS2( xor ebx, [ebp + esi*4 + 7*256] )\
AS2( shr ecx, 16 )\
AS2( xor ebx, [ebp + edi*4 + 5*256] )\
AS2( movzx esi, cl )\
AS2( movzx edi, ch )\
AS2( xor ebx, [ebp + esi*4 + 3*256] )\
AS2( add edx, 16 )\
AS2( xor ebx, [ebp + edi*4 + 1*256] )
#ifdef _MSC_VER
__declspec(naked)
#else
__attribute__ ((noinline))
#endif
void DES_EDE3::AsmProcess(const byte* in, byte* out, void* box) const
{
#ifdef __GNUC__
#define AS1(x) #x ";"
#define AS2(x, y) #x ", " #y ";"
#define PROLOG() \
__asm__ __volatile__ \
( \
".intel_syntax noprefix;" \
"push ebx;" \
"push ebp;" \
"movd mm6, ebp;" \
"movd mm7, ecx;" \
"mov ebp, eax;"
#define EPILOG() \
"pop ebp;" \
"pop ebx;" \
"emms;" \
".att_syntax;" \
: \
: "d" (this), "S" (in), "a" (box), "c" (out) \
: "%edi", "memory", "cc" \
);
#else
#define AS1(x) __asm x
#define AS2(x, y) __asm x, y
#define PROLOG() \
AS1( push ebp ) \
AS2( mov ebp, esp ) \
AS2( movd mm3, edi ) \
AS2( movd mm4, ebx ) \
AS2( movd mm5, esi ) \
AS2( movd mm6, ebp ) \
AS2( mov esi, DWORD PTR [ebp + 8] ) \
AS2( mov edx, ecx ) \
AS2( mov ebp, DWORD PTR [ebp + 16] )
// ebp restored at end
#define EPILOG() \
AS2( movd edi, mm3 ) \
AS2( movd ebx, mm4 ) \
AS2( movd esi, mm5 ) \
AS2( mov esp, ebp ) \
AS1( pop ebp ) \
AS1( emms ) \
AS1( ret 12 )
#endif
PROLOG()
AS2( movd mm2, edx )
#ifdef OLD_GCC_OFFSET
AS2( add edx, 60 ) // des1 = des1 key
#else
AS2( add edx, 56 ) // des1 = des1 key
#endif
AS2( mov eax, DWORD PTR [esi] )
AS2( mov ebx, DWORD PTR [esi + 4] )
AS1( bswap eax ) // left
AS1( bswap ebx ) // right
AsmIPERM()
DesRound() // 1
DesRound() // 2
DesRound() // 3
DesRound() // 4
DesRound() // 5
DesRound() // 6
DesRound() // 7
DesRound() // 8
// swap left and right
AS2( xchg eax, ebx )
DesRound() // 1
DesRound() // 2
DesRound() // 3
DesRound() // 4
DesRound() // 5
DesRound() // 6
DesRound() // 7
DesRound() // 8
// swap left and right
AS2( xchg eax, ebx )
DesRound() // 1
DesRound() // 2
DesRound() // 3
DesRound() // 4
DesRound() // 5
DesRound() // 6
DesRound() // 7
DesRound() // 8
AsmFPERM()
//end
AS2( movd ebp, mm6 )
// swap and write out
AS1( bswap ebx )
AS1( bswap eax )
#ifdef __GNUC__
AS2( movd esi, mm7 ) // outBlock
#else
AS2( mov esi, DWORD PTR [ebp + 12] ) // outBlock
#endif
AS2( mov DWORD PTR [esi], ebx ) // right first
AS2( mov DWORD PTR [esi + 4], eax )
EPILOG()
}
#endif // defined(DO_DES_ASM)
} // namespace