mariadb/storage/tokudb/hatoku_cmp.cc

#include "hatoku_cmp.h"

#ifdef WORDS_BIGENDIAN
#error "WORDS_BIGENDIAN not supported"
#endif

void get_var_field_info(
    uint32_t* field_len, // output: length of field
    uint32_t* start_offset, // output, length of offset where data starts
    uint32_t var_field_index, //input, index of var field we want info on
    const uchar* var_field_offset_ptr, //input, pointer to where offset information for all var fields begins
    uint32_t num_offset_bytes //input, number of bytes used to store offsets starting at var_field_offset_ptr
    )
{
    uint32_t data_start_offset = 0;
    uint32_t data_end_offset = 0;
    switch (num_offset_bytes) {
    case (1):
        data_end_offset = (var_field_offset_ptr + var_field_index)[0];
        break;
    case (2):
        data_end_offset = uint2korr(var_field_offset_ptr + 2*var_field_index);
        break;
    default:
        assert(false);
        break;
    }

    if (var_field_index) {
        switch (num_offset_bytes) {
        case (1):
            data_start_offset = (var_field_offset_ptr + var_field_index - 1)[0];
            break;
        case (2):
            data_start_offset = uint2korr(var_field_offset_ptr + 2*(var_field_index-1));
            break;
        default:
            assert(false);
            break;
        }
    }
    else {
        data_start_offset = 0;
    }

    *start_offset = data_start_offset;
    assert(data_end_offset >= data_start_offset);
    *field_len = data_end_offset - data_start_offset;
}

void get_blob_field_info(
    uint32_t* start_offset,
    uint32_t len_of_offsets,
    const uchar* var_field_data_ptr,
    uint32_t num_offset_bytes
    )
{
    uint32_t data_end_offset;
    //
    // need to set var_field_data_ptr to point to beginning of blobs, which
    // is at the end of the var stuff (if they exist), if var stuff does not exist
    // then the bottom variable will be 0, and var_field_data_ptr is already
    // set correctly
    //
    if (len_of_offsets) {
        switch (num_offset_bytes) {
        case (1):
            data_end_offset = (var_field_data_ptr - 1)[0];
            break;
        case (2):
            data_end_offset = uint2korr(var_field_data_ptr - 2);
            break;
        default:
            assert(false);
            break;
        }
    }
    else {
        data_end_offset = 0;
    }
    *start_offset = data_end_offset;
}


// this function is pattern matched from
// InnoDB's get_innobase_type_from_mysql_type
TOKU_TYPE mysql_to_toku_type (Field* field) {
    TOKU_TYPE ret_val = toku_type_unknown;
    enum_field_types mysql_type = field->real_type();
    switch (mysql_type) {
    case MYSQL_TYPE_LONG:
    case MYSQL_TYPE_LONGLONG:
    case MYSQL_TYPE_TINY:
    case MYSQL_TYPE_SHORT:
    case MYSQL_TYPE_INT24:
    case MYSQL_TYPE_DATE:
    case MYSQL_TYPE_YEAR:
    case MYSQL_TYPE_NEWDATE:
    case MYSQL_TYPE_ENUM:
    case MYSQL_TYPE_SET:
        ret_val = toku_type_int;
        goto exit;
    case MYSQL_TYPE_TIME:
    case MYSQL_TYPE_DATETIME:
    case MYSQL_TYPE_TIMESTAMP:
#ifdef MARIADB_BASE_VERSION
        // case to handle fractional seconds in MariaDB
        //
        if (field->key_type() == HA_KEYTYPE_BINARY) {
            ret_val = toku_type_fixbinary;
            goto exit;
        }
#endif
        ret_val = toku_type_int;
        goto exit;
    case MYSQL_TYPE_DOUBLE:
        ret_val = toku_type_double;
        goto exit;
    case MYSQL_TYPE_FLOAT:
        ret_val = toku_type_float;
        goto exit;
#if 50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699
    case MYSQL_TYPE_DATETIME2:
    case MYSQL_TYPE_TIMESTAMP2:
    case MYSQL_TYPE_TIME2:
#endif
    case MYSQL_TYPE_NEWDECIMAL:
    case MYSQL_TYPE_BIT:
        ret_val = toku_type_fixbinary;
        goto exit;
    case MYSQL_TYPE_STRING:
        if (field->binary()) {
            ret_val = toku_type_fixbinary;
        }
        else {
            ret_val = toku_type_fixstring;
        }
        goto exit;
    case MYSQL_TYPE_VARCHAR:
        if (field->binary()) {
            ret_val = toku_type_varbinary;
        }
        else {
            ret_val = toku_type_varstring;
        }
        goto exit;
    case MYSQL_TYPE_TINY_BLOB:
    case MYSQL_TYPE_MEDIUM_BLOB:
    case MYSQL_TYPE_BLOB:
    case MYSQL_TYPE_LONG_BLOB:
        ret_val = toku_type_blob;
        goto exit;
    //
    // I believe these are old types that are no longer
    // in any 5.1 tables, so tokudb does not need
    // to worry about them
    // Putting in this assert in case I am wrong.
    // Do not support geometry yet.
    //
    case MYSQL_TYPE_GEOMETRY:
    case MYSQL_TYPE_DECIMAL:
    case MYSQL_TYPE_VAR_STRING:
    case MYSQL_TYPE_NULL:
        assert(false);
    }
exit:
    return ret_val;
}


static inline CHARSET_INFO* get_charset_from_num (uint32_t charset_number) {
    //
    // patternmatched off of InnoDB, due to MySQL bug 42649
    //
    if (charset_number == default_charset_info->number) {
        return default_charset_info;
    }
    else if (charset_number == my_charset_latin1.number) {
        return &my_charset_latin1;
    }
    else {
        return get_charset(charset_number, MYF(MY_WME));
    }
}


//
// used to read the length of a variable sized field in a tokudb key (buf).
//
static inline uint32_t get_length_from_var_tokudata (uchar* buf, uint32_t length_bytes) {
    uint32_t length = (uint32_t)(buf[0]);
    if (length_bytes == 2) {
        uint32_t rest_of_length = (uint32_t)buf[1];
        length += rest_of_length<<8;
    }
    return length;
}

//
// used to deduce the number of bytes used to store the length of a varstring/varbinary
// in a key field stored in tokudb
//
static inline uint32_t get_length_bytes_from_max(uint32_t max_num_bytes) {
    return (max_num_bytes > 255) ? 2 : 1;
}


//
// assuming MySQL in little endian, and we are storing in little endian
//
static inline uchar* pack_toku_int (uchar* to_tokudb, uchar* from_mysql, uint32_t num_bytes) {
    switch (num_bytes) {
    case (1):
        memcpy(to_tokudb, from_mysql, 1);
        break;
    case (2):
        memcpy(to_tokudb, from_mysql, 2);
        break;
    case (3):
        memcpy(to_tokudb, from_mysql, 3);
        break;
    case (4):
        memcpy(to_tokudb, from_mysql, 4);
        break;
    case (8):
        memcpy(to_tokudb, from_mysql, 8);
        break;
    default:
        assert(false);
    }
    return to_tokudb+num_bytes;
}

//
// assuming MySQL in little endian, and we are unpacking to little endian
//
static inline uchar* unpack_toku_int(uchar* to_mysql, uchar* from_tokudb, uint32_t num_bytes) {
    switch (num_bytes) {
    case (1):
        memcpy(to_mysql, from_tokudb, 1);
        break;
    case (2):
        memcpy(to_mysql, from_tokudb, 2);
        break;
    case (3):
        memcpy(to_mysql, from_tokudb, 3);
        break;
    case (4):
        memcpy(to_mysql, from_tokudb, 4);
        break;
    case (8):
        memcpy(to_mysql, from_tokudb, 8);
        break;
    default:
        assert(false);
    }
    return from_tokudb+num_bytes;
}

static inline int cmp_toku_int (uchar* a_buf, uchar* b_buf, bool is_unsigned, uint32_t num_bytes) {
    int ret_val = 0;
    //
    // case for unsigned integers
    //
    if (is_unsigned) {
        uint32_t a_num, b_num = 0;
        uint64_t a_big_num, b_big_num = 0;
        switch (num_bytes) {
        case (1):
            a_num = *a_buf;
            b_num = *b_buf;
            ret_val = a_num-b_num;
            goto exit;
        case (2):
            a_num = uint2korr(a_buf);
            b_num = uint2korr(b_buf);
            ret_val = a_num-b_num;
            goto exit;
        case (3):
            a_num = uint3korr(a_buf);
            b_num = uint3korr(b_buf);
            ret_val = a_num-b_num;
            goto exit;
        case (4):
            a_num = uint4korr(a_buf);
            b_num = uint4korr(b_buf);
            if (a_num < b_num) {
                ret_val = -1; goto exit;
            }
            if (a_num > b_num) {
                ret_val = 1; goto exit;
            }
            ret_val = 0;
            goto exit;
        case (8):
            a_big_num = uint8korr(a_buf);
            b_big_num = uint8korr(b_buf);
            if (a_big_num < b_big_num) {
                ret_val = -1; goto exit;
            }
            else if (a_big_num > b_big_num) {
                ret_val = 1; goto exit;
            }
            ret_val = 0;
            goto exit;
        default:
            assert(false);
        }
    }
    //
    // case for signed integers
    //
    else {
        int32_t a_num, b_num = 0;
        int64_t a_big_num, b_big_num = 0;
        switch (num_bytes) {
        case (1):
            a_num = *(signed char *)a_buf;
            b_num = *(signed char *)b_buf;
            ret_val = a_num-b_num;
            goto exit;
        case (2):
            a_num = sint2korr(a_buf);
            b_num = sint2korr(b_buf);
            ret_val = a_num-b_num;
            goto exit;
        case (3):
            a_num = sint3korr(a_buf);
            b_num = sint3korr(b_buf);
            ret_val = a_num - b_num;
            goto exit;
        case (4):
            a_num = sint4korr(a_buf);
            b_num = sint4korr(b_buf);
            if (a_num < b_num) {
                ret_val = -1; goto exit;
            }
            if (a_num > b_num) {
                ret_val = 1; goto exit;
            }
            ret_val = 0;
            goto exit;
        case (8):
            a_big_num = sint8korr(a_buf);
            b_big_num = sint8korr(b_buf);
            if (a_big_num < b_big_num) {
                ret_val = -1; goto exit;
            }
            else if (a_big_num > b_big_num) {
                ret_val = 1; goto exit;
            }
            ret_val = 0;
            goto exit;
        default:
            assert(false);
        }
    }
    //
    // if this is hit, indicates bug in writing of this function
    //
    assert(false);
exit:
    return ret_val;
}

static inline uchar* pack_toku_double (uchar* to_tokudb, uchar* from_mysql) {
    memcpy(to_tokudb, from_mysql, sizeof(double));
    return to_tokudb + sizeof(double);
}


static inline uchar* unpack_toku_double(uchar* to_mysql, uchar* from_tokudb) {
    memcpy(to_mysql, from_tokudb, sizeof(double));
    return from_tokudb + sizeof(double);
}

static inline int cmp_toku_double(uchar* a_buf, uchar* b_buf) {
    int ret_val;
    double a_num;
    double b_num;
    doubleget(a_num, a_buf);
    doubleget(b_num, b_buf);
    if (a_num < b_num) {
        ret_val = -1;
        goto exit;
    }
    else if (a_num > b_num) {
        ret_val = 1;
        goto exit;
    }
    ret_val = 0;
exit:
    return ret_val;
}


static inline uchar* pack_toku_float (uchar* to_tokudb, uchar* from_mysql) {
    memcpy(to_tokudb, from_mysql, sizeof(float));
    return to_tokudb + sizeof(float);
}


static inline uchar* unpack_toku_float(uchar* to_mysql, uchar* from_tokudb) {
    memcpy(to_mysql, from_tokudb, sizeof(float));
    return from_tokudb + sizeof(float);
}

static inline int cmp_toku_float(uchar* a_buf, uchar* b_buf) {
    int ret_val;
    float a_num;
    float b_num;
    //
    // This is the way Field_float::cmp gets the floats from the buffers
    //
    memcpy(&a_num, a_buf, sizeof(float));
    memcpy(&b_num, b_buf, sizeof(float));
    if (a_num < b_num) {
        ret_val = -1;
        goto exit;
    }
    else if (a_num > b_num) {
        ret_val = 1;
        goto exit;
    }
    ret_val = 0;
exit:
    return ret_val;
}


static inline uchar* pack_toku_binary(uchar* to_tokudb, uchar* from_mysql, uint32_t num_bytes) {
    memcpy(to_tokudb, from_mysql, num_bytes);
    return to_tokudb + num_bytes;
}

static inline uchar* unpack_toku_binary(uchar* to_mysql, uchar* from_tokudb, uint32_t num_bytes) {
    memcpy(to_mysql, from_tokudb, num_bytes);
    return from_tokudb + num_bytes;
}


static inline int cmp_toku_binary(
    uchar* a_buf,
    uint32_t a_num_bytes,
    uchar* b_buf,
    uint32_t b_num_bytes
    )
{
    int ret_val = 0;
    uint32_t num_bytes_to_cmp = (a_num_bytes < b_num_bytes) ? a_num_bytes : b_num_bytes;
    ret_val = memcmp(a_buf, b_buf, num_bytes_to_cmp);
    if ((ret_val != 0) || (a_num_bytes == b_num_bytes)) {
        goto exit;
    }
    if (a_num_bytes < b_num_bytes) {
        ret_val = -1;
        goto exit;
    }
    else {
        ret_val = 1;
        goto exit;
    }
exit:
    return ret_val;
}

//
// partially copied from below
//
uchar* pack_toku_varbinary_from_desc(
    uchar* to_tokudb,
    const uchar* from_desc,
    uint32_t key_part_length, //number of bytes to use to encode the length in to_tokudb
    uint32_t field_length //length of field
    )
{
    uint32_t length_bytes_in_tokudb = get_length_bytes_from_max(key_part_length);
    uint32_t length = field_length;
    set_if_smaller(length, key_part_length);

    //
    // copy the length bytes, assuming both are in little endian
    //
    to_tokudb[0] = (uchar)length & 255;
    if (length_bytes_in_tokudb > 1) {
        to_tokudb[1] = (uchar) (length >> 8);
    }
    //
    // copy the string
    //
    memcpy(to_tokudb + length_bytes_in_tokudb, from_desc, length);
    return to_tokudb + length + length_bytes_in_tokudb;
}

static inline uchar* pack_toku_varbinary(
    uchar* to_tokudb,
    uchar* from_mysql,
    uint32_t length_bytes_in_mysql, //number of bytes used to encode the length in from_mysql
    uint32_t max_num_bytes
    )
{
    uint32_t length = 0;
    uint32_t length_bytes_in_tokudb;
    switch (length_bytes_in_mysql) {
    case (0):
        length = max_num_bytes;
        break;
    case (1):
        length = (uint32_t)(*from_mysql);
        break;
    case (2):
        length = uint2korr(from_mysql);
        break;
    case (3):
        length = uint3korr(from_mysql);
        break;
    case (4):
        length = uint4korr(from_mysql);
        break;
    }

    //
    // from this point on, functionality equivalent to pack_toku_varbinary_from_desc
    //
    set_if_smaller(length,max_num_bytes);

    length_bytes_in_tokudb = get_length_bytes_from_max(max_num_bytes);
    //
    // copy the length bytes, assuming both are in little endian
    //
    to_tokudb[0] = (uchar)length & 255;
    if (length_bytes_in_tokudb > 1) {
        to_tokudb[1] = (uchar) (length >> 8);
    }
    //
    // copy the string
    //
    memcpy(to_tokudb + length_bytes_in_tokudb, from_mysql + length_bytes_in_mysql, length);
    return to_tokudb + length + length_bytes_in_tokudb;
}

static inline uchar* unpack_toku_varbinary(
    uchar* to_mysql,
    uchar* from_tokudb,
    uint32_t length_bytes_in_tokudb, // number of bytes used to encode length in from_tokudb
    uint32_t length_bytes_in_mysql // number of bytes used to encode length in to_mysql
    )
{
    uint32_t length = get_length_from_var_tokudata(from_tokudb, length_bytes_in_tokudb);

    //
    // copy the length into the mysql buffer
    //
    switch (length_bytes_in_mysql) {
    case (0):
        break;
    case (1):
        *to_mysql = (uchar) length;
        break;
    case (2):
        int2store(to_mysql, length);
        break;
    case (3):
        int3store(to_mysql, length);
        break;
    case (4):
        int4store(to_mysql, length);
        break;
    default:
        assert(false);
    }
    //
    // copy the binary data
    //
    memcpy(to_mysql + length_bytes_in_mysql, from_tokudb + length_bytes_in_tokudb, length);
    return from_tokudb + length_bytes_in_tokudb+ length;
}

static inline int cmp_toku_varbinary(
    uchar* a_buf,
    uchar* b_buf,
    uint32_t length_bytes, //number of bytes used to encode length in a_buf and b_buf
    uint32_t* a_bytes_read,
    uint32_t* b_bytes_read
    )
{
    int ret_val = 0;
    uint32_t a_len = get_length_from_var_tokudata(a_buf, length_bytes);
    uint32_t b_len = get_length_from_var_tokudata(b_buf, length_bytes);
    ret_val = cmp_toku_binary(
        a_buf + length_bytes,
        a_len,
        b_buf + length_bytes,
        b_len
        );
    *a_bytes_read = a_len + length_bytes;
    *b_bytes_read = b_len + length_bytes;
    return ret_val;
}

static inline uchar* pack_toku_blob(
    uchar* to_tokudb,
    uchar* from_mysql,
    uint32_t length_bytes_in_tokudb, //number of bytes to use to encode the length in to_tokudb
    uint32_t length_bytes_in_mysql, //number of bytes used to encode the length in from_mysql
    uint32_t max_num_bytes,
#if MYSQL_VERSION_ID >= 50600
    const CHARSET_INFO* charset
#else
    CHARSET_INFO* charset
#endif
    )
{
    uint32_t length = 0;
    uint32_t local_char_length = 0;
    uchar* blob_buf = NULL;

    switch (length_bytes_in_mysql) {
    case (0):
        length = max_num_bytes;
        break;
    case (1):
        length = (uint32_t)(*from_mysql);
        break;
    case (2):
        length = uint2korr(from_mysql);
        break;
    case (3):
        length = uint3korr(from_mysql);
        break;
    case (4):
        length = uint4korr(from_mysql);
        break;
    }
    set_if_smaller(length,max_num_bytes);

    memcpy(&blob_buf,from_mysql+length_bytes_in_mysql,sizeof(uchar *));

    local_char_length= ((charset->mbmaxlen > 1) ?
                       max_num_bytes/charset->mbmaxlen : max_num_bytes);
    if (length > local_char_length)
    {
      local_char_length= my_charpos(
        charset,
        blob_buf,
        blob_buf+length,
        local_char_length
        );
      set_if_smaller(length, local_char_length);
    }


    //
    // copy the length bytes, assuming both are in little endian
    //
    to_tokudb[0] = (uchar)length & 255;
    if (length_bytes_in_tokudb > 1) {
        to_tokudb[1] = (uchar) (length >> 8);
    }
    //
    // copy the string
    //
    memcpy(to_tokudb + length_bytes_in_tokudb, blob_buf, length);
    return to_tokudb + length + length_bytes_in_tokudb;
}


static inline uchar* unpack_toku_blob(
    uchar* to_mysql,
    uchar* from_tokudb,
    uint32_t length_bytes_in_tokudb, // number of bytes used to encode length in from_tokudb
    uint32_t length_bytes_in_mysql // number of bytes used to encode length in to_mysql
    )
{
    uint32_t length = get_length_from_var_tokudata(from_tokudb, length_bytes_in_tokudb);
    uchar* blob_pos = NULL;
    //
    // copy the length into the mysql buffer
    //
    switch (length_bytes_in_mysql) {
    case (0):
        break;
    case (1):
        *to_mysql = (uchar) length;
        break;
    case (2):
        int2store(to_mysql, length);
        break;
    case (3):
        int3store(to_mysql, length);
        break;
    case (4):
        int4store(to_mysql, length);
        break;
    default:
        assert(false);
    }
    //
    // copy the binary data
    //
    blob_pos = from_tokudb + length_bytes_in_tokudb;
    memcpy(to_mysql + length_bytes_in_mysql, &blob_pos, sizeof(uchar *));
    return from_tokudb + length_bytes_in_tokudb+ length;
}


//
// partially copied from below
//
uchar* pack_toku_varstring_from_desc(
    uchar* to_tokudb,
    const uchar* from_desc,
    uint32_t key_part_length, //number of bytes to use to encode the length in to_tokudb
    uint32_t field_length,
    uint32_t charset_num//length of field
    )
{
    CHARSET_INFO* charset = NULL;
    uint32_t length_bytes_in_tokudb = get_length_bytes_from_max(key_part_length);
    uint32_t length = field_length;
    uint32_t local_char_length = 0;
    set_if_smaller(length, key_part_length);

    charset = get_charset_from_num(charset_num);

    //
    // copy the string
    //
    local_char_length= ((charset->mbmaxlen > 1) ?
                       key_part_length/charset->mbmaxlen : key_part_length);
    if (length > local_char_length)
    {
      local_char_length= my_charpos(
        charset,
        from_desc,
        from_desc+length,
        local_char_length
        );
      set_if_smaller(length, local_char_length);
    }


    //
    // copy the length bytes, assuming both are in little endian
    //
    to_tokudb[0] = (uchar)length & 255;
    if (length_bytes_in_tokudb > 1) {
        to_tokudb[1] = (uchar) (length >> 8);
    }
    //
    // copy the string
    //
    memcpy(to_tokudb + length_bytes_in_tokudb, from_desc, length);
    return to_tokudb + length + length_bytes_in_tokudb;
}

static inline uchar* pack_toku_varstring(
    uchar* to_tokudb,
    uchar* from_mysql,
    uint32_t length_bytes_in_tokudb, //number of bytes to use to encode the length in to_tokudb
    uint32_t length_bytes_in_mysql, //number of bytes used to encode the length in from_mysql
    uint32_t max_num_bytes,
#if MYSQL_VERSION_ID >= 50600
    const CHARSET_INFO *charset
#else
    CHARSET_INFO* charset
#endif
    )
{
    uint32_t length = 0;
    uint32_t local_char_length = 0;

    switch (length_bytes_in_mysql) {
    case (0):
        length = max_num_bytes;
        break;
    case (1):
        length = (uint32_t)(*from_mysql);
        break;
    case (2):
        length = uint2korr(from_mysql);
        break;
    case (3):
        length = uint3korr(from_mysql);
        break;
    case (4):
        length = uint4korr(from_mysql);
        break;
    }
    set_if_smaller(length,max_num_bytes);

    local_char_length= ((charset->mbmaxlen > 1) ?
                       max_num_bytes/charset->mbmaxlen : max_num_bytes);
    if (length > local_char_length)
    {
      local_char_length= my_charpos(
        charset,
        from_mysql+length_bytes_in_mysql,
        from_mysql+length_bytes_in_mysql+length,
        local_char_length
        );
      set_if_smaller(length, local_char_length);
    }


    //
    // copy the length bytes, assuming both are in little endian
    //
    to_tokudb[0] = (uchar)length & 255;
    if (length_bytes_in_tokudb > 1) {
        to_tokudb[1] = (uchar) (length >> 8);
    }
    //
    // copy the string
    //
    memcpy(to_tokudb + length_bytes_in_tokudb, from_mysql + length_bytes_in_mysql, length);
    return to_tokudb + length + length_bytes_in_tokudb;
}

static inline int cmp_toku_string(
    uchar* a_buf,
    uint32_t a_num_bytes,
    uchar* b_buf,
    uint32_t b_num_bytes,
    uint32_t charset_number
    )
{
    int ret_val = 0;
    CHARSET_INFO* charset = NULL;

    charset = get_charset_from_num(charset_number);

    ret_val = charset->coll->strnncollsp(
        charset,
        a_buf,
        a_num_bytes,
        b_buf,
        b_num_bytes,
        0
        );
    return ret_val;
}

static inline int cmp_toku_varstring(
    uchar* a_buf,
    uchar* b_buf,
    uint32_t length_bytes, //number of bytes used to encode length in a_buf and b_buf
    uint32_t charset_num,
    uint32_t* a_bytes_read,
    uint32_t* b_bytes_read
    )
{
    int ret_val = 0;
    uint32_t a_len = get_length_from_var_tokudata(a_buf, length_bytes);
    uint32_t b_len = get_length_from_var_tokudata(b_buf, length_bytes);
    ret_val = cmp_toku_string(
        a_buf + length_bytes,
        a_len,
        b_buf + length_bytes,
        b_len,
        charset_num
        );
    *a_bytes_read = a_len + length_bytes;
    *b_bytes_read = b_len + length_bytes;
    return ret_val;
}

static inline int tokudb_compare_two_hidden_keys(
    const void* new_key_data,
    const uint32_t new_key_size,
    const void*  saved_key_data,
    const uint32_t saved_key_size
    ) {
    assert( (new_key_size >= TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH) && (saved_key_size >= TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH) );
    ulonglong a = hpk_char_to_num((uchar *) new_key_data);
    ulonglong b = hpk_char_to_num((uchar *) saved_key_data);
    return a < b ? -1 : (a > b ? 1 : 0);
}

//
// Returns number of bytes used for a given TOKU_TYPE
// in a key descriptor. The number of bytes returned
// here MUST match the number of bytes used for the encoding
// in create_toku_key_descriptor_for_key
// Parameters:
//      [in]    row_desc - buffer that contains portion of descriptor
//              created in create_toku_key_descriptor_for_key. The first
//              byte points to the TOKU_TYPE.
//
uint32_t skip_field_in_descriptor(uchar* row_desc) {
    uchar* row_desc_pos = row_desc;
    TOKU_TYPE toku_type = (TOKU_TYPE)row_desc_pos[0];
    row_desc_pos++;

    switch (toku_type) {
    case (toku_type_hpk):
    case (toku_type_double):
    case (toku_type_float):
        break;
    case (toku_type_int):
        row_desc_pos += 2;
        break;
    case (toku_type_fixbinary):
    case (toku_type_varbinary):
        row_desc_pos++;
        break;
    case (toku_type_fixstring):
    case (toku_type_varstring):
    case (toku_type_blob):
        row_desc_pos++;
        row_desc_pos += sizeof(uint32_t);
        break;
    default:
        assert(false);
        break;
    }
    return (uint32_t)(row_desc_pos - row_desc);
}

//
// outputs a descriptor for key into buf. Returns number of bytes used in buf
// to store the descriptor. Number of bytes used MUST match number of bytes
// we would skip in skip_field_in_descriptor
//
int create_toku_key_descriptor_for_key(KEY* key, uchar* buf) {
    uchar* pos = buf;
    uint32_t num_bytes_in_field = 0;
    uint32_t charset_num = 0;
    for (uint i = 0; i < get_key_parts(key); i++){
        Field* field = key->key_part[i].field;
        //
        // The first byte states if there is a null byte
        // 0 means no null byte, non-zer means there
        // is one
        //
        *pos = field->null_bit;
        pos++;

        //
        // The second byte for each field is the type
        //
        TOKU_TYPE type = mysql_to_toku_type(field);
        assert (type < 256);
        *pos = (uchar)(type & 255);
        pos++;

        //
        // based on the type, extra data follows afterwards
        //
        switch (type) {
        //
        // two bytes follow for ints, first one states how many
        // bytes the int is (1 , 2, 3, 4 or 8)
        // next one states if it is signed or not
        //
        case (toku_type_int):
            num_bytes_in_field = field->pack_length();
            assert (num_bytes_in_field < 256);
            *pos = (uchar)(num_bytes_in_field & 255);
            pos++;
            *pos = (field->flags & UNSIGNED_FLAG) ? 1 : 0;
            pos++;
            break;
        //
        // nothing follows floats and doubles
        //
        case (toku_type_double):
        case (toku_type_float):
            break;
        //
        // one byte follow stating the length of the field
        //
        case (toku_type_fixbinary):
            num_bytes_in_field = field->pack_length();
            set_if_smaller(num_bytes_in_field, key->key_part[i].length);
            assert(num_bytes_in_field < 256);
            pos[0] = (uchar)(num_bytes_in_field & 255);
            pos++;
            break;
        //
        // one byte follows: the number of bytes used to encode the length
        //
        case (toku_type_varbinary):
            *pos = (uchar)(get_length_bytes_from_max(key->key_part[i].length) & 255);
            pos++;
            break;
        //
        // five bytes follow: one for the number of bytes to encode the length,
        //                           four for the charset number
        //
        case (toku_type_fixstring):
        case (toku_type_varstring):
        case (toku_type_blob):
            *pos = (uchar)(get_length_bytes_from_max(key->key_part[i].length) & 255);
            pos++;
            charset_num = field->charset()->number;
            pos[0] = (uchar)(charset_num & 255);
            pos[1] = (uchar)((charset_num >> 8) & 255);
            pos[2] = (uchar)((charset_num >> 16) & 255);
            pos[3] = (uchar)((charset_num >> 24) & 255);
            pos += 4;
            break;
        default:
            assert(false);

        }
    }
    return pos - buf;
}


//
// Creates a descriptor for a DB. That contains all information necessary
// to do both key comparisons and data comparisons (for dup-sort databases).
//
// There are two types of descriptors we care about:
// 1) Primary key, (in a no-dup database)
// 2) secondary keys, which are a secondary key followed by a primary key,
//      but in a no-dup database.
//
// I realize this may be confusing, but here is how it works.
// All DB's have a key compare.
// The format of the descriptor must be able to handle both.
//
// The first four bytes store an offset into the descriptor to the second piece
// used for data comparisons. So, if in the future we want to append something
// to the descriptor, we can.
//
//
int create_toku_key_descriptor(
    uchar* buf,
    bool is_first_hpk,
    KEY* first_key,
    bool is_second_hpk,
    KEY* second_key
    )
{
    //
    // The first four bytes always contain the offset of where the first key
    // ends.
    //
    uchar* pos = buf + 4;
    uint32_t num_bytes = 0;
    uint32_t offset = 0;


    if (is_first_hpk) {
        pos[0] = 0; //say there is NO infinity byte
        pos[1] = 0; //field cannot be NULL, stating it
        pos[2] = toku_type_hpk;
        pos += 3;
    }
    else {
        //
        // first key is NOT a hidden primary key, so we now pack first_key
        //
        pos[0] = 1; //say there is an infinity byte
        pos++;
        num_bytes = create_toku_key_descriptor_for_key(first_key, pos);
        pos += num_bytes;
    }

    //
    // if we do not have a second key, we can jump to exit right now
    // we do not have a second key if it is not a hidden primary key
    // and if second_key is NULL
    //
    if (is_first_hpk || (!is_second_hpk && (second_key == NULL)) ) {
        goto exit;
    }

    //
    // if we have a second key, and it is an hpk, we need to pack it, and
    // write in the offset to this position in the first four bytes
    //
    if (is_second_hpk) {
        pos[0] = 0; //field cannot be NULL, stating it
        pos[1] = toku_type_hpk;
        pos += 2;
    }
    else {
        //
        // second key is NOT a hidden primary key, so we now pack second_key
        //
        num_bytes = create_toku_key_descriptor_for_key(second_key, pos);
        pos += num_bytes;
    }


exit:
    offset = pos - buf;
    buf[0] = (uchar)(offset & 255);
    buf[1] = (uchar)((offset >> 8) & 255);
    buf[2] = (uchar)((offset >> 16) & 255);
    buf[3] = (uchar)((offset >> 24) & 255);

    return pos - buf;
}


static inline int compare_toku_field(
    uchar* a_buf,
    uchar* b_buf,
    uchar* row_desc,
    uint32_t* a_bytes_read,
    uint32_t* b_bytes_read,
    uint32_t* row_desc_bytes_read
    )
{
    int ret_val = 0;
    uchar* row_desc_pos = row_desc;
    uint32_t num_bytes = 0;
    uint32_t length_bytes = 0;
    uint32_t charset_num = 0;
    bool is_unsigned = false;

    TOKU_TYPE toku_type = (TOKU_TYPE)row_desc_pos[0];
    row_desc_pos++;

    switch (toku_type) {
    case (toku_type_hpk):
        ret_val = tokudb_compare_two_hidden_keys(
            a_buf,
            TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH,
            b_buf,
            TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH
            );
        *a_bytes_read = TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH;
        *b_bytes_read = TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH;
        break;
    case (toku_type_int):
        num_bytes = row_desc_pos[0];
        is_unsigned = row_desc_pos[1];
        ret_val = cmp_toku_int(
            a_buf,
            b_buf,
            is_unsigned,
            num_bytes
            );
        *a_bytes_read = num_bytes;
        *b_bytes_read = num_bytes;
        row_desc_pos += 2;
        break;
    case (toku_type_double):
        ret_val = cmp_toku_double(a_buf, b_buf);
        *a_bytes_read = sizeof(double);
        *b_bytes_read = sizeof(double);
        break;
    case (toku_type_float):
        ret_val = cmp_toku_float(a_buf, b_buf);
        *a_bytes_read = sizeof(float);
        *b_bytes_read = sizeof(float);
        break;
    case (toku_type_fixbinary):
        num_bytes = row_desc_pos[0];
        ret_val = cmp_toku_binary(a_buf, num_bytes, b_buf,num_bytes);
        *a_bytes_read = num_bytes;
        *b_bytes_read = num_bytes;
        row_desc_pos++;
        break;
    case (toku_type_varbinary):
        length_bytes = row_desc_pos[0];
        ret_val = cmp_toku_varbinary(
            a_buf,
            b_buf,
            length_bytes,
            a_bytes_read,
            b_bytes_read
            );
        row_desc_pos++;
        break;
    case (toku_type_fixstring):
    case (toku_type_varstring):
    case (toku_type_blob):
        length_bytes = row_desc_pos[0];
        row_desc_pos++;
        //
        // not sure we want to read charset_num like this
        //
        charset_num = *(uint32_t *)row_desc_pos;
        row_desc_pos += sizeof(uint32_t);
        ret_val = cmp_toku_varstring(
            a_buf,
            b_buf,
            length_bytes,
            charset_num,
            a_bytes_read,
            b_bytes_read
            );
        break;
    default:
        assert(false);
        break;
    }

    *row_desc_bytes_read = row_desc_pos - row_desc;
    return ret_val;
}

//
// packs a field from a  MySQL buffer into a tokudb buffer.
// Used for inserts/updates
//
uchar* pack_toku_key_field(
    uchar* to_tokudb,
    uchar* from_mysql,
    Field* field,
    uint32_t key_part_length //I really hope this is temporary as I phase out the pack_cmp stuff
    )
{
    uchar* new_pos = NULL;
    uint32_t num_bytes = 0;
    TOKU_TYPE toku_type = mysql_to_toku_type(field);
    switch(toku_type) {
    case (toku_type_int):
        assert(key_part_length == field->pack_length());
        new_pos = pack_toku_int(
            to_tokudb,
            from_mysql,
            field->pack_length()
            );
        goto exit;
    case (toku_type_double):
        assert(field->pack_length() == sizeof(double));
        assert(key_part_length == sizeof(double));
        new_pos = pack_toku_double(to_tokudb, from_mysql);
        goto exit;
    case (toku_type_float):
        assert(field->pack_length() == sizeof(float));
        assert(key_part_length == sizeof(float));
        new_pos = pack_toku_float(to_tokudb, from_mysql);
        goto exit;
    case (toku_type_fixbinary):
        num_bytes = field->pack_length();
        set_if_smaller(num_bytes, key_part_length);
        new_pos = pack_toku_binary(
            to_tokudb,
            from_mysql,
            num_bytes
            );
        goto exit;
    case (toku_type_fixstring):
        num_bytes = field->pack_length();
        set_if_smaller(num_bytes, key_part_length);
        new_pos = pack_toku_varstring(
            to_tokudb,
            from_mysql,
            get_length_bytes_from_max(key_part_length),
            0,
            num_bytes,
            field->charset()
            );
        goto exit;
    case (toku_type_varbinary):
        new_pos = pack_toku_varbinary(
            to_tokudb,
            from_mysql,
            ((Field_varstring *)field)->length_bytes,
            key_part_length
            );
        goto exit;
    case (toku_type_varstring):
        new_pos = pack_toku_varstring(
            to_tokudb,
            from_mysql,
            get_length_bytes_from_max(key_part_length),
            ((Field_varstring *)field)->length_bytes,
            key_part_length,
            field->charset()
            );
        goto exit;
    case (toku_type_blob):
        new_pos = pack_toku_blob(
            to_tokudb,
            from_mysql,
            get_length_bytes_from_max(key_part_length),
            ((Field_blob *)field)->row_pack_length(), //only calling this because packlength is returned
            key_part_length,
            field->charset()
            );
        goto exit;
    default:
        assert(false);
    }
    assert(false);
exit:
    return new_pos;
}

//
// packs a field from a  MySQL buffer into a tokudb buffer.
// Used for queries. The only difference between this function
// and pack_toku_key_field is that all variable sized columns
// use 2 bytes to encode the length, regardless of the field
// So varchar(4) will still use 2 bytes to encode the field
//
uchar* pack_key_toku_key_field(
    uchar* to_tokudb,
    uchar* from_mysql,
    Field* field,
    uint32_t key_part_length //I really hope this is temporary as I phase out the pack_cmp stuff
    )
{
    uchar* new_pos = NULL;
    TOKU_TYPE toku_type = mysql_to_toku_type(field);
    switch(toku_type) {
    case (toku_type_int):
    case (toku_type_double):
    case (toku_type_float):
    case (toku_type_fixbinary):
    case (toku_type_fixstring):
        new_pos = pack_toku_key_field(to_tokudb, from_mysql, field, key_part_length);
        goto exit;
    case (toku_type_varbinary):
        new_pos = pack_toku_varbinary(
            to_tokudb,
            from_mysql,
            2, // for some idiotic reason, 2 bytes are always used here, regardless of length of field
            key_part_length
            );
        goto exit;
    case (toku_type_varstring):
    case (toku_type_blob):
        new_pos = pack_toku_varstring(
            to_tokudb,
            from_mysql,
            get_length_bytes_from_max(key_part_length),
            2, // for some idiotic reason, 2 bytes are always used here, regardless of length of field
            key_part_length,
            field->charset()
            );
        goto exit;
    default:
        assert(false);
    }

    assert(false);
exit:
    return new_pos;
}


uchar* unpack_toku_key_field(
    uchar* to_mysql,
    uchar* from_tokudb,
    Field* field,
    uint32_t key_part_length
    )
{
    uchar* new_pos = NULL;
    uint32_t num_bytes = 0;
    uint32_t num_bytes_copied;
    TOKU_TYPE toku_type = mysql_to_toku_type(field);
    switch(toku_type) {
    case (toku_type_int):
        assert(key_part_length == field->pack_length());
        new_pos = unpack_toku_int(
            to_mysql,
            from_tokudb,
            field->pack_length()
            );
        goto exit;
    case (toku_type_double):
        assert(field->pack_length() == sizeof(double));
        assert(key_part_length == sizeof(double));
        new_pos = unpack_toku_double(to_mysql, from_tokudb);
        goto exit;
    case (toku_type_float):
        assert(field->pack_length() == sizeof(float));
        assert(key_part_length == sizeof(float));
        new_pos = unpack_toku_float(to_mysql, from_tokudb);
        goto exit;
    case (toku_type_fixbinary):
        num_bytes = field->pack_length();
        set_if_smaller(num_bytes, key_part_length);
        new_pos = unpack_toku_binary(
            to_mysql,
            from_tokudb,
            num_bytes
            );
        goto exit;
    case (toku_type_fixstring):
        num_bytes = field->pack_length();
        new_pos = unpack_toku_varbinary(
            to_mysql,
            from_tokudb,
            get_length_bytes_from_max(key_part_length),
            0
            );
        num_bytes_copied = new_pos - (from_tokudb + get_length_bytes_from_max(key_part_length));
        assert(num_bytes_copied <= num_bytes);
        memset(to_mysql+num_bytes_copied, field->charset()->pad_char, num_bytes - num_bytes_copied);
        goto exit;
    case (toku_type_varbinary):
    case (toku_type_varstring):
        new_pos = unpack_toku_varbinary(
            to_mysql,
            from_tokudb,
            get_length_bytes_from_max(key_part_length),
            ((Field_varstring *)field)->length_bytes
            );
        goto exit;
    case (toku_type_blob):
        new_pos = unpack_toku_blob(
            to_mysql,
            from_tokudb,
            get_length_bytes_from_max(key_part_length),
            ((Field_blob *)field)->row_pack_length() //only calling this because packlength is returned
            );
        goto exit;
    default:
        assert(false);
    }
    assert(false);
exit:
    return new_pos;
}


int tokudb_compare_two_keys(
    const void* new_key_data,
    const uint32_t new_key_size,
    const void*  saved_key_data,
    const uint32_t saved_key_size,
    const void*  row_desc,
    const uint32_t row_desc_size,
    bool cmp_prefix
    )
{
    int ret_val = 0;
    int8_t new_key_inf_val = COL_NEG_INF;
    int8_t saved_key_inf_val = COL_NEG_INF;

    uchar* row_desc_ptr = (uchar *)row_desc;
    uchar *new_key_ptr = (uchar *)new_key_data;
    uchar *saved_key_ptr = (uchar *)saved_key_data;

    uint32_t new_key_bytes_left = new_key_size;
    uint32_t saved_key_bytes_left = saved_key_size;

    //
    // if the keys have an infinity byte, set it
    //
    if (row_desc_ptr[0]) {
        new_key_inf_val = (int8_t)new_key_ptr[0];
        saved_key_inf_val = (int8_t)saved_key_ptr[0];
        new_key_ptr++;
        saved_key_ptr++;
    }
    row_desc_ptr++;

    while ( (uint32_t)(new_key_ptr - (uchar *)new_key_data) < new_key_size &&
            (uint32_t)(saved_key_ptr - (uchar *)saved_key_data) < saved_key_size &&
            (uint32_t)(row_desc_ptr - (uchar *)row_desc) < row_desc_size
            )
    {
        uint32_t new_key_field_length;
        uint32_t saved_key_field_length;
        uint32_t row_desc_field_length;
        //
        // if there is a null byte at this point in the key
        //
        if (row_desc_ptr[0]) {
            //
            // compare null bytes. If different, return
            //
            if (new_key_ptr[0] != saved_key_ptr[0]) {
                ret_val = ((int) *new_key_ptr - (int) *saved_key_ptr);
                goto exit;
            }
            saved_key_ptr++;
            //
            // in case we just read the fact that new_key_ptr and saved_key_ptr
            // have NULL as their next field
            //
            if (!*new_key_ptr++) {
                //
                // skip row_desc_ptr[0] read in if clause
                //
                row_desc_ptr++;
                //
                // skip data that describes rest of field
                //
                row_desc_ptr += skip_field_in_descriptor(row_desc_ptr);
                continue;
            }
        }
        row_desc_ptr++;

        ret_val = compare_toku_field(
            new_key_ptr,
            saved_key_ptr,
            row_desc_ptr,
            &new_key_field_length,
            &saved_key_field_length,
            &row_desc_field_length
            );
        new_key_ptr += new_key_field_length;
        saved_key_ptr += saved_key_field_length;
        row_desc_ptr += row_desc_field_length;
        if (ret_val) {
            goto exit;
        }

        assert((uint32_t)(new_key_ptr - (uchar *)new_key_data) <= new_key_size);
        assert((uint32_t)(saved_key_ptr - (uchar *)saved_key_data) <= saved_key_size);
        assert((uint32_t)(row_desc_ptr - (uchar *)row_desc) <= row_desc_size);
    }
    new_key_bytes_left = new_key_size - ((uint32_t)(new_key_ptr - (uchar *)new_key_data));
    saved_key_bytes_left = saved_key_size - ((uint32_t)(saved_key_ptr - (uchar *)saved_key_data));
    if (cmp_prefix) {
        ret_val = 0;
    }
    //
    // in this case, read both keys to completion, now read infinity byte
    //
    else if (new_key_bytes_left== 0 && saved_key_bytes_left== 0) {
        ret_val = new_key_inf_val - saved_key_inf_val;
    }
    //
    // at this point, one SHOULD be 0
    //
    else if (new_key_bytes_left == 0 && saved_key_bytes_left > 0) {
        ret_val = (new_key_inf_val == COL_POS_INF ) ? 1 : -1;
    }
    else if (new_key_bytes_left > 0 && saved_key_bytes_left == 0) {
        ret_val = (saved_key_inf_val == COL_POS_INF ) ? -1 : 1;
    }
    //
    // this should never happen, perhaps we should assert(false)
    //
    else {
        assert(false);
        ret_val = new_key_bytes_left - saved_key_bytes_left;
    }
exit:
    return ret_val;
}

int tokudb_cmp_dbt_key(DB* file, const DBT *keya, const DBT *keyb) {
    int cmp;
    if (file->cmp_descriptor->dbt.size == 0) {
        int num_bytes_cmp = keya->size < keyb->size ?
            keya->size : keyb->size;
        cmp = memcmp(keya->data,keyb->data,num_bytes_cmp);
        if (cmp == 0 && (keya->size != keyb->size)) {
            cmp = keya->size < keyb->size ? -1 : 1;
        }
    }
    else {
        cmp = tokudb_compare_two_keys(
            keya->data,
            keya->size,
            keyb->data,
            keyb->size,
            (uchar *)file->cmp_descriptor->dbt.data + 4,
            (*(uint32_t *)file->cmp_descriptor->dbt.data) - 4,
            false
            );
    }
    return cmp;
}

//TODO: QQQ Only do one direction for prefix.
int tokudb_prefix_cmp_dbt_key(DB *file, const DBT *keya, const DBT *keyb) {
    int cmp = tokudb_compare_two_keys(
        keya->data,
        keya->size,
        keyb->data,
        keyb->size,
        (uchar *)file->cmp_descriptor->dbt.data + 4,
        *(uint32_t *)file->cmp_descriptor->dbt.data - 4,
        true
        );
    return cmp;
}


uint32_t create_toku_main_key_pack_descriptor (
    uchar* buf
    )
{
    //
    // The first four bytes always contain the offset of where the first key
    // ends.
    //
    uchar* pos = buf + 4;
    uint32_t offset = 0;
    //
    // one byte states if this is the main dictionary
    //
    pos[0] = 1;
    pos++;
    goto exit;


exit:
    offset = pos - buf;
    buf[0] = (uchar)(offset & 255);
    buf[1] = (uchar)((offset >> 8) & 255);
    buf[2] = (uchar)((offset >> 16) & 255);
    buf[3] = (uchar)((offset >> 24) & 255);

    return pos - buf;
}

#define COL_FIX_FIELD 0x11
#define COL_VAR_FIELD 0x22
#define COL_BLOB_FIELD 0x33

#define COL_HAS_NO_CHARSET 0x44
#define COL_HAS_CHARSET 0x55

#define COL_FIX_PK_OFFSET 0x66
#define COL_VAR_PK_OFFSET 0x77

#define CK_FIX_RANGE 0x88
#define CK_VAR_RANGE 0x99

#define COPY_OFFSET_TO_BUF  memcpy ( \
    pos, \
    &kc_info->cp_info[pk_index][field_index].col_pack_val, \
    sizeof(uint32_t) \
    ); \
    pos += sizeof(uint32_t);


uint32_t pack_desc_pk_info(uchar* buf, KEY_AND_COL_INFO* kc_info, TABLE_SHARE* table_share, KEY_PART_INFO* key_part) {
    uchar* pos = buf;
    uint16 field_index = key_part->field->field_index;
    Field* field = table_share->field[field_index];
    TOKU_TYPE toku_type = mysql_to_toku_type(field);
    uint32_t key_part_length = key_part->length;
    uint32_t field_length;
    uchar len_bytes = 0;

    switch(toku_type) {
    case (toku_type_int):
    case (toku_type_double):
    case (toku_type_float):
        pos[0] = COL_FIX_FIELD;
        pos++;
        assert(kc_info->field_lengths[field_index] < 256);
        pos[0] = kc_info->field_lengths[field_index];
        pos++;
        break;
    case (toku_type_fixbinary):
        pos[0] = COL_FIX_FIELD;
        pos++;
        field_length = field->pack_length();
        set_if_smaller(key_part_length, field_length);
        assert(key_part_length < 256);
        pos[0] = (uchar)key_part_length;
        pos++;
        break;
    case (toku_type_fixstring):
    case (toku_type_varbinary):
    case (toku_type_varstring):
    case (toku_type_blob):
        pos[0] = COL_VAR_FIELD;
        pos++;
        len_bytes = (key_part_length > 255) ? 2 : 1;
        pos[0] = len_bytes;
        pos++;
        break;
    default:
        assert(false);
    }

    return pos - buf;
}

uint32_t pack_desc_pk_offset_info(
    uchar* buf,
    KEY_AND_COL_INFO* kc_info,
    TABLE_SHARE* table_share,
    KEY_PART_INFO* key_part,
    KEY* prim_key,
    uchar* pk_info
    )
{
    uchar* pos = buf;
    uint16 field_index = key_part->field->field_index;
    bool found_col_in_pk = false;
    uint32_t index_in_pk;

    bool is_constant_offset = true;
    uint32_t offset = 0;
    for (uint i = 0; i < get_key_parts(prim_key); i++) {
        KEY_PART_INFO curr = prim_key->key_part[i];
        uint16 curr_field_index = curr.field->field_index;

        if (pk_info[2*i] == COL_VAR_FIELD) {
            is_constant_offset = false;
        }

        if (curr_field_index == field_index) {
            found_col_in_pk = true;
            index_in_pk = i;
            break;
        }
        offset += pk_info[2*i + 1];
    }
    assert(found_col_in_pk);
    if (is_constant_offset) {
        pos[0] = COL_FIX_PK_OFFSET;
        pos++;

        memcpy (pos, &offset, sizeof(offset));
        pos += sizeof(offset);
    }
    else {
        pos[0] = COL_VAR_PK_OFFSET;
        pos++;

        memcpy(pos, &index_in_pk, sizeof(index_in_pk));
        pos += sizeof(index_in_pk);
    }
    return pos - buf;
}

uint32_t pack_desc_offset_info(uchar* buf, KEY_AND_COL_INFO* kc_info, uint pk_index, TABLE_SHARE* table_share, KEY_PART_INFO* key_part) {
    uchar* pos = buf;
    uint16 field_index = key_part->field->field_index;
    Field* field = table_share->field[field_index];
    TOKU_TYPE toku_type = mysql_to_toku_type(field);
    bool found_index = false;

    switch(toku_type) {
    case (toku_type_int):
    case (toku_type_double):
    case (toku_type_float):
    case (toku_type_fixbinary):
    case (toku_type_fixstring):
        pos[0] = COL_FIX_FIELD;
        pos++;

        // copy the offset
        COPY_OFFSET_TO_BUF;
        break;
    case (toku_type_varbinary):
    case (toku_type_varstring):
        pos[0] = COL_VAR_FIELD;
        pos++;

        // copy the offset
        COPY_OFFSET_TO_BUF;
        break;
    case (toku_type_blob):
        pos[0] = COL_BLOB_FIELD;
        pos++;
        for (uint32_t i = 0; i < kc_info->num_blobs; i++) {
            uint32_t blob_index = kc_info->blob_fields[i];
            if (blob_index == field_index) {
                uint32_t val = i;
                memcpy(pos, &val, sizeof(uint32_t));
                pos += sizeof(uint32_t);
                found_index = true;
                break;
            }
        }
        assert(found_index);
        break;
    default:
        assert(false);
    }

    return pos - buf;
}

uint32_t pack_desc_key_length_info(uchar* buf, KEY_AND_COL_INFO* kc_info, TABLE_SHARE* table_share, KEY_PART_INFO* key_part) {
    uchar* pos = buf;
    uint16 field_index = key_part->field->field_index;
    Field* field = table_share->field[field_index];
    TOKU_TYPE toku_type = mysql_to_toku_type(field);
    uint32_t key_part_length = key_part->length;
    uint32_t field_length;

    switch(toku_type) {
    case (toku_type_int):
    case (toku_type_double):
    case (toku_type_float):
        // copy the key_part length
        field_length = kc_info->field_lengths[field_index];
        memcpy(pos, &field_length, sizeof(field_length));
        pos += sizeof(key_part_length);
        break;
    case (toku_type_fixbinary):
    case (toku_type_fixstring):
        field_length = field->pack_length();
        set_if_smaller(key_part_length, field_length);
    case (toku_type_varbinary):
    case (toku_type_varstring):
    case (toku_type_blob):
        // copy the key_part length
        memcpy(pos, &key_part_length, sizeof(key_part_length));
        pos += sizeof(key_part_length);
        break;
    default:
        assert(false);
    }

    return pos - buf;
}

uint32_t pack_desc_char_info(uchar* buf, KEY_AND_COL_INFO* kc_info, TABLE_SHARE* table_share, KEY_PART_INFO* key_part) {
    uchar* pos = buf;
    uint16 field_index = key_part->field->field_index;
    Field* field = table_share->field[field_index];
    TOKU_TYPE toku_type = mysql_to_toku_type(field);
    uint32_t charset_num = 0;

    switch(toku_type) {
    case (toku_type_int):
    case (toku_type_double):
    case (toku_type_float):
    case (toku_type_fixbinary):
    case (toku_type_varbinary):
        pos[0] = COL_HAS_NO_CHARSET;
        pos++;
        break;
    case (toku_type_fixstring):
    case (toku_type_varstring):
    case (toku_type_blob):
        pos[0] = COL_HAS_CHARSET;
        pos++;

        // copy the charset
        charset_num = field->charset()->number;
        pos[0] = (uchar)(charset_num & 255);
        pos[1] = (uchar)((charset_num >> 8) & 255);
        pos[2] = (uchar)((charset_num >> 16) & 255);
        pos[3] = (uchar)((charset_num >> 24) & 255);
        pos += 4;
        break;
    default:
        assert(false);
    }

    return pos - buf;
}

uint32_t pack_some_row_info (
    uchar* buf,
    uint pk_index,
    TABLE_SHARE* table_share,
    KEY_AND_COL_INFO* kc_info
    )
{
    uchar* pos = buf;
    uint32_t num_null_bytes = 0;
    //
    // four bytes stating number of null bytes
    //
    num_null_bytes = table_share->null_bytes;
    memcpy(pos, &num_null_bytes, sizeof(num_null_bytes));
    pos += sizeof(num_null_bytes);
    //
    // eight bytes stating mcp_info
    //
    memcpy(pos, &kc_info->mcp_info[pk_index], sizeof(MULTI_COL_PACK_INFO));
    pos += sizeof(MULTI_COL_PACK_INFO);
    //
    // one byte for the number of offset bytes
    //
    pos[0] = (uchar)kc_info->num_offset_bytes;
    pos++;

    return pos - buf;
}

uint32_t get_max_clustering_val_pack_desc_size(
    TABLE_SHARE* table_share
    )
{
    uint32_t ret_val = 0;
    //
    // the fixed stuff:
    //  first the things in pack_some_row_info
    //  second another mcp_info
    //  third a byte that states if blobs exist
    ret_val += sizeof(uint32_t) + sizeof(MULTI_COL_PACK_INFO) + 1;
    ret_val += sizeof(MULTI_COL_PACK_INFO);
    ret_val++;
    //
    // now the variable stuff
    //  an upper bound is, for each field, byte stating if it is fixed or var, followed
    // by 8 bytes for endpoints
    //
    ret_val += (table_share->fields)*(1 + 2*sizeof(uint32_t));
    //
    // four bytes storing the length of this portion
    //
    ret_val += 4;

    return ret_val;
}

uint32_t create_toku_clustering_val_pack_descriptor (
    uchar* buf,
    uint pk_index,
    TABLE_SHARE* table_share,
    KEY_AND_COL_INFO* kc_info,
    uint32_t keynr,
    bool is_clustering
    )
{
    uchar* pos = buf + 4;
    uint32_t offset = 0;
    bool start_range_set = false;
    uint32_t last_col = 0;
    //
    // do not need to write anything if the key is not clustering
    //
    if (!is_clustering) {
        goto exit;
    }

    pos += pack_some_row_info(
        pos,
        pk_index,
        table_share,
        kc_info
        );

    //
    // eight bytes stating mcp_info of clustering key
    //
    memcpy(pos, &kc_info->mcp_info[keynr], sizeof(MULTI_COL_PACK_INFO));
    pos += sizeof(MULTI_COL_PACK_INFO);

    //
    // store bit that states if blobs exist
    //
    pos[0] = (kc_info->num_blobs) ? 1 : 0;
    pos++;

    //
    // descriptor assumes that all fields filtered from pk are
    // also filtered from clustering key val. Doing check here to
    // make sure something unexpected does not happen
    //
    for (uint i = 0; i < table_share->fields; i++) {
        bool col_filtered = bitmap_is_set(&kc_info->key_filters[keynr],i);
        bool col_filtered_in_pk = bitmap_is_set(&kc_info->key_filters[pk_index],i);
        if (col_filtered_in_pk) {
            assert(col_filtered);
        }
    }

    //
    // first handle the fixed fields
    //
    start_range_set = false;
    last_col = 0;
    for (uint i = 0; i < table_share->fields; i++) {
        bool col_filtered = bitmap_is_set(&kc_info->key_filters[keynr],i);
        if (kc_info->field_lengths[i] == 0) {
            //
            // not a fixed field, continue
            //
            continue;
        }
        if (col_filtered && start_range_set) {
            //
            // need to set the end range
            //
            start_range_set = false;
            uint32_t end_offset = kc_info->cp_info[pk_index][last_col].col_pack_val + kc_info->field_lengths[last_col];
            memcpy(pos, &end_offset, sizeof(end_offset));
            pos += sizeof(end_offset);
        }
        else if (!col_filtered) {
            if (!start_range_set) {
                pos[0] = CK_FIX_RANGE;
                pos++;
                start_range_set = true;
                uint32_t start_offset = kc_info->cp_info[pk_index][i].col_pack_val;
                memcpy(pos, &start_offset , sizeof(start_offset));
                pos += sizeof(start_offset);
            }
            last_col = i;
        }
        else {
            continue;
        }
    }
    if (start_range_set) {
        //
        // need to set the end range
        //
        start_range_set = false;
        uint32_t end_offset = kc_info->cp_info[pk_index][last_col].col_pack_val+ kc_info->field_lengths[last_col];
        memcpy(pos, &end_offset, sizeof(end_offset));
        pos += sizeof(end_offset);
    }

    //
    // now handle the var fields
    //
    start_range_set = false;
    last_col = 0;
    for (uint i = 0; i < table_share->fields; i++) {
        bool col_filtered = bitmap_is_set(&kc_info->key_filters[keynr],i);
        if (kc_info->length_bytes[i] == 0) {
            //
            // not a var field, continue
            //
            continue;
        }
        if (col_filtered && start_range_set) {
            //
            // need to set the end range
            //
            start_range_set = false;
            uint32_t end_offset = kc_info->cp_info[pk_index][last_col].col_pack_val;
            memcpy(pos, &end_offset, sizeof(end_offset));
            pos += sizeof(end_offset);
        }
        else if (!col_filtered) {
            if (!start_range_set) {
                pos[0] = CK_VAR_RANGE;
                pos++;

                start_range_set = true;
                uint32_t start_offset = kc_info->cp_info[pk_index][i].col_pack_val;
                memcpy(pos, &start_offset , sizeof(start_offset));
                pos += sizeof(start_offset);
            }
            last_col = i;
        }
        else {
            continue;
        }
    }
    if (start_range_set) {
        start_range_set = false;
        uint32_t end_offset = kc_info->cp_info[pk_index][last_col].col_pack_val;
        memcpy(pos, &end_offset, sizeof(end_offset));
        pos += sizeof(end_offset);
    }

exit:
    offset = pos - buf;
    buf[0] = (uchar)(offset & 255);
    buf[1] = (uchar)((offset >> 8) & 255);
    buf[2] = (uchar)((offset >> 16) & 255);
    buf[3] = (uchar)((offset >> 24) & 255);

    return pos - buf;
}

uint32_t pack_clustering_val_from_desc(
    uchar* buf,
    void* row_desc,
    uint32_t row_desc_size,
    const DBT* pk_val
    )
{
    uchar* null_bytes_src_ptr = NULL;
    uchar* fixed_src_ptr = NULL;
    uchar* var_src_offset_ptr = NULL;
    uchar* var_src_data_ptr = NULL;
    uchar* fixed_dest_ptr = NULL;
    uchar* var_dest_offset_ptr = NULL;
    uchar* var_dest_data_ptr = NULL;
    uchar* orig_var_dest_data_ptr = NULL;
    uchar* desc_pos = (uchar *)row_desc;
    uint32_t num_null_bytes = 0;
    uint32_t num_offset_bytes;
    MULTI_COL_PACK_INFO src_mcp_info, dest_mcp_info;
    uchar has_blobs;

    memcpy(&num_null_bytes, desc_pos, sizeof(num_null_bytes));
    desc_pos += sizeof(num_null_bytes);

    memcpy(&src_mcp_info, desc_pos, sizeof(src_mcp_info));
    desc_pos += sizeof(src_mcp_info);

    num_offset_bytes = desc_pos[0];
    desc_pos++;

    memcpy(&dest_mcp_info, desc_pos, sizeof(dest_mcp_info));
    desc_pos += sizeof(dest_mcp_info);

    has_blobs = desc_pos[0];
    desc_pos++;

    //
    //set the variables
    //
    null_bytes_src_ptr = (uchar *)pk_val->data;
    fixed_src_ptr = null_bytes_src_ptr + num_null_bytes;
    var_src_offset_ptr = fixed_src_ptr + src_mcp_info.fixed_field_size;
    var_src_data_ptr = var_src_offset_ptr + src_mcp_info.len_of_offsets;

    fixed_dest_ptr = buf + num_null_bytes;
    var_dest_offset_ptr = fixed_dest_ptr + dest_mcp_info.fixed_field_size;
    var_dest_data_ptr = var_dest_offset_ptr + dest_mcp_info.len_of_offsets;
    orig_var_dest_data_ptr = var_dest_data_ptr;

    //
    // copy the null bytes
    //
    memcpy(buf, null_bytes_src_ptr, num_null_bytes);
    while ( (uint32_t)(desc_pos - (uchar *)row_desc) < row_desc_size) {
        uint32_t start, end, length;
        uchar curr = desc_pos[0];
        desc_pos++;

        memcpy(&start, desc_pos, sizeof(start));
        desc_pos += sizeof(start);

        memcpy(&end, desc_pos, sizeof(end));
        desc_pos += sizeof(end);

        assert (start <= end);

        if (curr == CK_FIX_RANGE) {
            length = end - start;

            memcpy(fixed_dest_ptr, fixed_src_ptr + start, length);
            fixed_dest_ptr += length;
        }
        else if (curr == CK_VAR_RANGE) {
            uint32_t start_data_size;
            uint32_t start_data_offset;
            uint32_t end_data_size;
            uint32_t end_data_offset;
            uint32_t offset_diffs;

            get_var_field_info(
                &start_data_size,
                &start_data_offset,
                start,
                var_src_offset_ptr,
                num_offset_bytes
                );
            get_var_field_info(
                &end_data_size,
                &end_data_offset,
                end,
                var_src_offset_ptr,
                num_offset_bytes
                );
            length = end_data_offset + end_data_size - start_data_offset;
            //
            // copy the data
            //
            memcpy(
                var_dest_data_ptr,
                var_src_data_ptr + start_data_offset,
                length
                );
            var_dest_data_ptr += length;

            //
            // put in offset info
            //
            offset_diffs = (end_data_offset + end_data_size) - (uint32_t)(var_dest_data_ptr - orig_var_dest_data_ptr);
            for (uint32_t i = start; i <= end; i++) {
                if ( num_offset_bytes == 1 ) {
                    assert(offset_diffs < 256);
                    var_dest_offset_ptr[0] = var_src_offset_ptr[i] - (uchar)offset_diffs;
                    var_dest_offset_ptr++;
                }
                else if ( num_offset_bytes == 2 ) {
                    uint32_t tmp = uint2korr(var_src_offset_ptr + 2*i);
                    uint32_t new_offset = tmp - offset_diffs;
                    assert(new_offset < 1<<16);
                    int2store(var_dest_offset_ptr,new_offset);
                    var_dest_offset_ptr += 2;
                }
                else {
                    assert(false);
                }
            }
        }
        else {
            assert(false);
        }
    }
    //
    // copy blobs
    // at this point, var_dest_data_ptr is pointing to the end, where blobs should be located
    // so, we put the blobs at var_dest_data_ptr
    //
    if (has_blobs) {
        uint32_t num_blob_bytes;
        uint32_t start_offset;
        uchar* src_blob_ptr = NULL;
        get_blob_field_info(
            &start_offset,
            src_mcp_info.len_of_offsets,
            var_src_data_ptr,
            num_offset_bytes
            );
        src_blob_ptr = var_src_data_ptr + start_offset;
        num_blob_bytes = pk_val->size - (start_offset + (var_src_data_ptr - null_bytes_src_ptr));
        memcpy(var_dest_data_ptr, src_blob_ptr, num_blob_bytes);
        var_dest_data_ptr += num_blob_bytes;
    }
    return var_dest_data_ptr - buf;
}


uint32_t get_max_secondary_key_pack_desc_size(
    KEY_AND_COL_INFO* kc_info
    )
{
    uint32_t ret_val = 0;
    //
    // the fixed stuff:
    //  byte that states if main dictionary
    //  byte that states if hpk
    //  the things in pack_some_row_info
    ret_val++;
    ret_val++;
    ret_val += sizeof(uint32_t) + sizeof(MULTI_COL_PACK_INFO) + 1;
    //
    // now variable sized stuff
    //

    //  first the blobs
    ret_val += sizeof(kc_info->num_blobs);
    ret_val+= kc_info->num_blobs;

    // then the pk
    // one byte for num key parts
    // two bytes for each key part
    ret_val++;
    ret_val += MAX_REF_PARTS*2;

    // then the key
    // null bit, then null byte,
    // then 1 byte stating what it is, then 4 for offset, 4 for key length,
    //      1 for if charset exists, and 4 for charset
    ret_val += MAX_REF_PARTS*(1 + sizeof(uint32_t) + 1 + 3*sizeof(uint32_t) + 1);
    //
    // four bytes storing the length of this portion
    //
    ret_val += 4;
    return ret_val;
}

uint32_t create_toku_secondary_key_pack_descriptor (
    uchar* buf,
    bool has_hpk,
    uint pk_index,
    TABLE_SHARE* table_share,
    TABLE* table,
    KEY_AND_COL_INFO* kc_info,
    KEY* key_info,
    KEY* prim_key
    )
{
    //
    // The first four bytes always contain the offset of where the first key
    // ends.
    //
    uchar* pk_info = NULL;
    uchar* pos = buf + 4;
    uint32_t offset = 0;

    //
    // first byte states that it is NOT main dictionary
    //
    pos[0] = 0;
    pos++;

    //
    // one byte states if main dictionary has an hpk or not
    //
    if (has_hpk) {
        pos[0] = 1;
    }
    else {
        pos[0] = 0;
    }
    pos++;

    pos += pack_some_row_info(
        pos,
        pk_index,
        table_share,
        kc_info
        );

    //
    // store blob information
    //
    memcpy(pos, &kc_info->num_blobs, sizeof(kc_info->num_blobs));
    pos += sizeof(uint32_t);
    for (uint32_t i = 0; i < kc_info->num_blobs; i++) {
        //
        // store length bytes for each blob
        //
        Field* field = table_share->field[kc_info->blob_fields[i]];
        pos[0] = (uchar)field->row_pack_length();
        pos++;
    }

    //
    // store the pk information
    //
    if (has_hpk) {
        pos[0] = 0;
        pos++;
    }
    else {
        //
        // store number of parts
        //
        assert(get_key_parts(prim_key) < 128);
        pos[0] = 2 * get_key_parts(prim_key);
        pos++;
        //
        // for each part, store if it is a fixed field or var field
        // if fixed, store number of bytes, if var, store
        // number of length bytes
        // total should be two bytes per key part stored
        //
        pk_info = pos;
        uchar* tmp = pos;
        for (uint i = 0; i < get_key_parts(prim_key); i++) {
            tmp += pack_desc_pk_info(
                tmp,
                kc_info,
                table_share,
                &prim_key->key_part[i]
                );
        }
        //
        // asserting that we moved forward as much as we think we have
        //
        assert(tmp - pos == (2 * get_key_parts(prim_key)));
        pos = tmp;
    }

    for (uint i = 0; i < get_key_parts(key_info); i++) {
        KEY_PART_INFO curr_kpi = key_info->key_part[i];
        uint16 field_index = curr_kpi.field->field_index;
        Field* field = table_share->field[field_index];
        bool is_col_in_pk = false;

        if (bitmap_is_set(&kc_info->key_filters[pk_index],field_index)) {
            assert(!has_hpk && prim_key != NULL);
            is_col_in_pk = true;
        }
        else {
            is_col_in_pk = false;
        }

        pos[0] = field->null_bit;
        pos++;

        if (is_col_in_pk) {
            //
            // assert that columns in pk do not have a null bit
            // because in MySQL, pk columns cannot be null
            //
            assert(!field->null_bit);
        }

        if (field->null_bit) {
            uint32_t null_offset = get_null_offset(table,table->field[field_index]);
            memcpy(pos, &null_offset, sizeof(uint32_t));
            pos += sizeof(uint32_t);
        }
        if (is_col_in_pk) {
            pos += pack_desc_pk_offset_info(
                pos,
                kc_info,
                table_share,
                &curr_kpi,
                prim_key,
                pk_info
                );
        }
        else {
            pos += pack_desc_offset_info(
                pos,
                kc_info,
                pk_index,
                table_share,
                &curr_kpi
                );
        }
        pos += pack_desc_key_length_info(
            pos,
            kc_info,
            table_share,
            &curr_kpi
            );
        pos += pack_desc_char_info(
            pos,
            kc_info,
            table_share,
            &curr_kpi
            );
    }

    offset = pos - buf;
    buf[0] = (uchar)(offset & 255);
    buf[1] = (uchar)((offset >> 8) & 255);
    buf[2] = (uchar)((offset >> 16) & 255);
    buf[3] = (uchar)((offset >> 24) & 255);

    return pos - buf;
}

uint32_t skip_key_in_desc(
    uchar* row_desc
    )
{
    uchar* pos = row_desc;
    uchar col_bin_or_char;
    //
    // skip the byte that states if it is a fix field or var field, we do not care
    //
    pos++;

    //
    // skip the offset information
    //
    pos += sizeof(uint32_t);

    //
    // skip the key_part_length info
    //
    pos += sizeof(uint32_t);
    col_bin_or_char = pos[0];
    pos++;
    if (col_bin_or_char == COL_HAS_NO_CHARSET) {
        goto exit;
    }
    //
    // skip the charset info
    //
    pos += 4;


exit:
    return (uint32_t)(pos-row_desc);
}


uint32_t max_key_size_from_desc(
    void* row_desc,
    uint32_t row_desc_size
    )
{
    uchar* desc_pos = (uchar *)row_desc;
    uint32_t num_blobs;
    uint32_t num_pk_columns;
    //
    // start at 1 for the infinity byte
    //
    uint32_t max_size = 1;

    // skip byte that states if main dictionary
    bool is_main_dictionary = desc_pos[0];
    desc_pos++;
    assert(!is_main_dictionary);

    // skip hpk byte
    desc_pos++;

    // skip num_null_bytes
    desc_pos += sizeof(uint32_t);

    // skip mcp_info
    desc_pos += sizeof(MULTI_COL_PACK_INFO);

    // skip offset_bytes
    desc_pos++;

    // skip over blobs
    memcpy(&num_blobs, desc_pos, sizeof(num_blobs));
    desc_pos += sizeof(num_blobs);
    desc_pos += num_blobs;

    // skip over pk info
    num_pk_columns = desc_pos[0]/2;
    desc_pos++;
    desc_pos += 2*num_pk_columns;

    while ( (uint32_t)(desc_pos - (uchar *)row_desc) < row_desc_size) {
        uchar has_charset;
        uint32_t key_length = 0;

        uchar null_bit = desc_pos[0];
        desc_pos++;

        if (null_bit) {
            //
            // column is NULLable, skip null_offset, and add a null byte
            //
            max_size++;
            desc_pos += sizeof(uint32_t);
        }
        //
        // skip over byte that states if fix or var
        //
        desc_pos++;

        // skip over offset
        desc_pos += sizeof(uint32_t);

        //
        // get the key length and add it to return value
        //
        memcpy(&key_length, desc_pos, sizeof(key_length));
        desc_pos += sizeof(key_length);
        max_size += key_length;
        max_size += 2; // 2 bytes for a potential length bytes, we are upperbounding, does not need to be super tight

        has_charset = desc_pos[0];
        desc_pos++;

        uint32_t charset_num;
        if (has_charset == COL_HAS_CHARSET) {
            // skip over charsent num
            desc_pos += sizeof(charset_num);
        }
        else {
            assert(has_charset == COL_HAS_NO_CHARSET);
        }
    }
    return max_size;
}

uint32_t pack_key_from_desc(
    uchar* buf,
    void* row_desc,
    uint32_t row_desc_size,
    const DBT* pk_key,
    const DBT* pk_val
    )
{
    MULTI_COL_PACK_INFO mcp_info;
    uint32_t num_null_bytes;
    uint32_t num_blobs;
    uint32_t num_pk_columns;
    uchar* blob_lengths = NULL;
    uchar* pk_info = NULL;
    uchar* pk_data_ptr = NULL;
    uchar* null_bytes_ptr = NULL;
    uchar* fixed_field_ptr = NULL;
    uchar* var_field_offset_ptr = NULL;
    const uchar* var_field_data_ptr = NULL;
    uint32_t num_offset_bytes;
    uchar* packed_key_pos = buf;
    uchar* desc_pos = (uchar *)row_desc;

    bool is_main_dictionary = desc_pos[0];
    desc_pos++;
    assert(!is_main_dictionary);

    //
    // get the constant info out of descriptor
    //
    bool hpk = desc_pos[0];
    desc_pos++;

    memcpy(&num_null_bytes, desc_pos, sizeof(num_null_bytes));
    desc_pos += sizeof(num_null_bytes);

    memcpy(&mcp_info, desc_pos, sizeof(mcp_info));
    desc_pos += sizeof(mcp_info);

    num_offset_bytes = desc_pos[0];
    desc_pos++;

    memcpy(&num_blobs, desc_pos, sizeof(num_blobs));
    desc_pos += sizeof(num_blobs);

    blob_lengths = desc_pos;
    desc_pos += num_blobs;

    num_pk_columns = desc_pos[0]/2;
    desc_pos++;
    pk_info = desc_pos;
    desc_pos += 2*num_pk_columns;

    //
    // now start packing the key
    //

    //
    // pack the infinity byte
    //
    packed_key_pos[0] = COL_ZERO;
    packed_key_pos++;
    //
    // now start packing each column of the key, as described in descriptor
    //
    if (!hpk) {
        // +1 for the infinity byte
        pk_data_ptr = (uchar *)pk_key->data + 1;
    }
    null_bytes_ptr = (uchar *)pk_val->data;
    fixed_field_ptr = null_bytes_ptr + num_null_bytes;
    var_field_offset_ptr = fixed_field_ptr + mcp_info.fixed_field_size;
    var_field_data_ptr = var_field_offset_ptr + mcp_info.len_of_offsets;
    while ( (uint32_t)(desc_pos - (uchar *)row_desc) < row_desc_size) {
        uchar col_fix_val;
        uchar has_charset;
        uint32_t col_pack_val = 0;
        uint32_t key_length = 0;

        uchar null_bit = desc_pos[0];
        desc_pos++;

        if (null_bit) {
            //
            // column is NULLable, need to check the null bytes to see if it is NULL
            //
            uint32_t null_offset = 0;
            bool is_field_null;
            memcpy(&null_offset, desc_pos, sizeof(null_offset));
            desc_pos += sizeof(null_offset);

            is_field_null = (null_bytes_ptr[null_offset] & null_bit) ? true: false;
            if (is_field_null) {
                packed_key_pos[0] = NULL_COL_VAL;
                packed_key_pos++;
                desc_pos += skip_key_in_desc(desc_pos);
                continue;
            }
            else {
                packed_key_pos[0] = NONNULL_COL_VAL;
                packed_key_pos++;
            }
        }
        //
        // now pack the column (unless it was NULL, and we continued)
        //
        col_fix_val = desc_pos[0];
        desc_pos++;

        memcpy(&col_pack_val, desc_pos, sizeof(col_pack_val));
        desc_pos += sizeof(col_pack_val);

        memcpy(&key_length, desc_pos, sizeof(key_length));
        desc_pos += sizeof(key_length);

        has_charset = desc_pos[0];
        desc_pos++;

        uint32_t charset_num = 0;
        if (has_charset == COL_HAS_CHARSET) {
            memcpy(&charset_num, desc_pos, sizeof(charset_num));
            desc_pos += sizeof(charset_num);
        }
        else {
            assert(has_charset == COL_HAS_NO_CHARSET);
        }
        //
        // case where column is in pk val
        //
        if (col_fix_val == COL_FIX_FIELD || col_fix_val == COL_VAR_FIELD || col_fix_val == COL_BLOB_FIELD) {
            if (col_fix_val == COL_FIX_FIELD && has_charset == COL_HAS_NO_CHARSET) {
                memcpy(packed_key_pos, &fixed_field_ptr[col_pack_val], key_length);
                packed_key_pos += key_length;
            }
            else if (col_fix_val == COL_VAR_FIELD && has_charset == COL_HAS_NO_CHARSET) {
                uint32_t data_start_offset = 0;

                uint32_t data_size = 0;
                get_var_field_info(
                    &data_size,
                    &data_start_offset,
                    col_pack_val,
                    var_field_offset_ptr,
                    num_offset_bytes
                    );

                //
                // length of this field in this row is data_size
                // data is located beginning at var_field_data_ptr + data_start_offset
                //
                packed_key_pos = pack_toku_varbinary_from_desc(
                    packed_key_pos,
                    var_field_data_ptr + data_start_offset,
                    key_length, //number of bytes to use to encode the length in to_tokudb
                    data_size //length of field
                    );
            }
            else {
                const uchar* data_start = NULL;
                uint32_t data_start_offset = 0;
                uint32_t data_size = 0;

                if (col_fix_val == COL_FIX_FIELD) {
                    data_start_offset = col_pack_val;
                    data_size = key_length;
                    data_start = fixed_field_ptr + data_start_offset;
                }
                else if (col_fix_val == COL_VAR_FIELD){
                    get_var_field_info(
                        &data_size,
                        &data_start_offset,
                        col_pack_val,
                        var_field_offset_ptr,
                        num_offset_bytes
                        );
                    data_start = var_field_data_ptr + data_start_offset;
                }
                else if (col_fix_val == COL_BLOB_FIELD) {
                    uint32_t blob_index = col_pack_val;
                    uint32_t blob_offset;
                    const uchar* blob_ptr = NULL;
                    uint32_t field_len;
                    uint32_t field_len_bytes = blob_lengths[blob_index];
                    get_blob_field_info(
                        &blob_offset,
                        mcp_info.len_of_offsets,
                        var_field_data_ptr,
                        num_offset_bytes
                        );
                    blob_ptr = var_field_data_ptr + blob_offset;
                    assert(num_blobs > 0);
                    //
                    // skip over other blobs to get to the one we want to make a key out of
                    //
                    for (uint32_t i = 0; i < blob_index; i++) {
                        blob_ptr = unpack_toku_field_blob(
                            NULL,
                            blob_ptr,
                            blob_lengths[i],
                            true
                            );
                    }
                    //
                    // at this point, blob_ptr is pointing to the blob we want to make a key from
                    //
                    field_len = get_blob_field_len(blob_ptr, field_len_bytes);
                    //
                    // now we set the variables to make the key
                    //
                    data_start = blob_ptr + field_len_bytes;
                    data_size = field_len;


                }
                else {
                    assert(false);
                }

                packed_key_pos = pack_toku_varstring_from_desc(
                    packed_key_pos,
                    data_start,
                    key_length,
                    data_size,
                    charset_num
                    );
            }
        }
        //
        // case where column is in pk key
        //
        else {
            if (col_fix_val == COL_FIX_PK_OFFSET) {
                memcpy(packed_key_pos, &pk_data_ptr[col_pack_val], key_length);
                packed_key_pos += key_length;
            }
            else if (col_fix_val == COL_VAR_PK_OFFSET) {
                uchar* tmp_pk_data_ptr = pk_data_ptr;
                uint32_t index_in_pk = col_pack_val;
                //
                // skip along in pk to the right column
                //
                for (uint32_t i = 0; i < index_in_pk; i++) {
                    if (pk_info[2*i] == COL_FIX_FIELD) {
                        tmp_pk_data_ptr += pk_info[2*i + 1];
                    }
                    else if (pk_info[2*i] == COL_VAR_FIELD) {
                        uint32_t len_bytes = pk_info[2*i + 1];
                        uint32_t len;
                        if (len_bytes == 1) {
                            len = tmp_pk_data_ptr[0];
                            tmp_pk_data_ptr++;
                        }
                        else if (len_bytes == 2) {
                            len = uint2korr(tmp_pk_data_ptr);
                            tmp_pk_data_ptr += 2;
                        }
                        else {
                            assert(false);
                        }
                        tmp_pk_data_ptr += len;
                    }
                    else {
                        assert(false);
                    }
                }
                //
                // at this point, tmp_pk_data_ptr is pointing at the column
                //
                uint32_t is_fix_field = pk_info[2*index_in_pk];
                if (is_fix_field == COL_FIX_FIELD) {
                    memcpy(packed_key_pos, tmp_pk_data_ptr, key_length);
                    packed_key_pos += key_length;
                }
                else if (is_fix_field == COL_VAR_FIELD) {
                    const uchar* data_start = NULL;
                    uint32_t data_size = 0;
                    uint32_t len_bytes = pk_info[2*index_in_pk + 1];
                    if (len_bytes == 1) {
                        data_size = tmp_pk_data_ptr[0];
                        tmp_pk_data_ptr++;
                    }
                    else if (len_bytes == 2) {
                        data_size = uint2korr(tmp_pk_data_ptr);
                        tmp_pk_data_ptr += 2;
                    }
                    else {
                        assert(false);
                    }
                    data_start = tmp_pk_data_ptr;

                    if (has_charset == COL_HAS_CHARSET) {
                        packed_key_pos = pack_toku_varstring_from_desc(
                            packed_key_pos,
                            data_start,
                            key_length,
                            data_size,
                            charset_num
                            );
                    }
                    else if (has_charset == COL_HAS_NO_CHARSET) {
                        packed_key_pos = pack_toku_varbinary_from_desc(
                            packed_key_pos,
                            data_start,
                            key_length,
                            data_size //length of field
                            );
                    }
                    else {
                        assert(false);
                    }
                }
                else {
                    assert(false);
                }
            }
            else {
                assert(false);
            }
        }

    }
    assert( (uint32_t)(desc_pos - (uchar *)row_desc) == row_desc_size);

    //
    // now append the primary key to the end of the key
    //
    if (hpk) {
        memcpy(packed_key_pos, pk_key->data, pk_key->size);
        packed_key_pos += pk_key->size;
    }
    else {
        memcpy(packed_key_pos, (uchar *)pk_key->data + 1, pk_key->size - 1);
        packed_key_pos += (pk_key->size - 1);
    }

    return (uint32_t)(packed_key_pos - buf); //
}

bool fields_have_same_name(
    Field* a,
    Field* b
    )
{
    return strcmp(a->field_name, b->field_name) == 0;
}

bool fields_are_same_type(
    Field* a,
    Field* b
    )
{
    bool retval = true;
    enum_field_types a_mysql_type = a->real_type();
    enum_field_types b_mysql_type = b->real_type();
    TOKU_TYPE a_toku_type = mysql_to_toku_type(a);
    TOKU_TYPE b_toku_type = mysql_to_toku_type(b);
    // make sure have same names
    // make sure have same types
    if (a_mysql_type != b_mysql_type) {
        retval = false;
        goto cleanup;
    }
    // Thanks to MariaDB 5.5, we can have two fields
    // be the same MySQL type but not the same toku type,
    // This is an issue introduced with MariaDB's fractional time
    // implementation
    if (a_toku_type != b_toku_type) {
        retval = false;
        goto cleanup;
    }
    // make sure that either both are nullable, or both not nullable
    if ((a->null_bit && !b->null_bit) || (!a->null_bit && b->null_bit)) {
        retval = false;
        goto cleanup;
    }
    switch (a_mysql_type) {
    case MYSQL_TYPE_TINY:
    case MYSQL_TYPE_SHORT:
    case MYSQL_TYPE_INT24:
    case MYSQL_TYPE_LONG:
    case MYSQL_TYPE_LONGLONG:
        // length, unsigned, auto increment
        if (a->pack_length() != b->pack_length() ||
            (a->flags & UNSIGNED_FLAG) != (b->flags & UNSIGNED_FLAG) ||
            (a->flags & AUTO_INCREMENT_FLAG) != (b->flags & AUTO_INCREMENT_FLAG)) {
            retval = false;
            goto cleanup;
        }
        break;
    case MYSQL_TYPE_DOUBLE:
    case MYSQL_TYPE_FLOAT:
        // length, unsigned, auto increment
        if (a->pack_length() != b->pack_length() ||
            (a->flags & UNSIGNED_FLAG) != (b->flags & UNSIGNED_FLAG) ||
            (a->flags & AUTO_INCREMENT_FLAG) != (b->flags & AUTO_INCREMENT_FLAG)) {
            retval = false;
            goto cleanup;
        }
        break;
    case MYSQL_TYPE_NEWDECIMAL:
        // length, unsigned
        if (a->pack_length() != b->pack_length() ||
            (a->flags & UNSIGNED_FLAG) != (b->flags & UNSIGNED_FLAG)) {
            retval = false;
            goto cleanup;
        }
        break;
    case MYSQL_TYPE_ENUM:
    case MYSQL_TYPE_SET:
    case MYSQL_TYPE_BIT:
        // length
        if (a->pack_length() != b->pack_length()) {
            retval = false;
            goto cleanup;
        }
        break;
    case MYSQL_TYPE_DATE:
    case MYSQL_TYPE_DATETIME:
    case MYSQL_TYPE_YEAR:
    case MYSQL_TYPE_NEWDATE:
    case MYSQL_TYPE_TIME:
    case MYSQL_TYPE_TIMESTAMP:
#if 50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699
    case MYSQL_TYPE_DATETIME2:
    case MYSQL_TYPE_TIMESTAMP2:
    case MYSQL_TYPE_TIME2:
#endif
        // length
        if (a->pack_length() != b->pack_length()) {
            retval = false;
            goto cleanup;
        }
        break;
    case MYSQL_TYPE_TINY_BLOB:
    case MYSQL_TYPE_MEDIUM_BLOB:
    case MYSQL_TYPE_BLOB:
    case MYSQL_TYPE_LONG_BLOB:
        // test the charset
        if (a->charset()->number != b->charset()->number) {
            retval = false;
            goto cleanup;
        }
        if (a->row_pack_length() != b->row_pack_length()) {
            retval = false;
            goto cleanup;
        }
        break;
    case MYSQL_TYPE_STRING:
        if (a->pack_length() != b->pack_length()) {
            retval = false;
            goto cleanup;
        }
        // if both are binary, we know have same pack lengths,
        // so we can goto end
        if (a->binary() && b->binary()) {
            // nothing to do, we are good
        }
        else if (!a->binary() && !b->binary()) {
            // test the charset
            if (a->charset()->number != b->charset()->number) {
                retval = false;
                goto cleanup;
            }
        }
        else {
            // one is binary and the other is not, so not the same
            retval = false;
            goto cleanup;
        }
        break;
    case MYSQL_TYPE_VARCHAR:
        if (a->field_length != b->field_length) {
            retval = false;
            goto cleanup;
        }
        // if both are binary, we know have same pack lengths,
        // so we can goto end
        if (a->binary() && b->binary()) {
            // nothing to do, we are good
        }
        else if (!a->binary() && !b->binary()) {
            // test the charset
            if (a->charset()->number != b->charset()->number) {
                retval = false;
                goto cleanup;
            }
        }
        else {
            // one is binary and the other is not, so not the same
            retval = false;
            goto cleanup;
        }
        break;
    //
    // I believe these are old types that are no longer
    // in any 5.1 tables, so tokudb does not need
    // to worry about them
    // Putting in this assert in case I am wrong.
    // Do not support geometry yet.
    //
    case MYSQL_TYPE_GEOMETRY:
    case MYSQL_TYPE_DECIMAL:
    case MYSQL_TYPE_VAR_STRING:
    case MYSQL_TYPE_NULL:
        assert(false);
    }

cleanup:
    return retval;
}


bool are_two_fields_same(
    Field* a,
    Field* b
    )
{
    return fields_have_same_name(a, b) && fields_are_same_type(a, b);
}