Update Mroonga to the latest version on 2014-09-21T00:33:44+0900

This commit is contained in:
Kentoku SHIBA 2014-09-21 00:33:45 +09:00
commit 0cc855cdc8
2027 changed files with 460307 additions and 0 deletions

View file

@ -0,0 +1,12 @@
DEFS += -D_REENTRANT $(GRN_DEFS) -DGRN_DAT_EXPORT
DEFAULT_INCLUDES = \
-I$(top_builddir) \
-I$(top_srcdir)/include \
-I$(top_srcdir)/lib
noinst_LTLIBRARIES = libgrndat.la
include sources.am
CLEANFILES = *.gcno *.gcda

View file

@ -0,0 +1,100 @@
/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2011 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef GRN_DAT_ARRAY_HPP_
#define GRN_DAT_ARRAY_HPP_
#include "dat.hpp"
namespace grn {
namespace dat {
// This class is used to detect an out-of-range access in debug mode.
template <typename T>
class GRN_DAT_API Array {
public:
Array() : ptr_(NULL), size_(0) {}
Array(void *ptr, UInt32 size) : ptr_(static_cast<T *>(ptr)), size_(size) {
GRN_DAT_DEBUG_THROW_IF((ptr == NULL) && (size != 0));
}
template <UInt32 U>
explicit Array(T (&array)[U]) : ptr_(array), size_(U) {}
~Array() {}
const T &operator[](UInt32 i) const {
GRN_DAT_DEBUG_THROW_IF(i >= size_);
return ptr_[i];
}
T &operator[](UInt32 i) {
GRN_DAT_DEBUG_THROW_IF(i >= size_);
return ptr_[i];
}
const T *begin() const {
return ptr();
}
T *begin() {
return ptr();
}
const T *end() const {
return ptr() + size();
}
T *end() {
return ptr() + size();
}
void assign(void *ptr, UInt32 size) {
GRN_DAT_DEBUG_THROW_IF((ptr == NULL) && (size != 0));
ptr_ = static_cast<T *>(ptr);
size_ = size;
}
template <UInt32 U>
void assign(T (&array)[U]) {
assign(array, U);
}
void swap(Array *rhs) {
T * const temp_ptr = ptr_;
ptr_ = rhs->ptr_;
rhs->ptr_ = temp_ptr;
const UInt32 temp_size = size_;
size_ = rhs->size_;
rhs->size_ = temp_size;
}
T *ptr() const {
return ptr_;
}
UInt32 size() const {
return size_;
}
private:
T *ptr_;
UInt32 size_;
// Disallows copy and assignment.
Array(const Array &);
Array &operator=(const Array &);
};
} // namespace dat
} // namespace grn
#endif // GRN_DAT_ARRAY_HPP_

View file

@ -0,0 +1,69 @@
/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2011 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef GRN_DAT_BASE_HPP_
#define GRN_DAT_BASE_HPP_
#include "dat.hpp"
namespace grn {
namespace dat {
// The most significant bit represents whether or not the node is a linker.
// BASE of a linker represents the position of its associated key and BASE of
// a non-linker represents the offset to its child nodes.
class GRN_DAT_API Base {
public:
Base() : value_(0) {}
bool operator==(const Base &rhs) const {
return value_ == rhs.value_;
}
bool is_linker() const {
return (value_ & IS_LINKER_FLAG) == IS_LINKER_FLAG;
}
UInt32 offset() const {
GRN_DAT_DEBUG_THROW_IF(is_linker());
return value_;
}
UInt32 key_pos() const {
GRN_DAT_DEBUG_THROW_IF(!is_linker());
return value_ & ~IS_LINKER_FLAG;
}
void set_offset(UInt32 x) {
GRN_DAT_DEBUG_THROW_IF((x & IS_LINKER_FLAG) != 0);
GRN_DAT_DEBUG_THROW_IF(x > MAX_OFFSET);
value_ = x;
}
void set_key_pos(UInt32 x) {
GRN_DAT_DEBUG_THROW_IF((x & IS_LINKER_FLAG) != 0);
GRN_DAT_DEBUG_THROW_IF(x > MAX_OFFSET);
value_ = IS_LINKER_FLAG | x;
}
private:
UInt32 value_;
static const UInt32 IS_LINKER_FLAG = 0x80000000U;
};
} // namespace dat
} // namespace grn
#endif // GRN_DAT_BASE_HPP_

View file

@ -0,0 +1,96 @@
/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2011 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef GRN_DAT_BLOCK_HPP_
#define GRN_DAT_BLOCK_HPP_
#include "dat.hpp"
namespace grn {
namespace dat {
class GRN_DAT_API Block {
public:
Block() : next_(0), prev_(0), first_phantom_(0), num_phantoms_(0) {}
// Blocks in the same level are stored in a doubly-linked list which is
// represented by the following next() and prev().
UInt32 next() const {
return next_ / BLOCK_SIZE;
}
UInt32 prev() const {
return prev_ / BLOCK_SIZE;
}
// A level indicates how easyily find_offset() can find a good offset in that
// block. It is easier in lower level blocks.
UInt32 level() const {
return next_ & BLOCK_MASK;
}
// A block level rises when find_offset() fails to find a good offset
// MAX_FAILURE_COUNT times in that block.
UInt32 failure_count() const {
return prev_ & BLOCK_MASK;
}
UInt32 first_phantom() const {
return first_phantom_;
}
UInt32 num_phantoms() const {
return num_phantoms_;
}
void set_next(UInt32 x) {
GRN_DAT_DEBUG_THROW_IF(x > MAX_BLOCK_ID);
next_ = (next_ & BLOCK_MASK) | (x * BLOCK_SIZE);
}
void set_prev(UInt32 x) {
GRN_DAT_DEBUG_THROW_IF(x > MAX_BLOCK_ID);
prev_ = (prev_ & BLOCK_MASK) | (x * BLOCK_SIZE);
}
void set_level(UInt32 x) {
GRN_DAT_DEBUG_THROW_IF(x > MAX_BLOCK_LEVEL);
GRN_DAT_DEBUG_THROW_IF(x > BLOCK_MASK);
next_ = (next_ & ~BLOCK_MASK) | x;
}
void set_failure_count(UInt32 x) {
GRN_DAT_DEBUG_THROW_IF(x > MAX_FAILURE_COUNT);
GRN_DAT_DEBUG_THROW_IF(x > BLOCK_MASK);
prev_ = (prev_ & ~BLOCK_MASK) | x;
}
void set_first_phantom(UInt32 x) {
GRN_DAT_DEBUG_THROW_IF(x >= BLOCK_SIZE);
first_phantom_ = (UInt16)x;
}
void set_num_phantoms(UInt32 x) {
GRN_DAT_DEBUG_THROW_IF(x > BLOCK_SIZE);
num_phantoms_ = (UInt16)x;
}
private:
UInt32 next_;
UInt32 prev_;
UInt16 first_phantom_;
UInt16 num_phantoms_;
};
} // namespace dat
} // namespace grn
#endif // GRN_DAT_BLOCK_HPP_

View file

@ -0,0 +1,151 @@
/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2011 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef GRN_DAT_CHECK_HPP_
#define GRN_DAT_CHECK_HPP_
#include "dat.hpp"
namespace grn {
namespace dat {
class GRN_DAT_API Check {
public:
Check() : value_(0) {}
bool operator==(const Check &rhs) const {
return value_ == rhs.value_;
}
// The most significant bit represents whether or not the node ID is used as
// an offset. Note that the MSB is independent of the other bits.
bool is_offset() const {
return (value_ & IS_OFFSET_FLAG) == IS_OFFSET_FLAG;
}
UInt32 except_is_offset() const {
GRN_DAT_DEBUG_THROW_IF(is_phantom());
return value_ & ~IS_OFFSET_FLAG;
}
// A phantom node is a node that has never been used, and such a node is also
// called an empty element. Phantom nodes form a doubly linked list in each
// block, and the linked list is represented by next() and prev().
bool is_phantom() const {
return (value_ & IS_PHANTOM_FLAG) == IS_PHANTOM_FLAG;
}
UInt32 next() const {
GRN_DAT_DEBUG_THROW_IF(!is_phantom());
return (value_ >> NEXT_SHIFT) & BLOCK_MASK;
}
UInt32 prev() const {
GRN_DAT_DEBUG_THROW_IF(!is_phantom());
return (value_ >> PREV_SHIFT) & BLOCK_MASK;
}
// A label is attached to each non-phantom node. A label is represented by
// a byte except for a terminal label '\256'. Note that a phantom node always
// returns an invalid label with its phantom bit flag so as to reject invalid
// transitions.
UInt32 label() const {
return value_ & (IS_PHANTOM_FLAG | LABEL_MASK);
}
// A non-phantom node has the labels of the first child and the next sibling.
// Note that INVALID_LABEL is stored if the node has no child nodes or has
// no more siblings.
UInt32 child() const {
return (value_ >> CHILD_SHIFT) & LABEL_MASK;
}
UInt32 sibling() const {
return (value_ >> SIBLING_SHIFT) & LABEL_MASK;
}
void set_is_offset(bool x) {
if (x) {
GRN_DAT_DEBUG_THROW_IF(is_offset());
value_ |= IS_OFFSET_FLAG;
} else {
GRN_DAT_DEBUG_THROW_IF(!is_offset());
value_ &= ~IS_OFFSET_FLAG;
}
}
void set_except_is_offset(UInt32 x) {
GRN_DAT_DEBUG_THROW_IF(is_phantom());
GRN_DAT_DEBUG_THROW_IF((x & IS_OFFSET_FLAG) == IS_OFFSET_FLAG);
value_ = (value_ & IS_OFFSET_FLAG) | x;
}
// To reject a transition to an incomplete node, set_is_phantom() invalidates
// its label and links when it becomes non-phantom.
void set_is_phantom(bool x) {
if (x) {
GRN_DAT_DEBUG_THROW_IF(is_phantom());
value_ |= IS_PHANTOM_FLAG;
} else {
GRN_DAT_DEBUG_THROW_IF(!is_phantom());
value_ = (value_ & IS_OFFSET_FLAG) | (INVALID_LABEL << CHILD_SHIFT) |
(INVALID_LABEL << SIBLING_SHIFT) | INVALID_LABEL;
}
}
void set_next(UInt32 x) {
GRN_DAT_DEBUG_THROW_IF(!is_phantom());
GRN_DAT_DEBUG_THROW_IF(x > BLOCK_MASK);
value_ = (value_ & ~(BLOCK_MASK << NEXT_SHIFT)) | (x << NEXT_SHIFT);
}
void set_prev(UInt32 x) {
GRN_DAT_DEBUG_THROW_IF(!is_phantom());
GRN_DAT_DEBUG_THROW_IF(x > BLOCK_MASK);
value_ = (value_ & ~(BLOCK_MASK << PREV_SHIFT)) | (x << PREV_SHIFT);
}
void set_label(UInt32 x) {
GRN_DAT_DEBUG_THROW_IF(is_phantom());
GRN_DAT_DEBUG_THROW_IF(x > MAX_LABEL);
value_ = (value_ & ~LABEL_MASK) | x;
}
void set_child(UInt32 x) {
GRN_DAT_DEBUG_THROW_IF(is_phantom());
GRN_DAT_DEBUG_THROW_IF(x > MAX_LABEL);
value_ = (value_ & ~(LABEL_MASK << CHILD_SHIFT)) | (x << CHILD_SHIFT);
}
void set_sibling(UInt32 x) {
GRN_DAT_DEBUG_THROW_IF(is_phantom());
GRN_DAT_DEBUG_THROW_IF(label() > MAX_LABEL);
GRN_DAT_DEBUG_THROW_IF((sibling() != INVALID_LABEL) && (x == INVALID_LABEL));
value_ = (value_ & ~(LABEL_MASK << SIBLING_SHIFT)) | (x << SIBLING_SHIFT);
}
private:
UInt32 value_;
static const UInt32 IS_OFFSET_FLAG = 1U << 31;
static const UInt32 IS_PHANTOM_FLAG = 1U << 30;
static const UInt32 NEXT_SHIFT = 9;
static const UInt32 PREV_SHIFT = 18;
static const UInt32 CHILD_SHIFT = 9;
static const UInt32 SIBLING_SHIFT = 18;
};
} // namespace dat
} // namespace grn
#endif // GRN_DAT_CHECK_HPP_

View file

@ -0,0 +1,94 @@
/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2011 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "cursor-factory.hpp"
#include "id-cursor.hpp"
#include "key-cursor.hpp"
#include "prefix-cursor.hpp"
#include "predictive-cursor.hpp"
#include <new>
namespace grn {
namespace dat {
Cursor *CursorFactory::open(const Trie &trie,
const void *min_ptr, UInt32 min_length,
const void *max_ptr, UInt32 max_length,
UInt32 offset,
UInt32 limit,
UInt32 flags) {
GRN_DAT_THROW_IF(PARAM_ERROR, &trie == NULL);
const UInt32 cursor_type = flags & CURSOR_TYPE_MASK;
switch (cursor_type) {
case ID_RANGE_CURSOR: {
IdCursor *cursor = new (std::nothrow) IdCursor;
GRN_DAT_THROW_IF(MEMORY_ERROR, cursor == NULL);
try {
cursor->open(trie, String(min_ptr, min_length),
String(max_ptr, max_length), offset, limit, flags);
} catch (...) {
delete cursor;
throw;
}
return cursor;
}
case KEY_RANGE_CURSOR: {
KeyCursor *cursor = new (std::nothrow) KeyCursor;
GRN_DAT_THROW_IF(MEMORY_ERROR, cursor == NULL);
try {
cursor->open(trie, String(min_ptr, min_length),
String(max_ptr, max_length), offset, limit, flags);
} catch (...) {
delete cursor;
throw;
}
return cursor;
}
case PREFIX_CURSOR: {
PrefixCursor *cursor = new (std::nothrow) PrefixCursor;
GRN_DAT_THROW_IF(MEMORY_ERROR, cursor == NULL);
try {
cursor->open(trie, String(max_ptr, max_length), min_length,
offset, limit, flags);
} catch (...) {
delete cursor;
throw;
}
return cursor;
}
case PREDICTIVE_CURSOR: {
PredictiveCursor *cursor = new (std::nothrow) PredictiveCursor;
GRN_DAT_THROW_IF(MEMORY_ERROR, cursor == NULL);
try {
cursor->open(trie, String(min_ptr, min_length),
offset, limit, flags);
} catch (...) {
delete cursor;
throw;
}
return cursor;
}
default: {
GRN_DAT_THROW(PARAM_ERROR, "unknown cursor type");
}
}
}
} // namespace dat
} // namespace grn

View file

@ -0,0 +1,46 @@
/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2011 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef GRN_DAT_CURSOR_FACTORY_HPP_
#define GRN_DAT_CURSOR_FACTORY_HPP_
#include "cursor.hpp"
namespace grn {
namespace dat {
class Trie;
class GRN_DAT_API CursorFactory {
public:
static Cursor *open(const Trie &trie,
const void *min_ptr, UInt32 min_length,
const void *max_ptr, UInt32 max_length,
UInt32 offset = 0,
UInt32 limit = MAX_UINT32,
UInt32 flags = 0);
private:
// Disallows copy and assignment.
CursorFactory(const CursorFactory &);
CursorFactory &operator=(const CursorFactory &);
};
} // namespace dat
} // namespace grn
#endif // GRN_DAT_CURSOR_FACTORY_HPP_

View file

@ -0,0 +1,48 @@
/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2011 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef GRN_DAT_CURSOR_HPP_
#define GRN_DAT_CURSOR_HPP_
#include "key.hpp"
namespace grn {
namespace dat {
class GRN_DAT_API Cursor {
public:
Cursor() {}
virtual ~Cursor() {}
virtual void close() = 0;
virtual const Key &next() = 0;
virtual UInt32 offset() const = 0;
virtual UInt32 limit() const = 0;
virtual UInt32 flags() const = 0;
private:
// Disallows copy and assignment.
Cursor(const Cursor &);
Cursor &operator=(const Cursor &);
};
} // namespace dat
} // namespace grn
#endif // GRN_DAT_CURSOR_HPP_

View file

@ -0,0 +1,255 @@
/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2011 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef GRN_DAT_COMMON_HPP_
#define GRN_DAT_COMMON_HPP_
#ifndef _MSC_VER
# include <stddef.h>
# include <stdint.h>
#endif // _MSC_VER
#include <cstddef>
#include <exception>
#ifdef _DEBUG
# include <iostream>
#endif // _DEBUG
#ifndef GRN_DAT_API
# ifdef WIN32
# ifdef GRN_DAT_EXPORT
# define GRN_DAT_API __declspec(dllexport)
# else // GRN_DAT_EXPORT
# define GRN_DAT_API __declspec(dllimport)
# endif // GRN_DAT_EXPORT
# else // WIN32
# define GRN_DAT_API
# endif // WIN32
#endif // GRN_DAT_API
namespace grn {
namespace dat {
#ifdef _MSC_VER
typedef unsigned __int8 UInt8;
typedef unsigned __int16 UInt16;
typedef unsigned __int32 UInt32;
typedef unsigned __int64 UInt64;
#else // _MSC_VER
typedef ::uint8_t UInt8;
typedef ::uint16_t UInt16;
typedef ::uint32_t UInt32;
typedef ::uint64_t UInt64;
#endif // _MSC_VER
const UInt8 MAX_UINT8 = static_cast<UInt8>(0xFFU);
const UInt16 MAX_UINT16 = static_cast<UInt16>(0xFFFFU);
const UInt32 MAX_UINT32 = static_cast<UInt32>(0xFFFFFFFFU);
const UInt64 MAX_UINT64 = static_cast<UInt64>(0xFFFFFFFFFFFFFFFFULL);
// If a key is a prefix of another key, such a key is associated with a special
// terminal node which has TERMINAL_LABEL.
const UInt16 TERMINAL_LABEL = 0x100;
const UInt16 MIN_LABEL = '\0';
const UInt16 MAX_LABEL = TERMINAL_LABEL;
const UInt32 INVALID_LABEL = 0x1FF;
const UInt32 LABEL_MASK = 0x1FF;
// The MSB of BASE is used to represent whether the node is a linker node or
// not and the other 31 bits represent the offset to its child nodes. So, the
// number of nodes is limited to 2^31.
const UInt32 ROOT_NODE_ID = 0;
const UInt32 MAX_NODE_ID = 0x7FFFFFFF;
const UInt32 MAX_NUM_NODES = MAX_NODE_ID + 1;
const UInt32 INVALID_NODE_ID = MAX_NODE_ID + 1;
// 0 is reserved for non-linker leaf nodes. For example, the root node of an
// initial double-array is a non-linker leaf node.
const UInt32 MAX_OFFSET = MAX_NODE_ID;
const UInt32 INVALID_OFFSET = 0;
// Phantom nodes are managed in each block because siblings are always put in
// the same block.
const UInt32 BLOCK_SIZE = 0x200;
const UInt32 BLOCK_MASK = 0x1FF;
const UInt32 MAX_BLOCK_ID = MAX_NODE_ID / BLOCK_SIZE;
const UInt32 MAX_NUM_BLOCKS = MAX_BLOCK_ID + 1;
// Blocks are divided by their levels, which indicate how easily update
// operations can find a good offset in them. The level of a block rises when
// find_offset() fails in that block many times. MAX_FAILURE_COUNT is the
// threshold. Also, in order to limit the time cost, find_offset() scans at
// most MAX_BLOCK_COUNT blocks.
// Larger parameters bring more chances of finding good offsets but it leads to
// more node renumberings, which are costly operations, and thus results in
// a degradation of space/time efficiencies.
const UInt32 MAX_FAILURE_COUNT = 4;
const UInt32 MAX_BLOCK_COUNT = 16;
const UInt32 MAX_BLOCK_LEVEL = 5;
// Blocks in the same level compose a doubly linked list. The entry block of
// a linked list is called a leader. INVALID_LEADER means that a linked list is
// empty and there exists no leader.
const UInt32 INVALID_LEADER = 0x7FFFFFFF;
const UInt32 MIN_KEY_ID = 1;
const UInt32 MAX_KEY_ID = MAX_NODE_ID;
const UInt32 INVALID_KEY_ID = 0;
// A key length is represented as a 12-bit unsigned integer in Key.
// A key ID is represented as a 28-bit unsigned integer in Key.
const UInt32 MAX_KEY_LENGTH = (1U << 12) - 1;
const UInt32 MAX_NUM_KEYS = (1U << 28) - 1;
const UInt64 MIN_FILE_SIZE = 1 << 16;
const UInt64 DEFAULT_FILE_SIZE = 1 << 20;
const UInt64 MAX_FILE_SIZE = (UInt64)1 << 40;
const double DEFAULT_NUM_NODES_PER_KEY = 4.0;
const double DEFAULT_AVERAGE_KEY_LENGTH = 16.0;
const UInt32 MAX_KEY_BUF_SIZE = 0x80000000U;
const UInt32 MAX_TOTAL_KEY_LENGTH = 0xFFFFFFFFU;
const UInt32 ID_RANGE_CURSOR = 0x00001;
const UInt32 KEY_RANGE_CURSOR = 0x00002;
const UInt32 PREFIX_CURSOR = 0x00004;
const UInt32 PREDICTIVE_CURSOR = 0x00008;
const UInt32 CURSOR_TYPE_MASK = 0x000FF;
const UInt32 ASCENDING_CURSOR = 0x00100;
const UInt32 DESCENDING_CURSOR = 0x00200;
const UInt32 CURSOR_ORDER_MASK = 0x00F00;
const UInt32 EXCEPT_LOWER_BOUND = 0x01000;
const UInt32 EXCEPT_UPPER_BOUND = 0x02000;
const UInt32 EXCEPT_EXACT_MATCH = 0x04000;
const UInt32 CURSOR_OPTIONS_MASK = 0xFF000;
const UInt32 REMOVING_FLAG = 1U << 0;
const UInt32 INSERTING_FLAG = 1U << 1;
const UInt32 UPDATING_FLAG = 1U << 2;
const UInt32 CHANGING_MASK = REMOVING_FLAG | INSERTING_FLAG | UPDATING_FLAG;
const UInt32 MKQ_SORT_THRESHOLD = 10;
enum ErrorCode {
PARAM_ERROR = -1,
IO_ERROR = -2,
FORMAT_ERROR = -3,
MEMORY_ERROR = -4,
SIZE_ERROR = -5,
UNEXPECTED_ERROR = -6,
STATUS_ERROR = -7
};
class Exception : public std::exception {
public:
Exception() throw()
: std::exception(),
file_(""),
line_(-1),
what_("") {}
Exception(const char *file, int line, const char *what) throw()
: std::exception(),
file_(file),
line_(line),
what_((what != NULL) ? what : "") {}
Exception(const Exception &ex) throw()
: std::exception(ex),
file_(ex.file_),
line_(ex.line_),
what_(ex.what_) {}
virtual ~Exception() throw() {}
virtual Exception &operator=(const Exception &ex) throw() {
file_ = ex.file_;
line_ = ex.line_;
what_ = ex.what_;
return *this;
}
virtual ErrorCode code() const throw() = 0;
virtual const char *file() const throw() {
return file_;
}
virtual int line() const throw() {
return line_;
}
virtual const char *what() const throw() {
return what_;
}
private:
const char *file_;
int line_;
const char *what_;
};
template <ErrorCode T>
class Error : public Exception {
public:
Error() throw()
: Exception() {}
Error(const char *file, int line, const char *what) throw()
: Exception(file, line, what) {}
Error(const Error &ex) throw()
: Exception(ex) {}
virtual ~Error() throw() {}
virtual Error &operator=(const Error &ex) throw() {
*static_cast<Exception *>(this) = ex;
return *this;
}
virtual ErrorCode code() const throw() {
return T;
}
};
typedef Error<PARAM_ERROR> ParamError;
typedef Error<IO_ERROR> IOError;
typedef Error<FORMAT_ERROR> FormatError;
typedef Error<MEMORY_ERROR> MemoryError;
typedef Error<SIZE_ERROR> SizeError;
typedef Error<UNEXPECTED_ERROR> UnexpectedError;
typedef Error<STATUS_ERROR> StatusError;
#define GRN_DAT_INT_TO_STR(value) #value
#define GRN_DAT_LINE_TO_STR(line) GRN_DAT_INT_TO_STR(line)
#define GRN_DAT_LINE_STR GRN_DAT_LINE_TO_STR(__LINE__)
#define GRN_DAT_THROW(code, msg)\
(throw grn::dat::Error<code>(__FILE__, __LINE__,\
__FILE__ ":" GRN_DAT_LINE_STR ": " #code ": " msg))
#define GRN_DAT_THROW_IF(code, cond)\
(void)((!(cond)) || (GRN_DAT_THROW(code, #cond), 0))
#ifdef _DEBUG
#define GRN_DAT_DEBUG_THROW_IF(cond)\
GRN_DAT_THROW_IF(grn::dat::UNEXPECTED_ERROR, cond)
#define GRN_DAT_DEBUG_LOG(var)\
(std::clog << __FILE__ ":" GRN_DAT_LINE_STR ": " #var ": "\
<< (var) << std::endl)
#else // _DEBUG
#define GRN_DAT_DEBUG_THROW_IF(cond)
#define GRN_DAT_DEBUG_LOG(var)
#endif // _DEBUG
} // namespace dat
} // namespace grn
#endif // GRN_DAT_COMMON_HPP_

View file

@ -0,0 +1,61 @@
/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2011 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef GRN_DAT_ENTRY_HPP_
#define GRN_DAT_ENTRY_HPP_
#include "dat.hpp"
namespace grn {
namespace dat {
// The most significant bit represents whether or not the entry is valid.
// A valid entry stores the position of its associated key and an invalid entry
// stores the index of the next invalid entry.
class GRN_DAT_API Entry {
public:
Entry() : value_(0) {}
bool is_valid() const {
return (value_ & IS_VALID_FLAG) == IS_VALID_FLAG;
}
UInt32 key_pos() const {
GRN_DAT_DEBUG_THROW_IF(!is_valid());
return value_ & ~IS_VALID_FLAG;
}
UInt32 next() const {
GRN_DAT_DEBUG_THROW_IF(is_valid());
return value_;
}
void set_key_pos(UInt32 x) {
value_ = IS_VALID_FLAG | x;
}
void set_next(UInt32 x) {
value_ = x;
}
private:
UInt32 value_;
static const UInt32 IS_VALID_FLAG = 0x80000000U;
};
} // namespace dat
} // namespace grn
#endif // GRN_DAT_ENTRY_HPP_

View file

@ -0,0 +1,244 @@
/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2011 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "file-impl.hpp"
#include <sys/types.h>
#include <sys/stat.h>
#ifdef WIN32
# ifdef min
# undef min
# endif // min
# ifdef max
# undef max
# endif // max
#else // WIN32
# include <fcntl.h>
# include <sys/mman.h>
# include <unistd.h>
#endif // WIN32
#include <algorithm>
#include <limits>
namespace grn {
namespace dat {
#ifdef WIN32
FileImpl::FileImpl()
: ptr_(NULL),
size_(0),
file_(INVALID_HANDLE_VALUE),
map_(INVALID_HANDLE_VALUE),
addr_(NULL) {}
FileImpl::~FileImpl() {
if (addr_ != NULL) {
::UnmapViewOfFile(addr_);
}
if (map_ != INVALID_HANDLE_VALUE) {
::CloseHandle(map_);
}
if (file_ != INVALID_HANDLE_VALUE) {
::CloseHandle(file_);
}
}
#else // WIN32
FileImpl::FileImpl()
: ptr_(NULL),
size_(0),
fd_(-1),
addr_(MAP_FAILED),
length_(0) {}
FileImpl::~FileImpl() {
if (addr_ != MAP_FAILED) {
::munmap(addr_, length_);
}
if (fd_ != -1) {
::close(fd_);
}
}
#endif // WIN32
void FileImpl::create(const char *path, UInt64 size) {
GRN_DAT_THROW_IF(PARAM_ERROR, size == 0);
GRN_DAT_THROW_IF(PARAM_ERROR,
size > static_cast<UInt64>(std::numeric_limits< ::size_t>::max()));
FileImpl new_impl;
new_impl.create_(path, size);
new_impl.swap(this);
}
void FileImpl::open(const char *path) {
GRN_DAT_THROW_IF(PARAM_ERROR, path == NULL);
GRN_DAT_THROW_IF(PARAM_ERROR, path[0] == '\0');
FileImpl new_impl;
new_impl.open_(path);
new_impl.swap(this);
}
void FileImpl::close() {
FileImpl new_impl;
new_impl.swap(this);
}
#ifdef WIN32
void FileImpl::swap(FileImpl *rhs) {
std::swap(ptr_, rhs->ptr_);
std::swap(size_, rhs->size_);
std::swap(file_, rhs->file_);
std::swap(map_, rhs->map_);
std::swap(addr_, rhs->addr_);
}
void FileImpl::create_(const char *path, UInt64 size) {
if ((path != NULL) && (path[0] != '\0')) {
file_ = ::CreateFileA(path, GENERIC_READ | GENERIC_WRITE,
FILE_SHARE_READ | FILE_SHARE_WRITE,
NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
GRN_DAT_THROW_IF(IO_ERROR, file_ == INVALID_HANDLE_VALUE);
const LONG size_low = static_cast<LONG>(size & 0xFFFFFFFFU);
LONG size_high = static_cast<LONG>(size >> 32);
const DWORD file_pos = ::SetFilePointer(file_, size_low, &size_high,
FILE_BEGIN);
GRN_DAT_THROW_IF(IO_ERROR, (file_pos == INVALID_SET_FILE_POINTER) &&
(::GetLastError() != 0));
GRN_DAT_THROW_IF(IO_ERROR, ::SetEndOfFile(file_) == 0);
map_ = ::CreateFileMapping(file_, NULL, PAGE_READWRITE, 0, 0, NULL);
GRN_DAT_THROW_IF(IO_ERROR, map_ == INVALID_HANDLE_VALUE);
} else {
const DWORD size_low = static_cast<DWORD>(size & 0xFFFFFFFFU);
const DWORD size_high = static_cast<DWORD>(size >> 32);
map_ = ::CreateFileMapping(file_, NULL, PAGE_READWRITE,
size_high, size_low, NULL);
GRN_DAT_THROW_IF(IO_ERROR, map_ == INVALID_HANDLE_VALUE);
}
addr_ = ::MapViewOfFile(map_, FILE_MAP_WRITE, 0, 0, 0);
GRN_DAT_THROW_IF(IO_ERROR, addr_ == NULL);
ptr_ = addr_;
size_ = static_cast< ::size_t>(size);
}
void FileImpl::open_(const char *path) {
#ifdef _MSC_VER
struct __stat64 st;
GRN_DAT_THROW_IF(IO_ERROR, ::_stat64(path, &st) == -1);
#else // _MSC_VER
struct _stat st;
GRN_DAT_THROW_IF(IO_ERROR, ::_stat(path, &st) == -1);
#endif // _MSC_VER
GRN_DAT_THROW_IF(IO_ERROR, st.st_size == 0);
GRN_DAT_THROW_IF(IO_ERROR,
static_cast<UInt64>(st.st_size) > std::numeric_limits< ::size_t>::max());
file_ = ::CreateFileA(path, GENERIC_READ | GENERIC_WRITE,
FILE_SHARE_READ | FILE_SHARE_WRITE,
NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
GRN_DAT_THROW_IF(IO_ERROR, file_ == NULL);
map_ = ::CreateFileMapping(file_, NULL, PAGE_READWRITE, 0, 0, NULL);
GRN_DAT_THROW_IF(IO_ERROR, map_ == NULL);
addr_ = ::MapViewOfFile(map_, FILE_MAP_WRITE, 0, 0, 0);
GRN_DAT_THROW_IF(IO_ERROR, addr_ == NULL);
ptr_ = addr_;
size_ = static_cast< ::size_t>(st.st_size);
}
#else // WIN32
void FileImpl::swap(FileImpl *rhs) {
std::swap(ptr_, rhs->ptr_);
std::swap(size_, rhs->size_);
std::swap(fd_, rhs->fd_);
std::swap(addr_, rhs->addr_);
std::swap(length_, rhs->length_);
}
void FileImpl::create_(const char *path, UInt64 size) {
GRN_DAT_THROW_IF(PARAM_ERROR,
size > static_cast<UInt64>(std::numeric_limits< ::off_t>::max()));
if ((path != NULL) && (path[0] != '\0')) {
fd_ = ::open(path, O_RDWR | O_CREAT | O_TRUNC, 0666);
GRN_DAT_THROW_IF(IO_ERROR, fd_ == -1);
const ::off_t file_size = static_cast< ::off_t>(size);
GRN_DAT_THROW_IF(IO_ERROR, ::ftruncate(fd_, file_size) == -1);
}
#ifdef MAP_ANONYMOUS
const int flags = (fd_ == -1) ? (MAP_PRIVATE | MAP_ANONYMOUS) : MAP_SHARED;
#else // MAP_ANONYMOUS
const int flags = (fd_ == -1) ? (MAP_PRIVATE | MAP_ANON) : MAP_SHARED;
#endif // MAP_ANONYMOUS
length_ = static_cast< ::size_t>(size);
#ifdef USE_MAP_HUGETLB
addr_ = ::mmap(NULL, length_, PROT_READ | PROT_WRITE,
flags | MAP_HUGETLB, fd_, 0);
#endif // USE_MAP_HUGETLB
if (addr_ == MAP_FAILED) {
addr_ = ::mmap(NULL, length_, PROT_READ | PROT_WRITE, flags, fd_, 0);
GRN_DAT_THROW_IF(IO_ERROR, addr_ == MAP_FAILED);
}
ptr_ = addr_;
size_ = length_;
}
void FileImpl::open_(const char *path) {
struct stat st;
GRN_DAT_THROW_IF(IO_ERROR, ::stat(path, &st) == -1);
GRN_DAT_THROW_IF(IO_ERROR, (st.st_mode & S_IFMT) != S_IFREG);
GRN_DAT_THROW_IF(IO_ERROR, st.st_size == 0);
GRN_DAT_THROW_IF(IO_ERROR,
static_cast<UInt64>(st.st_size) > std::numeric_limits< ::size_t>::max());
fd_ = ::open(path, O_RDWR);
GRN_DAT_THROW_IF(IO_ERROR, fd_ == -1);
length_ = static_cast<std::size_t>(st.st_size);
addr_ = ::mmap(NULL, length_, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0);
GRN_DAT_THROW_IF(IO_ERROR, addr_ == MAP_FAILED);
ptr_ = addr_;
size_ = length_;
}
#endif // WIN32
} // namespace dat
} // namespace grn

View file

@ -0,0 +1,73 @@
/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2011-2012 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef GRN_DAT_FILE_IMPL_HPP_
#define GRN_DAT_FILE_IMPL_HPP_
#ifdef WIN32
# include <windows.h>
#endif // WIN32
#include "dat.hpp"
namespace grn {
namespace dat {
class FileImpl {
public:
FileImpl();
~FileImpl();
void create(const char *path, UInt64 size);
void open(const char *path);
void close();
void *ptr() const {
return ptr_;
}
UInt64 size() const {
return size_;
}
void swap(FileImpl *rhs);
private:
void *ptr_;
UInt64 size_;
#ifdef WIN32
HANDLE file_;
HANDLE map_;
LPVOID addr_;
#else // WIN32
int fd_;
void *addr_;
::size_t length_;
#endif // WIN32
void create_(const char *path, UInt64 size);
void open_(const char *path);
// Disallows copy and assignment.
FileImpl(const FileImpl &);
FileImpl &operator=(const FileImpl &);
};
} // namespace dat
} // namespace grn
#endif // GRN_DAT_FILE_IMPL_HPP_

View file

@ -0,0 +1,67 @@
/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2011 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "file.hpp"
#include "file-impl.hpp"
#include <new>
namespace grn {
namespace dat {
File::File() : impl_(NULL) {}
File::~File() {
delete impl_;
}
void File::create(const char *path, UInt64 size) {
File new_file;
new_file.impl_ = new (std::nothrow) FileImpl;
GRN_DAT_THROW_IF(MEMORY_ERROR, new_file.impl_ == NULL);
new_file.impl_->create(path, size);
new_file.swap(this);
}
void File::open(const char *path) {
File new_file;
new_file.impl_ = new (std::nothrow) FileImpl;
GRN_DAT_THROW_IF(MEMORY_ERROR, new_file.impl_ == NULL);
new_file.impl_->open(path);
new_file.swap(this);
}
void File::close() {
File().swap(this);
}
void *File::ptr() const {
return (impl_ != NULL) ? impl_->ptr() : NULL;
}
UInt64 File::size() const {
return (impl_ != NULL) ? impl_->size() : 0;
}
void File::swap(File *rhs) {
FileImpl * const temp = impl_;
impl_ = rhs->impl_;
rhs->impl_ = temp;
}
} // namespace dat
} // namespace grn

View file

@ -0,0 +1,60 @@
/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2011 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef GRN_DAT_FILE_HPP_
#define GRN_DAT_FILE_HPP_
#include "dat.hpp"
namespace grn {
namespace dat {
// This implementation class hides environment dependent codes required for
// memory-mapped I/O.
class FileImpl;
class GRN_DAT_API File {
public:
File();
~File();
// This function creates a file and maps the entire file to a certain range
// of the address space. Note that a file is truncated if exists.
void create(const char *path, UInt64 size);
// This function opens a file and maps the entire file to a certain range of
// the address space.
void open(const char *path);
void close();
void *ptr() const;
UInt64 size() const;
void swap(File *rhs);
private:
FileImpl *impl_;
// Disallows copy and assignment.
File(const File &);
File &operator=(const File &);
};
} // namespace dat
} // namespace grn
#endif // GRN_DAT_FILE_HPP_

View file

@ -0,0 +1,181 @@
/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2011 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef GRN_DAT_HPP_HEADER_HPP_
#define GRN_DAT_HPP_HEADER_HPP_
#include "dat.hpp"
namespace grn {
namespace dat {
class GRN_DAT_API Header {
public:
Header()
: file_size_(0),
total_key_length_(0),
next_key_id_(grn::dat::MIN_KEY_ID),
max_key_id_(0),
num_keys_(0),
max_num_keys_(0),
num_phantoms_(0),
num_zombies_(0),
num_blocks_(0),
max_num_blocks_(0),
next_key_pos_(0),
key_buf_size_(0),
status_flags_(0) {
for (UInt32 i = 0; i <= MAX_BLOCK_LEVEL; ++i) {
leaders_[i] = INVALID_LEADER;
}
for (UInt32 i = 0; i < (sizeof(reserved_) / sizeof(*reserved_)); ++i) {
reserved_[i] = 0;
}
}
UInt64 file_size() const {
return file_size_;
}
UInt32 total_key_length() const {
return total_key_length_;
}
UInt32 min_key_id() const {
return MIN_KEY_ID;
}
UInt32 next_key_id() const {
return next_key_id_;
}
UInt32 max_key_id() const {
return max_key_id_;
}
UInt32 num_keys() const {
return num_keys_;
}
UInt32 max_num_keys() const {
return max_num_keys_;
}
UInt32 num_nodes() const {
return num_blocks() * BLOCK_SIZE;
}
UInt32 num_phantoms() const {
return num_phantoms_;
}
UInt32 num_zombies() const {
return num_zombies_;
}
UInt32 max_num_nodes() const {
return max_num_blocks() * BLOCK_SIZE;
}
UInt32 num_blocks() const {
return num_blocks_;
}
UInt32 max_num_blocks() const {
return max_num_blocks_;
}
UInt32 next_key_pos() const {
return next_key_pos_;
}
UInt32 key_buf_size() const {
return key_buf_size_;
}
UInt32 status_flags() const {
return status_flags_;
}
UInt32 ith_leader(UInt32 i) const {
GRN_DAT_DEBUG_THROW_IF(i > MAX_BLOCK_LEVEL);
return leaders_[i];
}
void set_file_size(UInt64 x) {
GRN_DAT_DEBUG_THROW_IF(x > MAX_FILE_SIZE);
file_size_ = x;
}
void set_total_key_length(UInt32 x) {
GRN_DAT_DEBUG_THROW_IF(x > MAX_TOTAL_KEY_LENGTH);
total_key_length_ = x;
}
void set_next_key_id(UInt32 x) {
GRN_DAT_DEBUG_THROW_IF((x - 1) > MAX_KEY_ID);
next_key_id_ = x;
}
void set_max_key_id(UInt32 x) {
GRN_DAT_DEBUG_THROW_IF(x > MAX_KEY_ID);
max_key_id_ = x;
}
void set_num_keys(UInt32 x) {
GRN_DAT_DEBUG_THROW_IF(x > MAX_NUM_KEYS);
num_keys_ = x;
}
void set_max_num_keys(UInt32 x) {
GRN_DAT_DEBUG_THROW_IF(x > MAX_NUM_KEYS);
max_num_keys_ = x;
}
void set_num_phantoms(UInt32 x) {
GRN_DAT_DEBUG_THROW_IF(x > max_num_nodes());
num_phantoms_ = x;
}
void set_num_zombies(UInt32 x) {
GRN_DAT_DEBUG_THROW_IF(x > max_num_nodes());
num_zombies_ = x;
}
void set_num_blocks(UInt32 x) {
GRN_DAT_DEBUG_THROW_IF(x > max_num_blocks());
num_blocks_ = x;
}
void set_max_num_blocks(UInt32 x) {
GRN_DAT_DEBUG_THROW_IF(x > MAX_NUM_BLOCKS);
max_num_blocks_ = x;
}
void set_next_key_pos(UInt32 x) {
GRN_DAT_DEBUG_THROW_IF(x > key_buf_size());
next_key_pos_ = x;
}
void set_key_buf_size(UInt32 x) {
GRN_DAT_DEBUG_THROW_IF(x > MAX_KEY_BUF_SIZE);
key_buf_size_ = x;
}
void set_status_flags(UInt32 x) {
status_flags_ = x;
}
void set_ith_leader(UInt32 i, UInt32 x) {
GRN_DAT_DEBUG_THROW_IF(i > MAX_BLOCK_LEVEL);
GRN_DAT_DEBUG_THROW_IF((x != INVALID_LEADER) && (x >= num_blocks()));
leaders_[i] = x;
}
private:
UInt64 file_size_;
UInt32 total_key_length_;
UInt32 next_key_id_;
UInt32 max_key_id_;
UInt32 num_keys_;
UInt32 max_num_keys_;
UInt32 num_phantoms_;
UInt32 num_zombies_;
UInt32 num_blocks_;
UInt32 max_num_blocks_;
UInt32 next_key_pos_;
UInt32 key_buf_size_;
UInt32 leaders_[MAX_BLOCK_LEVEL + 1];
UInt32 status_flags_;
UInt32 reserved_[12];
};
} // namespace dat
} // namespace grn
#endif // GRN_DAT_HPP_HEADER_HPP_

View file

@ -0,0 +1,184 @@
/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2011 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "id-cursor.hpp"
#include <algorithm>
#include "trie.hpp"
namespace grn {
namespace dat {
IdCursor::IdCursor()
: trie_(NULL),
offset_(0),
limit_(MAX_UINT32),
flags_(ID_RANGE_CURSOR),
cur_(INVALID_KEY_ID),
end_(INVALID_KEY_ID),
count_(0) {}
IdCursor::~IdCursor() {}
void IdCursor::open(const Trie &trie,
const String &min_str,
const String &max_str,
UInt32 offset,
UInt32 limit,
UInt32 flags) {
UInt32 min_id = INVALID_KEY_ID;
if (min_str.ptr() != NULL) {
UInt32 key_pos;
GRN_DAT_THROW_IF(PARAM_ERROR,
!trie.search(min_str.ptr(), min_str.length(), &key_pos));
min_id = trie.get_key(key_pos).id();
}
UInt32 max_id = INVALID_KEY_ID;
if (max_str.ptr() != NULL) {
UInt32 key_pos;
GRN_DAT_THROW_IF(PARAM_ERROR,
!trie.search(max_str.ptr(), max_str.length(), &key_pos));
max_id = trie.get_key(key_pos).id();
}
open(trie, min_id, max_id, offset, limit, flags);
}
void IdCursor::open(const Trie &trie,
UInt32 min_id,
UInt32 max_id,
UInt32 offset,
UInt32 limit,
UInt32 flags) {
flags = fix_flags(flags);
IdCursor new_cursor(trie, offset, limit, flags);
new_cursor.init(min_id, max_id);
new_cursor.swap(this);
}
void IdCursor::close() {
IdCursor new_cursor;
new_cursor.swap(this);
}
const Key &IdCursor::next() {
if (count_ >= limit_) {
return Key::invalid_key();
}
while (cur_ != end_) {
const Key &key = trie_->ith_key(cur_);
if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) {
++cur_;
} else {
--cur_;
}
if (key.is_valid()) {
++count_;
return key;
}
}
return Key::invalid_key();
}
IdCursor::IdCursor(const Trie &trie,
UInt32 offset,
UInt32 limit,
UInt32 flags)
: trie_(&trie),
offset_(offset),
limit_(limit),
flags_(flags),
cur_(INVALID_KEY_ID),
end_(INVALID_KEY_ID),
count_(0) {}
UInt32 IdCursor::fix_flags(UInt32 flags) const {
const UInt32 cursor_type = flags & CURSOR_TYPE_MASK;
GRN_DAT_THROW_IF(PARAM_ERROR, (cursor_type != 0) &&
(cursor_type != ID_RANGE_CURSOR));
flags |= ID_RANGE_CURSOR;
const UInt32 cursor_order = flags & CURSOR_ORDER_MASK;
GRN_DAT_THROW_IF(PARAM_ERROR, (cursor_order != 0) &&
(cursor_order != ASCENDING_CURSOR) &&
(cursor_order != DESCENDING_CURSOR));
if (cursor_order == 0) {
flags |= ASCENDING_CURSOR;
}
const UInt32 cursor_options = flags & CURSOR_OPTIONS_MASK;
GRN_DAT_THROW_IF(PARAM_ERROR,
cursor_options & ~(EXCEPT_LOWER_BOUND | EXCEPT_UPPER_BOUND));
return flags;
}
void IdCursor::init(UInt32 min_id, UInt32 max_id) {
if (min_id == INVALID_KEY_ID) {
min_id = trie_->min_key_id();
} else if ((flags_ & EXCEPT_LOWER_BOUND) == EXCEPT_LOWER_BOUND) {
++min_id;
}
if (max_id == INVALID_KEY_ID) {
max_id = trie_->max_key_id();
} else if ((flags_ & EXCEPT_UPPER_BOUND) == EXCEPT_UPPER_BOUND) {
--max_id;
}
if ((max_id < min_id) || ((max_id - min_id) < offset_)) {
return;
}
if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) {
cur_ = min_id;
end_ = max_id + 1;
for (UInt32 i = 0; (i < offset_) && (cur_ != end_); ++i) {
while (cur_ != end_) {
if (trie_->ith_key(cur_++).is_valid()) {
break;
}
}
}
} else {
cur_ = max_id;
end_ = min_id - 1;
for (UInt32 i = 0; (i < offset_) && (cur_ != end_); ++i) {
while (cur_ != end_) {
if (trie_->ith_key(cur_--).is_valid()) {
break;
}
}
}
}
}
void IdCursor::swap(IdCursor *cursor) {
std::swap(trie_, cursor->trie_);
std::swap(offset_, cursor->offset_);
std::swap(limit_, cursor->limit_);
std::swap(flags_, cursor->flags_);
std::swap(cur_, cursor->cur_);
std::swap(end_, cursor->end_);
std::swap(count_, cursor->count_);
}
} // namespace dat
} // namespace grn

View file

@ -0,0 +1,85 @@
/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2011 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef GRN_DAT_ID_CURSOR_HPP_
#define GRN_DAT_ID_CURSOR_HPP_
#include "cursor.hpp"
namespace grn {
namespace dat {
class Trie;
class GRN_DAT_API IdCursor : public Cursor {
public:
IdCursor();
~IdCursor();
void open(const Trie &trie,
const String &min_str,
const String &max_str,
UInt32 offset = 0,
UInt32 limit = MAX_UINT32,
UInt32 flags = 0);
void open(const Trie &trie,
UInt32 min_id,
UInt32 max_id,
UInt32 offset = 0,
UInt32 limit = MAX_UINT32,
UInt32 flags = 0);
void close();
const Key &next();
UInt32 offset() const {
return offset_;
}
UInt32 limit() const {
return limit_;
}
UInt32 flags() const {
return flags_;
}
private:
const Trie *trie_;
UInt32 offset_;
UInt32 limit_;
UInt32 flags_;
UInt32 cur_;
UInt32 end_;
UInt32 count_;
IdCursor(const Trie &trie, UInt32 offset, UInt32 limit, UInt32 flags);
UInt32 fix_flags(UInt32 flags) const;
void init(UInt32 min_id, UInt32 max_id);
void swap(IdCursor *cursor);
// Disallows copy and assignment.
IdCursor(const IdCursor &);
IdCursor &operator=(const IdCursor &);
};
} // namespace dat
} // namespace grn
#endif // GRN_DAT_ID_CURSOR_HPP_

View file

@ -0,0 +1,349 @@
/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2011 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "key-cursor.hpp"
#include <algorithm>
#include <cstring>
#include "trie.hpp"
namespace grn {
namespace dat {
KeyCursor::KeyCursor()
: trie_(NULL),
offset_(0),
limit_(MAX_UINT32),
flags_(KEY_RANGE_CURSOR),
buf_(),
count_(0),
max_count_(0),
finished_(false),
end_buf_(NULL),
end_str_() {}
KeyCursor::~KeyCursor() {
if (end_buf_ != NULL) {
delete [] end_buf_;
}
}
void KeyCursor::open(const Trie &trie,
const String &min_str,
const String &max_str,
UInt32 offset,
UInt32 limit,
UInt32 flags) {
GRN_DAT_THROW_IF(PARAM_ERROR,
(min_str.ptr() == NULL) && (min_str.length() != 0));
GRN_DAT_THROW_IF(PARAM_ERROR,
(max_str.ptr() == NULL) && (max_str.length() != 0));
flags = fix_flags(flags);
KeyCursor new_cursor(trie, offset, limit, flags);
new_cursor.init(min_str, max_str);
new_cursor.swap(this);
}
void KeyCursor::close() {
KeyCursor new_cursor;
new_cursor.swap(this);
}
const Key &KeyCursor::next() {
if (finished_ || (count_ >= max_count_)) {
return Key::invalid_key();
}
if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) {
return ascending_next();
} else {
return descending_next();
}
}
KeyCursor::KeyCursor(const Trie &trie,
UInt32 offset, UInt32 limit, UInt32 flags)
: trie_(&trie),
offset_(offset),
limit_(limit),
flags_(flags),
buf_(),
count_(0),
max_count_(0),
finished_(false),
end_buf_(NULL),
end_str_() {}
UInt32 KeyCursor::fix_flags(UInt32 flags) const {
const UInt32 cursor_type = flags & CURSOR_TYPE_MASK;
GRN_DAT_THROW_IF(PARAM_ERROR, (cursor_type != 0) &&
(cursor_type != KEY_RANGE_CURSOR));
flags |= KEY_RANGE_CURSOR;
const UInt32 cursor_order = flags & CURSOR_ORDER_MASK;
GRN_DAT_THROW_IF(PARAM_ERROR, (cursor_order != 0) &&
(cursor_order != ASCENDING_CURSOR) &&
(cursor_order != DESCENDING_CURSOR));
if (cursor_order == 0) {
flags |= ASCENDING_CURSOR;
}
const UInt32 cursor_options = flags & CURSOR_OPTIONS_MASK;
GRN_DAT_THROW_IF(PARAM_ERROR,
cursor_options & ~(EXCEPT_LOWER_BOUND | EXCEPT_UPPER_BOUND));
return flags;
}
void KeyCursor::init(const String &min_str, const String &max_str) {
if (offset_ > (MAX_UINT32 - limit_)) {
max_count_ = MAX_UINT32;
} else {
max_count_ = offset_ + limit_;
}
if (limit_ == 0) {
return;
}
if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) {
ascending_init(min_str, max_str);
} else {
descending_init(min_str, max_str);
}
}
void KeyCursor::ascending_init(const String &min_str, const String &max_str) {
if (max_str.ptr() != NULL) {
if (max_str.length() != 0) {
end_buf_ = new UInt8[max_str.length()];
std::memcpy(end_buf_, max_str.ptr(), max_str.length());
end_str_.assign(end_buf_, max_str.length());
}
}
if ((min_str.ptr() == NULL) || (min_str.length() == 0)) {
buf_.push_back(ROOT_NODE_ID);
return;
}
UInt32 node_id = ROOT_NODE_ID;
Node node;
for (UInt32 i = 0; i < min_str.length(); ++i) {
node = trie_->ith_node(node_id);
if (node.is_linker()) {
const Key &key = trie_->get_key(node.key_pos());
const int result = key.str().compare(min_str, i);
if ((result > 0) || ((result == 0) &&
((flags_ & EXCEPT_LOWER_BOUND) != EXCEPT_LOWER_BOUND))) {
buf_.push_back(node_id);
} else if (node.sibling() != INVALID_LABEL) {
buf_.push_back(node_id ^ node.label() ^ node.sibling());
}
return;
} else if (node.sibling() != INVALID_LABEL) {
buf_.push_back(node_id ^ node.label() ^ node.sibling());
}
node_id = node.offset() ^ min_str[i];
if (trie_->ith_node(node_id).label() != min_str[i]) {
UInt16 label = node.child();
if (label == TERMINAL_LABEL) {
label = trie_->ith_node(node.offset() ^ label).sibling();
}
while (label != INVALID_LABEL) {
if (label > min_str[i]) {
buf_.push_back(node.offset() ^ label);
break;
}
label = trie_->ith_node(node.offset() ^ label).sibling();
}
return;
}
}
node = trie_->ith_node(node_id);
if (node.is_linker()) {
const Key &key = trie_->get_key(node.key_pos());
if ((key.length() != min_str.length()) ||
((flags_ & EXCEPT_LOWER_BOUND) != EXCEPT_LOWER_BOUND)) {
buf_.push_back(node_id);
} else if (node.sibling() != INVALID_LABEL) {
buf_.push_back(node_id ^ node.label() ^ node.sibling());
}
return;
} else if (node.sibling() != INVALID_LABEL) {
buf_.push_back(node_id ^ node.label() ^ node.sibling());
}
UInt16 label = node.child();
if ((label == TERMINAL_LABEL) &&
((flags_ & EXCEPT_LOWER_BOUND) == EXCEPT_LOWER_BOUND)) {
label = trie_->ith_node(node.offset() ^ label).sibling();
}
if (label != INVALID_LABEL) {
buf_.push_back(node.offset() ^ label);
}
}
void KeyCursor::descending_init(const String &min_str, const String &max_str) {
if (min_str.ptr() != NULL) {
if (min_str.length() != 0) {
end_buf_ = new UInt8[min_str.length()];
std::memcpy(end_buf_, min_str.ptr(), min_str.length());
end_str_.assign(end_buf_, min_str.length());
}
}
if ((max_str.ptr() == NULL) || (max_str.length() == 0)) {
buf_.push_back(ROOT_NODE_ID);
return;
}
UInt32 node_id = ROOT_NODE_ID;
for (UInt32 i = 0; i < max_str.length(); ++i) {
const Base base = trie_->ith_node(node_id).base();
if (base.is_linker()) {
const Key &key = trie_->get_key(base.key_pos());
const int result = key.str().compare(max_str, i);
if ((result < 0) || ((result == 0) &&
((flags_ & EXCEPT_UPPER_BOUND) != EXCEPT_UPPER_BOUND))) {
buf_.push_back(node_id | POST_ORDER_FLAG);
}
return;
}
UInt32 label = trie_->ith_node(node_id).child();
if (label == TERMINAL_LABEL) {
node_id = base.offset() ^ label;
buf_.push_back(node_id | POST_ORDER_FLAG);
label = trie_->ith_node(node_id).sibling();
}
while (label != INVALID_LABEL) {
node_id = base.offset() ^ label;
if (label < max_str[i]) {
buf_.push_back(node_id);
} else if (label > max_str[i]) {
return;
} else {
break;
}
label = trie_->ith_node(node_id).sibling();
}
if (label == INVALID_LABEL) {
return;
}
}
const Base base = trie_->ith_node(node_id).base();
if (base.is_linker()) {
const Key &key = trie_->get_key(base.key_pos());
if ((key.length() == max_str.length()) &&
((flags_ & EXCEPT_UPPER_BOUND) != EXCEPT_UPPER_BOUND)) {
buf_.push_back(node_id | POST_ORDER_FLAG);
}
return;
}
UInt16 label = trie_->ith_node(node_id).child();
if ((label == TERMINAL_LABEL) &&
((flags_ & EXCEPT_UPPER_BOUND) != EXCEPT_UPPER_BOUND)) {
buf_.push_back((base.offset() ^ label) | POST_ORDER_FLAG);
}
}
void KeyCursor::swap(KeyCursor *cursor) {
std::swap(trie_, cursor->trie_);
std::swap(offset_, cursor->offset_);
std::swap(limit_, cursor->limit_);
std::swap(flags_, cursor->flags_);
buf_.swap(&cursor->buf_);
std::swap(count_, cursor->count_);
std::swap(max_count_, cursor->max_count_);
std::swap(finished_, cursor->finished_);
std::swap(end_buf_, cursor->end_buf_);
end_str_.swap(&cursor->end_str_);
}
const Key &KeyCursor::ascending_next() {
while (!buf_.empty()) {
const UInt32 node_id = buf_.back();
buf_.pop_back();
const Node node = trie_->ith_node(node_id);
if (node.sibling() != INVALID_LABEL) {
buf_.push_back(node_id ^ node.label() ^ node.sibling());
}
if (node.is_linker()) {
const Key &key = trie_->get_key(node.key_pos());
if (end_buf_ != NULL) {
const int result = key.str().compare(end_str_);
if ((result > 0) || ((result == 0) &&
((flags_ & EXCEPT_UPPER_BOUND) == EXCEPT_UPPER_BOUND))) {
finished_ = true;
return Key::invalid_key();
}
}
if (count_++ >= offset_) {
return key;
}
} else if (node.child() != INVALID_LABEL) {
buf_.push_back(node.offset() ^ node.child());
}
}
return Key::invalid_key();
}
const Key &KeyCursor::descending_next() {
while (!buf_.empty()) {
const bool post_order = (buf_.back() & POST_ORDER_FLAG) == POST_ORDER_FLAG;
const UInt32 node_id = buf_.back() & ~POST_ORDER_FLAG;
const Base base = trie_->ith_node(node_id).base();
if (post_order) {
buf_.pop_back();
if (base.is_linker()) {
const Key &key = trie_->get_key(base.key_pos());
if (end_buf_ != NULL) {
const int result = key.str().compare(end_str_);
if ((result < 0) || ((result == 0) &&
((flags_ & EXCEPT_LOWER_BOUND) == EXCEPT_LOWER_BOUND))) {
finished_ = true;
return Key::invalid_key();
}
}
if (count_++ >= offset_) {
return key;
}
}
} else {
buf_.back() |= POST_ORDER_FLAG;
UInt16 label = trie_->ith_node(node_id).child();
while (label != INVALID_LABEL) {
buf_.push_back(base.offset() ^ label);
label = trie_->ith_node(base.offset() ^ label).sibling();
}
}
}
return Key::invalid_key();
}
} // namespace dat
} // namespace grn

View file

@ -0,0 +1,90 @@
/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2011 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef GRN_DAT_KEY_CURSOR_HPP_
#define GRN_DAT_KEY_CURSOR_HPP_
#include "cursor.hpp"
#include "vector.hpp"
namespace grn {
namespace dat {
class Trie;
class GRN_DAT_API KeyCursor : public Cursor {
public:
KeyCursor();
~KeyCursor();
void open(const Trie &trie,
const String &min_str,
const String &max_str,
UInt32 offset = 0,
UInt32 limit = MAX_UINT32,
UInt32 flags = 0);
void close();
const Key &next();
UInt32 offset() const {
return offset_;
}
UInt32 limit() const {
return limit_;
}
UInt32 flags() const {
return flags_;
}
private:
const Trie *trie_;
UInt32 offset_;
UInt32 limit_;
UInt32 flags_;
Vector<UInt32> buf_;
UInt32 count_;
UInt32 max_count_;
bool finished_;
UInt8 *end_buf_;
String end_str_;
KeyCursor(const Trie &trie,
UInt32 offset, UInt32 limit, UInt32 flags);
UInt32 fix_flags(UInt32 flags) const;
void init(const String &min_str, const String &max_str);
void ascending_init(const String &min_str, const String &max_str);
void descending_init(const String &min_str, const String &max_str);
void swap(KeyCursor *cursor);
const Key &ascending_next();
const Key &descending_next();
static const UInt32 POST_ORDER_FLAG = 0x80000000U;
// Disallows copy and assignment.
KeyCursor(const KeyCursor &);
KeyCursor &operator=(const KeyCursor &);
};
} // namespace dat
} // namespace grn
#endif // GRN_DAT_KEY_CURSOR_HPP_

View file

@ -0,0 +1,112 @@
/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2011 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef GRN_DAT_KEY_HPP_
#define GRN_DAT_KEY_HPP_
#include "string.hpp"
namespace grn {
namespace dat {
class GRN_DAT_API Key {
public:
const UInt8 &operator[](UInt32 i) const {
GRN_DAT_DEBUG_THROW_IF(i >= length());
return buf_[i];
}
bool is_valid() const {
return id() != INVALID_KEY_ID;
}
String str() const {
return String(ptr(), length());
}
const void *ptr() const {
return buf_;
}
UInt32 length() const {
return (length_high_ << 4) | (id_and_length_low_ & 0x0F);
}
UInt32 id() const {
return id_and_length_low_ >> 4;
}
bool equals_to(const void *ptr, UInt32 length, UInt32 offset = 0) const {
if (length != this->length()) {
return false;
}
for ( ; offset < length; ++offset) {
if ((*this)[offset] != static_cast<const UInt8 *>(ptr)[offset]) {
return false;
}
}
return true;
}
// Creates an object of Key from given parameters. Then, the created object
// is embedded into a specified buffer.
static const Key &create(UInt32 *buf, UInt32 key_id,
const void *key_ptr, UInt32 key_length) {
GRN_DAT_DEBUG_THROW_IF(buf == NULL);
GRN_DAT_DEBUG_THROW_IF(key_id > MAX_KEY_ID);
GRN_DAT_DEBUG_THROW_IF((key_ptr == NULL) && (key_length != 0));
GRN_DAT_DEBUG_THROW_IF(key_length > MAX_KEY_LENGTH);
*buf = (key_id << 4) | (key_length & 0x0F);
UInt8 *ptr = reinterpret_cast<UInt8 *>(buf + 1);
*ptr++ = key_length >> 4;
for (UInt32 i = 0; i < key_length; ++i) {
ptr[i] = static_cast<const UInt8 *>(key_ptr)[i];
}
return *reinterpret_cast<const Key *>(buf);
}
// Calculates how many UInt32s are required for a string. It is guaranteed
// that the estimated size is not less than the actual size.
static UInt32 estimate_size(UInt32 length) {
return 2 + (length / sizeof(UInt32));
}
// Returns a reference to an invalid key.
static const Key &invalid_key() {
static const Key invalid_key;
return invalid_key;
// static const UInt32 invalid_key_buf[2] = { INVALID_KEY_ID << 4, 0 };
// return *reinterpret_cast<const Key *>(invalid_key_buf);
}
private:
UInt32 id_and_length_low_;
UInt8 length_high_;
UInt8 buf_[3];
// Disallows instantiation.
Key() : id_and_length_low_(INVALID_KEY_ID << 4), length_high_(0) {}
~Key() {}
// Disallows copy and assignment.
Key(const Key &);
Key &operator=(const Key &);
};
} // namespace dat
} // namespace grn
#endif // GRN_DAT_KEY_HPP_

View file

@ -0,0 +1,129 @@
/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2011 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef GRN_DAT_NODE_HPP_
#define GRN_DAT_NODE_HPP_
// See base.hpp and check.hpp for details.
#include "base.hpp"
#include "check.hpp"
namespace grn {
namespace dat {
class GRN_DAT_API Node {
public:
Node() : base_(), check_() {}
Base base() const {
return base_;
}
bool is_linker() const {
GRN_DAT_DEBUG_THROW_IF(is_phantom());
return base_.is_linker();
}
UInt32 offset() const {
GRN_DAT_DEBUG_THROW_IF(is_phantom());
return base_.offset();
}
UInt32 key_pos() const {
GRN_DAT_DEBUG_THROW_IF(is_phantom());
return base_.key_pos();
}
Check check() const {
return check_;
}
bool is_offset() const {
return check_.is_offset();
}
UInt32 except_is_offset() const {
return check_.except_is_offset();
}
bool is_phantom() const {
return check_.is_phantom();
}
UInt32 next() const {
return check_.next();
}
UInt32 prev() const {
return check_.prev();
}
UInt32 label() const {
return check_.label();
}
UInt32 child() const {
return check_.child();
}
UInt32 sibling() const {
return check_.sibling();
}
void set_base(Base x) {
GRN_DAT_DEBUG_THROW_IF(is_phantom());
base_ = x;
}
void set_offset(UInt32 x) {
GRN_DAT_DEBUG_THROW_IF(is_phantom());
base_.set_offset(x);
}
void set_key_pos(UInt32 x) {
GRN_DAT_DEBUG_THROW_IF(is_phantom());
base_.set_key_pos(x);
}
void set_check(Check x) {
check_ = x;
}
void set_is_offset(bool x) {
check_.set_is_offset(x);
}
void set_except_is_offset(UInt32 x) {
check_.set_except_is_offset(x);
}
void set_is_phantom(bool x) {
GRN_DAT_DEBUG_THROW_IF(base_.offset() != INVALID_OFFSET);
check_.set_is_phantom(x);
}
void set_next(UInt32 x) {
GRN_DAT_DEBUG_THROW_IF(base_.offset() != INVALID_OFFSET);
check_.set_next(x);
}
void set_prev(UInt32 x) {
GRN_DAT_DEBUG_THROW_IF(base_.offset() != INVALID_OFFSET);
check_.set_prev(x);
}
void set_label(UInt32 x) {
GRN_DAT_DEBUG_THROW_IF(offset() != INVALID_OFFSET);
check_.set_label(x);
}
void set_child(UInt32 x) {
check_.set_child(x);
}
void set_sibling(UInt32 x) {
check_.set_sibling(x);
}
private:
Base base_;
Check check_;
};
} // namespace dat
} // namespace grn
#endif // GRN_DAT_NODE_HPP_

View file

@ -0,0 +1,206 @@
/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2011 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "predictive-cursor.hpp"
#include <algorithm>
#include <cstring>
#include "trie.hpp"
namespace grn {
namespace dat {
PredictiveCursor::PredictiveCursor()
: trie_(NULL),
offset_(0),
limit_(MAX_UINT32),
flags_(PREDICTIVE_CURSOR),
buf_(),
cur_(0),
end_(0),
min_length_(0) {}
PredictiveCursor::~PredictiveCursor() {}
void PredictiveCursor::open(const Trie &trie,
const String &str,
UInt32 offset,
UInt32 limit,
UInt32 flags) {
GRN_DAT_THROW_IF(PARAM_ERROR, (str.ptr() == NULL) && (str.length() != 0));
flags = fix_flags(flags);
PredictiveCursor new_cursor(trie, offset, limit, flags);
new_cursor.init(str);
new_cursor.swap(this);
}
void PredictiveCursor::close() {
PredictiveCursor new_cursor;
new_cursor.swap(this);
}
const Key &PredictiveCursor::next() {
if (cur_ == end_) {
return Key::invalid_key();
}
if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) {
return ascending_next();
} else {
return descending_next();
}
}
PredictiveCursor::PredictiveCursor(const Trie &trie,
UInt32 offset, UInt32 limit, UInt32 flags)
: trie_(&trie),
offset_(offset),
limit_(limit),
flags_(flags),
buf_(),
cur_(0),
end_(0),
min_length_(0) {}
UInt32 PredictiveCursor::fix_flags(UInt32 flags) const {
const UInt32 cursor_type = flags & CURSOR_TYPE_MASK;
GRN_DAT_THROW_IF(PARAM_ERROR, (cursor_type != 0) &&
(cursor_type != PREDICTIVE_CURSOR));
flags |= PREDICTIVE_CURSOR;
const UInt32 cursor_order = flags & CURSOR_ORDER_MASK;
GRN_DAT_THROW_IF(PARAM_ERROR, (cursor_order != 0) &&
(cursor_order != ASCENDING_CURSOR) &&
(cursor_order != DESCENDING_CURSOR));
if (cursor_order == 0) {
flags |= ASCENDING_CURSOR;
}
const UInt32 cursor_options = flags & CURSOR_OPTIONS_MASK;
GRN_DAT_THROW_IF(PARAM_ERROR, cursor_options & ~(EXCEPT_EXACT_MATCH));
return flags;
}
void PredictiveCursor::init(const String &str) {
if (limit_ == 0) {
return;
}
min_length_ = str.length();
if ((flags_ & EXCEPT_EXACT_MATCH) == EXCEPT_EXACT_MATCH) {
++min_length_;
}
end_ = (offset_ > (MAX_UINT32 - limit_)) ? MAX_UINT32 : (offset_ + limit_);
UInt32 node_id = ROOT_NODE_ID;
for (UInt32 i = 0; i < str.length(); ++i) {
const Base base = trie_->ith_node(node_id).base();
if (base.is_linker()) {
if (offset_ == 0) {
const Key &key = trie_->get_key(base.key_pos());
if ((key.length() >= str.length()) &&
(key.str().substr(0, str.length()).compare(str, i) == 0)) {
if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) {
node_id |= IS_ROOT_FLAG;
}
buf_.push_back(node_id);
}
}
return;
}
node_id = base.offset() ^ str[i];
if (trie_->ith_node(node_id).label() != str[i]) {
return;
}
}
if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) {
node_id |= IS_ROOT_FLAG;
}
buf_.push_back(node_id);
}
void PredictiveCursor::swap(PredictiveCursor *cursor) {
std::swap(trie_, cursor->trie_);
std::swap(offset_, cursor->offset_);
std::swap(limit_, cursor->limit_);
std::swap(flags_, cursor->flags_);
buf_.swap(&cursor->buf_);
std::swap(cur_, cursor->cur_);
std::swap(end_, cursor->end_);
std::swap(min_length_, cursor->min_length_);
}
const Key &PredictiveCursor::ascending_next() {
while (!buf_.empty()) {
const bool is_root = (buf_.back() & IS_ROOT_FLAG) == IS_ROOT_FLAG;
const UInt32 node_id = buf_.back() & ~IS_ROOT_FLAG;
buf_.pop_back();
const Node node = trie_->ith_node(node_id);
if (!is_root && (node.sibling() != INVALID_LABEL)) {
buf_.push_back(node_id ^ node.label() ^ node.sibling());
}
if (node.is_linker()) {
const Key &key = trie_->get_key(node.key_pos());
if (key.length() >= min_length_) {
if (cur_++ >= offset_) {
return key;
}
}
} else if (node.child() != INVALID_LABEL) {
buf_.push_back(node.offset() ^ node.child());
}
}
return Key::invalid_key();
}
const Key &PredictiveCursor::descending_next() {
while (!buf_.empty()) {
const bool post_order = (buf_.back() & POST_ORDER_FLAG) == POST_ORDER_FLAG;
const UInt32 node_id = buf_.back() & ~POST_ORDER_FLAG;
const Base base = trie_->ith_node(node_id).base();
if (post_order) {
buf_.pop_back();
if (base.is_linker()) {
const Key &key = trie_->get_key(base.key_pos());
if (key.length() >= min_length_) {
if (cur_++ >= offset_) {
return key;
}
}
}
} else {
buf_.back() |= POST_ORDER_FLAG;
UInt16 label = trie_->ith_node(node_id).child();
while (label != INVALID_LABEL) {
buf_.push_back(base.offset() ^ label);
label = trie_->ith_node(base.offset() ^ label).sibling();
}
}
}
return Key::invalid_key();
}
} // namespace dat
} // namespace grn

View file

@ -0,0 +1,86 @@
/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2011 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef GRN_DAT_PREDICTIVE_CURSOR_HPP_
#define GRN_DAT_PREDICTIVE_CURSOR_HPP_
#include "cursor.hpp"
#include "vector.hpp"
namespace grn {
namespace dat {
class Trie;
class GRN_DAT_API PredictiveCursor : public Cursor {
public:
PredictiveCursor();
~PredictiveCursor();
void open(const Trie &trie,
const String &str,
UInt32 offset = 0,
UInt32 limit = MAX_UINT32,
UInt32 flags = 0);
void close();
const Key &next();
UInt32 offset() const {
return offset_;
}
UInt32 limit() const {
return limit_;
}
UInt32 flags() const {
return flags_;
}
private:
const Trie *trie_;
UInt32 offset_;
UInt32 limit_;
UInt32 flags_;
Vector<UInt32> buf_;
UInt32 cur_;
UInt32 end_;
UInt32 min_length_;
PredictiveCursor(const Trie &trie,
UInt32 offset, UInt32 limit, UInt32 flags);
UInt32 fix_flags(UInt32 flags) const;
void init(const String &str);
void swap(PredictiveCursor *cursor);
const Key &ascending_next();
const Key &descending_next();
static const UInt32 IS_ROOT_FLAG = 0x80000000U;
static const UInt32 POST_ORDER_FLAG = 0x80000000U;
// Disallows copy and assignment.
PredictiveCursor(const PredictiveCursor &);
PredictiveCursor &operator=(const PredictiveCursor &);
};
} // namespace dat
} // namespace grn
#endif // GRN_DAT_PREDICTIVE_CURSOR_HPP_

View file

@ -0,0 +1,175 @@
/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2011 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "prefix-cursor.hpp"
#include <algorithm>
#include "trie.hpp"
namespace grn {
namespace dat {
PrefixCursor::PrefixCursor()
: trie_(NULL),
offset_(0),
limit_(MAX_UINT32),
flags_(PREFIX_CURSOR),
buf_(),
cur_(0),
end_(0) {}
PrefixCursor::~PrefixCursor() {}
void PrefixCursor::open(const Trie &trie,
const String &str,
UInt32 min_length,
UInt32 offset,
UInt32 limit,
UInt32 flags) {
GRN_DAT_THROW_IF(PARAM_ERROR, (str.ptr() == NULL) && (str.length() != 0));
GRN_DAT_THROW_IF(PARAM_ERROR, min_length > str.length());
flags = fix_flags(flags);
PrefixCursor new_cursor(trie, offset, limit, flags);
new_cursor.init(str, min_length);
new_cursor.swap(this);
}
void PrefixCursor::close() {
PrefixCursor new_cursor;
new_cursor.swap(this);
}
const Key &PrefixCursor::next() {
if (cur_ == end_) {
return Key::invalid_key();
}
if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) {
return trie_->get_key(buf_[cur_++]);
} else {
return trie_->get_key(buf_[--cur_]);
}
}
PrefixCursor::PrefixCursor(const Trie &trie,
UInt32 offset, UInt32 limit, UInt32 flags)
: trie_(&trie),
offset_(offset),
limit_(limit),
flags_(flags),
buf_(),
cur_(0),
end_(0) {}
UInt32 PrefixCursor::fix_flags(UInt32 flags) const {
const UInt32 cursor_type = flags & CURSOR_TYPE_MASK;
GRN_DAT_THROW_IF(PARAM_ERROR, (cursor_type != 0) &&
(cursor_type != PREFIX_CURSOR));
flags |= PREFIX_CURSOR;
const UInt32 cursor_order = flags & CURSOR_ORDER_MASK;
GRN_DAT_THROW_IF(PARAM_ERROR, (cursor_order != 0) &&
(cursor_order != ASCENDING_CURSOR) &&
(cursor_order != DESCENDING_CURSOR));
if (cursor_order == 0) {
flags |= ASCENDING_CURSOR;
}
const UInt32 cursor_options = flags & CURSOR_OPTIONS_MASK;
GRN_DAT_THROW_IF(PARAM_ERROR, cursor_options & ~EXCEPT_EXACT_MATCH);
return flags;
}
void PrefixCursor::init(const String &str, UInt32 min_length) {
if ((limit_ == 0) || (offset_ > (str.length() - min_length))) {
return;
}
UInt32 node_id = ROOT_NODE_ID;
UInt32 i;
for (i = 0; i < str.length(); ++i) {
const Base base = trie_->ith_node(node_id).base();
if (base.is_linker()) {
const Key &key = trie_->get_key(base.key_pos());
if ((key.length() >= min_length) && (key.length() <= str.length()) &&
(str.substr(0, key.length()).compare(key.str(), i) == 0) &&
((key.length() < str.length()) ||
((flags_ & EXCEPT_EXACT_MATCH) != EXCEPT_EXACT_MATCH))) {
buf_.push_back(base.key_pos());
}
break;
}
if ((i >= min_length) &&
(trie_->ith_node(node_id).child() == TERMINAL_LABEL)) {
const Base linker_base =
trie_->ith_node(base.offset() ^ TERMINAL_LABEL).base();
if (linker_base.is_linker()) {
buf_.push_back(linker_base.key_pos());
}
}
node_id = base.offset() ^ str[i];
if (trie_->ith_node(node_id).label() != str[i]) {
break;
}
}
if ((i == str.length()) &&
((flags_ & EXCEPT_EXACT_MATCH) != EXCEPT_EXACT_MATCH)) {
const Base base = trie_->ith_node(node_id).base();
if (base.is_linker()) {
const Key &key = trie_->get_key(base.key_pos());
if ((key.length() >= min_length) && (key.length() <= str.length())) {
buf_.push_back(base.key_pos());
}
} else if (trie_->ith_node(node_id).child() == TERMINAL_LABEL) {
const Base linker_base =
trie_->ith_node(base.offset() ^ TERMINAL_LABEL).base();
if (linker_base.is_linker()) {
buf_.push_back(linker_base.key_pos());
}
}
}
if (buf_.size() <= offset_) {
return;
}
if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) {
cur_ = offset_;
end_ = (limit_ < (buf_.size() - cur_)) ? (cur_ + limit_) : buf_.size();
} else {
cur_ = buf_.size() - offset_;
end_ = (limit_ < cur_) ? (cur_ - limit_) : 0;
}
}
void PrefixCursor::swap(PrefixCursor *cursor) {
std::swap(trie_, cursor->trie_);
std::swap(offset_, cursor->offset_);
std::swap(limit_, cursor->limit_);
std::swap(flags_, cursor->flags_);
buf_.swap(&cursor->buf_);
std::swap(cur_, cursor->cur_);
std::swap(end_, cursor->end_);
}
} // namespace dat
} // namespace grn

View file

@ -0,0 +1,80 @@
/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2011 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef GRN_DAT_PREFIX_CURSOR_HPP_
#define GRN_DAT_PREFIX_CURSOR_HPP_
#include "cursor.hpp"
#include "vector.hpp"
namespace grn {
namespace dat {
class Trie;
class GRN_DAT_API PrefixCursor : public Cursor {
public:
PrefixCursor();
~PrefixCursor();
void open(const Trie &trie,
const String &str,
UInt32 min_length = 0,
UInt32 offset = 0,
UInt32 limit = MAX_UINT32,
UInt32 flags = 0);
void close();
const Key &next();
UInt32 offset() const {
return offset_;
}
UInt32 limit() const {
return limit_;
}
UInt32 flags() const {
return flags_;
}
private:
const Trie *trie_;
UInt32 offset_;
UInt32 limit_;
UInt32 flags_;
Vector<UInt32> buf_;
UInt32 cur_;
UInt32 end_;
PrefixCursor(const Trie &trie,
UInt32 offset, UInt32 limit, UInt32 flags);
UInt32 fix_flags(UInt32 flags) const;
void init(const String &str, UInt32 min_length);
void swap(PrefixCursor *cursor);
// Disallows copy and assignment.
PrefixCursor(const PrefixCursor &);
PrefixCursor &operator=(const PrefixCursor &);
};
} // namespace dat
} // namespace grn
#endif // GRN_DAT_PREFIX_CURSOR_HPP_

View file

@ -0,0 +1,29 @@
libgrndat_la_SOURCES = \
cursor-factory.cpp \
file-impl.cpp \
file.cpp \
id-cursor.cpp \
key-cursor.cpp \
predictive-cursor.cpp \
prefix-cursor.cpp \
trie.cpp \
array.hpp \
base.hpp \
block.hpp \
check.hpp \
cursor-factory.hpp \
cursor.hpp \
dat.hpp \
entry.hpp \
file-impl.hpp \
file.hpp \
header.hpp \
id-cursor.hpp \
key-cursor.hpp \
key.hpp \
node.hpp \
predictive-cursor.hpp \
prefix-cursor.hpp \
string.hpp \
trie.hpp \
vector.hpp

View file

@ -0,0 +1,175 @@
/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2011 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef GRN_DAT_STRING_HPP_
#define GRN_DAT_STRING_HPP_
#include "dat.hpp"
namespace grn {
namespace dat {
class GRN_DAT_API String {
public:
String()
: ptr_(NULL),
length_(0) {}
String(const void *ptr, UInt32 length)
: ptr_(static_cast<const UInt8 *>(ptr)),
length_(length) {}
template <UInt32 T>
explicit String(const char (&str)[T])
: ptr_(reinterpret_cast<const UInt8 *>(str)),
length_(T - 1) {}
String(const String &rhs)
: ptr_(rhs.ptr_),
length_(rhs.length_) {}
String &operator=(const String &rhs) {
set_ptr(rhs.ptr());
set_length(rhs.length());
return *this;
}
const UInt8 &operator[](UInt32 i) const {
GRN_DAT_DEBUG_THROW_IF(i >= length_);
return ptr_[i];
}
const void *ptr() const {
return ptr_;
}
UInt32 length() const {
return length_;
}
void set_ptr(const void *x) {
ptr_ = static_cast<const UInt8 *>(x);
}
void set_length(UInt32 x) {
length_ = x;
}
void assign(const void *ptr, UInt32 length) {
set_ptr(ptr);
set_length(length);
}
String substr(UInt32 offset = 0) const {
return String(ptr_ + offset, length_ - offset);
}
String substr(UInt32 offset, UInt32 length) const {
return String(ptr_ + offset, length);
}
// This function returns an integer as follows:
// - a negative value if *this < rhs,
// - zero if *this == rhs,
// - a positive value if *this > rhs,
// but if the offset is too large, the result is undefined.
int compare(const String &rhs, UInt32 offset = 0) const {
GRN_DAT_DEBUG_THROW_IF(offset > length());
GRN_DAT_DEBUG_THROW_IF(offset > rhs.length());
for (UInt32 i = offset; i < length(); ++i) {
if (i >= rhs.length()) {
return 1;
} else if ((*this)[i] != rhs[i]) {
return (*this)[i] - rhs[i];
}
}
return (length() == rhs.length()) ? 0 : -1;
}
bool starts_with(const String &str) const {
if (length() < str.length()) {
return false;
}
for (UInt32 i = 0; i < str.length(); ++i) {
if ((*this)[i] != str[i]) {
return false;
}
}
return true;
}
bool ends_with(const String &str) const {
if (length() < str.length()) {
return false;
}
UInt32 offset = length() - str.length();
for (UInt32 i = 0; i < str.length(); ++i) {
if ((*this)[offset + i] != str[i]) {
return false;
}
}
return true;
}
void swap(String *rhs) {
const UInt8 * const ptr_temp = ptr_;
ptr_ = rhs->ptr_;
rhs->ptr_ = ptr_temp;
const UInt32 length_temp = length_;
length_ = rhs->length_;
rhs->length_ = length_temp;
}
private:
const UInt8 *ptr_;
UInt32 length_;
};
inline bool operator==(const String &lhs, const String &rhs) {
if (lhs.length() != rhs.length()) {
return false;
} else if (lhs.ptr() == rhs.ptr()) {
return true;
}
for (UInt32 i = 0; i < lhs.length(); ++i) {
if (lhs[i] != rhs[i]) {
return false;
}
}
return true;
}
inline bool operator!=(const String &lhs, const String &rhs) {
return !(lhs == rhs);
}
inline bool operator<(const String &lhs, const String &rhs) {
return lhs.compare(rhs) < 0;
}
inline bool operator>(const String &lhs, const String &rhs) {
return rhs < lhs;
}
inline bool operator<=(const String &lhs, const String &rhs) {
return !(lhs > rhs);
}
inline bool operator>=(const String &lhs, const String &rhs) {
return !(lhs < rhs);
}
} // namespace dat
} // namespace grn
#endif // GRN_DAT_STRING_HPP_

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,285 @@
/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2011 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef GRN_DAT_TRIE_HPP_
#define GRN_DAT_TRIE_HPP_
#include "array.hpp"
#include "header.hpp"
#include "node.hpp"
#include "block.hpp"
#include "entry.hpp"
#include "key.hpp"
#include "file.hpp"
namespace grn {
namespace dat {
class GRN_DAT_API Trie {
public:
Trie();
~Trie();
void create(const char *file_name = NULL,
UInt64 file_size = 0,
UInt32 max_num_keys = 0,
double num_nodes_per_key = 0.0,
double average_key_length = 0.0);
void create(const Trie &trie,
const char *file_name = NULL,
UInt64 file_size = 0,
UInt32 max_num_keys = 0,
double num_nodes_per_key = 0.0,
double average_key_length = 0.0);
void repair(const Trie &trie, const char *file_name = NULL);
void open(const char *file_name);
void close();
void swap(Trie *trie);
// Users can access a key by its position or ID.
const Key &get_key(UInt32 key_pos) const {
GRN_DAT_DEBUG_THROW_IF(key_pos >= next_key_pos());
return *reinterpret_cast<const Key *>(key_buf_.ptr() + key_pos);
}
// If a specified ID is invalid, e.g. the key is already deleted, this
// function returns a reference to an invalid key object whose id() returns
// INVALID_KEY_ID.
const Key &ith_key(UInt32 key_id) const {
if ((key_id >= min_key_id()) && (key_id <= max_key_id()) &&
ith_entry(key_id).is_valid()) {
return get_key(ith_entry(key_id).key_pos());
}
return Key::invalid_key();
}
bool search(const void *ptr, UInt32 length, UInt32 *key_pos = NULL) const {
return search_key(static_cast<const UInt8 *>(ptr), length, key_pos);
}
// Longest prefix match search.
bool lcp_search(const void *ptr, UInt32 length,
UInt32 *key_pos = NULL) const {
return lcp_search_key(static_cast<const UInt8 *>(ptr), length, key_pos);
}
bool remove(UInt32 key_id) {
const Key &key = ith_key(key_id);
if (key.is_valid()) {
return remove(key.ptr(), key.length());
}
return false;
}
bool remove(const void *ptr, UInt32 length) {
return remove_key(static_cast<const UInt8 *>(ptr), length);
}
bool insert(const void *ptr, UInt32 length, UInt32 *key_pos = NULL) {
return insert_key(static_cast<const UInt8 *>(ptr), length, key_pos);
}
bool update(UInt32 key_id, const void *ptr, UInt32 length,
UInt32 *key_pos = NULL) {
return update_key(ith_key(key_id), static_cast<const UInt8 *>(ptr),
length, key_pos);
}
bool update(const void *src_ptr, UInt32 src_length,
const void *dest_ptr, UInt32 dest_length,
UInt32 *key_pos = NULL) {
UInt32 src_key_pos;
if (!search(src_ptr, src_length, &src_key_pos)) {
return false;
}
const Key &src_key = get_key(src_key_pos);
return update_key(src_key, static_cast<const UInt8 *>(dest_ptr),
dest_length, key_pos);
}
const Node &ith_node(UInt32 i) const {
GRN_DAT_DEBUG_THROW_IF(i >= num_nodes());
return nodes_[i];
}
const Block &ith_block(UInt32 i) const {
GRN_DAT_DEBUG_THROW_IF(i >= num_blocks());
return blocks_[i];
}
const Entry &ith_entry(UInt32 i) const {
GRN_DAT_DEBUG_THROW_IF(i < min_key_id());
GRN_DAT_DEBUG_THROW_IF(i > max_key_id());
return entries_[i];
}
const Header &header() const {
return *header_;
}
UInt64 file_size() const {
return header_->file_size();
}
UInt64 virtual_size() const {
return sizeof(Header)
+ (sizeof(Entry) * num_keys())
+ (sizeof(Block) * num_blocks())
+ (sizeof(Node) * num_nodes())
+ total_key_length();
}
UInt32 total_key_length() const {
return header_->total_key_length();
}
UInt32 num_keys() const {
return header_->num_keys();
}
UInt32 min_key_id() const {
return header_->min_key_id();
}
UInt32 next_key_id() const {
return header_->next_key_id();
}
UInt32 max_key_id() const {
return header_->max_key_id();
}
UInt32 max_num_keys() const {
return header_->max_num_keys();
}
UInt32 num_nodes() const {
return header_->num_nodes();
}
UInt32 num_phantoms() const {
return header_->num_phantoms();
}
UInt32 num_zombies() const {
return header_->num_zombies();
}
UInt32 max_num_nodes() const {
return header_->max_num_nodes();
}
UInt32 num_blocks() const {
return header_->num_blocks();
}
UInt32 max_num_blocks() const {
return header_->max_num_blocks();
}
UInt32 next_key_pos() const {
return header_->next_key_pos();
}
UInt32 key_buf_size() const {
return header_->key_buf_size();
}
UInt32 status_flags() const {
return header_->status_flags();
}
void clear_status_flags() {
header_->set_status_flags(status_flags() & ~CHANGING_MASK);
}
private:
File file_;
Header *header_;
Array<Node> nodes_;
Array<Block> blocks_;
Array<Entry> entries_;
Array<UInt32> key_buf_;
void create_file(const char *file_name,
UInt64 file_size,
UInt32 max_num_keys,
double num_nodes_per_key,
double average_key_length);
void create_file(const char *file_name,
UInt64 file_size,
UInt32 max_num_keys,
UInt32 max_num_blocks,
UInt32 key_buf_size);
void open_file(const char *file_name);
void map_address(void *address);
void build_from_trie(const Trie &trie);
void build_from_trie(const Trie &trie, UInt32 src, UInt32 dest);
void repair_trie(const Trie &trie);
void build_from_keys(const UInt32 *begin, const UInt32 *end,
UInt32 depth, UInt32 node_id);
void mkq_sort(UInt32 *l, UInt32 *r, UInt32 depth);
void insertion_sort(UInt32 *l, UInt32 *r, UInt32 depth);
inline int get_label(UInt32 key_id, UInt32 depth) const;
inline int get_median(UInt32 a, UInt32 b, UInt32 c, UInt32 depth) const;
inline bool less_than(UInt32 lhs, UInt32 rhs, UInt32 depth) const;
inline static void swap_ids(UInt32 *lhs, UInt32 *rhs);
bool search_key(const UInt8 *ptr, UInt32 length, UInt32 *key_pos) const;
bool search_linker(const UInt8 *ptr, UInt32 length,
UInt32 &node_id, UInt32 &query_pos) const;
bool lcp_search_key(const UInt8 *ptr, UInt32 length, UInt32 *key_pos) const;
bool remove_key(const UInt8 *ptr, UInt32 length);
bool insert_key(const UInt8 *ptr, UInt32 length, UInt32 *key_pos);
bool insert_linker(const UInt8 *ptr, UInt32 length,
UInt32 &node_id, UInt32 query_pos);
bool update_key(const Key &key, const UInt8 *ptr, UInt32 length,
UInt32 *key_pos);
UInt32 insert_node(UInt32 node_id, UInt16 label);
UInt32 append_key(const UInt8 *ptr, UInt32 length, UInt32 key_id);
UInt32 separate(const UInt8 *ptr, UInt32 length,
UInt32 node_id, UInt32 i);
void resolve(UInt32 node_id, UInt16 label);
void migrate_nodes(UInt32 node_id, UInt32 dest_offset,
const UInt16 *labels, UInt32 num_labels);
UInt32 find_offset(const UInt16 *labels, UInt32 num_labels);
void reserve_node(UInt32 node_id);
void reserve_block(UInt32 block_id);
void update_block_level(UInt32 block_id, UInt32 level);
void set_block_level(UInt32 block_id, UInt32 level);
void unset_block_level(UInt32 block_id);
Node &ith_node(UInt32 i) {
GRN_DAT_DEBUG_THROW_IF(i >= num_nodes());
return nodes_[i];
}
Block &ith_block(UInt32 i) {
GRN_DAT_DEBUG_THROW_IF(i >= num_blocks());
return blocks_[i];
}
Entry &ith_entry(UInt32 i) {
GRN_DAT_DEBUG_THROW_IF(i < min_key_id());
GRN_DAT_DEBUG_THROW_IF(i > (max_key_id() + 1));
return entries_[i];
}
// Disallows copy and assignment.
Trie(const Trie &);
Trie &operator=(const Trie &);
};
} // namespace dat
} // namespace grn
#endif // GRN_DAT_TRIE_HPP_

View file

@ -0,0 +1,193 @@
/* -*- c-basic-offset: 2 -*- */
/* Copyright(C) 2011 Brazil
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License version 2.1 as published by the Free Software Foundation.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef GRN_DAT_VECTOR_HPP_
#define GRN_DAT_VECTOR_HPP_
#include "dat.hpp"
#include <new>
namespace grn {
namespace dat {
template <typename T>
class GRN_DAT_API Vector {
public:
Vector() : buf_(NULL), size_(0), capacity_(0) {}
~Vector() {
for (UInt32 i = 0; i < size(); ++i) {
buf_[i].~T();
}
delete [] reinterpret_cast<char *>(buf_);
}
const T &operator[](UInt32 i) const {
GRN_DAT_DEBUG_THROW_IF(i >= size());
return buf_[i];
}
T &operator[](UInt32 i) {
GRN_DAT_DEBUG_THROW_IF(i >= size());
return buf_[i];
}
const T &front() const {
GRN_DAT_DEBUG_THROW_IF(empty());
return buf_[0];
}
T &front() {
GRN_DAT_DEBUG_THROW_IF(empty());
return buf_[0];
}
const T &back() const {
GRN_DAT_DEBUG_THROW_IF(empty());
return buf_[size() - 1];
}
T &back() {
GRN_DAT_DEBUG_THROW_IF(empty());
return buf_[size() - 1];
}
const T *begin() const {
return buf_;
}
T *begin() {
return buf_;
}
const T *end() const {
return buf_ + size_;
}
T *end() {
return buf_ + size_;
}
void push_back() {
reserve(size() + 1);
new (&buf_[size()]) T;
++size_;
}
void push_back(const T &x) {
reserve(size() + 1);
new (&buf_[size()]) T(x);
++size_;
}
void pop_back() {
GRN_DAT_DEBUG_THROW_IF(empty());
back().~T();
--size_;
}
void clear() {
resize(0);
}
void resize(UInt32 new_size) {
if (new_size > capacity()) {
reserve(new_size);
}
for (UInt32 i = size(); i < new_size; ++i) {
new (&buf_[i]) T;
}
for (UInt32 i = new_size; i < size(); ++i) {
buf_[i].~T();
}
size_ = new_size;
}
template <typename U>
void resize(UInt32 new_size, const U &value) {
if (new_size > capacity()) {
reserve(new_size);
}
for (UInt32 i = size(); i < new_size; ++i) {
new (&buf_[i]) T(value);
}
for (UInt32 i = new_size; i < size(); ++i) {
buf_[i].~T();
}
size_ = new_size;
}
void reserve(UInt32 new_capacity) {
if (new_capacity <= capacity()) {
return;
} else if ((new_capacity / 2) < capacity()) {
if (capacity() < (MAX_UINT32 / 2)) {
new_capacity = capacity() * 2;
} else {
new_capacity = MAX_UINT32;
}
}
T *new_buf = reinterpret_cast<T *>(
new (std::nothrow) char[sizeof(new_capacity) * new_capacity]);
GRN_DAT_THROW_IF(MEMORY_ERROR, new_buf == NULL);
for (UInt32 i = 0; i < size(); ++i) {
new (&new_buf[i]) T(buf_[i]);
}
for (UInt32 i = 0; i < size(); ++i) {
buf_[i].~T();
}
T *old_buf = buf_;
buf_ = new_buf;
delete [] reinterpret_cast<char *>(old_buf);
capacity_ = new_capacity;
}
void swap(Vector *rhs) {
T * const temp_buf = buf_;
buf_ = rhs->buf_;
rhs->buf_ = temp_buf;
const UInt32 temp_size = size_;
size_ = rhs->size_;
rhs->size_ = temp_size;
const UInt32 temp_capacity = capacity_;
capacity_ = rhs->capacity_;
rhs->capacity_ = temp_capacity;
}
bool empty() const {
return size_ == 0;
}
UInt32 size() const {
return size_;
}
UInt32 capacity() const {
return capacity_;
}
private:
T *buf_;
UInt32 size_;
UInt32 capacity_;
// Disallows copy and assignment.
Vector(const Vector &);
Vector &operator=(const Vector &);
};
} // namespace dat
} // namespace grn
#endif // GRN_DAT_VECTOR_HPP_