mirror of
https://github.com/MariaDB/server.git
synced 2025-01-19 13:32:33 +01:00
ndb - Update documentation wrt scans
ndb/include/ndbapi/Ndb.hpp: Update documentation wrt scans ndb/include/ndbapi/NdbIndexScanOperation.hpp: Update documentation wrt scans ndb/include/ndbapi/NdbOperation.hpp: Update documentation wrt scans ndb/include/ndbapi/NdbRecAttr.hpp: Update documentation wrt scans ndb/include/ndbapi/NdbScanOperation.hpp: Update documentation wrt scans ndb/include/ndbapi/NdbTransaction.hpp: Update documentation wrt scans ndb/src/ndbapi/NdbOperationSearch.cpp: Update documentation wrt scans
This commit is contained in:
parent
29b3edb9b4
commit
badf354985
7 changed files with 509 additions and 553 deletions
|
@ -52,15 +52,6 @@
|
||||||
|
|
||||||
The execution can be of two different types,
|
The execution can be of two different types,
|
||||||
<var>Commit</var> or <var>NoCommit</var>.
|
<var>Commit</var> or <var>NoCommit</var>.
|
||||||
*/
|
|
||||||
#ifndef DOXYGEN_SHOULD_SKIP_INTERNAL
|
|
||||||
/**
|
|
||||||
(A transaction's execution can also be divided into three
|
|
||||||
steps: prepare, send, and poll. This allows us to perform asynchronous
|
|
||||||
transactions. More about this later.)
|
|
||||||
*/
|
|
||||||
#endif
|
|
||||||
/**
|
|
||||||
If the execution is of type <var>NoCommit</var>,
|
If the execution is of type <var>NoCommit</var>,
|
||||||
then the application program executes part of a transaction,
|
then the application program executes part of a transaction,
|
||||||
but without committing the transaction.
|
but without committing the transaction.
|
||||||
|
@ -94,28 +85,13 @@
|
||||||
To execute several parallel synchronous transactions, one can either
|
To execute several parallel synchronous transactions, one can either
|
||||||
use multiple <code>Ndb</code> objects in several threads, or start multiple
|
use multiple <code>Ndb</code> objects in several threads, or start multiple
|
||||||
applications programs.
|
applications programs.
|
||||||
*/
|
|
||||||
#ifndef DOXYGEN_SHOULD_SKIP_INTERNAL
|
|
||||||
/**
|
|
||||||
Another way to execute several parallel transactions is to use
|
|
||||||
asynchronous transactions.
|
|
||||||
*/
|
|
||||||
#endif
|
|
||||||
/**
|
|
||||||
@section secNdbOperations Operations
|
@section secNdbOperations Operations
|
||||||
|
|
||||||
Each <code>NdbTransaction</code> (that is, a transaction)
|
Each <code>NdbTransaction</code>
|
||||||
consists of a list of operations which are represented by instances
|
consists of a list of operations which are represented by instances
|
||||||
of <code>Ndb*Operation</code>.
|
of <code>Ndb*Operation</code>.
|
||||||
*/
|
|
||||||
#ifndef DOXYGEN_SHOULD_SKIP_INTERNAL
|
|
||||||
/**
|
|
||||||
Operations are of two different kinds:
|
|
||||||
-# standard operations, and
|
|
||||||
-# interpreted program operations.
|
|
||||||
*/
|
|
||||||
#endif
|
|
||||||
/**
|
|
||||||
<h3>Single row operations</h3>
|
<h3>Single row operations</h3>
|
||||||
After the operation is created using <code>NdbTransaction::getNdbOperation()</code>
|
After the operation is created using <code>NdbTransaction::getNdbOperation()</code>
|
||||||
(or <code>NdbTransaction::getNdbIndexOperation()</code>), it is defined in the following
|
(or <code>NdbTransaction::getNdbIndexOperation()</code>), it is defined in the following
|
||||||
|
@ -163,11 +139,8 @@
|
||||||
We will now discuss in somewhat greater detail each step involved in the creation
|
We will now discuss in somewhat greater detail each step involved in the creation
|
||||||
and use of synchronous transactions.
|
and use of synchronous transactions.
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
// Edit stop point - JS, 20041228 0425+1000
|
// Edit stop point - JS, 20041228 0425+1000
|
||||||
|
|
||||||
/**
|
|
||||||
<h4>Step 1: Define single row operation type</h4>
|
<h4>Step 1: Define single row operation type</h4>
|
||||||
The following types of operations exist:
|
The following types of operations exist:
|
||||||
-# NdbOperation::insertTuple :
|
-# NdbOperation::insertTuple :
|
||||||
|
@ -207,14 +180,13 @@
|
||||||
Normally the attribute is defined by its name but it is
|
Normally the attribute is defined by its name but it is
|
||||||
also possible to use the attribute identity to define the
|
also possible to use the attribute identity to define the
|
||||||
attribute.
|
attribute.
|
||||||
The mapping from name to identity is performed by the Table object.
|
|
||||||
|
|
||||||
NdbIndexOperation::getValue returns an NdbRecAttr object
|
NdbOperation::getValue returns an NdbRecAttr object
|
||||||
containing the read value.
|
containing the read value.
|
||||||
To get the value, there is actually two methods.
|
To get the value, there is actually two methods.
|
||||||
The application can either
|
The application can either
|
||||||
- use its own memory (passed through a pointer aValue) to
|
- use its own memory (passed through a pointer aValue) to
|
||||||
NdbIndexOperation::getValue, or
|
NdbOperation::getValue, or
|
||||||
- receive the attribute value in an NdbRecAttr object allocated
|
- receive the attribute value in an NdbRecAttr object allocated
|
||||||
by the NDB API.
|
by the NDB API.
|
||||||
|
|
||||||
|
@ -224,7 +196,466 @@
|
||||||
Ndb::closeTransaction have been called.
|
Ndb::closeTransaction have been called.
|
||||||
The result of reading data from an NdbRecAttr object before
|
The result of reading data from an NdbRecAttr object before
|
||||||
calling NdbTransaction::execute is undefined.
|
calling NdbTransaction::execute is undefined.
|
||||||
|
|
||||||
|
|
||||||
|
@subsection secScan Scan Operations
|
||||||
|
|
||||||
|
Scans are roughly the equivalent of SQL cursors.
|
||||||
|
|
||||||
|
Scans can either be performed on a table (@ref NdbScanOperation) or
|
||||||
|
on an ordered index (@ref NdbIndexScanOperation).
|
||||||
|
|
||||||
|
Scan operation are characteriesed by the following:
|
||||||
|
- They can only perform reads (shared, exclusive or dirty)
|
||||||
|
- They can potentially work with multiple rows
|
||||||
|
- They can be used to update or delete multiple rows
|
||||||
|
- They can operate on several nodes in parallell
|
||||||
|
|
||||||
|
After the operation is created using <code>NdbTransaction::getNdbScanOperation()</code>
|
||||||
|
(or <code>NdbTransaction::getNdbIndexScanOperation()</code>), it is defined in the following
|
||||||
|
three steps:
|
||||||
|
-# Define the standard operation type, using <code>NdbScanOperation::readTuples()</code>
|
||||||
|
-# Specify search conditions, using @ref NdbScanFilter and/or @ref NdbIndexScanOperation::setBound
|
||||||
|
-# Specify attribute actions, using <code>NdbOperation::getValue()</code>
|
||||||
|
-# Executing the transaction, using <code>NdbTransaction::execute()</code>
|
||||||
|
-# Iterating through the result set using <code>NdbScanOperation::nextResult</code>
|
||||||
|
|
||||||
|
Here are two brief examples illustrating this process. For the sake of brevity,
|
||||||
|
we omit error-handling.
|
||||||
|
|
||||||
|
This first example uses an <code>NdbScanOperation</code>:
|
||||||
|
@code
|
||||||
|
// 1. Create
|
||||||
|
MyOperation= MyTransaction->getNdbScanOperation("MYTABLENAME");
|
||||||
|
|
||||||
|
// 2. Define type of operation and lock mode
|
||||||
|
MyOperation->readTuples(NdbOperation::LM_Read);
|
||||||
|
|
||||||
|
// 3. Specify Search Conditions
|
||||||
|
NdbScanFilter sf(MyOperation);
|
||||||
|
sf.begin(NdbScanFilter::OR);
|
||||||
|
sf.eq(0, i); // Return rows with column 0 equal to i or
|
||||||
|
sf.eq(1, i+1); // column 1 equal to (i+1)
|
||||||
|
sf.end();
|
||||||
|
|
||||||
|
// 4. Attribute Actions
|
||||||
|
MyRecAttr= MyOperation->getValue("ATTR2", NULL);
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
The second example uses an <code>NdbIndexScanOperation</code>:
|
||||||
|
@code
|
||||||
|
// 1. Create
|
||||||
|
MyOperation= MyTransaction->getNdbIndexScanOperation("MYORDEREDINDEX", "MYTABLENAME");
|
||||||
|
|
||||||
|
// 2. Define type of operation and lock mode
|
||||||
|
MyOperation->readTuples(NdbOperation::LM_Read);
|
||||||
|
|
||||||
|
// 3. Specify Search Conditions
|
||||||
|
// All rows with ATTR1 between i and (i+1)
|
||||||
|
MyOperation->setBound("ATTR1", NdbIndexScanOperation::BoundGE, i);
|
||||||
|
MyOperation->setBound("ATTR1", NdbIndexScanOperation::BoundLE, i+1);
|
||||||
|
|
||||||
|
// 4. Attribute Actions
|
||||||
|
MyRecAttr = MyOperation->getValue("ATTR2", NULL);
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
<h4>Step 1: Define scan operation operation type</h4>
|
||||||
|
Scan operations only support 1 operation, @ref NdbScanOperation::readTuples or @ref NdbIndexScanOperation::readTuples
|
||||||
|
|
||||||
|
@note If you want to define multiple scan operations within the same transaction,
|
||||||
|
then you need to call NdbTransaction::getNdb*ScanOperation for each
|
||||||
|
operation.
|
||||||
|
|
||||||
|
<h4>Step 2: Specify Search Conditions</h4>
|
||||||
|
The search condition is used to select tuples.
|
||||||
|
If no search condition is specified, the scan will return all rows
|
||||||
|
in the table.
|
||||||
|
|
||||||
|
Search condition can be @ref NdbScanFilter which can be used on both
|
||||||
|
@ref NdbScanOperation and @ref NdbIndexScanOperation or bounds which
|
||||||
|
can only be used on index scans, @ref NdbIndexScanOperation::setBound.
|
||||||
|
An index scan can have both NdbScanFilter and bounds
|
||||||
|
|
||||||
|
@note When NdbScanFilter is used each row is examined but maybe not
|
||||||
|
returned. But when using bounds, only rows within bounds will be examined.
|
||||||
|
|
||||||
|
<h4>Step 3: Specify Attribute Actions</h4>
|
||||||
|
|
||||||
|
Now it is time to define which attributes should be read.
|
||||||
|
Normally the attribute is defined by its name but it is
|
||||||
|
also possible to use the attribute identity to define the
|
||||||
|
attribute.
|
||||||
|
|
||||||
|
NdbOperation::getValue returns an NdbRecAttr object
|
||||||
|
containing the read value.
|
||||||
|
To get the value, there is actually two methods.
|
||||||
|
The application can either
|
||||||
|
- use its own memory (passed through a pointer aValue) to
|
||||||
|
NdbOperation::getValue, or
|
||||||
|
- receive the attribute value in an NdbRecAttr object allocated
|
||||||
|
by the NDB API.
|
||||||
|
|
||||||
|
The NdbRecAttr object is released when Ndb::closeTransaction
|
||||||
|
is called.
|
||||||
|
Thus, the application can not reference this object after
|
||||||
|
Ndb::closeTransaction have been called.
|
||||||
|
The result of reading data from an NdbRecAttr object before
|
||||||
|
calling NdbTransaction::execute is undefined.
|
||||||
|
|
||||||
|
<h3> Using Scan to update/delete </h3>
|
||||||
|
Scanning can also be used to update/delete rows.
|
||||||
|
This is performed by
|
||||||
|
-# Scan using exclusive locks, NdbOperation::LM_Exclusive
|
||||||
|
-# When iterating through the result set, for each row optionally call
|
||||||
|
either NdbScanOperation::updateCurrentTuple or
|
||||||
|
NdbScanOperation::deleteCurrentTuple
|
||||||
|
-# If performing <code>NdbScanOperation::updateCurrentTuple</code>,
|
||||||
|
set new values on record using ordinary @ref NdbOperation::setValue.
|
||||||
|
NdbOperation::equal should _not_ be called as the primary key is
|
||||||
|
retreived from the scan.
|
||||||
|
|
||||||
|
@note that the actual update/delete will not be performed until next
|
||||||
|
NdbTransaction::execute (as with single row operations),
|
||||||
|
NdbTransaction::execute needs to be called before locks are released,
|
||||||
|
see @ref secScanLocks
|
||||||
|
|
||||||
|
<h4> Index scans specific features </h4>
|
||||||
|
The following features are available when performing an index scan
|
||||||
|
- Scan subset of table using @ref NdbIndexScanOperation::setBound
|
||||||
|
- Ordering result set ascending or descending, @ref NdbIndexScanOperation::readTuples
|
||||||
|
- When using NdbIndexScanOperation::BoundEQ on distribution key
|
||||||
|
only fragment containing rows will be scanned.
|
||||||
|
|
||||||
|
Rows are returned unordered unless sorted is set to true.
|
||||||
|
@note When performing sorted scan, parameter parallelism to readTuples will
|
||||||
|
be ignored and max parallelism will be used instead.
|
||||||
|
|
||||||
|
@subsection secScanLocks Lock handling with scans
|
||||||
|
|
||||||
|
When scanning a table or an index potentially
|
||||||
|
a lot of records will be returned.
|
||||||
|
|
||||||
|
But Ndb will only lock a batch of rows per fragment at a time.
|
||||||
|
How many rows will be locked per fragment is controlled by the
|
||||||
|
<code>batch</code> parameter to @ref NdbScanOperation::readTuples.
|
||||||
|
|
||||||
|
To let the application handle how locks are released
|
||||||
|
@ref NdbScanOperation::nextResult have a parameter <code>fetch_allow</code>.
|
||||||
|
If NdbScanOperation::nextResult is called with fetch_allow = false, no
|
||||||
|
locks may be released as result of the function call. Otherwise the locks
|
||||||
|
for the current batch may be released.
|
||||||
|
|
||||||
|
This example shows scan delete, handling locks in an efficient manner.
|
||||||
|
For the sake of brevity, we omit error-handling.
|
||||||
|
@code
|
||||||
|
int check;
|
||||||
|
|
||||||
|
// Outer loop for each batch of rows
|
||||||
|
while((check = MyScanOperation->nextResult(true)) == 0)
|
||||||
|
{
|
||||||
|
do
|
||||||
|
{
|
||||||
|
// Inner loop for each row within batch
|
||||||
|
MyScanOperation->deleteCurrentTuple();
|
||||||
|
} while((check = MyScanOperation->nextResult(false) == 0));
|
||||||
|
|
||||||
|
// When no more rows in batch, exeute all defined deletes
|
||||||
|
MyTransaction->execute(NoCommit);
|
||||||
|
}
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
See @ref ndbapi_scan.cpp for full example of scan.
|
||||||
|
|
||||||
|
@section secError Error Handling
|
||||||
|
|
||||||
|
Errors can occur when
|
||||||
|
-# operations are being defined, or when the
|
||||||
|
-# transaction is being executed.
|
||||||
|
|
||||||
|
One recommended way to handle a transaction failure
|
||||||
|
(i.e. an error is reported) is to:
|
||||||
|
-# Rollback transaction (NdbTransaction::execute with a special parameter)
|
||||||
|
-# Close transaction
|
||||||
|
-# Restart transaction (if the error was temporary)
|
||||||
|
|
||||||
|
@note Transaction are not automatically closed when an error occur.
|
||||||
|
|
||||||
|
Several errors can occur when a transaction holds multiple
|
||||||
|
operations which are simultaneously executed.
|
||||||
|
In this case the application has to go through the operation
|
||||||
|
objects and query for their NdbError objects to find out what really
|
||||||
|
happened.
|
||||||
|
|
||||||
|
NdbTransaction::getNdbErrorOperation returns a reference to the
|
||||||
|
operation causing the latest error.
|
||||||
|
NdbTransaction::getNdbErrorLine delivers the method number of the
|
||||||
|
erroneous method in the operation.
|
||||||
|
|
||||||
|
@code
|
||||||
|
theTransaction = theNdb->startTransaction();
|
||||||
|
theOperation = theTransaction->getNdbOperation("TEST_TABLE");
|
||||||
|
if (theOperation == NULL) goto error;
|
||||||
|
theOperation->readTuple(NdbOperation::LM_Read);
|
||||||
|
theOperation->setValue("ATTR_1", at1);
|
||||||
|
theOperation->setValue("ATTR_2", at1); //Here an error occurs
|
||||||
|
theOperation->setValue("ATTR_3", at1);
|
||||||
|
theOperation->setValue("ATTR_4", at1);
|
||||||
|
|
||||||
|
if (theTransaction->execute(Commit) == -1) {
|
||||||
|
errorLine = theTransaction->getNdbErrorLine();
|
||||||
|
errorOperation = theTransaction->getNdbErrorOperation();
|
||||||
|
@endcode
|
||||||
|
|
||||||
|
Here errorLine will be 3 as the error occurred in the third method
|
||||||
|
on the operation object.
|
||||||
|
Getting errorLine == 0 means that the error occurred when executing the
|
||||||
|
operations.
|
||||||
|
Here errorOperation will be a pointer to the theOperation object.
|
||||||
|
NdbTransaction::getNdbError will return the NdbError object
|
||||||
|
including holding information about the error.
|
||||||
|
|
||||||
|
Since errors could have occurred even when a commit was reported,
|
||||||
|
there is also a special method, NdbTransaction::commitStatus,
|
||||||
|
to check the commit status of the transaction.
|
||||||
|
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @page ndbapi_example1.cpp ndbapi_example1.cpp
|
||||||
|
* @include ndbapi_example1.cpp
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @page ndbapi_example2.cpp ndbapi_example2.cpp
|
||||||
|
* @include ndbapi_example2.cpp
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @page ndbapi_example3.cpp ndbapi_example3.cpp
|
||||||
|
* @include ndbapi_example3.cpp
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @page ndbapi_example4.cpp ndbapi_example4.cpp
|
||||||
|
* @include ndbapi_example4.cpp
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @page ndbapi_scan.cpp ndbapi_scan.cpp
|
||||||
|
* @include ndbapi_scan.cpp
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
@page secAdapt Adaptive Send Algorithm
|
||||||
|
|
||||||
|
At the time of "sending" the transaction
|
||||||
|
(using NdbTransaction::execute), the transactions
|
||||||
|
are in reality <em>not</em> immediately transfered to the NDB Kernel.
|
||||||
|
Instead, the "sent" transactions are only kept in a
|
||||||
|
special send list (buffer) in the Ndb object to which they belong.
|
||||||
|
The adaptive send algorithm decides when transactions should
|
||||||
|
be transfered to the NDB kernel.
|
||||||
|
|
||||||
|
For each of these "sent" transactions, there are three
|
||||||
|
possible states:
|
||||||
|
-# Waiting to be transferred to NDB Kernel.
|
||||||
|
-# Has been transferred to the NDB Kernel and is currently
|
||||||
|
being processed.
|
||||||
|
-# Has been transferred to the NDB Kernel and has
|
||||||
|
finished processing.
|
||||||
|
Now it is waiting for a call to a poll method.
|
||||||
|
(When the poll method is invoked,
|
||||||
|
then the transaction callback method will be executed.)
|
||||||
|
|
||||||
|
The poll method invoked (either Ndb::pollNdb or Ndb::sendPollNdb)
|
||||||
|
will return when:
|
||||||
|
-# at least 'minNoOfEventsToWakeup' of the transactions
|
||||||
|
in the send list have transitioned to state 3 as described above, and
|
||||||
|
-# all of these transactions have executed their callback methods.
|
||||||
|
|
||||||
|
|
||||||
|
Since the NDB API is designed as a multi-threaded interface,
|
||||||
|
it is desirable to transfer database operations from more than
|
||||||
|
one thread at a time.
|
||||||
|
The NDB API keeps track of which Ndb objects are active in transfering
|
||||||
|
information to the NDB kernel and the expected amount of threads to
|
||||||
|
interact with the NDB kernel.
|
||||||
|
Note that an Ndb object should be used in at most one thread.
|
||||||
|
Two different threads should <em>not</em> use the same Ndb object.
|
||||||
|
|
||||||
|
There are four reasons leading to transfering of database
|
||||||
|
operations:
|
||||||
|
-# The NDB Transporter (TCP/IP, OSE, SCI or shared memory)
|
||||||
|
decides that a buffer is full and sends it off.
|
||||||
|
The buffer size is implementation dependent and
|
||||||
|
might change between NDB Cluster releases.
|
||||||
|
On TCP/IP the buffer size is usually around 64 kByte and
|
||||||
|
on OSE/Delta it is usually less than 2000 bytes.
|
||||||
|
In each Ndb object there is one buffer per DB node,
|
||||||
|
so this criteria of a full buffer is only
|
||||||
|
local to the connection to one DB node.
|
||||||
|
-# Statistical information on the transfered information
|
||||||
|
may force sending of buffers to all DB nodes.
|
||||||
|
-# Every 10 ms a special send-thread checks whether
|
||||||
|
any send activity has occurred. If not, then the thread will
|
||||||
|
force sending to all nodes.
|
||||||
|
This means that 20 ms is the maximum time database operations
|
||||||
|
are waiting before being sent off. The 10 millisecond limit
|
||||||
|
is likely to become a configuration parameter in
|
||||||
|
later releases of NDB Cluster.
|
||||||
|
However, to support faster than 10 ms checks,
|
||||||
|
there has to be support from the operating system.
|
||||||
|
-# When calling NdbTransaction::execute synchronously or calling any
|
||||||
|
of the poll-methods, there is a force parameter that overrides the
|
||||||
|
adaptive algorithm and forces the send to all nodes.
|
||||||
|
|
||||||
|
@note The times mentioned above are examples. These might
|
||||||
|
change in later releases of NDB Cluster.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
@page secConcepts NDB Cluster Concepts
|
||||||
|
|
||||||
|
The <em>NDB Kernel</em> is the collection of database (DB) nodes
|
||||||
|
belonging to an NDB Cluster.
|
||||||
|
The application programmer can for most purposes view the
|
||||||
|
set of all DB nodes as one entity.
|
||||||
|
Each DB node has three main components:
|
||||||
|
- TC : The transaction coordinator
|
||||||
|
- ACC : The index storage
|
||||||
|
- TUP : The data storage
|
||||||
|
|
||||||
|
When the application program executes a transaction,
|
||||||
|
it connects to one TC on one DB node.
|
||||||
|
Usually, the programmer does not need to specify which TC to use,
|
||||||
|
but some cases when performance is important,
|
||||||
|
transactions can be hinted to use a certain TC.
|
||||||
|
(If the node with the TC is down, then another TC will
|
||||||
|
automatically take over the work.)
|
||||||
|
|
||||||
|
Every DB node has an ACC and a TUP which stores
|
||||||
|
the index and the data part of the database.
|
||||||
|
Even though one TC is responsible for the transaction,
|
||||||
|
several ACCs and TUPs on other DB nodes might be involved in the
|
||||||
|
execution of the transaction.
|
||||||
|
|
||||||
|
|
||||||
|
@section secNdbKernelConnection Selecting Transaction Coordinator
|
||||||
|
|
||||||
|
The default method is to select the transaction coordinator (TC) as being
|
||||||
|
the "closest" DB node. There is a heuristics for closeness based on
|
||||||
|
the type of transporter connection. In order of closest first, we have
|
||||||
|
SCI, SHM, TCP/IP (localhost), and TCP/IP (remote host). If there are several
|
||||||
|
connections available with the same "closeness", they will each be
|
||||||
|
selected in a round robin fashion for every transaction. Optionally
|
||||||
|
one may set the methos for TC selection round robin over all available
|
||||||
|
connections, where each new set of transactions
|
||||||
|
is placed on the next DB node.
|
||||||
|
|
||||||
|
The application programmer can however hint the NDB API which
|
||||||
|
transaction coordinator to use
|
||||||
|
by providing a <em>distribution key</em> (usually the primary key).
|
||||||
|
By using the primary key as distribution key,
|
||||||
|
the transaction will be placed on the node where the primary replica
|
||||||
|
of that record resides.
|
||||||
|
Note that this is only a hint, the system can be
|
||||||
|
reconfigured and then the NDB API will choose a transaction
|
||||||
|
coordinator without using the hint.
|
||||||
|
For more information, see NdbDictionary::Column::setDistributionKey.
|
||||||
|
|
||||||
|
|
||||||
|
@section secRecordStruct Record Structure
|
||||||
|
NDB Cluster is a relational database with tables of records.
|
||||||
|
Table rows represent tuples of relational data stored as records.
|
||||||
|
When created, the attribute schema of the table is specified,
|
||||||
|
and thus each record of the table has the same schema.
|
||||||
|
|
||||||
|
|
||||||
|
@subsection secKeys Primary Keys
|
||||||
|
Each record has from 1 up to 32 attributes which belong
|
||||||
|
to the primary key of the table.
|
||||||
|
|
||||||
|
@section secTrans Transactions
|
||||||
|
|
||||||
|
Transactions are committed to main memory,
|
||||||
|
and are committed to disk after a global checkpoint, GCP.
|
||||||
|
Since all data is (in most NDB Cluster configurations)
|
||||||
|
synchronously replicated and stored on multiple NDB nodes,
|
||||||
|
the system can still handle processor failures without loss
|
||||||
|
of data.
|
||||||
|
However, in the case of a system failure (e.g. the whole system goes down),
|
||||||
|
then all (committed or not) transactions after the latest GCP are lost.
|
||||||
|
|
||||||
|
|
||||||
|
@subsection secConcur Concurrency Control
|
||||||
|
NDB Cluster uses pessimistic concurrency control based on locking.
|
||||||
|
If a requested lock (implicit and depending on database operation)
|
||||||
|
cannot be attained within a specified time,
|
||||||
|
then a timeout error occurs.
|
||||||
|
|
||||||
|
Concurrent transactions (parallel application programs, thread-based
|
||||||
|
applications)
|
||||||
|
sometimes deadlock when they try to access the same information.
|
||||||
|
Applications need to be programmed so that timeout errors
|
||||||
|
occurring due to deadlocks are handled. This generally
|
||||||
|
means that the transaction encountering timeout
|
||||||
|
should be rolled back and restarted.
|
||||||
|
|
||||||
|
|
||||||
|
@section secHint Hints and performance
|
||||||
|
|
||||||
|
Placing the transaction coordinator close
|
||||||
|
to the actual data used in the transaction can in many cases
|
||||||
|
improve performance significantly. This is particularly true for
|
||||||
|
systems using TCP/IP. A system using Solaris and a 500 MHz processor
|
||||||
|
has a cost model for TCP/IP communication which is:
|
||||||
|
|
||||||
|
30 microseconds + (100 nanoseconds * no of Bytes)
|
||||||
|
|
||||||
|
This means that if we can ensure that we use "popular" links we increase
|
||||||
|
buffering and thus drastically reduce the communication cost.
|
||||||
|
Systems using SCI has a different cost model which is:
|
||||||
|
|
||||||
|
5 microseconds + (10 nanoseconds * no of Bytes)
|
||||||
|
|
||||||
|
Thus SCI systems are much less dependent on selection of
|
||||||
|
transaction coordinators.
|
||||||
|
Typically TCP/IP systems spend 30-60% of the time during communication,
|
||||||
|
whereas SCI systems typically spend 5-10% of the time during
|
||||||
|
communication.
|
||||||
|
Thus SCI means that less care from the NDB API programmer is
|
||||||
|
needed and great scalability can be achieved even for applications using
|
||||||
|
data from many parts of the database.
|
||||||
|
|
||||||
|
A simple example is an application that uses many simple updates where
|
||||||
|
a transaction needs to update one record.
|
||||||
|
This record has a 32 bit primary key,
|
||||||
|
which is also the distribution key.
|
||||||
|
Then the keyData will be the address of the integer
|
||||||
|
of the primary key and keyLen will be 4.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef DOXYGEN_SHOULD_SKIP_INTERNAL
|
||||||
|
/**
|
||||||
|
(A transaction's execution can also be divided into three
|
||||||
|
steps: prepare, send, and poll. This allows us to perform asynchronous
|
||||||
|
transactions. More about this later.)
|
||||||
|
*/
|
||||||
|
#endif
|
||||||
|
#ifndef DOXYGEN_SHOULD_SKIP_INTERNAL
|
||||||
|
/**
|
||||||
|
Another way to execute several parallel transactions is to use
|
||||||
|
asynchronous transactions.
|
||||||
|
*/
|
||||||
|
#endif
|
||||||
|
#ifndef DOXYGEN_SHOULD_SKIP_INTERNAL
|
||||||
|
/**
|
||||||
|
Operations are of two different kinds:
|
||||||
|
-# standard operations, and
|
||||||
|
-# interpreted program operations.
|
||||||
|
*/
|
||||||
|
#endif
|
||||||
#ifndef DOXYGEN_SHOULD_SKIP_INTERNAL
|
#ifndef DOXYGEN_SHOULD_SKIP_INTERNAL
|
||||||
/**
|
/**
|
||||||
<h3>Interpreted Program Operations</h3>
|
<h3>Interpreted Program Operations</h3>
|
||||||
|
@ -233,10 +664,6 @@
|
||||||
updates a tuple using an interpreted program
|
updates a tuple using an interpreted program
|
||||||
-# NdbOperation::interpretedDeleteTuple :
|
-# NdbOperation::interpretedDeleteTuple :
|
||||||
delete a tuple using an interpreted program
|
delete a tuple using an interpreted program
|
||||||
-# NdbOperation::openScanRead :
|
|
||||||
scans a table with read lock on each tuple
|
|
||||||
-# NdbOperation::openScanExclusive :
|
|
||||||
scans a table with exclusive update lock on each tuple
|
|
||||||
|
|
||||||
The operations interpretedUpdateTuple and interpretedDeleteTuple both
|
The operations interpretedUpdateTuple and interpretedDeleteTuple both
|
||||||
work using the unique tuple key.
|
work using the unique tuple key.
|
||||||
|
@ -289,122 +716,6 @@
|
||||||
NdbOperation::def_subroutine and NdbOperation::ret_sub.
|
NdbOperation::def_subroutine and NdbOperation::ret_sub.
|
||||||
*/
|
*/
|
||||||
#endif
|
#endif
|
||||||
/**
|
|
||||||
@subsection secScan Scanning
|
|
||||||
The most common use of interpreted programs is for scanning
|
|
||||||
tables. Scanning is a search of all tuples in a table.
|
|
||||||
Tuples which satisfy conditions (a search filter)
|
|
||||||
stated in the interpreted program
|
|
||||||
are sent to the application.
|
|
||||||
|
|
||||||
Reasons for using scan transactions include
|
|
||||||
need to use a search key different from the primary key
|
|
||||||
and any secondary index.
|
|
||||||
Or that the query needs to access so many tuples so that
|
|
||||||
it is more efficient to scan the entire table.
|
|
||||||
|
|
||||||
Scanning can also be used to update information.
|
|
||||||
The scanning transaction itself is however
|
|
||||||
not allowed to update any tuples.
|
|
||||||
To do updates via scanning transactions, the tuples
|
|
||||||
need to be handed over to another transaction which is
|
|
||||||
executing the actual update.
|
|
||||||
|
|
||||||
Even though a scan operation is part of a transaction,
|
|
||||||
the scan transaction is not a normal transaction.
|
|
||||||
The locks are <em>not</em> kept throughout the entire
|
|
||||||
scan transaction, since this would imply non-optimal performance.
|
|
||||||
<em>
|
|
||||||
A transaction containing a scan operation can only
|
|
||||||
contain that operation.
|
|
||||||
No other operations are allowed in the same transaction.
|
|
||||||
</em>
|
|
||||||
|
|
||||||
The NdbOperation::openScanRead operation
|
|
||||||
only sets a temporary read lock while
|
|
||||||
reading the tuple.
|
|
||||||
The tuple lock is released already when the
|
|
||||||
result of the read reaches the application.
|
|
||||||
The NdbOperation::openScanExclusive operation sets an
|
|
||||||
exclusive lock on the tuple
|
|
||||||
and sends the result to the application.
|
|
||||||
Thus when the application reads the data it is still
|
|
||||||
locked with the exclusive lock.
|
|
||||||
|
|
||||||
If the application desires to update the tuple it may transfer
|
|
||||||
the tuple to another transaction which updates the tuple.
|
|
||||||
The updating transaction can consist of a combination of tuples
|
|
||||||
received from the scan and normal operations.
|
|
||||||
|
|
||||||
For transferred operations it is not necessary to provide the
|
|
||||||
primary key. It is part of the transfer.
|
|
||||||
You only need to give the operation type and the
|
|
||||||
actions to perform on the tuple.
|
|
||||||
|
|
||||||
The scan transaction starts like a usual transaction,
|
|
||||||
but is of the following form:
|
|
||||||
-# Start transaction
|
|
||||||
-# Get NdbOperation for the table to be scanned
|
|
||||||
-# Set the operation type using NdbOperation::openScanRead or
|
|
||||||
NdbOperation::openScanExclusive
|
|
||||||
-# Search conditions are defined by an interpreted program
|
|
||||||
(setValue and write_attr are not allowed, since scan transactions
|
|
||||||
are only allowed to read information).
|
|
||||||
The instruction interpret_exit_nok does in this case
|
|
||||||
not abort the transaction, it only skips the tuple and
|
|
||||||
proceeds with the next.
|
|
||||||
The skipped tuple will not be reported to the application.
|
|
||||||
-# Call NdbTransaction::executeScan to define (and start) the scan.
|
|
||||||
-# Call NdbTransaction::nextScanResult to proceed with next tuple.
|
|
||||||
When calling NdbTransaction::nextScanResult, the lock on any
|
|
||||||
previous tuples are released.
|
|
||||||
<br>
|
|
||||||
If the tuple should be updated then it must be transferred over
|
|
||||||
to another updating transaction.
|
|
||||||
This is performed by calling
|
|
||||||
NdbOperation::takeOverForUpdate or takeOverForDelete on
|
|
||||||
the scanning transactions NdbOperation object with the updating
|
|
||||||
transactions NdbTransaction object as parameter.
|
|
||||||
<p>
|
|
||||||
If NdbOperation::takeOverFor* returns NULL then the
|
|
||||||
operation was not successful, otherwise it returns a reference
|
|
||||||
to the NdbOperation which the updating transaction has received
|
|
||||||
-# Use Ndb::closeTransaction as usual to close the transaction.
|
|
||||||
This can be performed even if there are more tuples to scan.
|
|
||||||
|
|
||||||
See also example program in section @ref select_all.cpp.
|
|
||||||
|
|
||||||
However, a new scan api is under development, using NdbScanOperation
|
|
||||||
and NdbScanFilter. NdbScanFilter makes it easier to define a search
|
|
||||||
criteria and is recommended instead of using Interpreted Programs.
|
|
||||||
|
|
||||||
The scan transaction starts like a usual transaction,
|
|
||||||
but is of the following form:
|
|
||||||
-# Start transaction
|
|
||||||
-# Get NdbScanOperation for the table to be scanned
|
|
||||||
-# NdbScanOperation::readTuples(NdbOperation::LM_Exclusive) returns a handle to a
|
|
||||||
NdbResultSet.
|
|
||||||
-# Search conditions are defined by NdbScanFilter
|
|
||||||
-# Call NdbTransaction::execute(NoCommit) to start the scan.
|
|
||||||
-# Call NdbResultSet::nextResult to proceed with next tuple.
|
|
||||||
When calling NdbResultSet::nextResult(false), the lock on any
|
|
||||||
previous tuples are released and the next tuple cached in the API
|
|
||||||
is fetched.
|
|
||||||
<br>
|
|
||||||
If the tuple should be updated then define a new update operation
|
|
||||||
(NdbOperation) using NdbResultSet::updateTuple().
|
|
||||||
The new update operation can the be used to modify the tuple.
|
|
||||||
When nextResult(false) returns != 0, then no more tuples
|
|
||||||
are cached in the API. Updated tuples is now commit using
|
|
||||||
NdbTransaction::execute(Commit).
|
|
||||||
After the commit, more tuples are fetched from NDB using
|
|
||||||
nextResult(true).
|
|
||||||
-# Use Ndb::closeTransaction as usual to close the transaction.
|
|
||||||
This can be performed even if there are more tuples to scan.
|
|
||||||
|
|
||||||
See the scan example program in @ref ndbapi_scan.cppn for example
|
|
||||||
usage of the new scan api.
|
|
||||||
*/
|
|
||||||
#ifndef DOXYGEN_SHOULD_SKIP_INTERNAL
|
#ifndef DOXYGEN_SHOULD_SKIP_INTERNAL
|
||||||
/**
|
/**
|
||||||
<h3>Interpreted Programs</h3>
|
<h3>Interpreted Programs</h3>
|
||||||
|
@ -584,234 +895,11 @@
|
||||||
*/
|
*/
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/**
|
|
||||||
@section secError Error Handling
|
|
||||||
|
|
||||||
Errors can occur when
|
|
||||||
-# operations are being defined, or when the
|
|
||||||
-# transaction is being executed.
|
|
||||||
|
|
||||||
One recommended way to handle a transaction failure
|
|
||||||
(i.e. an error is reported) is to:
|
|
||||||
-# Rollback transaction (NdbTransaction::execute with a special parameter)
|
|
||||||
-# Close transaction
|
|
||||||
-# Restart transaction (if the error was temporary)
|
|
||||||
|
|
||||||
@note Transaction are not automatically closed when an error occur.
|
|
||||||
|
|
||||||
Several errors can occur when a transaction holds multiple
|
|
||||||
operations which are simultaneously executed.
|
|
||||||
In this case the application has to go through the operation
|
|
||||||
objects and query for their NdbError objects to find out what really
|
|
||||||
happened.
|
|
||||||
|
|
||||||
NdbTransaction::getNdbErrorOperation returns a reference to the
|
|
||||||
operation causing the latest error.
|
|
||||||
NdbTransaction::getNdbErrorLine delivers the method number of the
|
|
||||||
erroneous method in the operation.
|
|
||||||
|
|
||||||
@code
|
|
||||||
theTransaction = theNdb->startTransaction();
|
|
||||||
theOperation = theTransaction->getNdbOperation("TEST_TABLE");
|
|
||||||
if (theOperation == NULL) goto error;
|
|
||||||
theOperation->readTuple(NdbOperation::LM_Read);
|
|
||||||
theOperation->setValue("ATTR_1", at1);
|
|
||||||
theOperation->setValue("ATTR_2", at1); //Here an error occurs
|
|
||||||
theOperation->setValue("ATTR_3", at1);
|
|
||||||
theOperation->setValue("ATTR_4", at1);
|
|
||||||
|
|
||||||
if (theTransaction->execute(Commit) == -1) {
|
|
||||||
errorLine = theTransaction->getNdbErrorLine();
|
|
||||||
errorOperation = theTransaction->getNdbErrorOperation();
|
|
||||||
@endcode
|
|
||||||
|
|
||||||
Here errorLine will be 3 as the error occurred in the third method
|
|
||||||
on the operation object.
|
|
||||||
Getting errorLine == 0 means that the error occurred when executing the
|
|
||||||
operations.
|
|
||||||
Here errorOperation will be a pointer to the theOperation object.
|
|
||||||
NdbTransaction::getNdbError will return the NdbError object
|
|
||||||
including holding information about the error.
|
|
||||||
|
|
||||||
Since errors could have occurred even when a commit was reported,
|
|
||||||
there is also a special method, NdbTransaction::commitStatus,
|
|
||||||
to check the commit status of the transaction.
|
|
||||||
|
|
||||||
*******************************************************************************/
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @page ndbapi_example1.cpp ndbapi_example1.cpp
|
|
||||||
* @include ndbapi_example1.cpp
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @page ndbapi_example2.cpp ndbapi_example2.cpp
|
|
||||||
* @include ndbapi_example2.cpp
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @page ndbapi_example3.cpp ndbapi_example3.cpp
|
|
||||||
* @include ndbapi_example3.cpp
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @page ndbapi_example4.cpp ndbapi_example4.cpp
|
|
||||||
* @include ndbapi_example4.cpp
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef DOXYGEN_SHOULD_SKIP_INTERNAL
|
|
||||||
/**
|
|
||||||
* @page select_all.cpp select_all.cpp
|
|
||||||
* @include select_all.cpp
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @page ndbapi_async.cpp ndbapi_async.cpp
|
|
||||||
* @include ndbapi_async.cpp
|
|
||||||
*/
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @page ndbapi_scan.cpp ndbapi_scan.cpp
|
|
||||||
* @include ndbapi_scan.cpp
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
@page secAdapt Adaptive Send Algorithm
|
|
||||||
|
|
||||||
At the time of "sending" the transaction
|
|
||||||
(using NdbTransaction::execute), the transactions
|
|
||||||
are in reality <em>not</em> immediately transfered to the NDB Kernel.
|
|
||||||
Instead, the "sent" transactions are only kept in a
|
|
||||||
special send list (buffer) in the Ndb object to which they belong.
|
|
||||||
The adaptive send algorithm decides when transactions should
|
|
||||||
be transfered to the NDB kernel.
|
|
||||||
|
|
||||||
For each of these "sent" transactions, there are three
|
|
||||||
possible states:
|
|
||||||
-# Waiting to be transferred to NDB Kernel.
|
|
||||||
-# Has been transferred to the NDB Kernel and is currently
|
|
||||||
being processed.
|
|
||||||
-# Has been transferred to the NDB Kernel and has
|
|
||||||
finished processing.
|
|
||||||
Now it is waiting for a call to a poll method.
|
|
||||||
(When the poll method is invoked,
|
|
||||||
then the transaction callback method will be executed.)
|
|
||||||
|
|
||||||
The poll method invoked (either Ndb::pollNdb or Ndb::sendPollNdb)
|
|
||||||
will return when:
|
|
||||||
-# at least 'minNoOfEventsToWakeup' of the transactions
|
|
||||||
in the send list have transitioned to state 3 as described above, and
|
|
||||||
-# all of these transactions have executed their callback methods.
|
|
||||||
|
|
||||||
|
|
||||||
Since the NDB API is designed as a multi-threaded interface,
|
|
||||||
it is desirable to transfer database operations from more than
|
|
||||||
one thread at a time.
|
|
||||||
The NDB API keeps track of which Ndb objects are active in transfering
|
|
||||||
information to the NDB kernel and the expected amount of threads to
|
|
||||||
interact with the NDB kernel.
|
|
||||||
Note that an Ndb object should be used in at most one thread.
|
|
||||||
Two different threads should <em>not</em> use the same Ndb object.
|
|
||||||
|
|
||||||
There are four reasons leading to transfering of database
|
|
||||||
operations:
|
|
||||||
-# The NDB Transporter (TCP/IP, OSE, SCI or shared memory)
|
|
||||||
decides that a buffer is full and sends it off.
|
|
||||||
The buffer size is implementation dependent and
|
|
||||||
might change between NDB Cluster releases.
|
|
||||||
On TCP/IP the buffer size is usually around 64 kByte and
|
|
||||||
on OSE/Delta it is usually less than 2000 bytes.
|
|
||||||
In each Ndb object there is one buffer per DB node,
|
|
||||||
so this criteria of a full buffer is only
|
|
||||||
local to the connection to one DB node.
|
|
||||||
-# Statistical information on the transfered information
|
|
||||||
may force sending of buffers to all DB nodes.
|
|
||||||
-# Every 10 ms a special send-thread checks whether
|
|
||||||
any send activity has occurred. If not, then the thread will
|
|
||||||
force sending to all nodes.
|
|
||||||
This means that 20 ms is the maximum time database operations
|
|
||||||
are waiting before being sent off. The 10 millisecond limit
|
|
||||||
is likely to become a configuration parameter in
|
|
||||||
later releases of NDB Cluster.
|
|
||||||
However, to support faster than 10 ms checks,
|
|
||||||
there has to be support from the operating system.
|
|
||||||
-# When calling NdbTransaction::execute synchronously or calling any
|
|
||||||
of the poll-methods, there is a force parameter that overrides the
|
|
||||||
adaptive algorithm and forces the send to all nodes.
|
|
||||||
|
|
||||||
@note The times mentioned above are examples. These might
|
|
||||||
change in later releases of NDB Cluster.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
@page secConcepts NDB Cluster Concepts
|
|
||||||
|
|
||||||
The <em>NDB Kernel</em> is the collection of database (DB) nodes
|
|
||||||
belonging to an NDB Cluster.
|
|
||||||
The application programmer can for most purposes view the
|
|
||||||
set of all DB nodes as one entity.
|
|
||||||
Each DB node has three main components:
|
|
||||||
- TC : The transaction coordinator
|
|
||||||
- ACC : The index storage
|
|
||||||
- TUP : The data storage
|
|
||||||
|
|
||||||
When the application program executes a transaction,
|
|
||||||
it connects to one TC on one DB node.
|
|
||||||
Usually, the programmer does not need to specify which TC to use,
|
|
||||||
but some cases when performance is important,
|
|
||||||
transactions can be hinted to use a certain TC.
|
|
||||||
(If the node with the TC is down, then another TC will
|
|
||||||
automatically take over the work.)
|
|
||||||
|
|
||||||
Every DB node has an ACC and a TUP which stores
|
|
||||||
the index and the data part of the database.
|
|
||||||
Even though one TC is responsible for the transaction,
|
|
||||||
several ACCs and TUPs on other DB nodes might be involved in the
|
|
||||||
execution of the transaction.
|
|
||||||
|
|
||||||
|
|
||||||
@section secNdbKernelConnection Selecting Transaction Coordinator
|
|
||||||
|
|
||||||
The default method is to select the transaction coordinator (TC) as being
|
|
||||||
the "closest" DB node. There is a heuristics for closeness based on
|
|
||||||
the type of transporter connection. In order of closest first, we have
|
|
||||||
SCI, SHM, TCP/IP (localhost), and TCP/IP (remote host). If there are several
|
|
||||||
connections available with the same "closeness", they will each be
|
|
||||||
selected in a round robin fashion for every transaction. Optionally
|
|
||||||
one may set the methos for TC selection round robin over all available
|
|
||||||
connections, where each new set of transactions
|
|
||||||
is placed on the next DB node.
|
|
||||||
|
|
||||||
The application programmer can however hint the NDB API which
|
Put this back when real array ops are supported
|
||||||
transaction coordinator to use
|
i.e. get/setValue("kalle[3]");
|
||||||
by providing a <em>distribution key</em> (usually the primary key).
|
|
||||||
By using the primary key as distribution key,
|
|
||||||
the transaction will be placed on the node where the primary replica
|
|
||||||
of that record resides.
|
|
||||||
Note that this is only a hint, the system can be
|
|
||||||
reconfigured and then the NDB API will choose a transaction
|
|
||||||
coordinator without using the hint.
|
|
||||||
For more information, see NdbDictionary::Column::setDistributionKey.
|
|
||||||
|
|
||||||
|
|
||||||
@section secRecordStruct Record Structure
|
|
||||||
NDB Cluster is a relational database with tables of records.
|
|
||||||
Table rows represent tuples of relational data stored as records.
|
|
||||||
When created, the attribute schema of the table is specified,
|
|
||||||
and thus each record of the table has the same schema.
|
|
||||||
|
|
||||||
|
|
||||||
@subsection secKeys Tuple Keys
|
|
||||||
Each record has from zero up to four attributes which belong
|
|
||||||
to the primary key of the table.
|
|
||||||
If no attribute belongs to the primary key, then
|
|
||||||
the NDB Cluster creates an attribute named <em>NDB$TID</em>
|
|
||||||
which stores a tuple identity.
|
|
||||||
The <em>tuple key</em> of a table is thus either
|
|
||||||
the primary key attributes or the special NDB$TID attribute.
|
|
||||||
|
|
||||||
|
|
||||||
@subsection secArrays Array Attributes
|
@subsection secArrays Array Attributes
|
||||||
A table attribute in NDB Cluster can be of <em>array type</em>.
|
A table attribute in NDB Cluster can be of <em>array type</em>.
|
||||||
|
@ -819,66 +907,7 @@
|
||||||
<em>elements</em>. The <em>attribute size</em> is the size
|
<em>elements</em>. The <em>attribute size</em> is the size
|
||||||
of one element of the array (expressed in bits) and the
|
of one element of the array (expressed in bits) and the
|
||||||
<em>array size</em> is the number of elements of the array.
|
<em>array size</em> is the number of elements of the array.
|
||||||
|
|
||||||
|
|
||||||
@section secTrans Transactions
|
|
||||||
|
|
||||||
Transactions are committed to main memory,
|
|
||||||
and are committed to disk after a global checkpoint, GCP.
|
|
||||||
Since all data is (in most NDB Cluster configurations)
|
|
||||||
synchronously replicated and stored on multiple NDB nodes,
|
|
||||||
the system can still handle processor failures without loss
|
|
||||||
of data.
|
|
||||||
However, in the case of a system failure (e.g. the whole system goes down),
|
|
||||||
then all (committed or not) transactions after the latest GCP are lost.
|
|
||||||
|
|
||||||
|
|
||||||
@subsection secConcur Concurrency Control
|
|
||||||
NDB Cluster uses pessimistic concurrency control based on locking.
|
|
||||||
If a requested lock (implicit and depending on database operation)
|
|
||||||
cannot be attained within a specified time,
|
|
||||||
then a timeout error occurs.
|
|
||||||
|
|
||||||
Concurrent transactions (parallel application programs, thread-based
|
|
||||||
applications)
|
|
||||||
sometimes deadlock when they try to access the same information.
|
|
||||||
Applications need to be programmed so that timeout errors
|
|
||||||
occurring due to deadlocks are handled. This generally
|
|
||||||
means that the transaction encountering timeout
|
|
||||||
should be rolled back and restarted.
|
|
||||||
|
|
||||||
|
|
||||||
@section secHint Hints and performance
|
|
||||||
|
|
||||||
Placing the transaction coordinator close
|
|
||||||
to the actual data used in the transaction can in many cases
|
|
||||||
improve performance significantly. This is particularly true for
|
|
||||||
systems using TCP/IP. A system using Solaris and a 500 MHz processor
|
|
||||||
has a cost model for TCP/IP communication which is:
|
|
||||||
|
|
||||||
30 microseconds + (100 nanoseconds * no of Bytes)
|
|
||||||
|
|
||||||
This means that if we can ensure that we use "popular" links we increase
|
|
||||||
buffering and thus drastically reduce the communication cost.
|
|
||||||
Systems using SCI has a different cost model which is:
|
|
||||||
|
|
||||||
5 microseconds + (10 nanoseconds * no of Bytes)
|
|
||||||
|
|
||||||
Thus SCI systems are much less dependent on selection of
|
|
||||||
transaction coordinators.
|
|
||||||
Typically TCP/IP systems spend 30-60% of the time during communication,
|
|
||||||
whereas SCI systems typically spend 5-10% of the time during
|
|
||||||
communication.
|
|
||||||
Thus SCI means that less care from the NDB API programmer is
|
|
||||||
needed and great scalability can be achieved even for applications using
|
|
||||||
data from many parts of the database.
|
|
||||||
|
|
||||||
A simple example is an application that uses many simple updates where
|
|
||||||
a transaction needs to update one record.
|
|
||||||
This record has a 32 bit primary key,
|
|
||||||
which is also the distribution key.
|
|
||||||
Then the keyData will be the address of the integer
|
|
||||||
of the primary key and keyLen will be 4.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef Ndb_H
|
#ifndef Ndb_H
|
||||||
|
|
|
@ -34,16 +34,15 @@ class NdbIndexScanOperation : public NdbScanOperation {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/**
|
/**
|
||||||
* readTuples returns a NdbResultSet where tuples are stored.
|
* readTuples using ordered index
|
||||||
* Tuples are not stored in NdbResultSet until execute(NoCommit)
|
|
||||||
* has been executed and nextResult has been called.
|
|
||||||
*
|
*
|
||||||
* @param parallel Scan parallelism
|
* @param lock_mode Lock mode
|
||||||
* @param batch No of rows to fetch from each fragment at a time
|
* @param batch No of rows to fetch from each fragment at a time
|
||||||
* @param LockMode Scan lock handling
|
* @param parallel No of fragments to scan in parallel
|
||||||
* @param order_by Order result set in index order
|
* @param order_by Order result set in index order
|
||||||
* @param order_desc Order descending, ignored unless order_by
|
* @param order_desc Order descending, ignored unless order_by
|
||||||
* @returns NdbResultSet.
|
* @param read_range_no Enable reading of range no using @ref get_range_no
|
||||||
|
* @returns 0 for success and -1 for failure
|
||||||
* @see NdbScanOperation::readTuples
|
* @see NdbScanOperation::readTuples
|
||||||
*/
|
*/
|
||||||
int readTuples(LockMode = LM_Read,
|
int readTuples(LockMode = LM_Read,
|
||||||
|
@ -53,16 +52,6 @@ public:
|
||||||
bool order_desc = false,
|
bool order_desc = false,
|
||||||
bool read_range_no = false);
|
bool read_range_no = false);
|
||||||
|
|
||||||
#ifndef DOXYGEN_SHOULD_SKIP_DEPRECATED
|
|
||||||
inline int readTuples(int parallell){
|
|
||||||
return readTuples(LM_Read, 0, parallell, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline int readTuplesExclusive(int parallell = 0){
|
|
||||||
return readTuples(LM_Exclusive, 0, parallell, false);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Type of ordered index key bound. The values (0-4) will not change
|
* Type of ordered index key bound. The values (0-4) will not change
|
||||||
* and can be used explicitly (e.g. they could be computed).
|
* and can be used explicitly (e.g. they could be computed).
|
||||||
|
@ -134,7 +123,14 @@ public:
|
||||||
*/
|
*/
|
||||||
int get_range_no();
|
int get_range_no();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is current scan sorted
|
||||||
|
*/
|
||||||
bool getSorted() const { return m_ordered; }
|
bool getSorted() const { return m_ordered; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is current scan sorted descending
|
||||||
|
*/
|
||||||
bool getDescending() const { return m_descending; }
|
bool getDescending() const { return m_descending; }
|
||||||
private:
|
private:
|
||||||
NdbIndexScanOperation(Ndb* aNdb);
|
NdbIndexScanOperation(Ndb* aNdb);
|
||||||
|
|
|
@ -265,21 +265,6 @@ public:
|
||||||
int equal(Uint32 anAttrId, Int64 aValue);
|
int equal(Uint32 anAttrId, Int64 aValue);
|
||||||
int equal(Uint32 anAttrId, Uint64 aValue);
|
int equal(Uint32 anAttrId, Uint64 aValue);
|
||||||
|
|
||||||
/**
|
|
||||||
* Generate a tuple id and set it as search argument.
|
|
||||||
*
|
|
||||||
* The Tuple id has NDB$TID as attribute name and 0 as attribute id.
|
|
||||||
*
|
|
||||||
* The generated tuple id is returned by the method.
|
|
||||||
* If zero is returned there is an error.
|
|
||||||
*
|
|
||||||
* This is mostly used for tables without any primary key
|
|
||||||
* attributes.
|
|
||||||
*
|
|
||||||
* @return Generated tuple id if successful, otherwise 0.
|
|
||||||
*/
|
|
||||||
Uint64 setTupleId();
|
|
||||||
|
|
||||||
/** @} *********************************************************************/
|
/** @} *********************************************************************/
|
||||||
/**
|
/**
|
||||||
* @name Specify Attribute Actions for Operations
|
* @name Specify Attribute Actions for Operations
|
||||||
|
@ -708,6 +693,7 @@ public:
|
||||||
|
|
||||||
/** @} *********************************************************************/
|
/** @} *********************************************************************/
|
||||||
|
|
||||||
|
#ifndef DOXYGEN_SHOULD_SKIP_INTERNAL
|
||||||
/**
|
/**
|
||||||
* Type of operation
|
* Type of operation
|
||||||
*/
|
*/
|
||||||
|
@ -723,11 +709,16 @@ public:
|
||||||
NotDefined2, ///< Internal for debugging
|
NotDefined2, ///< Internal for debugging
|
||||||
NotDefined ///< Internal for debugging
|
NotDefined ///< Internal for debugging
|
||||||
};
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return lock mode for operation
|
||||||
|
*/
|
||||||
LockMode getLockMode() const { return theLockMode; }
|
LockMode getLockMode() const { return theLockMode; }
|
||||||
void setAbortOption(Int8 ao) { m_abortOption = ao; }
|
|
||||||
|
|
||||||
#ifndef DOXYGEN_SHOULD_SKIP_INTERNAL
|
#ifndef DOXYGEN_SHOULD_SKIP_INTERNAL
|
||||||
|
void setAbortOption(Int8 ao) { m_abortOption = ao; }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set/get distribution/partition key
|
* Set/get distribution/partition key
|
||||||
*/
|
*/
|
||||||
|
@ -758,8 +749,10 @@ protected:
|
||||||
void next(NdbOperation*); // Set next pointer
|
void next(NdbOperation*); // Set next pointer
|
||||||
NdbOperation* next(); // Get next pointer
|
NdbOperation* next(); // Get next pointer
|
||||||
public:
|
public:
|
||||||
|
#ifndef DOXYGEN_SHOULD_SKIP_INTERNAL
|
||||||
const NdbOperation* next() const;
|
const NdbOperation* next() const;
|
||||||
const NdbRecAttr* getFirstRecAttr() const;
|
const NdbRecAttr* getFirstRecAttr() const;
|
||||||
|
#endif
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
enum OperationStatus
|
enum OperationStatus
|
||||||
|
|
|
@ -245,7 +245,9 @@ public:
|
||||||
~NdbRecAttr();
|
~NdbRecAttr();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
#ifndef DOXYGEN_SHOULD_SKIP_INTERNAL
|
||||||
const NdbRecAttr* next() const;
|
const NdbRecAttr* next() const;
|
||||||
|
#endif
|
||||||
private:
|
private:
|
||||||
NdbRecAttr();
|
NdbRecAttr();
|
||||||
|
|
||||||
|
|
|
@ -37,16 +37,14 @@ class NdbScanOperation : public NdbOperation {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/**
|
/**
|
||||||
* readTuples returns a NdbResultSet where tuples are stored.
|
* readTuples
|
||||||
* Tuples are not stored in NdbResultSet until execute(NoCommit)
|
|
||||||
* has been executed and nextResult has been called.
|
|
||||||
*
|
*
|
||||||
* @param parallel Scan parallelism
|
* @param lock_mode Lock mode
|
||||||
* @param batch No of rows to fetch from each fragment at a time
|
* @param batch No of rows to fetch from each fragment at a time
|
||||||
* @param LockMode Scan lock handling
|
* @param parallel No of fragments to scan in parallell
|
||||||
* @note specifying 0 for batch and parallall means max performance
|
* @note specifying 0 for batch and parallall means max performance
|
||||||
*/
|
*/
|
||||||
int readTuples(LockMode = LM_Read,
|
int readTuples(LockMode lock_mode = LM_Read,
|
||||||
Uint32 batch = 0, Uint32 parallel = 0);
|
Uint32 batch = 0, Uint32 parallel = 0);
|
||||||
|
|
||||||
#ifndef DOXYGEN_SHOULD_SKIP_DEPRECATED
|
#ifndef DOXYGEN_SHOULD_SKIP_DEPRECATED
|
||||||
|
@ -67,7 +65,7 @@ public:
|
||||||
/**
|
/**
|
||||||
* Get the next tuple in a scan transaction.
|
* Get the next tuple in a scan transaction.
|
||||||
*
|
*
|
||||||
* After each call to NdbResult::nextResult
|
* After each call to nextResult
|
||||||
* the buffers and NdbRecAttr objects defined in
|
* the buffers and NdbRecAttr objects defined in
|
||||||
* NdbOperation::getValue are updated with values
|
* NdbOperation::getValue are updated with values
|
||||||
* from the scanned tuple.
|
* from the scanned tuple.
|
||||||
|
@ -114,53 +112,31 @@ public:
|
||||||
int nextResult(bool fetchAllowed = true, bool forceSend = false);
|
int nextResult(bool fetchAllowed = true, bool forceSend = false);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Close result set (scan)
|
* Close scan
|
||||||
*/
|
*/
|
||||||
void close(bool forceSend = false);
|
void close(bool forceSend = false);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Restart
|
* Update current tuple
|
||||||
*/
|
|
||||||
int restart(bool forceSend = false);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Transfer scan operation to an updating transaction. Use this function
|
|
||||||
* when a scan has found a record that you want to update.
|
|
||||||
* 1. Start a new transaction.
|
|
||||||
* 2. Call the function takeOverForUpdate using your new transaction
|
|
||||||
* as parameter, all the properties of the found record will be copied
|
|
||||||
* to the new transaction.
|
|
||||||
* 3. When you execute the new transaction, the lock held by the scan will
|
|
||||||
* be transferred to the new transaction(it's taken over).
|
|
||||||
*
|
*
|
||||||
* @note You must have started the scan with openScanExclusive
|
|
||||||
* to be able to update the found tuple.
|
|
||||||
*
|
|
||||||
* @param updateTrans the update transaction connection.
|
|
||||||
* @return an NdbOperation or NULL.
|
* @return an NdbOperation or NULL.
|
||||||
*/
|
*/
|
||||||
NdbOperation* updateCurrentTuple();
|
NdbOperation* updateCurrentTuple();
|
||||||
NdbOperation* updateCurrentTuple(NdbTransaction* updateTrans);
|
NdbOperation* updateCurrentTuple(NdbTransaction* updateTrans);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Transfer scan operation to a deleting transaction. Use this function
|
* Delete current tuple
|
||||||
* when a scan has found a record that you want to delete.
|
* @return 0 on success or -1 on failure
|
||||||
* 1. Start a new transaction.
|
|
||||||
* 2. Call the function takeOverForDelete using your new transaction
|
|
||||||
* as parameter, all the properties of the found record will be copied
|
|
||||||
* to the new transaction.
|
|
||||||
* 3. When you execute the new transaction, the lock held by the scan will
|
|
||||||
* be transferred to the new transaction(its taken over).
|
|
||||||
*
|
|
||||||
* @note You must have started the scan with openScanExclusive
|
|
||||||
* to be able to delete the found tuple.
|
|
||||||
*
|
|
||||||
* @param deleteTrans the delete transaction connection.
|
|
||||||
* @return an NdbOperation or NULL.
|
|
||||||
*/
|
*/
|
||||||
int deleteCurrentTuple();
|
int deleteCurrentTuple();
|
||||||
int deleteCurrentTuple(NdbTransaction* takeOverTransaction);
|
int deleteCurrentTuple(NdbTransaction* takeOverTransaction);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Restart scan with exactly the same
|
||||||
|
* getValues and search conditions
|
||||||
|
*/
|
||||||
|
int restart(bool forceSend = false);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
NdbScanOperation(Ndb* aNdb);
|
NdbScanOperation(Ndb* aNdb);
|
||||||
virtual ~NdbScanOperation();
|
virtual ~NdbScanOperation();
|
||||||
|
|
|
@ -110,23 +110,6 @@ enum ExecType {
|
||||||
* -# AbortOption::IgnoreError
|
* -# AbortOption::IgnoreError
|
||||||
* Continue execution of transaction even if operation fails
|
* Continue execution of transaction even if operation fails
|
||||||
*
|
*
|
||||||
* NdbTransaction::execute can sometimes indicate an error
|
|
||||||
* (return with -1) while the error code on the NdbTransaction is 0.
|
|
||||||
* This is an indication that one of the operations found a record
|
|
||||||
* problem. The transaction is still ok and can continue as usual.
|
|
||||||
* The NdbTransaction::execute returns -1 together with error code
|
|
||||||
* on NdbTransaction object equal to 0 always means that an
|
|
||||||
* operation was not successful but that the total transaction was OK.
|
|
||||||
* By checking error codes on the individual operations it is possible
|
|
||||||
* to find out which operation was not successful.
|
|
||||||
*
|
|
||||||
* NdbTransaction::executeScan is used to setup a scan in the NDB kernel
|
|
||||||
* after it has been defined.
|
|
||||||
* NdbTransaction::nextScanResult is used to iterate through the
|
|
||||||
* scanned tuples.
|
|
||||||
* After each call to NdbTransaction::nextScanResult, the pointers
|
|
||||||
* of NdbRecAttr objects defined in the NdbOperation::getValue
|
|
||||||
* operations are updated with the values of the new the scanned tuple.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* FUTURE IMPLEMENTATION:
|
/* FUTURE IMPLEMENTATION:
|
||||||
|
@ -376,6 +359,7 @@ public:
|
||||||
#endif
|
#endif
|
||||||
void close();
|
void close();
|
||||||
|
|
||||||
|
#ifndef DOXYGEN_SHOULD_SKIP_INTERNAL
|
||||||
/**
|
/**
|
||||||
* Restart transaction
|
* Restart transaction
|
||||||
*
|
*
|
||||||
|
@ -385,6 +369,7 @@ public:
|
||||||
* Note this method also releases completed operations
|
* Note this method also releases completed operations
|
||||||
*/
|
*/
|
||||||
int restart();
|
int restart();
|
||||||
|
#endif
|
||||||
|
|
||||||
/** @} *********************************************************************/
|
/** @} *********************************************************************/
|
||||||
|
|
||||||
|
@ -494,7 +479,7 @@ public:
|
||||||
*/
|
*/
|
||||||
const NdbOperation * getNextCompletedOperation(const NdbOperation * op)const;
|
const NdbOperation * getNextCompletedOperation(const NdbOperation * op)const;
|
||||||
|
|
||||||
|
#ifndef DOXYGEN_SHOULD_SKIP_INTERNAL
|
||||||
const NdbOperation* getFirstDefinedOperation()const{return theFirstOpInList;}
|
const NdbOperation* getFirstDefinedOperation()const{return theFirstOpInList;}
|
||||||
const NdbOperation* getLastDefinedOperation()const{return theLastOpInList;}
|
const NdbOperation* getLastDefinedOperation()const{return theLastOpInList;}
|
||||||
|
|
||||||
|
@ -508,6 +493,7 @@ public:
|
||||||
* ops are used (read, insert, update, delete).
|
* ops are used (read, insert, update, delete).
|
||||||
*/
|
*/
|
||||||
int executePendingBlobOps(Uint8 flags = 0xFF);
|
int executePendingBlobOps(Uint8 flags = 0xFF);
|
||||||
|
#endif
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -300,32 +300,6 @@ NdbOperation::equal_impl(const NdbColumnImpl* tAttrInfo,
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/******************************************************************************
|
|
||||||
* Uint64 setTupleId( void )
|
|
||||||
*
|
|
||||||
* Return Value: Return > 0: OK
|
|
||||||
* Return 0 : setTupleId failed
|
|
||||||
* Parameters:
|
|
||||||
* Remark:
|
|
||||||
*****************************************************************************/
|
|
||||||
Uint64
|
|
||||||
NdbOperation::setTupleId()
|
|
||||||
{
|
|
||||||
if (theStatus != OperationDefined)
|
|
||||||
{
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
Uint64 tTupleId = theNdb->getTupleIdFromNdb(m_currentTable->m_tableId);
|
|
||||||
if (tTupleId == ~(Uint64)0){
|
|
||||||
setErrorCodeAbort(theNdb->theError.code);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
if (equal((Uint32)0, tTupleId) == -1)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
return tTupleId;
|
|
||||||
}
|
|
||||||
|
|
||||||
/******************************************************************************
|
/******************************************************************************
|
||||||
* int insertKEYINFO(const char* aValue, aStartPosition,
|
* int insertKEYINFO(const char* aValue, aStartPosition,
|
||||||
* anAttrSizeInWords, Uint32 anAttrBitsInLastWord);
|
* anAttrSizeInWords, Uint32 anAttrBitsInLastWord);
|
||||||
|
|
Loading…
Reference in a new issue