From 7b876a71d0d12e6ff36d56280343809621ac9239 Mon Sep 17 00:00:00 2001 From: "tomas@poseidon.ndb.mysql.com" <> Date: Wed, 5 Jul 2006 17:36:18 +0200 Subject: [PATCH 1/6] Bug #20843 tests fails randomly with assertion in completeClusterFailed --- sql/ha_ndbcluster_binlog.cc | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/sql/ha_ndbcluster_binlog.cc b/sql/ha_ndbcluster_binlog.cc index 0c0b7ae7a19..0f25f4dc38c 100644 --- a/sql/ha_ndbcluster_binlog.cc +++ b/sql/ha_ndbcluster_binlog.cc @@ -3442,8 +3442,8 @@ restart: // wait for the first event thd->proc_info= "Waiting for first event from ndbcluster"; DBUG_PRINT("info", ("Waiting for the first event")); - int schema_res= 0; - Uint64 schema_gci= 0; + int schema_res= 0, res= 0; + Uint64 schema_gci= 0, gci= 0; while (schema_res == 0 && !abort_loop) { schema_res= s_ndb->pollEvents(100, &schema_gci); @@ -3452,7 +3452,14 @@ restart: DBUG_PRINT("info", ("schema_res: %d schema_gci: %d", schema_res, schema_gci)); if (schema_res > 0) { - i_ndb->pollEvents(0); + while (res >= 0 && gci < schema_gci && !abort_loop) + { + res= i_ndb->pollEvents(100, &gci); + } + if (gci > schema_gci) + { + schema_gci= gci; + } i_ndb->flushIncompleteEvents(schema_gci); s_ndb->flushIncompleteEvents(schema_gci); if (schema_gci < ndb_latest_handled_binlog_epoch) From 38d63c303ad68c81c39418571d373fe7eeb11d33 Mon Sep 17 00:00:00 2001 From: "pekka@clam.ndb.mysql.com" <> Date: Wed, 5 Jul 2006 17:36:19 +0200 Subject: [PATCH 2/6] ndb - ndb api : try to catch autoincr 'error 0' --- storage/ndb/src/ndbapi/Ndb.cpp | 42 ++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/storage/ndb/src/ndbapi/Ndb.cpp b/storage/ndb/src/ndbapi/Ndb.cpp index 5b0a9e9d330..5eddbc35665 100644 --- a/storage/ndb/src/ndbapi/Ndb.cpp +++ b/storage/ndb/src/ndbapi/Ndb.cpp @@ -1025,14 +1025,19 @@ int Ndb::initAutoIncrement() setDatabaseName("sys"); setDatabaseSchemaName("def"); - m_sys_tab_0 = getDictionary()->getTableGlobal("SYSTAB_0"); + m_sys_tab_0 = theDictionary->getTableGlobal("SYSTAB_0"); // Restore current name space setDatabaseName(currentDb.c_str()); setDatabaseSchemaName(currentSchema.c_str()); + if (m_sys_tab_0 == NULL) { + assert(theDictionary->m_error.code != 0); + theError.code = theDictionary->m_error.code; + return -1; + } - return (m_sys_tab_0 == NULL); + return 0; } int @@ -1043,19 +1048,19 @@ Ndb::opTupleIdOnNdb(const NdbTableImpl* table, Uint32 aTableId = table->m_id; DBUG_PRINT("enter", ("table=%u value=%llu op=%u", aTableId, opValue, op)); - NdbTransaction* tConnection; - NdbOperation* tOperation= 0; // Compiler warning if not initialized + NdbTransaction* tConnection = NULL; + NdbOperation* tOperation = NULL; Uint64 tValue; NdbRecAttr* tRecAttrResult; - CHECK_STATUS_MACRO_ZERO; + CHECK_STATUS_MACRO; - if (initAutoIncrement()) - goto error_return; + if (initAutoIncrement() == -1) + goto error_handler; tConnection = this->startTransaction(); if (tConnection == NULL) - goto error_return; + goto error_handler; tOperation = tConnection->getNdbOperation(m_sys_tab_0); if (tOperation == NULL) @@ -1065,7 +1070,7 @@ Ndb::opTupleIdOnNdb(const NdbTableImpl* table, { case 0: tOperation->interpretedUpdateTuple(); - tOperation->equal("SYSKEY_0", aTableId ); + tOperation->equal("SYSKEY_0", aTableId); tOperation->incValue("NEXTID", opValue); tRecAttrResult = tOperation->getValue("NEXTID"); @@ -1130,14 +1135,21 @@ Ndb::opTupleIdOnNdb(const NdbTableImpl* table, DBUG_RETURN(0); - error_handler: - theError.code = tConnection->theError.code; - this->closeTransaction(tConnection); - error_return: +error_handler: DBUG_PRINT("error", ("ndb=%d con=%d op=%d", theError.code, - tConnection ? tConnection->theError.code : -1, - tOperation ? tOperation->theError.code : -1)); + tConnection != NULL ? tConnection->theError.code : -1, + tOperation != NULL ? tOperation->theError.code : -1)); + + if (theError.code == 0 && tConnection != NULL) + theError.code = tConnection->theError.code; + if (theError.code == 0 && tOperation != NULL) + theError.code = tOperation->theError.code; + DBUG_ASSERT(theError.code != 0); + + if (tConnection != NULL) + this->closeTransaction(tConnection); + DBUG_RETURN(-1); } From 26e39baca130f8758b1f1ab8e6ab55964842ec02 Mon Sep 17 00:00:00 2001 From: "tomas@poseidon.ndb.mysql.com" <> Date: Wed, 5 Jul 2006 18:36:18 +0200 Subject: [PATCH 3/6] added warning on cluster reconnect and binlog usage, that data may be missing --- sql/ha_ndbcluster_binlog.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sql/ha_ndbcluster_binlog.cc b/sql/ha_ndbcluster_binlog.cc index 0f25f4dc38c..8b04f263b26 100644 --- a/sql/ha_ndbcluster_binlog.cc +++ b/sql/ha_ndbcluster_binlog.cc @@ -3473,6 +3473,12 @@ restart: ndb_latest_applied_binlog_epoch= 0; ndb_latest_received_binlog_epoch= 0; } + else if (ndb_latest_applied_binlog_epoch > 0) + { + sql_print_warning("NDB Binlog: cluster has reconnected. " + "Changes to the database that occured while " + "disconnected will not be in the binlog"); + } if (ndb_extra_logging) { sql_print_information("NDB Binlog: starting log at epoch %u", From aacb705613927c11cc8703a273b324e19368fe21 Mon Sep 17 00:00:00 2001 From: "tomas@poseidon.ndb.mysql.com" <> Date: Wed, 5 Jul 2006 20:20:39 +0200 Subject: [PATCH 4/6] Bug #20419 ndbd --nowait-nodes= fails - updated error message to more correctly reflect the issue --- ndb/include/mgmapi/ndbd_exit_codes.h | 1 + ndb/src/kernel/blocks/qmgr/QmgrMain.cpp | 16 +++++++--------- ndb/src/kernel/error/ndbd_exit_codes.c | 2 ++ 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/ndb/include/mgmapi/ndbd_exit_codes.h b/ndb/include/mgmapi/ndbd_exit_codes.h index 686641ebef5..1016234c513 100644 --- a/ndb/include/mgmapi/ndbd_exit_codes.h +++ b/ndb/include/mgmapi/ndbd_exit_codes.h @@ -71,6 +71,7 @@ typedef ndbd_exit_classification_enum ndbd_exit_classification; #define NDBD_EXIT_INDEX_NOTINRANGE 2304 #define NDBD_EXIT_ARBIT_SHUTDOWN 2305 #define NDBD_EXIT_POINTER_NOTINRANGE 2306 +#define NDBD_EXIT_PARTITIONED_SHUTDOWN 2307 #define NDBD_EXIT_SR_OTHERNODEFAILED 2308 #define NDBD_EXIT_NODE_NOT_DEAD 2309 #define NDBD_EXIT_SR_REDOLOG 2310 diff --git a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp index 3d9ade9b57c..0d59c087913 100644 --- a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp +++ b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp @@ -907,9 +907,9 @@ retry: char buf[255]; BaseString::snprintf(buf, sizeof(buf), - "Partitioned cluster! check StartPartialTimeout, " - " node %d thinks %d is president, " - " I think president is: %d", + "check StartPartialTimeout, " + "node %d thinks %d is president, " + "I think president is: %d", nodeId, president, cpresident); ndbout_c(buf); @@ -941,7 +941,7 @@ retry: CRASH_INSERTION(932); progError(__LINE__, - NDBD_EXIT_ARBIT_SHUTDOWN, + NDBD_EXIT_PARTITIONED_SHUTDOWN, buf); ndbrequire(false); @@ -2794,7 +2794,7 @@ void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode, break; case FailRep::ZPARTITIONED_CLUSTER: { - code = NDBD_EXIT_ARBIT_SHUTDOWN; + code = NDBD_EXIT_PARTITIONED_SHUTDOWN; char buf1[100], buf2[100]; c_clusterNodes.getText(buf1); if (signal->getLength()== FailRep::SignalLength + FailRep::ExtraLength && @@ -2805,16 +2805,14 @@ void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode, part.assign(NdbNodeBitmask::Size, rep->partition); part.getText(buf2); BaseString::snprintf(extra, sizeof(extra), - "Partitioned cluster!" - " Our cluster: %s other cluster: %s", + "Our cluster: %s other cluster: %s", buf1, buf2); } else { jam(); BaseString::snprintf(extra, sizeof(extra), - "Partitioned cluster!" - " Our cluster: %s ", buf1); + "Our cluster: %s", buf1); } msg = extra; break; diff --git a/ndb/src/kernel/error/ndbd_exit_codes.c b/ndb/src/kernel/error/ndbd_exit_codes.c index 257af4c5b1b..07b276346a0 100644 --- a/ndb/src/kernel/error/ndbd_exit_codes.c +++ b/ndb/src/kernel/error/ndbd_exit_codes.c @@ -54,6 +54,8 @@ static const ErrStruct errArray[] = {NDBD_EXIT_ARBIT_SHUTDOWN, XAE, "Node lost connection to other nodes and " "can not form a unpartitioned cluster, please investigate if there are " "error(s) on other node(s)"}, + {NDBD_EXIT_PARTITIONED_SHUTDOWN, XAE, "Partitioned cluster detected. " + "Please check if cluster is already running"}, {NDBD_EXIT_POINTER_NOTINRANGE, XIE, "Pointer too large"}, {NDBD_EXIT_SR_OTHERNODEFAILED, XRE, "Another node failed during system " "restart, please investigate error(s) on other node(s)"}, From f413bc2fe8337ada8deab30e86601cac4c53bd93 Mon Sep 17 00:00:00 2001 From: "tomas@poseidon.ndb.mysql.com" <> Date: Wed, 5 Jul 2006 20:24:12 +0200 Subject: [PATCH 5/6] ndbd: added missing jamEntry(); --- ndb/src/kernel/blocks/qmgr/QmgrMain.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp index 0d59c087913..95698a9a37e 100644 --- a/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp +++ b/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp @@ -438,6 +438,7 @@ void Qmgr::execCONNECT_REP(Signal* signal) void Qmgr::execREAD_NODESCONF(Signal* signal) { + jamEntry(); check_readnodes_reply(signal, refToNode(signal->getSendersBlockRef()), GSN_READ_NODESCONF); @@ -446,6 +447,7 @@ Qmgr::execREAD_NODESCONF(Signal* signal) void Qmgr::execREAD_NODESREF(Signal* signal) { + jamEntry(); check_readnodes_reply(signal, refToNode(signal->getSendersBlockRef()), GSN_READ_NODESREF); From 06ac56e61c82eb15eba65b253d380022a38f4bfa Mon Sep 17 00:00:00 2001 From: "tomas@poseidon.ndb.mysql.com" <> Date: Wed, 5 Jul 2006 21:44:11 +0200 Subject: [PATCH 6/6] Bug #20843 tests fails randomly with assertion in completeClusterFailed - reenabled test as this now seems fixed --- mysql-test/t/disabled.def | 4 ---- 1 file changed, 4 deletions(-) diff --git a/mysql-test/t/disabled.def b/mysql-test/t/disabled.def index 5e6ab1dd728..ebe61e1af4a 100644 --- a/mysql-test/t/disabled.def +++ b/mysql-test/t/disabled.def @@ -18,10 +18,6 @@ #im_life_cycle : Bug#20368 2006-06-10 alik im_life_cycle test fails ndb_autodiscover : BUG#18952 2006-02-16 jmiller Needs to be fixed w.r.t binlog ndb_autodiscover2 : BUG#18952 2006-02-16 jmiller Needs to be fixed w.r.t binlog -ndb_autodiscover3 : BUD#20843 2006-07-04 tomas ndb_autodiscover3 fails randomly -#ndb_binlog_discover : BUG#19395 2006-04-28 tomas/knielsen mysqld does not always detect cluster shutdown -#ndb_cache2 : BUG#18597 2006-03-28 brian simultaneous drop table and ndb statistics update triggers node failure -#ndb_cache_multi2 : BUG#18597 2006-04-10 kent simultaneous drop table and ndb statistics update triggers node failure ndb_load : BUG#17233 2006-05-04 tomas failed load data from infile causes mysqld dbug_assert, binlog not flushed partition_03ndb : BUG#16385 2006-03-24 mikael Partitions: crash when updating a range partitioned NDB table ps_7ndb : BUG#18950 2006-02-16 jmiller create table like does not obtain LOCK_open