Skip to content

Commit 6c7292f

Browse files
blauddendahlerlend
authored andcommitted
Bug#32891206 CRASH IN NDB_DD_RESTART
Crash occurs when spurious schema operation reply is received from node not registered as participant in the current schema operation. This will in some cases (when client hasn't yet detected completion of schema operation) cause the schema distribution coordinator to remove the active schema operation a second time. Fix by discarding replies from node not registered as participant. This special case was actually already handled but didn't properly propagate all the way to handling of active schema op. Also remove two unused schema dist functions for starting schema operations and document the two unsent schema operation codes used by those functions as "never sent". Change-Id: I57ca344ba22df582aa4a020145cc8fa343bf659c
1 parent 694e3e5 commit 6c7292f

File tree

5 files changed

+18
-26
lines changed

5 files changed

+18
-26
lines changed

storage/ndb/plugin/ha_ndbcluster_binlog.cc

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2569,7 +2569,7 @@ class Ndb_schema_event_handler {
25692569
return;
25702570
}
25712571

2572-
// Check if all coordinator completed and wake up client
2572+
// Check if coordinator completed and wake up client
25732573
const bool coordinator_completed =
25742574
ndb_schema_object->check_coordinator_completed();
25752575

@@ -3881,7 +3881,7 @@ class Ndb_schema_event_handler {
38813881
rewrite_acl_change_for_server_log(query);
38823882

38833883
ndb_log_verbose(19,
3884-
"got schema event on '%s.%s(%u/%u)' query: '%s' "
3884+
"Schema event on '%s.%s(%u/%u)' query: '%s' "
38853885
"type: %s(%d) node: %u slock: %x%08x",
38863886
schema->db, schema->name, schema->id, schema->version,
38873887
query.c_str(),
@@ -4215,8 +4215,14 @@ class Ndb_schema_event_handler {
42154215
return;
42164216
}
42174217

4218-
ndb_schema_object->result_received_from_node(participant_node_id, result,
4219-
unpacked_message);
4218+
const bool participant_registered =
4219+
ndb_schema_object->result_received_from_node(participant_node_id,
4220+
result, unpacked_message);
4221+
if (!participant_registered) {
4222+
ndb_log_info("Ignoring node: %d, not a registered participant",
4223+
participant_node_id);
4224+
return;
4225+
}
42204226

42214227
if (ndb_schema_object->check_all_participants_completed()) {
42224228
// All participants have completed(or failed) -> send final ack

storage/ndb/plugin/ndb_schema_dist.cc

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -624,20 +624,6 @@ bool Ndb_schema_dist_client::acl_notify(std::string user_list) {
624624
SOT_ACL_SNAPSHOT);
625625
}
626626

627-
bool Ndb_schema_dist_client::tablespace_changed(const char *tablespace_name,
628-
int id, int version) {
629-
DBUG_TRACE;
630-
return log_schema_op(ndb_thd_query(m_thd), ndb_thd_query_length(m_thd), "",
631-
tablespace_name, id, version, SOT_TABLESPACE);
632-
}
633-
634-
bool Ndb_schema_dist_client::logfilegroup_changed(const char *logfilegroup_name,
635-
int id, int version) {
636-
DBUG_TRACE;
637-
return log_schema_op(ndb_thd_query(m_thd), ndb_thd_query_length(m_thd), "",
638-
logfilegroup_name, id, version, SOT_LOGFILE_GROUP);
639-
}
640-
641627
bool Ndb_schema_dist_client::create_tablespace(const char *tablespace_name,
642628
int id, int version) {
643629
DBUG_TRACE;

storage/ndb/plugin/ndb_schema_dist.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ enum SCHEMA_OP_TYPE {
4545
SOT_CREATE_DB = 5,
4646
SOT_ALTER_DB = 6,
4747
SOT_CLEAR_SLOCK = 7,
48-
SOT_TABLESPACE = 8,
49-
SOT_LOGFILE_GROUP = 9,
48+
SOT_TABLESPACE = 8, // Never sent since 8.0.14, still reserved
49+
SOT_LOGFILE_GROUP = 9, // Never sent since 8.0.14, still reserved
5050
SOT_RENAME_TABLE = 10,
5151
SOT_TRUNCATE_TABLE = 11,
5252
SOT_RENAME_TABLE_PREPARE = 12,
@@ -319,9 +319,6 @@ class Ndb_schema_dist_client {
319319
bool participants_must_refresh);
320320
bool acl_notify(std::string user_list);
321321

322-
bool tablespace_changed(const char *tablespace_name, int id, int version);
323-
bool logfilegroup_changed(const char *logfilegroup_name, int id, int version);
324-
325322
bool create_tablespace(const char *tablespace_name, int id, int version);
326323
bool alter_tablespace(const char *tablespace_name, int id, int version);
327324
bool drop_tablespace(const char *tablespace_name, int id, int version);

storage/ndb/plugin/ndb_schema_object.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ void NDB_SCHEMA_OBJECT::register_participants(
305305
ndbcluster::ndbrequire(nodes.size() == state.m_participants.size());
306306
}
307307

308-
void NDB_SCHEMA_OBJECT::result_received_from_node(
308+
bool NDB_SCHEMA_OBJECT::result_received_from_node(
309309
uint32 participant_node_id, uint32 result,
310310
const std::string &message) const {
311311
std::lock_guard<std::mutex> lock_state(state.m_lock);
@@ -315,14 +315,15 @@ void NDB_SCHEMA_OBJECT::result_received_from_node(
315315
// Received reply from node not registered as participant, may happen
316316
// when a node hears the schema op but this node hasn't registered it as
317317
// subscriber yet.
318-
return;
318+
return false; // Not registered
319319
}
320320

321321
// Mark participant as completed and save result
322322
State::Participant &participant = it->second;
323323
participant.m_completed = true;
324324
participant.m_result = result;
325325
participant.m_message = message;
326+
return true;
326327
}
327328

328329
void NDB_SCHEMA_OBJECT::result_received_from_nodes(

storage/ndb/plugin/ndb_schema_object.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,8 +215,10 @@ class NDB_SCHEMA_OBJECT {
215215
@param participant_node_id The nodeid of the node who reported result
216216
@param result The result received
217217
@param message The message describing the result if != 0
218+
219+
@return true if node was registered as participant, false otherwise
218220
*/
219-
void result_received_from_node(uint32 participant_node_id, uint32 result,
221+
bool result_received_from_node(uint32 participant_node_id, uint32 result,
220222
const std::string &message) const;
221223

222224
/**

0 commit comments

Comments
 (0)