Skip to content

Commit 2336fe5

Browse files
Martin SköldMartin Sköld
authored andcommitted
Bug#23735996 LCP NEVER COMPLETED AFTER CLEAN NODE SHUTDOWN
When a slave requests starting rollforward from the first operation the new master might already have completed that operation. It then has to be re-created from the gci that the slave returns so the transaction can be restarted from where all the slaves properly complete their operations.
1 parent d90f9f1 commit 2336fe5

File tree

1 file changed

+38
-20
lines changed

1 file changed

+38
-20
lines changed

storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp

Lines changed: 38 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20928,6 +20928,7 @@ Dbdict::execDICT_TAKEOVER_REQ(Signal* signal)
2092820928
Uint32 highest_op_impl_req_gsn = 0;
2092920929
SchemaTransPtr trans_ptr;
2093020930
bool ending = false;
20931+
bool restarting = false;
2093120932

2093220933
jam();
2093320934
bool pending_trans = c_schemaTransList.first(trans_ptr);
@@ -21032,6 +21033,7 @@ Dbdict::execDICT_TAKEOVER_REQ(Signal* signal)
2103221033
}
2103321034
else
2103421035
{
21036+
SchemaOpPtr last_op_ptr;
2103521037
jam();
2103621038
#ifdef VM_TRACE
2103721039
ndbout_c("Op %u, state %u, rollforward %u/%u, rollback %u/%u",op_ptr.p->op_key,op_ptr.p->m_state, rollforward_op, rollforward_op_state, rollback_op, rollback_op_state);
@@ -21047,6 +21049,36 @@ Dbdict::execDICT_TAKEOVER_REQ(Signal* signal)
2104721049
rollforward_op = op_ptr.p->op_key;
2104821050
rollforward_op_state = op_ptr.p->m_state;
2104921051
}
21052+
list.last(last_op_ptr);
21053+
/*
21054+
Check if we didn't found any operation that hasn't completed.
21055+
Then it could be that we have already completed some operations,
21056+
then set rollforward point to first operation, if any, and inform
21057+
we are restarting to ensure the related gci is returned to new master
21058+
so it can find the operation if it has already completed that operation.
21059+
*/
21060+
if ((rollforward_op == 0) && (op_ptr.i == last_op_ptr.i))
21061+
{
21062+
jam();
21063+
SchemaOpPtr first_op_ptr;
21064+
if (list.first(first_op_ptr))
21065+
{
21066+
jam();
21067+
rollforward_op = first_op_ptr.p->op_key;
21068+
rollforward_op_state = first_op_ptr.p->m_state;
21069+
lowest_op = first_op_ptr.p->op_key;
21070+
lowest_op_state = first_op_ptr.p->m_state;
21071+
/*
21072+
Find the OpInfo gsn for the first operation,
21073+
this might be needed by new master to create missing operation.
21074+
*/
21075+
lowest_op_impl_req_gsn = getOpInfo(first_op_ptr).m_impl_req_gsn;
21076+
#ifdef VM_TRACE
21077+
ndbout_c("execDICT_TAKEOVER_CONF: Transaction %u rolled forward, resetting rollforward to first %u(%u), gsn %u", trans_ptr.p->trans_key, rollforward_op_state, rollforward_op, lowest_op_impl_req_gsn);
21078+
#endif
21079+
}
21080+
restarting = true;
21081+
}
2105021082
/*
2105121083
Find the starting point for a rollback, the last
2105221084
operation found that changed state.
@@ -21084,7 +21116,7 @@ Dbdict::execDICT_TAKEOVER_REQ(Signal* signal)
2108421116
conf->highest_op_state = highest_op_state;
2108521117
conf->highest_op_impl_req_gsn = highest_op_impl_req_gsn;
2108621118
}
21087-
if (ending)
21119+
if (ending || restarting)
2108821120
{
2108921121
/*
2109021122
New master might already have released lowest operation found.
@@ -21751,27 +21783,13 @@ void Dbdict::check_takeover_replies(Signal* signal)
2175121783
*/
2175221784
if (trans_ptr.p->m_master_recovery_state == SchemaTrans::TRS_ROLLFORWARD)
2175321785
{
21754-
if (trans_ptr.p->m_rollforward_op == 0)
21755-
{
21756-
jam();
21757-
SchemaOpPtr first_op_ptr;
21758-
LocalSchemaOp_list list(c_schemaOpPool, trans_ptr.p->m_op_list);
21759-
list.first(first_op_ptr);
21760-
trans_ptr.p->m_curr_op_ptr_i = first_op_ptr.i;
21761-
#ifdef VM_TRACE
21762-
ndbout_c("execDICT_TAKEOVER_CONF: Transaction %u rolled forward, but nothing to do", trans_ptr.p->trans_key);
21763-
#endif
21764-
}
21765-
else
21766-
{
21767-
jam();
21768-
SchemaOpPtr rollforward_op_ptr;
21769-
ndbrequire(findSchemaOp(rollforward_op_ptr, trans_ptr.p->m_rollforward_op));
21770-
trans_ptr.p->m_curr_op_ptr_i = rollforward_op_ptr.i;
21786+
jam();
21787+
SchemaOpPtr rollforward_op_ptr;
21788+
ndbrequire(findSchemaOp(rollforward_op_ptr, trans_ptr.p->m_rollforward_op));
21789+
trans_ptr.p->m_curr_op_ptr_i = rollforward_op_ptr.i;
2177121790
#ifdef VM_TRACE
21772-
ndbout_c("execDICT_TAKEOVER_CONF: Transaction %u rolled forward starting at %u(%u)", trans_ptr.p->trans_key, trans_ptr.p->m_rollforward_op, trans_ptr.p->m_curr_op_ptr_i);
21791+
ndbout_c("execDICT_TAKEOVER_CONF: Transaction %u rolled forward starting at %u(%u)", trans_ptr.p->trans_key, trans_ptr.p->m_rollforward_op, trans_ptr.p->m_curr_op_ptr_i);
2177321792
#endif
21774-
}
2177521793
}
2177621794
else // if (trans_ptr.p->master_recovery_state == SchemaTrans::TRS_ROLLBACK)
2177721795
{

0 commit comments

Comments
 (0)