From dad4b3e93cd04cdb2f8a77e100510f4b7d800ea2 Mon Sep 17 00:00:00 2001 From: Thirunarayanan Balathandayuthapani Date: Mon, 9 Feb 2026 18:40:42 +0530 Subject: [PATCH] MDEV-36436 Assertion "unexpected references" == 0 after ALTER IGNORE TABLE Problem: ========= An assertion failure occurs in InnoDB during consecutive ALTER TABLE operations using the COPY algorithm. The crash happens during the table rename phase because the source table (dict_table_t::n_ref_count) is non-zero, despite the thread holding an exclusive Metadata Lock (MDL). Reason: ======== When ALTER IGNORE TABLE is executed via the COPY algorithm, it generates undo logs for every row inserted into the intermediate table (e.g., #sql-alter-...). The background Purge Thread, responsible for cleaning up these undo logs, attempts to take an MDL on the table to prevent the table from being dropped while in use. Race condition: ================== First ALTER: Creates #sql-alter-, copies data, and renames it to t1. Purge Activation: The Purge thread picks up the undo logs from step 1. It takes an MDL on the temporary name (#sql-alter-) and increments the table's n_ref_count. Identity Shift: InnoDB renames the physical table object to t1, but the Purge thread still holds a reference to this object. Second ALTER: Starts a new copy process. When it attempts to rename the "new" t1 to a backup name, it checks if n_ref_count == 0. Because the Purge thread is still "pinning" the object to clean up logs from the first ALTER, the count is > 0, triggering the assertion failure. Solution: ======== ALTER IGNORE TABLE needs row-level undo logging to easily roll back the last inserted row in case of duplicate key errors. By discarding the last undo log record after inserting each row, purge will not process any log records generated by ALTER IGNORE TABLE, preventing unexpected access from the purge subsystem during subsequent DDL operations. Make skip_alter_undo (1-bit) to (2-bit enum) to support different ALTER operation modes: - NO_UNDO (0): Normal mode with standard undo logging - SKIP_UNDO (1): ALTER mode that skips undo logging - IGNORE_UNDO (2): ALTER IGNORE mode that rewrites undo blocks trx_undo_report_row_operation(): Add ALTER IGNORE undo rewriting logic. Store old undo record info before writing new records for IGNORE_UNDO mode. Reset undo top_offset to maintain only latest insert undo row_ins_clust_index_entry_low(): Prevent ALTER IGNORE from using bulk insert optimization. since it requires to write undo log for each row operation that's incompatible with existing bulk operation. --- include/my_base.h | 5 ++++- sql/ha_partition.cc | 1 + sql/sql_insert.cc | 6 ++++-- sql/sql_table.cc | 11 ++++++++--- storage/innobase/btr/btr0cur.cc | 2 +- storage/innobase/handler/ha_innodb.cc | 24 +++++++++++++++++++++--- storage/innobase/include/dict0mem.h | 23 ++++++++++++++++++----- storage/innobase/include/trx0undo.h | 4 ++++ storage/innobase/row/row0ins.cc | 11 ++++++++--- storage/innobase/row/row0uins.cc | 2 +- storage/innobase/row/row0undo.cc | 2 +- storage/innobase/trx/trx0rec.cc | 25 +++++++++++++++++++++++++ storage/mroonga/ha_mroonga.cpp | 3 +++ 13 files changed, 99 insertions(+), 20 deletions(-) diff --git a/include/my_base.h b/include/my_base.h index 2da0d2ef65115..cb9c0fc0b1a11 100644 --- a/include/my_base.h +++ b/include/my_base.h @@ -229,7 +229,10 @@ enum ha_extra_function { HA_EXTRA_END_ALTER_COPY, /** Abort of writing rows during ALTER TABLE..ALGORITHM=COPY or CREATE..SELCT */ - HA_EXTRA_ABORT_ALTER_COPY + HA_EXTRA_ABORT_ALTER_COPY, + /** Overwrite the old undo log for each insert for + ALTER IGNORE TABLE .. ALGORITHM= COPY */ + HA_EXTRA_BEGIN_ALTER_IGNORE_COPY }; /* Compatible option, to be deleted in 6.0 */ diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc index dfb4b0e1804cb..0786354c1cfa7 100644 --- a/sql/ha_partition.cc +++ b/sql/ha_partition.cc @@ -9492,6 +9492,7 @@ int ha_partition::extra(enum ha_extra_function operation) case HA_EXTRA_BEGIN_ALTER_COPY: case HA_EXTRA_END_ALTER_COPY: case HA_EXTRA_ABORT_ALTER_COPY: + case HA_EXTRA_BEGIN_ALTER_IGNORE_COPY: DBUG_RETURN(loop_partitions(extra_cb, &operation)); default: { diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc index ff8b6bf2ca042..a2a5f4cf085ff 100644 --- a/sql/sql_insert.cc +++ b/sql/sql_insert.cc @@ -5165,8 +5165,10 @@ select_create::prepare(List &_values, SELECT_LEX_UNIT *u) !table->s->long_unique_table) { table->file->ha_start_bulk_insert((ha_rows) 0); - if (thd->lex->duplicates == DUP_ERROR && !thd->lex->ignore) - table->file->extra(HA_EXTRA_BEGIN_ALTER_COPY); + if (thd->lex->duplicates == DUP_ERROR) + table->file->extra(thd->lex->ignore + ? HA_EXTRA_BEGIN_ALTER_IGNORE_COPY + : HA_EXTRA_BEGIN_ALTER_COPY); table->file->extra(HA_EXTRA_WRITE_CACHE); } thd->abort_on_warning= !info.ignore && thd->is_strict_mode(); diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 452ca09b31eef..5011618b64804 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -12351,8 +12351,9 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to, bool ignore, thd->progress.max_counter= from->file->records(); time_to_report_progress= MY_HOW_OFTEN_TO_WRITE/10; - if (!ignore) /* for now, InnoDB needs the undo log for ALTER IGNORE */ - to->file->extra(HA_EXTRA_BEGIN_ALTER_COPY); + to->file->extra(ignore + ? HA_EXTRA_BEGIN_ALTER_IGNORE_COPY + : HA_EXTRA_BEGIN_ALTER_COPY); while (likely(!(error= info.read_record()))) { @@ -12455,6 +12456,10 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to, bool ignore, if (error > 0 && !from->s->tmp_table) { + /* Abort the ALTER COPY operation. For ALTER IGNORE, this + prevents undo logs from being added to the purge thread, + allowing proper cleanup of the temporary undo state. */ + to->file->extra(HA_EXTRA_ABORT_ALTER_COPY); /* We are going to drop the temporary table */ to->file->extra(HA_EXTRA_PREPARE_FOR_DROP); } @@ -12466,7 +12471,7 @@ copy_data_between_tables(THD *thd, TABLE *from, TABLE *to, bool ignore, error= 1; } bulk_insert_started= 0; - if (!ignore && error <= 0) + if (error <= 0) { int alt_error= to->file->extra(HA_EXTRA_END_ALTER_COPY); if (alt_error > 0) diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index 5348a67e8c923..d7ce6ae996ef9 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -2286,7 +2286,7 @@ btr_cur_ins_lock_and_undo( || dict_index_is_clust(index) || (flags & BTR_CREATE_FLAG)); ut_ad((flags & BTR_NO_UNDO_LOG_FLAG) - || !index->table->skip_alter_undo); + || index->table->skip_alter_undo != dict_table_t::NO_UNDO); ut_ad(mtr->is_named_space(index->table->space)); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index d90038d44a9e1..903306272f317 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -111,6 +111,7 @@ extern my_bool opt_readonly; #include "ut0mem.h" #include "row0ext.h" #include "mariadb_stats.h" +#include "trx0undo.h" simple_thread_local ha_handler_stats *mariadb_stats; #include "lz4.h" @@ -15884,9 +15885,13 @@ ha_innobase::extra( break; } goto stmt_boundary; + case HA_EXTRA_BEGIN_ALTER_IGNORE_COPY: case HA_EXTRA_BEGIN_ALTER_COPY: trx = check_trx_exists(thd); - m_prebuilt->table->skip_alter_undo = 1; + m_prebuilt->table->skip_alter_undo = + (operation == HA_EXTRA_BEGIN_ALTER_COPY) + ? dict_table_t::NO_UNDO + : dict_table_t::IGNORE_UNDO; if (m_prebuilt->table->is_temporary() || !m_prebuilt->table->versioned_by_id()) { break; @@ -15905,7 +15910,19 @@ ha_innobase::extra( they could cause a severe performance regression. */ break; } - m_prebuilt->table->skip_alter_undo = 0; + + if (m_prebuilt->table->skip_alter_undo == + dict_table_t::IGNORE_UNDO) { + /* Remove the last undo log from + transaction and does call commit_empty() + during commit process */ + trx->undo_no = 0; + trx_undo_try_truncate(*trx); + } + + m_prebuilt->table->skip_alter_undo = + dict_table_t::NORMAL_UNDO; + if (dberr_t err= trx->bulk_insert_apply()) { trx->rollback(); return convert_error_code_to_mysql( @@ -15938,7 +15955,8 @@ ha_innobase::extra( if (m_prebuilt->table->skip_alter_undo && !m_prebuilt->table->is_temporary()) { trx = check_trx_exists(ha_thd()); - m_prebuilt->table->skip_alter_undo = 0; + m_prebuilt->table->skip_alter_undo = + dict_table_t::NORMAL_UNDO; trx->rollback(); return(HA_ERR_ROLLBACK); } diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index 76dd0351a41d7..144a153cedc8f 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -2222,12 +2222,25 @@ struct dict_table_t { Use DICT_TF2_FLAG_IS_SET() to parse this flag. */ unsigned flags2:DICT_TF2_BITS; - /** TRUE if the table is an intermediate table during copy alter - operation or a partition/subpartition which is required for copying - data and skip the undo log for insertion of row in the table. - This variable will be set and unset during extra(), or during the + /** Undo log handling modes for ALTER TABLE operations */ + enum skip_alter_undo_t { + /** Normal mode - standard undo logging */ + NORMAL_UNDO = 0, + /** ALTER mode - skip undo logging */ + NO_UNDO = 1, + /** ALTER IGNORE mode - rewrite undo blocks to + maintain only latest insert undo log */ + IGNORE_UNDO = 2 + }; + + /** Mode for handling undo logs during ALTER TABLE operations. + Set during copy alter operations or partition/subpartition operations. + When set, controls undo log behavior for row operations in the table. + For ALTER IGNORE, this enables rewriting old insert undo blocks + to maintain only the latest insert undo log. + This variable is set and unset during extra(), or during the process of altering partitions */ - unsigned skip_alter_undo:1; + unsigned skip_alter_undo:2; /*!< whether this is in a single-table tablespace and the .ibd file is missing or page decryption failed and page is corrupted */ diff --git a/storage/innobase/include/trx0undo.h b/storage/innobase/include/trx0undo.h index 2ee2e8ac8a552..9caadb994643c 100644 --- a/storage/innobase/include/trx0undo.h +++ b/storage/innobase/include/trx0undo.h @@ -271,6 +271,10 @@ struct trx_undo_t { uint16_t top_offset; /*!< offset of the latest undo record, i.e., the topmost element in the undo log if we think of it as a stack */ + /** For ALTER IGNORE: store previous undo record info to + rewrite undo chain */ + uint16_t old_offset; /*!< previous undo record offset + for ALTER IGNORE */ undo_no_t top_undo_no; /*!< undo number of the latest record (IB_ID_MAX if the undo log is empty) */ buf_block_t* guess_block; /*!< guess for the buffer block where diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index 9ece5dd156789..0a6215751882d 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -2781,9 +2781,14 @@ row_ins_clust_index_entry_low( if (!index->table->n_rec_locks && !index->table->versioned() && !index->table->is_temporary() - && !index->table->has_spatial_index()) { + && !index->table->has_spatial_index() + /* Prevent ALTER IGNORE from using bulk insert + optimization. since it requires to write undo log + for each row operation that's incompatible with + bulk operations */ + && index->table->skip_alter_undo != + dict_table_t::IGNORE_UNDO) { - ut_ad(!index->table->skip_alter_undo); trx->bulk_insert = TRX_DML_BULK; err = lock_table(index->table, NULL, LOCK_X, thr); if (err != DB_SUCCESS) { @@ -3341,7 +3346,7 @@ row_ins_clust_index_entry( skip the undo log and record lock checking for insertion operation. */ - if (index->table->skip_alter_undo) { + if (index->table->skip_alter_undo == dict_table_t::NO_UNDO) { flags |= BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG; } diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc index 50234e60e7b27..cf98d846bba99 100644 --- a/storage/innobase/row/row0uins.cc +++ b/storage/innobase/row/row0uins.cc @@ -452,7 +452,7 @@ static bool row_undo_ins_parse_undo_rec(undo_node_t* node, bool dict_locked) node->table = NULL; return false; } else { - ut_ad(!node->table->skip_alter_undo); + ut_ad(node->table->skip_alter_undo != dict_table_t::NO_UNDO); clust_index = dict_table_get_first_index(node->table); if (clust_index != NULL) { diff --git a/storage/innobase/row/row0undo.cc b/storage/innobase/row/row0undo.cc index 610fcf58ad22c..4f194547b360b 100644 --- a/storage/innobase/row/row0undo.cc +++ b/storage/innobase/row/row0undo.cc @@ -171,7 +171,7 @@ row_undo_search_clust_to_pcur( rec_offs* offsets = offsets_; rec_offs_init(offsets_); - ut_ad(!node->table->skip_alter_undo); + ut_ad(node->table->skip_alter_undo != dict_table_t::NO_UNDO); mtr_start(&mtr); diff --git a/storage/innobase/trx/trx0rec.cc b/storage/innobase/trx/trx0rec.cc index fa632ad457c35..677200592b905 100644 --- a/storage/innobase/trx/trx0rec.cc +++ b/storage/innobase/trx/trx0rec.cc @@ -1918,6 +1918,8 @@ trx_undo_report_row_operation( } trx_undo_t* undo = *pundo; + bool ignore_ddl= + (index->table->skip_alter_undo == dict_table_t::IGNORE_UNDO); ut_ad((err == DB_SUCCESS) == (undo_block != NULL)); if (UNIV_UNLIKELY(undo_block == NULL)) { err_exit: @@ -1927,6 +1929,22 @@ trx_undo_report_row_operation( ut_ad(undo != NULL); + /* For ALTER IGNORE, implement undo log rewriting to maintain only + the latest insert undo record. This allows easy rollback of the + last inserted row on duplicate key errors. Before writing a new + undo record, rewind the undo log to the previous record position, + effectively discarding all intermediate + undo records and keeping only the most recent one. */ + if (ignore_ddl && !undo->empty() && + undo->old_offset <= undo->top_offset) { + + undo->top_offset = undo->old_offset; + undo->top_undo_no = 0; + mtr.write<2>(*undo_block, + undo_block->page.frame + TRX_UNDO_PAGE_HDR + + TRX_UNDO_PAGE_FREE, undo->old_offset); + } + do { uint16_t offset = !rec ? trx_undo_page_report_insert( @@ -2001,6 +2019,13 @@ trx_undo_report_row_operation( /* Success */ undo->top_page_no = undo_block->page.id().page_no(); mtr.commit(); + + /* For ALTER IGNORE, store the current record position + as the rollback point for the next operation */ + if (ignore_ddl && undo->empty()) { + undo->old_offset = offset; + } + undo->top_offset = offset; undo->top_undo_no = trx->undo_no++; undo->guess_block = undo_block; diff --git a/storage/mroonga/ha_mroonga.cpp b/storage/mroonga/ha_mroonga.cpp index fb279c5458eab..a4fa9e602aa4e 100644 --- a/storage/mroonga/ha_mroonga.cpp +++ b/storage/mroonga/ha_mroonga.cpp @@ -561,6 +561,9 @@ static const char *mrn_inspect_extra_function(enum ha_extra_function operation) case HA_EXTRA_ABORT_ALTER_COPY: inspected = "HA_EXTRA_ABORT_ALTER_COPY"; break; + case HA_EXTRA_BEGIN_ALTER_IGNORE_COPY: + inspected = "HA_EXTRA_BEGIN_ALTER_IGNORE_COPY"; + break; #ifdef MRN_HAVE_HA_EXTRA_EXPORT case HA_EXTRA_EXPORT: inspected = "HA_EXTRA_EXPORT";