diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index 3071206b201fa..5d6b1ef0603af 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -2698,9 +2698,14 @@ static bool innodb_init() } ut_ad(srv_force_recovery <= SRV_FORCE_IGNORE_CORRUPT); + mysql_mutex_lock(&recv_sys.mutex); ut_ad(recv_no_log_write); - buf_flush_sync(); + if (recv_sys.recovery_on) + recv_sys.apply(true); + mysql_mutex_unlock(&recv_sys.mutex); recv_sys.debug_free(); + + buf_flush_sync_batch(LSN_MAX); ut_ad(!os_aio_pending_reads()); ut_d(mysql_mutex_lock(&buf_pool.flush_list_mutex)); ut_ad(!buf_pool.get_oldest_modification(0)); diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 322f93357bd3a..d5d6f29479b66 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -3118,7 +3118,7 @@ buf_page_get_low( return(NULL); } - buf_flush_sync(); + buf_flush_sync_batch(LSN_MAX); state = block->page.state(); @@ -4090,15 +4090,15 @@ ATTRIBUTE_COLD void buf_pool_t::clear_hash_index() noexcept @retval nullptr if all freed */ void buf_pool_t::assert_all_freed() noexcept { - mysql_mutex_lock(&mutex); + mysql_mutex_assert_owner(&mutex); - for (char *extent= memory, - *end= memory + block_descriptors_in_bytes(n_blocks); - extent < end; extent+= innodb_buffer_pool_extent_size) - for (buf_block_t *block= reinterpret_cast(extent), - *extent_end= block + - pages_in_extent[srv_page_size_shift - UNIV_PAGE_SIZE_SHIFT_MIN]; - block < extent_end && reinterpret_cast(block) < end; block++) + for (char *extent= memory, + *end= memory + block_descriptors_in_bytes(n_blocks); + extent < end; extent+= innodb_buffer_pool_extent_size) + for (buf_block_t *block= reinterpret_cast(extent), + *extent_end= block + + pages_in_extent[srv_page_size_shift - UNIV_PAGE_SIZE_SHIFT_MIN]; + block < extent_end && reinterpret_cast(block) < end; block++) { if (!block->page.in_file()) continue; @@ -4128,8 +4128,6 @@ void buf_pool_t::assert_all_freed() noexcept fixed_or_dirty: ib::fatal() << "Page " << block->page.id() << " still fixed or dirty"; } - - mysql_mutex_unlock(&mutex); } #endif /* UNIV_DEBUG */ @@ -4140,33 +4138,6 @@ void buf_refresh_io_stats() noexcept buf_pool.old_stat = buf_pool.stat; } -/** Invalidate all pages in the buffer pool. -All pages must be in a replaceable state (not modified or latched). */ -void buf_pool_invalidate() noexcept -{ - /* It is possible that a write batch that has been posted - earlier is still not complete. For buffer pool invalidation to - proceed we must ensure there is NO write activity happening. */ - - os_aio_wait_until_no_pending_writes(false); - ut_d(buf_pool.assert_all_freed()); - mysql_mutex_lock(&buf_pool.mutex); - - while (UT_LIST_GET_LEN(buf_pool.LRU)) { - buf_LRU_scan_and_free_block(); - } - - ut_ad(UT_LIST_GET_LEN(buf_pool.unzip_LRU) == 0); - - buf_pool.freed_page_clock = 0; - buf_pool.LRU_old = NULL; - buf_pool.LRU_old_len = 0; - buf_pool.stat.init(); - - buf_refresh_io_stats(); - mysql_mutex_unlock(&buf_pool.mutex); -} - #ifdef UNIV_DEBUG /** Validate the buffer pool. */ void buf_pool_t::validate() noexcept diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc index 256e9d88ff414..e31cf8bb0fe11 100644 --- a/storage/innobase/buf/buf0dblwr.cc +++ b/storage/innobase/buf/buf0dblwr.cc @@ -173,9 +173,15 @@ bool buf_dblwr_t::create() noexcept ut_ad(init_mtr.get_savepoint() == 1); ut_ad(init_mtr.m_memo[0].object == new_block); ut_ad(init_mtr.m_memo[0].type == MTR_MEMO_PAGE_X_MODIFY); + new_block->page.fix(); init_mtr.m_memo[0].type= MTR_MEMO_PAGE_X_FIX; init_mtr.rollback_to_savepoint(0, 1); init_mtr.m_log.erase(); + mysql_mutex_lock(&buf_pool.mutex); + new_block->page.unfix(); + ut_d(bool freed=) buf_LRU_free_page(&new_block->page, true); + ut_ad(freed); + mysql_mutex_unlock(&buf_pool.mutex); if (i == size / 2) ut_a(id.page_no() == size); @@ -202,11 +208,6 @@ bool buf_dblwr_t::create() noexcept TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N); mtr.commit(); - buf_flush_wait_flushed(mtr.commit_lsn()); - - /* Remove doublewrite pages from LRU */ - buf_pool_invalidate(); - sql_print_information("InnoDB: Doublewrite buffer created"); goto start_again; } diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 4e9f1e015bacf..4368220071042 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -2057,8 +2057,7 @@ log file. Use log_make_checkpoint() to flush also the pool. @retval false if a checkpoint write was already running */ static bool log_checkpoint() noexcept { - if (recv_recovery_is_on()) - recv_sys.apply(true); + ut_ad(!recv_recovery_is_on()); #if defined HAVE_valgrind && !__has_feature(memory_sanitizer) /* The built-in scheduler in Valgrind may neglect some threads for a @@ -2130,9 +2129,6 @@ ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn) noexcept ut_ad(sync_lsn < LSN_MAX); ut_ad(!srv_read_only_mode); - if (recv_recovery_is_on()) - recv_sys.apply(true); - mysql_mutex_lock(&buf_pool.flush_list_mutex); if (buf_pool.get_oldest_modification(sync_lsn) < sync_lsn) @@ -2189,9 +2185,6 @@ ATTRIBUTE_COLD void buf_flush_ahead(lsn_t lsn, bool furious) noexcept { ut_ad(!srv_read_only_mode); - if (recv_recovery_is_on()) - recv_sys.apply(true); - DBUG_EXECUTE_IF("ib_log_checkpoint_avoid_hard", return;); Atomic_relaxed &limit= furious @@ -2861,7 +2854,7 @@ ATTRIBUTE_COLD void buf_flush_buffer_pool() noexcept /** Synchronously flush dirty blocks during recv_sys_t::apply(). NOTE: The calling thread is not allowed to hold any buffer page latches! */ -void buf_flush_sync_batch(lsn_t lsn) noexcept +ATTRIBUTE_COLD void buf_flush_sync_batch(lsn_t lsn) noexcept { lsn= std::max(lsn, log_get_lsn()); mysql_mutex_lock(&buf_pool.flush_list_mutex); @@ -2869,43 +2862,6 @@ void buf_flush_sync_batch(lsn_t lsn) noexcept mysql_mutex_unlock(&buf_pool.flush_list_mutex); } -/** Synchronously flush dirty blocks. -NOTE: The calling thread is not allowed to hold any buffer page latches! */ -void buf_flush_sync() noexcept -{ - if (recv_recovery_is_on()) - { - mysql_mutex_lock(&recv_sys.mutex); - recv_sys.apply(true); - mysql_mutex_unlock(&recv_sys.mutex); - } - - thd_wait_begin(nullptr, THD_WAIT_DISKIO); - tpool::tpool_wait_begin(); - log_sys.latch.wr_lock(SRW_LOCK_CALL); - - for (lsn_t lsn= log_sys.get_lsn();;) - { - log_sys.latch.wr_unlock(); - mysql_mutex_lock(&buf_pool.flush_list_mutex); - buf_flush_wait(lsn); - /* Wait for the page cleaner to be idle (for log resizing at startup) */ - while (buf_flush_sync_lsn) - my_cond_wait(&buf_pool.done_flush_list, - &buf_pool.flush_list_mutex.m_mutex); - mysql_mutex_unlock(&buf_pool.flush_list_mutex); - log_sys.latch.wr_lock(SRW_LOCK_CALL); - lsn_t new_lsn= log_sys.get_lsn(); - if (lsn == new_lsn) - break; - lsn= new_lsn; - } - - log_sys.latch.wr_unlock(); - tpool::tpool_wait_end(); - thd_wait_end(nullptr); -} - ATTRIBUTE_COLD void buf_pool_t::print_flush_info() const noexcept { /* We do dirty read of UT_LIST count variable. */ diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index d90038d44a9e1..628bdeb2e7a0e 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -18440,7 +18440,7 @@ buf_flush_list_now_set(THD*, st_mysql_sys_var*, void*, const void* save) os_aio_wait_until_no_pending_writes(true); } else - buf_flush_sync(); + buf_flush_sync_batch(LSN_MAX); mysql_mutex_lock(&LOCK_global_system_variables); } diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index 202771fcfe142..de456680747b6 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -400,10 +400,6 @@ buf_print_io( /** Refresh the statistics used to print per-second averages. */ void buf_refresh_io_stats() noexcept; -/** Invalidate all pages in the buffer pool. -All pages must be in a replaceable state (not modified or latched). */ -void buf_pool_invalidate() noexcept; - /*======================================================================== --------------------------- LOWER LEVEL ROUTINES ------------------------- =========================================================================*/ diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h index 5d01d38ba21a9..791792bc0983c 100644 --- a/storage/innobase/include/buf0flu.h +++ b/storage/innobase/include/buf0flu.h @@ -104,8 +104,4 @@ void buf_flush_validate() noexcept; /** Synchronously flush dirty blocks during recv_sys_t::apply(). NOTE: The calling thread is not allowed to hold any buffer page latches! */ -void buf_flush_sync_batch(lsn_t lsn) noexcept; - -/** Synchronously flush dirty blocks. -NOTE: The calling thread is not allowed to hold any buffer page latches! */ -void buf_flush_sync() noexcept; +ATTRIBUTE_COLD void buf_flush_sync_batch(lsn_t lsn) noexcept; diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h index 0d4ee7b505a20..5fba4aefdb4de 100644 --- a/storage/innobase/include/log0recv.h +++ b/storage/innobase/include/log0recv.h @@ -238,7 +238,10 @@ struct recv_sys_t /** whether we are applying redo log records during crash recovery. This can be cleared when holding mutex, or when pages.empty() and - we are holding exclusive log_sys.latch. */ + we are holding exclusive log_sys.latch. When this is set, + buf_flush_page_cleaner() will not invoke log_checkpoint_low(), + buf_pool.flush_list may be unsorted by buf_page_t::oldest_modification(), + and garbage_collect() replaces buf_pool_t::running_out(). */ Atomic_relaxed recovery_on= false; /** whether recv_recover_page(), invoked from buf_page_t::read_complete(), should apply log records*/ diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index 072e8696cf82c..3744068574d31 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -1506,7 +1506,9 @@ ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown() noexcept service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL, "Free innodb buffer pool"); + ut_d(mysql_mutex_lock(&buf_pool.mutex)); ut_d(buf_pool.assert_all_freed()); + ut_d(mysql_mutex_unlock(&buf_pool.mutex)); ut_a(lsn == log_get_lsn() || srv_force_recovery == SRV_FORCE_NO_LOG_REDO); diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index a37ab1bd2b2c7..ccbb8faf4c727 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -4091,6 +4091,30 @@ static void log_sort_flush_list() noexcept mysql_mutex_unlock(&buf_pool.flush_list_mutex); } +/** Invalidate all pages in the buffer pool. +All pages must be replaceable (not modified, latched, or io-fixed). */ +ATTRIBUTE_COLD static void buf_pool_invalidate() noexcept +{ + mysql_mutex_lock(&buf_pool.mutex); + ut_ad(!os_aio_pending_reads()); + /* os_aio_pending_writes() may hold here if some write_io_callback() + did not release the slot yet. However, buf_flush_sync_batch() waited + for the page write itself to complete, which we will check below. */ + ut_d(buf_pool.assert_all_freed()); + + while (UT_LIST_GET_LEN(buf_pool.LRU)) + buf_LRU_scan_and_free_block(); + + ut_ad(UT_LIST_GET_LEN(buf_pool.unzip_LRU) == 0); + + buf_pool.freed_page_clock= 0; + buf_pool.LRU_old= nullptr; + buf_pool.LRU_old_len= 0; + buf_pool.stat.init(); + buf_refresh_io_stats(); + mysql_mutex_unlock(&buf_pool.mutex); +} + /** Apply buffered log to persistent data pages. @param last_batch whether it is possible to write more redo log */ void recv_sys_t::apply(bool last_batch) diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 7c2aafe6c7bf1..75141f33dc909 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -171,7 +171,9 @@ static void delete_log_files() @return DB_SUCCESS or error code */ static dberr_t create_log_file(bool create_new_db, lsn_t lsn) { + ut_ad(log_sys.latch_have_wr()); ut_ad(!srv_read_only_mode); + ut_ad(!buf_pool.get_oldest_modification(0)); /* We will retain ib_logfile0 until we have written a new logically empty log as ib_logfile101 and atomically renamed it to @@ -179,9 +181,6 @@ static dberr_t create_log_file(bool create_new_db, lsn_t lsn) delete_log_files(); ut_ad(!os_aio_pending_reads()); - ut_d(mysql_mutex_lock(&buf_pool.flush_list_mutex)); - ut_ad(!buf_pool.get_oldest_modification(0)); - ut_d(mysql_mutex_unlock(&buf_pool.flush_list_mutex)); /* os_aio_pending_writes() may hold here if some write_io_callback() did not release the slot yet. However, the page write itself must have completed, because the @@ -190,7 +189,6 @@ static dberr_t create_log_file(bool create_new_db, lsn_t lsn) assumption does not hold. */ ut_d(os_aio_wait_until_no_pending_writes(false)); - log_sys.latch.wr_lock(SRW_LOCK_CALL); log_sys.set_capacity(); std::string logfile0{get_log_file_path("ib_logfile101")}; @@ -206,6 +204,7 @@ static dberr_t create_log_file(bool create_new_db, lsn_t lsn) int(logfile0.size()), logfile0.data()); err_exit: log_sys.latch.wr_unlock(); + mysql_mutex_unlock(&buf_pool.flush_list_mutex); return DB_ERROR; } @@ -223,14 +222,6 @@ static dberr_t create_log_file(bool create_new_db, lsn_t lsn) goto close_and_exit; } - mysql_mutex_lock(&recv_sys.mutex); - const bool all_opened = fil_system.sys_space->open(create_new_db); - mysql_mutex_unlock(&recv_sys.mutex); - - if (!all_opened) { - goto err_exit; - } - /* Create a log checkpoint. */ if (log_sys.is_encrypted() && !log_crypt_init()) { goto err_exit; @@ -255,6 +246,7 @@ static dberr_t create_log_file(bool create_new_db, lsn_t lsn) buf_page_t::read_complete(). */ recv_sys.recovery_on = false; log_sys.latch.wr_unlock(); + mysql_mutex_unlock(&buf_pool.flush_list_mutex); log_make_checkpoint(); log_buffer_flush_to_disk(); @@ -1068,15 +1060,22 @@ srv_init_abort_low( /** Prepare to delete the redo log file. Flush the dirty pages from all the buffer pools. Flush the redo log buffer to the redo log file. -@return lsn upto which data pages have been flushed. */ -static lsn_t srv_prepare_to_delete_redo_log_file() noexcept +@return lsn upto which data pages have been flushed and log_sys.latch acquired +@retval 0 in case of error; log_sys.latch will not be acquired */ +static ATTRIBUTE_COLD lsn_t srv_prepare_to_delete_redo_log_file() noexcept { DBUG_ENTER("srv_prepare_to_delete_redo_log_file"); + mysql_mutex_lock(&recv_sys.mutex); ut_ad(recv_sys.recovery_on); + recv_sys.apply(true); + const bool all_opened= fil_system.sys_space->open(false); + mysql_mutex_unlock(&recv_sys.mutex); + if (!all_opened) + DBUG_RETURN(0); /* Clean the buffer pool. */ - buf_flush_sync(); + buf_flush_sync_batch(LSN_MAX); DBUG_EXECUTE_IF("innodb_log_abort_1", DBUG_RETURN(0);); DBUG_PRINT("ib_log", ("After innodb_log_abort_1")); @@ -1085,21 +1084,6 @@ static lsn_t srv_prepare_to_delete_redo_log_file() noexcept const bool latest_format{log_sys.is_latest()}; lsn_t flushed_lsn{log_sys.get_flushed_lsn(std::memory_order_relaxed)}; - if (latest_format && !(log_sys.file_size & 4095) && - flushed_lsn != log_sys.next_checkpoint_lsn + - (log_sys.is_encrypted() - ? SIZE_OF_FILE_CHECKPOINT + 8 - : SIZE_OF_FILE_CHECKPOINT)) - { -#ifdef HAVE_PMEM - if (!log_sys.is_opened()) - log_sys.buf_size= unsigned(std::min(log_sys.capacity(), - log_sys.buf_size_max)); -#endif - fil_names_clear(flushed_lsn); - flushed_lsn= log_sys.get_lsn(); - } - { const char *msg; if (!latest_format) @@ -1131,18 +1115,6 @@ static lsn_t srv_prepare_to_delete_redo_log_file() noexcept } } - log_sys.latch.wr_unlock(); - - if (latest_format) - log_write_up_to(flushed_lsn, false); - - ut_ad(flushed_lsn == log_get_lsn()); - ut_ad(!os_aio_pending_reads()); - ut_d(mysql_mutex_lock(&buf_pool.flush_list_mutex)); - ut_ad(!buf_pool.get_oldest_modification(0)); - ut_d(mysql_mutex_unlock(&buf_pool.flush_list_mutex)); - ut_d(os_aio_wait_until_no_pending_writes(false)); - DBUG_RETURN(flushed_lsn); } @@ -1381,6 +1353,8 @@ dberr_t srv_start(bool create_new_db) if (create_new_db) { lsn_t flushed_lsn = log_sys.init_lsn(); + log_sys.latch.wr_lock(SRW_LOCK_CALL); + mysql_mutex_lock(&buf_pool.flush_list_mutex); err = create_log_file(true, flushed_lsn); @@ -1470,8 +1444,6 @@ dberr_t srv_start(bool create_new_db) return(srv_init_abort(err)); } - buf_flush_sync(); - ut_ad(!srv_log_file_created); ut_d(srv_log_file_created= true); @@ -1578,17 +1550,16 @@ dberr_t srv_start(bool create_new_db) } else { /* Prepare to delete the old redo log file */ const lsn_t lsn{srv_prepare_to_delete_redo_log_file()}; - - DBUG_EXECUTE_IF("innodb_log_abort_1", - return(srv_init_abort(DB_ERROR));); + if (!lsn) { + return srv_init_abort(DB_ERROR); + } + mysql_mutex_lock(&buf_pool.flush_list_mutex); + ut_ad(!buf_pool.get_oldest_modification(0)); /* Prohibit redo log writes from any other threads until creating a log checkpoint at the end of create_log_file(). */ ut_d(recv_no_log_write = true); ut_ad(!os_aio_pending_reads()); - ut_d(mysql_mutex_lock(&buf_pool.flush_list_mutex)); - ut_ad(!buf_pool.get_oldest_modification(0)); - ut_d(mysql_mutex_unlock(&buf_pool.flush_list_mutex)); /* os_aio_pending_writes() may hold here if some write_io_callback() did not release the slot yet. However, the page write itself must @@ -1602,6 +1573,9 @@ dberr_t srv_start(bool create_new_db) log_sys.close_file(); DBUG_EXECUTE_IF("innodb_log_abort_5", + log_sys.latch.wr_unlock(); + mysql_mutex_unlock( + &buf_pool.flush_list_mutex); return(srv_init_abort(DB_ERROR));); DBUG_PRINT("ib_log", ("After innodb_log_abort_5"));