From 63cb471a447fcbca5c12ee100adfd68d84f2c40d Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Wed, 4 Feb 2026 08:44:15 -0500 Subject: [PATCH 1/2] DAOS-18531 vos: setup ts_set in vos_obj_incarnate() Replace the vos_obj_hold() call before vos_obj_incarnate() with vos_obj_acquire() to ensure the ts_set setup is fully done within transaction, that can make code cleaner and avoid following race: - Updater A called vos_obj_hold(), it failed to find the OI and mark the obj entry in ts_set as negative. - Updater A yield. - Concurrent updater B created OI and setup obj->obj_df. - Updater A resumed to call vos_obj_incarnate(), it found that the obj->obj_df is non-NULL so skipped OI creation. - The ts_set for updater A is now in an inconsistent state, it's negative but without 'se_create_idx' set. Signed-off-by: Niu Yawei --- src/vos/vos_io.c | 36 ++++++---------------------- src/vos/vos_obj.c | 5 ++-- src/vos/vos_obj_cache.c | 53 ++++++++++++++++++++++++++--------------- 3 files changed, 43 insertions(+), 51 deletions(-) diff --git a/src/vos/vos_io.c b/src/vos/vos_io.c index 4d105b91412..e2bcad16dc3 100644 --- a/src/vos/vos_io.c +++ b/src/vos/vos_io.c @@ -37,9 +37,7 @@ struct vos_io_context { daos_iod_t *ic_iods; struct dcs_iod_csums *ic_iod_csums; /** reference on the object */ - struct vos_object *ic_obj; - /** used only for md-on-ssd phase2 evictable pool */ - struct vos_object *ic_pinned_obj; + struct vos_object *ic_obj; /** BIO descriptor, has ic_iod_nr SGLs */ struct bio_desc *ic_biod; struct vos_ts_set *ic_ts_set; @@ -603,9 +601,6 @@ vos_ioc_destroy(struct vos_io_context *ioc, bool evict) if (ioc->ic_obj) vos_obj_release(ioc->ic_obj, 0, evict); - if (ioc->ic_pinned_obj) - vos_obj_release(ioc->ic_pinned_obj, 0, evict); - vos_ioc_reserve_fini(ioc); vos_ilog_fetch_finish(&ioc->ic_dkey_info); vos_ilog_fetch_finish(&ioc->ic_akey_info); @@ -2210,7 +2205,7 @@ reserve_space(struct vos_io_context *ioc, uint16_t media, daos_size_t size, if (media == DAOS_MEDIA_SCM) { umem_off_t umoff; - umoff = vos_reserve_scm(ioc->ic_cont, ioc->ic_rsrvd_scm, size, ioc->ic_pinned_obj); + umoff = vos_reserve_scm(ioc->ic_cont, ioc->ic_rsrvd_scm, size, ioc->ic_obj); if (!UMOFF_IS_NULL(umoff)) { ioc->ic_umoffs[ioc->ic_umoffs_cnt] = umoff; ioc->ic_umoffs_cnt++; @@ -2572,7 +2567,8 @@ vos_update_end(daos_handle_t ioh, uint32_t pm_ver, daos_key_t *dkey, int err, if (err != 0) goto abort; - if (ioc->ic_pinned_obj != NULL && unlikely(vos_obj_is_evicted(ioc->ic_pinned_obj))) { + D_ASSERT(ioc->ic_obj != NULL); + if (unlikely(vos_obj_is_evicted(ioc->ic_obj))) { D_DEBUG(DB_IO, "Obj " DF_UOID " is evicted during update, need to restart TX.\n", DP_UOID(ioc->ic_oid)); @@ -2582,14 +2578,6 @@ vos_update_end(daos_handle_t ioh, uint32_t pm_ver, daos_key_t *dkey, int err, err = vos_ts_set_add(ioc->ic_ts_set, ioc->ic_cont->vc_ts_idx, NULL, 0); D_ASSERT(err == 0); - err = vos_obj_hold(ioc->ic_cont, ioc->ic_oid, &ioc->ic_epr, ioc->ic_bound, - flags, DAOS_INTENT_UPDATE, &ioc->ic_obj, ioc->ic_ts_set); - if (err != 0) - goto abort; - - if (ioc->ic_pinned_obj != NULL) - D_ASSERT(ioc->ic_pinned_obj == ioc->ic_obj); - err = vos_tx_begin(dth, umem, ioc->ic_cont->vc_pool->vp_sysdb, ioc->ic_obj); if (err != 0) goto abort; @@ -2767,19 +2755,9 @@ vos_update_begin(daos_handle_t coh, daos_unit_oid_t oid, daos_epoch_t epoch, goto error; } - /* Hold the object for the evictable md-on-ssd phase2 pool */ - if (vos_pool_is_evictable(vos_cont2pool(ioc->ic_cont))) { - /* - * FIXME: - * The same object will be referenced by vos_obj_acquire() and vos_obj_hold() - * (in vos_update_end()) twice, this is for avoiding the complication of adding - * object ilog to ts_set. We'll re-org vos_obj_hold() in the future to make the - * code look cleaner. - */ - rc = vos_obj_acquire(ioc->ic_cont, ioc->ic_oid, true, &ioc->ic_pinned_obj); - if (rc != 0) - goto error; - } + rc = vos_obj_acquire(ioc->ic_cont, ioc->ic_oid, true, &ioc->ic_obj); + if (rc != 0) + goto error; rc = dkey_update_begin(ioc); if (rc != 0) { diff --git a/src/vos/vos_obj.c b/src/vos/vos_obj.c index 117cf8baaab..210e71608cd 100644 --- a/src/vos/vos_obj.c +++ b/src/vos/vos_obj.c @@ -490,7 +490,7 @@ vos_obj_punch(daos_handle_t coh, daos_unit_oid_t oid, daos_epoch_t epoch, hold_flags = (flags & VOS_OF_COND_PUNCH) ? 0 : VOS_OBJ_CREATE; hold_flags |= VOS_OBJ_VISIBLE; - rc = vos_obj_hold(cont, oid, &epr, bound, hold_flags, DAOS_INTENT_PUNCH, &obj, ts_set); + rc = vos_obj_acquire(cont, oid, true, &obj); if (rc != 0) goto reset; @@ -842,8 +842,7 @@ vos_obj_mark_corruption(daos_handle_t coh, daos_epoch_t epoch, uint32_t pm_ver, } restart: - rc = vos_obj_hold(cont, oid, &epr, epoch, VOS_OBJ_VISIBLE | VOS_OBJ_CREATE, - DAOS_INTENT_MARK, &obj, NULL); + rc = vos_obj_acquire(cont, oid, true, &obj); if (rc != 0) goto log; diff --git a/src/vos/vos_obj_cache.c b/src/vos/vos_obj_cache.c index ba1898e0f25..8d92094cefe 100644 --- a/src/vos/vos_obj_cache.c +++ b/src/vos/vos_obj_cache.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -535,23 +535,36 @@ vos_obj_incarnate(struct vos_object *obj, daos_epoch_range_t *epr, daos_epoch_t D_ASSERT(intent == DAOS_INTENT_PUNCH || intent == DAOS_INTENT_UPDATE || intent == DAOS_INTENT_MARK); + if (check_discard(obj, flags)) + return -DER_UPDATE_AGAIN; + + /* Lookup OI table if the cached object is negative */ if (obj->obj_df == NULL) { - rc = vos_oi_alloc(cont, obj->obj_id, epr->epr_hi, &obj->obj_df, ts_set); - if (rc) { - DL_ERROR(rc, DF_CONT": Failed to allocate OI "DF_UOID".", - DP_CONT(cont->vc_pool->vp_id, cont->vc_id), - DP_UOID(obj->obj_id)); + obj->obj_sync_epoch = 0; + rc = vos_oi_find(cont, obj->obj_id, &obj->obj_df, ts_set); + if (rc == 0) { + obj->obj_sync_epoch = obj->obj_df->vo_sync; + } else if (rc == -DER_NONEXIST) { + rc = vos_oi_alloc(cont, obj->obj_id, epr->epr_hi, &obj->obj_df, ts_set); + if (rc) { + DL_ERROR(rc, DF_CONT ": Failed to allocate OI " DF_UOID ".", + DP_CONT(cont->vc_pool->vp_id, cont->vc_id), + DP_UOID(obj->obj_id)); + return rc; + } + D_ASSERT(obj->obj_df); + } else if (rc) { + DL_ERROR(rc, DF_CONT ": Failed to find OI " DF_UOID ".", + DP_CONT(cont->vc_pool->vp_id, cont->vc_id), DP_UOID(obj->obj_id)); return rc; } - D_ASSERT(obj->obj_df); - } else { + } else if (likely(intent != DAOS_INTENT_MARK)) { vos_ilog_ts_ignore(vos_obj2umm(obj), &obj->obj_df->vo_ilog); + rc = vos_ilog_ts_add(ts_set, &obj->obj_df->vo_ilog, &obj->obj_id, + sizeof(obj->obj_id)); + D_ASSERT(rc == 0); /* Non-zero only valid for akey */ } - /* Check again since it could yield since vos_obj_hold() */ - if (check_discard(obj, flags)) - return -DER_UPDATE_AGAIN; - /* Check the sync epoch */ if (intent != DAOS_INTENT_MARK && epr->epr_hi <= obj->obj_sync_epoch && vos_dth_get(obj->obj_cont->vc_pool->vp_sysdb) != NULL) { @@ -979,14 +992,16 @@ vos_obj_acquire(struct vos_container *cont, daos_unit_oid_t oid, bool pin, } } - if (!obj->obj_bkt_alloted) - obj_allot_bkt(cont->vc_pool, obj); + if (vos_pool_is_evictable(cont->vc_pool)) { + if (!obj->obj_bkt_alloted) + obj_allot_bkt(cont->vc_pool, obj); - if (pin) { - rc = obj_pin_bkt(cont->vc_pool, obj); - if (rc) { - obj_put(occ, obj, false); - return rc; + if (pin) { + rc = obj_pin_bkt(cont->vc_pool, obj); + if (rc) { + obj_put(occ, obj, false); + return rc; + } } } From fff694b4b6027ae846fbd1f4cb7ffec6eaeac903 Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Sat, 7 Feb 2026 02:02:48 -0500 Subject: [PATCH 2/2] DAOS-18531 vos: add comment for vos_obj_hold() Add comment for vos_obj_hold(). Signed-off-by: Niu Yawei --- src/vos/vos_obj_cache.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/vos/vos_obj_cache.c b/src/vos/vos_obj_cache.c index 8d92094cefe..c880dcfe919 100644 --- a/src/vos/vos_obj_cache.c +++ b/src/vos/vos_obj_cache.c @@ -623,6 +623,10 @@ vos_obj_incarnate(struct vos_object *obj, daos_epoch_range_t *epr, daos_epoch_t return rc; } +/* + * The legacy function is being phased out. It is currently used to hold an object for fetch and + * iteration operations. Update and punch operations use vos_obj_acquire() to hold an object. + */ int vos_obj_hold(struct vos_container *cont, daos_unit_oid_t oid, daos_epoch_range_t *epr, daos_epoch_t bound, uint64_t flags, uint32_t intent, struct vos_object **obj_p,