From a532f2302d9eac7a2ba52b9929b790c20347c9ba Mon Sep 17 00:00:00 2001 From: Bernd Edlinger Date: Sun, 9 Mar 2025 11:20:43 +0100 Subject: [PATCH] Do some more cleanup in the RCU code Only a minimum of 2 qp's are necessary: one for the readers, and at least one that writers can wait on for retirement. There is no need for one additional qp that is always unused. Also only one ACQUIRE barrier is necessary in get_hold_current_qp, so the ATOMIC_LOAD of the reader_idx can be changed to RELAXED. And finally clarify some comments. Reviewed-by: Paul Dale Reviewed-by: Tomas Mraz (Merged from https://github.com/openssl/openssl/pull/27012) --- crypto/threads_pthread.c | 15 +++++++-------- crypto/threads_win.c | 8 +++----- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/crypto/threads_pthread.c b/crypto/threads_pthread.c index f69b3d6f45..750ef20121 100644 --- a/crypto/threads_pthread.c +++ b/crypto/threads_pthread.c @@ -261,6 +261,8 @@ static struct rcu_qp *get_hold_current_qp(struct rcu_lock_st *lock) /* get the current qp index */ for (;;) { + qp_idx = ATOMIC_LOAD_N(uint32_t, &lock->reader_idx, __ATOMIC_RELAXED); + /* * Notes on use of __ATOMIC_ACQUIRE * We need to ensure the following: @@ -271,10 +273,7 @@ static struct rcu_qp *get_hold_current_qp(struct rcu_lock_st *lock) * of the lock is flushed from a local cpu cache so that we see any * updates prior to the load. This is a non-issue on cache coherent * systems like x86, but is relevant on other arches - * Note: This applies to the reload below as well */ - qp_idx = ATOMIC_LOAD_N(uint32_t, &lock->reader_idx, __ATOMIC_ACQUIRE); - ATOMIC_ADD_FETCH(&lock->qp_group[qp_idx].users, (uint64_t)1, __ATOMIC_ACQUIRE); @@ -475,6 +474,8 @@ void ossl_synchronize_rcu(CRYPTO_RCU_LOCK *lock) * prior __ATOMIC_RELEASE write operation in ossl_rcu_read_unlock * is visible prior to our read * however this is likely just necessary to silence a tsan warning + * because the read side should not do any write operation + * outside the atomic itself */ do { count = ATOMIC_LOAD_N(uint64_t, &qp->users, __ATOMIC_ACQUIRE); @@ -531,10 +532,10 @@ CRYPTO_RCU_LOCK *ossl_rcu_lock_new(int num_writers, OSSL_LIB_CTX *ctx) struct rcu_lock_st *new; /* - * We need a minimum of 3 qp's + * We need a minimum of 2 qp's */ - if (num_writers < 3) - num_writers = 3; + if (num_writers < 2) + num_writers = 2; ctx = ossl_lib_ctx_get_concrete(ctx); if (ctx == NULL) @@ -550,8 +551,6 @@ CRYPTO_RCU_LOCK *ossl_rcu_lock_new(int num_writers, OSSL_LIB_CTX *ctx) pthread_mutex_init(&new->alloc_lock, NULL); pthread_cond_init(&new->prior_signal, NULL); pthread_cond_init(&new->alloc_signal, NULL); - /* By default our first writer is already alloced */ - new->writers_alloced = 1; new->qp_group = allocate_new_qp_group(new, num_writers); if (new->qp_group == NULL) { diff --git a/crypto/threads_win.c b/crypto/threads_win.c index 72f54f118c..97b6d3eb73 100644 --- a/crypto/threads_win.c +++ b/crypto/threads_win.c @@ -138,10 +138,10 @@ CRYPTO_RCU_LOCK *ossl_rcu_lock_new(int num_writers, OSSL_LIB_CTX *ctx) struct rcu_lock_st *new; /* - * We need a minimum of 3 qps + * We need a minimum of 2 qps */ - if (num_writers < 3) - num_writers = 3; + if (num_writers < 2) + num_writers = 2; ctx = ossl_lib_ctx_get_concrete(ctx); if (ctx == NULL) @@ -160,8 +160,6 @@ CRYPTO_RCU_LOCK *ossl_rcu_lock_new(int num_writers, OSSL_LIB_CTX *ctx) new->alloc_lock = ossl_crypto_mutex_new(); new->prior_lock = ossl_crypto_mutex_new(); new->qp_group = allocate_new_qp_group(new, num_writers); - /* By default the first qp is already alloced */ - new->writers_alloced = 1; if (new->qp_group == NULL || new->alloc_signal == NULL || new->prior_signal == NULL