From fce514611fae9c3822d3502087d2e92bb7b63de7 Mon Sep 17 00:00:00 2001 From: Felix Moessbauer Date: Tue, 10 Sep 2024 19:11:56 +0200 Subject: [PATCH] io_uring/io-wq: do not allow pinning outside of cpuset commit 0997aa5497c714edbb349ca366d28bd550ba3408 upstream. The io worker threads are userland threads that just never exit to the userland. By that, they are also assigned to a cgroup (the group of the creating task). When changing the affinity of the io_wq thread via syscall, we must only allow cpumasks within the limits defined by the cpuset controller of the cgroup (if enabled). Fixes: da64d6db3bd3 ("io_uring: One wqe per wq") Signed-off-by: Felix Moessbauer Link: https://lore.kernel.org/r/20240910171157.166423-2-felix.moessbauer@siemens.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/io-wq.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index 139cd49b2c27..c74bcc8d2f06 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -1362,22 +1363,34 @@ static int io_wq_cpu_offline(unsigned int cpu, struct hlist_node *node) int io_wq_cpu_affinity(struct io_uring_task *tctx, cpumask_var_t mask) { + cpumask_var_t allowed_mask; + int ret = 0; int i; if (!tctx || !tctx->io_wq) return -EINVAL; + if (!alloc_cpumask_var(&allowed_mask, GFP_KERNEL)) + return -ENOMEM; + cpuset_cpus_allowed(tctx->io_wq->task, allowed_mask); + rcu_read_lock(); for_each_node(i) { struct io_wqe *wqe = tctx->io_wq->wqes[i]; - - if (mask) - cpumask_copy(wqe->cpu_mask, mask); - else - cpumask_copy(wqe->cpu_mask, cpumask_of_node(i)); + if (mask) { + if (cpumask_subset(mask, allowed_mask)) + cpumask_copy(wqe->cpu_mask, mask); + else + ret = -EINVAL; + } else { + if (!cpumask_and(wqe->cpu_mask, cpumask_of_node(i), allowed_mask)) + cpumask_copy(wqe->cpu_mask, allowed_mask); + } } rcu_read_unlock(); - return 0; + + free_cpumask_var(allowed_mask); + return ret; } /*