diff --git a/Cargo.lock b/Cargo.lock index 4cc5c378c..9426e08f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4696,6 +4696,13 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "pbind" +version = "0.0.0" +dependencies = [ + "libc", +] + [[package]] name = "pem" version = "3.0.5" @@ -5510,6 +5517,7 @@ dependencies = [ "oximeter", "oximeter-instruments", "oximeter-producer", + "pbind", "propolis", "propolis-server-api", "propolis_api_types", @@ -5567,6 +5575,7 @@ dependencies = [ "futures", "libc", "oxide-tokio-rt", + "pbind", "propolis", "propolis_types", "serde", diff --git a/Cargo.toml b/Cargo.toml index d225dd945..ccda40ff9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,6 +50,7 @@ cpuid_profile_config = { path = "crates/cpuid-profile-config" } dladm = { path = "crates/dladm" } nvpair = { path = "crates/nvpair" } nvpair_sys = { path = "crates/nvpair/sys" } +pbind = { path = "crates/pbind" } propolis-config-toml = { path = "crates/propolis-config-toml" } propolis_api_types = { path = "crates/propolis-api-types" } propolis-server-api = { path = "crates/propolis-server-api" } diff --git a/bin/propolis-server/Cargo.toml b/bin/propolis-server/Cargo.toml index 7c8f4a5d1..8e2671ab1 100644 --- a/bin/propolis-server/Cargo.toml +++ b/bin/propolis-server/Cargo.toml @@ -43,6 +43,7 @@ oxide-tokio-rt.workspace = true oximeter-instruments.workspace = true oximeter-producer.workspace = true oximeter.workspace = true +pbind.workspace = true ron.workspace = true thiserror.workspace = true tokio = { workspace = true, features = ["full"] } diff --git a/bin/propolis-server/src/lib/vcpu_tasks.rs b/bin/propolis-server/src/lib/vcpu_tasks.rs index e74074e3e..e21ffc027 100644 --- a/bin/propolis-server/src/lib/vcpu_tasks.rs +++ b/bin/propolis-server/src/lib/vcpu_tasks.rs @@ -22,6 +22,8 @@ use thiserror::Error; pub enum VcpuTaskError { #[error("Failed to spawn a vCPU backing thread: {0}")] BackingThreadSpawnFailed(std::io::Error), + #[error("CPU bindings did not match vCPUs: {bindings} bindings for {vcpus} vCPUs")] + CpuBindingMismatch { bindings: usize, vcpus: usize }, } pub struct VcpuTasks { @@ -41,11 +43,30 @@ impl VcpuTasks { pub(crate) fn new( machine: &propolis::Machine, event_handler: Arc, + bind_cpus: Option>, log: slog::Logger, ) -> Result { let generation = Arc::new(AtomicUsize::new(0)); + + // We take in an `Option>` but a `Vec>` is more + // convenient for spawning below, so we have to shuffle values a bit.. + let mut bindings = vec![None; machine.vcpus.len()]; + if let Some(bind_cpus) = bind_cpus { + if bind_cpus.len() != machine.vcpus.len() { + return Err(VcpuTaskError::CpuBindingMismatch { + bindings: bind_cpus.len(), + vcpus: machine.vcpus.len(), + }); + } + for i in 0..machine.vcpus.len() { + bindings[i] = Some(bind_cpus[i]); + } + } + let mut tasks = Vec::new(); - for vcpu in machine.vcpus.iter().map(Arc::clone) { + for (vcpu, bind_cpu) in + machine.vcpus.iter().map(Arc::clone).zip(bindings.into_iter()) + { let (task, ctrl) = propolis::tasks::TaskHdl::new_held(Some(vcpu.barrier_fn())); let task_log = log.new(slog::o!("vcpu" => vcpu.id)); @@ -54,6 +75,10 @@ impl VcpuTasks { let thread = std::thread::Builder::new() .name(format!("vcpu-{}", vcpu.id)) .spawn(move || { + if let Some(bind_cpu) = bind_cpu { + pbind::bind_lwp(bind_cpu) + .expect("can bind to specified CPU"); + } Self::vcpu_loop( vcpu.as_ref(), task, diff --git a/bin/propolis-server/src/lib/vm/ensure.rs b/bin/propolis-server/src/lib/vm/ensure.rs index 20bc39ee5..127009b69 100644 --- a/bin/propolis-server/src/lib/vm/ensure.rs +++ b/bin/propolis-server/src/lib/vm/ensure.rs @@ -581,9 +581,28 @@ async fn initialize_vm_objects( init.register_guest_hv_interface(guest_hv_lifecycle); init.initialize_cpus().await?; + + let total_cpus = pbind::online_cpus()?; + let vcpu_count: i32 = machine + .vcpus + .len() + .try_into() + .map_err(|_| anyhow::anyhow!("more than 2^31 vCPUs"))?; + let bind_cpus = if vcpu_count > total_cpus / 2 { + let mut bind_cpus = Vec::new(); + for i in 0..vcpu_count { + // Bind to the upper range of CPUs, fairly arbitrary. + bind_cpus.push(total_cpus - vcpu_count + i); + } + Some(bind_cpus) + } else { + None + }; + let vcpu_tasks = Box::new(crate::vcpu_tasks::VcpuTasks::new( &machine, event_queue.clone() as Arc, + bind_cpus, log.new(slog::o!("component" => "vcpu_tasks")), )?); diff --git a/bin/propolis-standalone/Cargo.toml b/bin/propolis-standalone/Cargo.toml index 72627b802..137affeb2 100644 --- a/bin/propolis-standalone/Cargo.toml +++ b/bin/propolis-standalone/Cargo.toml @@ -39,6 +39,7 @@ slog-term.workspace = true strum = { workspace = true, features = ["derive"] } tar.workspace = true uuid.workspace = true +pbind.workspace = true [features] default = [] diff --git a/bin/propolis-standalone/src/config.rs b/bin/propolis-standalone/src/config.rs index 9bc9637d3..3f9797cdb 100644 --- a/bin/propolis-standalone/src/config.rs +++ b/bin/propolis-standalone/src/config.rs @@ -54,6 +54,9 @@ pub struct Main { pub memory: usize, pub use_reservoir: Option, pub cpuid_profile: Option, + /// How vCPUs should be bound to physical processors, if at all. If not + /// provided, vCPUs are not bound (equivalent to setting `any`). + pub cpu_binding: Option, /// Process exitcode to emit if/when instance halts /// /// Default: 0 @@ -69,6 +72,16 @@ pub struct Main { pub boot_order: Option>, } +#[derive(Copy, Clone, Debug, Deserialize, Serialize)] +#[serde(rename_all = "kebab-case")] +pub enum BindingStrategy { + /// vCPUs are not bound to any particular physical processor. + Any, + /// vCPUs are bound to the highest-numbered processors in the system, one + /// vCPU per CPU, with the last vCPU bound to the last physical processor. + FromLast, +} + /// A hard-coded device, either enabled by default or accessible locally /// on a machine. #[derive(Clone, Debug, Deserialize, Serialize)] diff --git a/bin/propolis-standalone/src/main.rs b/bin/propolis-standalone/src/main.rs index 507284faf..aff341782 100644 --- a/bin/propolis-standalone/src/main.rs +++ b/bin/propolis-standalone/src/main.rs @@ -286,7 +286,29 @@ impl Instance { let state = &mut *state_guard; let machine = state.machine.as_ref().unwrap(); - for vcpu in machine.vcpus.iter().map(Arc::clone) { + let bind_cpus = match this.0.config.main.cpu_binding { + Some(config::BindingStrategy::FromLast) => { + let mut bind_cpus = vec![None; machine.vcpus.len()]; + let total_cpus = + pbind::online_cpus().expect("can get processor count"); + let vcpu_count: i32 = + machine.vcpus.len().try_into().expect("<2^31 vCPUs"); + + let first_bound_cpu = total_cpus - vcpu_count; + for i in 0..vcpu_count { + // Bind to the upper range of CPUs. + bind_cpus[i as usize] = Some(first_bound_cpu + i); + } + bind_cpus + } + Some(config::BindingStrategy::Any) | None => { + vec![None; machine.vcpus.len()] + } + }; + + for (vcpu, bind_cpu) in + machine.vcpus.iter().map(Arc::clone).zip(bind_cpus.into_iter()) + { let (task, ctrl) = propolis::tasks::TaskHdl::new_held(Some(vcpu.barrier_fn())); @@ -295,6 +317,9 @@ impl Instance { let _ = std::thread::Builder::new() .name(format!("vcpu-{}", vcpu.id)) .spawn(move || { + if let Some(bind_cpu) = bind_cpu { + pbind::bind_lwp(bind_cpu).expect("can bind vcpu"); + } Instance::vcpu_loop(inner, vcpu.as_ref(), &task, task_log) }) .unwrap(); diff --git a/crates/pbind/Cargo.toml b/crates/pbind/Cargo.toml new file mode 100644 index 000000000..0583fba7d --- /dev/null +++ b/crates/pbind/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "pbind" +version = "0.0.0" +license = "MPL-2.0" +edition = "2021" + +[dependencies] +libc.workspace = true diff --git a/crates/pbind/src/lib.rs b/crates/pbind/src/lib.rs new file mode 100644 index 000000000..f92af5585 --- /dev/null +++ b/crates/pbind/src/lib.rs @@ -0,0 +1,100 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// C-style type names follow, opt out of warnings for using names from headers. +#![allow(non_camel_case_types)] + +//! Utility functions for binding LWPs to specific CPUs. +//! +//! This is generally a very light wrapper for illumos' `sysconf(3c)` and +//! `processor_bind(2)`, plus a few constants out of related headers. + +use std::io::Error; + +// From `` +pub type id_t = i32; + +// From `` +pub type processorid_t = i32; + +// From `` +pub type idtype_t = i32; + +/// The enum values `idtype_t` can be. This is separate to be more explicit that +/// idtype_t is the ABI type, but is `repr(i32)` to make casting to `idtype_t` +/// trivial. +#[allow(non_camel_case_types)] +#[repr(i32)] +pub enum IdType { + P_PID, + P_PPID, + P_PGID, + P_SID, + P_CID, + P_UID, + P_GID, + P_ALL, + P_LWPID, + P_TASKID, + P_PROJID, + P_POOLID, + P_ZONEID, + P_CTID, + P_CPUID, + P_PSETID, +} + +// Returns an `i32` to match `processorid_t`, so that `0..online_cpus()` +// produces a range of processor IDs without additional translation needed. +pub fn online_cpus() -> Result { + let res = unsafe { libc::sysconf(libc::_SC_NPROCESSORS_ONLN) }; + + if res == -1 { + return Err(Error::last_os_error()); + } + + res.try_into().map_err(|_| { + // sysconf() reports more than 2^31 processors?! + Error::other(format!("too many processors: {}", res)) + }) +} + +#[cfg(target_os = "illumos")] +/// Bind the current LWP to the specified processor. +pub fn bind_lwp(bind_cpu: processorid_t) -> Result<(), Error> { + extern "C" { + fn processor_bind( + idtype: idtype_t, + id: id_t, + processorid: processorid_t, + obind: *mut processorid_t, + ) -> i32; + } + + // From ``. + const P_MYID: id_t = -1; + + let res = unsafe { + processor_bind( + IdType::P_LWPID as i32, + P_MYID, + bind_cpu, + std::ptr::null_mut(), + ) + }; + + if res != 0 { + return Err(Error::last_os_error()); + } + + Ok(()) +} + +#[cfg(not(target_os = "illumos"))] +/// On non-illumos targets, we're not actually running a VM. We do need the +/// crate to compile to be nicer for blanket `cargo test` invocations on other +/// platforms. So a no-op function will do. +pub fn bind_lwp(_bind_cpu: processorid_t) -> Result<(), Error> { + Ok(()) +}