diff --git a/Cargo.lock b/Cargo.lock index 366f001a6..f5526b267 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3392,6 +3392,23 @@ dependencies = [ "url", ] +[[package]] +name = "openshell-driver-apple-container" +version = "0.0.0" +dependencies = [ + "chrono", + "futures", + "openshell-core", + "prost-types", + "serde", + "serde_json", + "thiserror 2.0.18", + "tokio", + "tokio-stream", + "tonic", + "tracing", +] + [[package]] name = "openshell-driver-docker" version = "0.0.0" @@ -3644,6 +3661,7 @@ dependencies = [ "miette", "openshell-bootstrap", "openshell-core", + "openshell-driver-apple-container", "openshell-driver-docker", "openshell-driver-kubernetes", "openshell-driver-podman", diff --git a/architecture/compute-runtimes.md b/architecture/compute-runtimes.md index c8ecd3e5c..95befef24 100644 --- a/architecture/compute-runtimes.md +++ b/architecture/compute-runtimes.md @@ -32,13 +32,14 @@ when a sandbox create request asks for GPU resources. |---|---|---|---| | Docker | Local development with Docker available. | Container plus nested sandbox namespace. | Uses host networking so loopback gateway endpoints work from the supervisor. | | Podman | Rootless or single-machine deployments. | Container plus nested sandbox namespace. | Uses the Podman REST API, OCI image volumes, and CDI GPU devices when available. | +| Apple Container | Local macOS development on Apple silicon. | One lightweight Linux VM per sandbox container, plus the nested OpenShell sandbox namespace. | Uses Apple's `container` CLI and a host-reachable supervisor callback endpoint for guest-to-gateway communication. | | Kubernetes | Cluster deployment through Helm. | Pod plus nested sandbox namespace. | Uses Kubernetes API objects, service accounts, secrets, PVC-backed workspace storage, and GPU resources. | | VM | Experimental microVM isolation. | Per-sandbox libkrun VM. | Gateway spawns `openshell-driver-vm` as a subprocess over a private, state-local Unix socket. The VM driver boots a cached bootstrap `rootfs.ext4`, prepares requested OCI images inside a bootstrap VM with `umoci`, attaches the prepared image disk read-only, and gives each sandbox a writable `overlay.ext4` for merged-root changes and runtime material. The driver persists each accepted launch request beside the overlay and restarts those VMs on driver startup without recreating the overlay. | Per-sandbox CPU and memory values currently enter the driver layer through -template resource limits. Docker and Podman apply them as runtime limits. -Kubernetes mirrors each limit into the matching request. VM accepts the fields -but currently ignores them. +template resource limits. Docker, Podman, and Apple Container apply them as +runtime limits. Kubernetes mirrors each limit into the matching request. VM +accepts the fields but currently ignores them. Docker and Podman also accept per-sandbox driver-config mounts for existing runtime-managed named volumes and tmpfs mounts. Podman additionally accepts @@ -66,6 +67,7 @@ Runtime-specific implementation notes belong in the driver crate README: - `crates/openshell-driver-docker/README.md` - `crates/openshell-driver-podman/README.md` - `crates/openshell-driver-kubernetes/README.md` +- `crates/openshell-driver-apple-container/README.md` - `crates/openshell-driver-vm/README.md` ## Supervisor Delivery @@ -77,6 +79,7 @@ The supervisor must be available inside each sandbox workload: | Docker | Bind-mounted local supervisor binary, or a binary extracted from the configured supervisor image. | | Podman | Read-only OCI image volume containing the supervisor binary. | | Kubernetes | Sandbox pod image or pod template configuration. | +| Apple Container | Host bind-mounted Linux `openshell-sandbox` binary from `supervisor_bin_dir`. | | VM | Embedded in the guest rootfs bundle. | Driver-controlled environment variables must override sandbox image or template diff --git a/crates/openshell-bootstrap/src/pki.rs b/crates/openshell-bootstrap/src/pki.rs index adc2c48f1..92338155e 100644 --- a/crates/openshell-bootstrap/src/pki.rs +++ b/crates/openshell-bootstrap/src/pki.rs @@ -28,7 +28,8 @@ pub struct PkiBundle { /// /// Covers the host aliases used by every supported runtime: Kubernetes service DNS, /// `host.docker.internal` for Docker Desktop and rootless Docker on Linux, -/// and `host.containers.internal` for Podman containers reaching their host. +/// `host.containers.internal` for Podman containers reaching their host, and +/// `host.container.internal` for Apple Container guests reaching their host. pub const DEFAULT_SERVER_SANS: &[&str] = &[ "openshell", "openshell.openshell.svc", @@ -38,6 +39,7 @@ pub const DEFAULT_SERVER_SANS: &[&str] = &[ "*.openshell.localhost", "host.docker.internal", "host.containers.internal", + "host.container.internal", "127.0.0.1", ]; @@ -190,5 +192,6 @@ mod tests { fn default_server_sans_include_local_container_hostnames() { assert!(DEFAULT_SERVER_SANS.contains(&"host.docker.internal")); assert!(DEFAULT_SERVER_SANS.contains(&"host.containers.internal")); + assert!(DEFAULT_SERVER_SANS.contains(&"host.container.internal")); } } diff --git a/crates/openshell-core/src/config.rs b/crates/openshell-core/src/config.rs index eaaf1e4a0..cf420fe15 100644 --- a/crates/openshell-core/src/config.rs +++ b/crates/openshell-core/src/config.rs @@ -55,6 +55,8 @@ pub enum ComputeDriverKind { Vm, Docker, Podman, + #[serde(rename = "apple-container")] + AppleContainer, } impl ComputeDriverKind { @@ -65,6 +67,7 @@ impl ComputeDriverKind { Self::Vm => "vm", Self::Docker => "docker", Self::Podman => "podman", + Self::AppleContainer => "apple-container", } } } @@ -84,8 +87,9 @@ impl FromStr for ComputeDriverKind { "vm" => Ok(Self::Vm), "docker" => Ok(Self::Docker), "podman" => Ok(Self::Podman), + "apple-container" => Ok(Self::AppleContainer), other => Err(format!( - "unsupported compute driver '{other}'. expected one of: kubernetes, vm, docker, podman" + "unsupported compute driver '{other}'. expected one of: kubernetes, vm, docker, podman, apple-container" )), } } @@ -94,7 +98,7 @@ impl FromStr for ComputeDriverKind { /// Auto-detect the appropriate compute driver based on the runtime environment. /// /// Priority order: Kubernetes → Podman → Docker. -/// VM is never auto-detected (requires explicit `--drivers vm`). +/// VM and Apple Container are never auto-detected and require explicit drivers. /// /// Returns the first driver where the environment check passes. /// Returns `None` if no compatible driver is found. @@ -795,12 +799,24 @@ mod tests { "docker".parse::().unwrap(), ComputeDriverKind::Docker ); + assert_eq!( + "apple-container".parse::().unwrap(), + ComputeDriverKind::AppleContainer + ); + assert_eq!( + " Apple-Container ".parse::().unwrap(), + ComputeDriverKind::AppleContainer + ); } #[test] fn compute_driver_kind_rejects_unknown_values() { let err = "firecracker".parse::().unwrap_err(); assert!(err.contains("unsupported compute driver 'firecracker'")); + let err = "apple_container".parse::().unwrap_err(); + assert!(err.contains("unsupported compute driver 'apple_container'")); + let err = "apple".parse::().unwrap_err(); + assert!(err.contains("unsupported compute driver 'apple'")); } #[test] diff --git a/crates/openshell-core/src/driver_utils.rs b/crates/openshell-core/src/driver_utils.rs index 9e4411b2a..dcc1c85c4 100644 --- a/crates/openshell-core/src/driver_utils.rs +++ b/crates/openshell-core/src/driver_utils.rs @@ -53,8 +53,9 @@ pub const SUPERVISOR_CONTAINER_BINARY: &str = "/opt/openshell/bin/openshell-sand // --------------------------------------------------------------------------- // In-container mount paths for guest TLS materials and the sandbox token. // -// All container-based drivers (Docker, Podman, Kubernetes) mount the gateway's -// mTLS client credentials at these fixed paths inside every sandbox container. +// All container-based drivers (Docker, Podman, Apple Container, Kubernetes) +// mount the gateway's mTLS client credentials at these fixed paths inside every +// sandbox container. // The supervisor reads these paths on startup to establish its gRPC-over-mTLS // connection back to the gateway. The paths must remain stable across driver // versions since the supervisor binary is built and packaged separately. diff --git a/crates/openshell-core/src/telemetry.rs b/crates/openshell-core/src/telemetry.rs index 96f68d35c..1aa0acb4c 100644 --- a/crates/openshell-core/src/telemetry.rs +++ b/crates/openshell-core/src/telemetry.rs @@ -161,6 +161,7 @@ pub enum TelemetryComputeDriver { Kubernetes, Podman, Vm, + AppleContainer, Unknown, } @@ -172,6 +173,7 @@ impl TelemetryComputeDriver { Self::Kubernetes => "kubernetes", Self::Podman => "podman", Self::Vm => "vm", + Self::AppleContainer => "apple-container", Self::Unknown => "unknown", } } @@ -183,6 +185,7 @@ impl TelemetryComputeDriver { "k8s" | "kubernetes" => Self::Kubernetes, "podman" => Self::Podman, "vm" => Self::Vm, + "apple-container" => Self::AppleContainer, _ => Self::Unknown, } } @@ -194,6 +197,7 @@ impl TelemetryComputeDriver { Some(crate::ComputeDriverKind::Kubernetes) => Self::Kubernetes, Some(crate::ComputeDriverKind::Podman) => Self::Podman, Some(crate::ComputeDriverKind::Vm) => Self::Vm, + Some(crate::ComputeDriverKind::AppleContainer) => Self::AppleContainer, None => Self::Unknown, } } @@ -695,6 +699,26 @@ mod tests { TelemetryComputeDriver::from_raw("podman").as_str(), "podman" ); + assert_eq!( + TelemetryComputeDriver::from_raw("apple-container").as_str(), + "apple-container" + ); + assert_eq!( + TelemetryComputeDriver::from_raw("apple_container").as_str(), + "unknown" + ); + assert_eq!( + TelemetryComputeDriver::from_raw("apple").as_str(), + "unknown" + ); + assert_eq!( + TelemetryComputeDriver::from_raw("Apple-Container").as_str(), + "apple-container" + ); + assert_eq!( + TelemetryComputeDriver::from_raw(" apple-container ").as_str(), + "apple-container" + ); assert_eq!( TelemetryComputeDriver::from_raw("private-driver").as_str(), "unknown" diff --git a/crates/openshell-driver-apple-container/Cargo.toml b/crates/openshell-driver-apple-container/Cargo.toml new file mode 100644 index 000000000..eea24017e --- /dev/null +++ b/crates/openshell-driver-apple-container/Cargo.toml @@ -0,0 +1,30 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +[package] +name = "openshell-driver-apple-container" +description = "Apple Container compute driver for OpenShell" +version.workspace = true +edition.workspace = true +rust-version.workspace = true +license.workspace = true +repository.workspace = true + +[dependencies] +openshell-core = { path = "../openshell-core", default-features = false } + +tokio = { workspace = true } +tonic = { workspace = true } +futures = { workspace = true } +tokio-stream = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +thiserror = { workspace = true } +tracing = { workspace = true } +chrono = { version = "0.4", default-features = false, features = ["std"] } + +[dev-dependencies] +prost-types = { workspace = true } + +[lints] +workspace = true diff --git a/crates/openshell-driver-apple-container/README.md b/crates/openshell-driver-apple-container/README.md new file mode 100644 index 000000000..77b1b5c82 --- /dev/null +++ b/crates/openshell-driver-apple-container/README.md @@ -0,0 +1,28 @@ +# OpenShell Apple Container Driver + +This crate implements the OpenShell compute driver for Apple's `container` CLI. +It creates local macOS sandboxes as Linux containers inside Apple Container +lightweight VMs. + +The driver intentionally shells out to the installed `container` CLI instead of +linking Swift or XPC APIs directly. Apple Container's public, supported operator +surface is the CLI, and the CLI exposes machine-readable JSON for the state that +OpenShell needs: + +- `container system status --format json` +- `container list --all --format json` +- `container network list --format json` + +The gateway must run on macOS with Apple Container installed and running. Set +`compute_drivers = ["apple-container"]` in `[openshell.gateway]`; the gateway +does not auto-detect this driver. + +When `grpc_endpoint` is empty, the driver builds the supervisor callback URL +from `host_callback_host` and the gateway bind port. The default callback host +is `host.container.internal`, which Apple Container resolves inside the guest +VM. The gateway also listens on the Apple Container default network gateway +address discovered from `container network list --format json`. + +Apple Container accepts integer CPU counts. OpenShell therefore rejects +per-sandbox CPU limits such as `500m` or `1.5` that cannot be passed to +`container run --cpus`. diff --git a/crates/openshell-driver-apple-container/src/cli.rs b/crates/openshell-driver-apple-container/src/cli.rs new file mode 100644 index 000000000..856fcfe95 --- /dev/null +++ b/crates/openshell-driver-apple-container/src/cli.rs @@ -0,0 +1,409 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! CLI backend for Apple's installed `container` command. + +use serde::Deserialize; +use std::collections::BTreeMap; +use std::net::IpAddr; +use std::path::{Path, PathBuf}; +use std::process::Stdio; +use tokio::process::Command; + +/// Error returned while running Apple's `container` CLI. +#[derive(Debug, thiserror::Error)] +pub enum AppleContainerCliError { + /// A command could not be started. + #[error("failed to execute `{program}`: {source}")] + Spawn { + /// Program path attempted by the backend. + program: String, + /// Underlying I/O error. + source: std::io::Error, + }, + /// A command exited unsuccessfully. + #[error("`{program} {args}` failed with status {status}: {stderr}")] + Status { + /// Program path attempted by the backend. + program: String, + /// Command-line arguments, shell-escaped for diagnostics only. + args: String, + /// Exit status text. + status: String, + /// Standard error text. + stderr: String, + }, + /// Command output could not be decoded as UTF-8. + #[error("decode `{program}` stdout failed: {source}")] + Utf8 { + /// Program path attempted by the backend. + program: String, + /// Underlying UTF-8 error. + source: std::string::FromUtf8Error, + }, + /// Command output JSON did not match the expected schema. + #[error("parse `{program}` JSON failed: {source}")] + Json { + /// Program path attempted by the backend. + program: String, + /// Underlying JSON error. + source: serde_json::Error, + }, + /// Apple Container service is reachable but not ready to run containers. + #[error("`{program} system status --format json` reported status {status}")] + Unhealthy { + /// Program path attempted by the backend. + program: String, + /// Service status returned by Apple Container. + status: String, + }, +} + +/// JSON status produced by `container system status --format json`. +#[derive(Debug, Clone, Deserialize)] +pub struct AppleContainerSystemStatus { + /// Service state such as `running`. + pub status: String, +} + +/// JSON container entry produced by `container list --format json`. +#[derive(Debug, Clone, Deserialize)] +pub struct AppleContainerListEntry { + /// Apple container identifier. + pub id: String, + /// Static container configuration. + pub configuration: AppleContainerConfiguration, + /// Runtime status. + pub status: AppleContainerStatus, +} + +/// Static configuration fields used by the driver. +#[derive(Debug, Clone, Deserialize)] +pub struct AppleContainerConfiguration { + /// Creation timestamp emitted by Apple Container. + #[serde(default, rename = "creationDate")] + pub creation_date: Option, + /// Container labels. + #[serde(default)] + pub labels: BTreeMap, + /// Image metadata. + pub image: Option, +} + +/// Image metadata emitted by Apple container. +#[derive(Debug, Clone, Deserialize)] +pub struct AppleContainerImage { + /// Original image reference. + pub reference: String, +} + +/// Runtime status fields used by the driver. +#[derive(Debug, Clone, Deserialize)] +pub struct AppleContainerStatus { + /// Runtime state such as `running` or `stopped`. + pub state: String, +} + +/// JSON network entry produced by `container network list --format json`. +#[derive(Debug, Clone, Deserialize)] +pub struct AppleContainerNetworkEntry { + /// Apple network identifier. + pub id: String, + /// Static network configuration. + pub configuration: AppleContainerNetworkConfiguration, + /// Runtime network status. + pub status: AppleContainerNetworkStatus, +} + +/// Static network configuration fields used by the driver. +#[derive(Debug, Clone, Deserialize)] +pub struct AppleContainerNetworkConfiguration { + /// Network name. + pub name: String, +} + +/// Runtime network status fields used by the driver. +#[derive(Debug, Clone, Deserialize)] +pub struct AppleContainerNetworkStatus { + /// IPv4 host-side gateway address for the Apple vmnet network. + #[serde(default, rename = "ipv4Gateway")] + pub ipv4_gateway: Option, +} + +/// Apple Container CLI wrapper. +#[derive(Debug, Clone)] +pub struct AppleContainerCli { + program: PathBuf, +} + +impl AppleContainerCli { + /// Create a CLI backend using the configured executable path. + #[must_use] + pub fn new(program: PathBuf) -> Self { + Self { program } + } + + /// Run `container system status --format json` to verify the service is ready. + pub async fn health(&self) -> Result<(), AppleContainerCliError> { + let status = self.system_status().await?; + if status.status.trim().eq_ignore_ascii_case("running") { + Ok(()) + } else { + Err(AppleContainerCliError::Unhealthy { + program: self.program.display().to_string(), + status: status.status, + }) + } + } + + /// Return the Apple Container service status as machine-readable JSON. + pub async fn system_status( + &self, + ) -> Result { + let text = self.run(["system", "status", "--format", "json"]).await?; + serde_json::from_str(&text).map_err(|source| AppleContainerCliError::Json { + program: self.program.display().to_string(), + source, + }) + } + + /// Create and start a detached container. + /// + /// Apple Container exposes `create` and `start` as separate commands, but + /// listing the container between those two calls reports a stopped state. + /// `run --detach` keeps normal sandbox provisioning as a single visible + /// lifecycle transition from absent to running. + pub async fn run_detached(&self, args: &[String]) -> Result<(), AppleContainerCliError> { + let mut command_args = vec!["run".to_string(), "--detach".to_string()]; + command_args.extend_from_slice(args); + self.run(command_args).await.map(|_| ()) + } + + /// Start a container. + pub async fn start(&self, id: &str) -> Result<(), AppleContainerCliError> { + self.run(["start", id]).await.map(|_| ()) + } + + /// Stop a container. + pub async fn stop(&self, id: &str, timeout_secs: u32) -> Result<(), AppleContainerCliError> { + self.run(["stop", "--time", &timeout_secs.to_string(), id]) + .await + .map(|_| ()) + } + + /// Delete a container. + pub async fn delete(&self, id: &str) -> Result { + match self.run(["delete", "--force", id]).await { + Ok(_) => Ok(true), + Err(AppleContainerCliError::Status { stderr, .. }) + if stderr.contains("not found") || stderr.contains("does not exist") => + { + Ok(false) + } + Err(err) => Err(err), + } + } + + /// Create a named Apple container volume. + pub async fn create_volume( + &self, + name: &str, + labels: &[String], + ) -> Result<(), AppleContainerCliError> { + let mut argv = vec!["volume".to_string(), "create".to_string()]; + for label in labels { + argv.push("--label".to_string()); + argv.push(label.clone()); + } + argv.push(name.to_string()); + self.run(argv).await.map(|_| ()) + } + + /// Delete a named Apple container volume. + pub async fn delete_volume(&self, name: &str) -> Result { + match self.run(["volume", "delete", name]).await { + Ok(_) => Ok(true), + Err(AppleContainerCliError::Status { stderr, .. }) + if stderr.contains("not found") || stderr.contains("does not exist") => + { + Ok(false) + } + Err(err) => Err(err), + } + } + + /// List all containers. + pub async fn list(&self) -> Result, AppleContainerCliError> { + let text = self.run(["list", "--all", "--format", "json"]).await?; + serde_json::from_str(&text).map_err(|source| AppleContainerCliError::Json { + program: self.program.display().to_string(), + source, + }) + } + + /// List Apple Container networks as machine-readable JSON. + pub async fn list_networks( + &self, + ) -> Result, AppleContainerCliError> { + let text = self.run(["network", "list", "--format", "json"]).await?; + serde_json::from_str(&text).map_err(|source| AppleContainerCliError::Json { + program: self.program.display().to_string(), + source, + }) + } + + async fn run(&self, args: I) -> Result + where + I: IntoIterator, + S: AsRef, + { + let args = args + .into_iter() + .map(|arg| arg.as_ref().to_string()) + .collect::>(); + let output = Command::new(&self.program) + .args(&args) + .stdin(Stdio::null()) + .output() + .await + .map_err(|source| AppleContainerCliError::Spawn { + program: self.program.display().to_string(), + source, + })?; + if !output.status.success() { + return Err(AppleContainerCliError::Status { + program: self.program.display().to_string(), + args: redact_args(&args), + status: output.status.to_string(), + stderr: String::from_utf8_lossy(&output.stderr).trim().to_string(), + }); + } + String::from_utf8(output.stdout).map_err(|source| AppleContainerCliError::Utf8 { + program: self.program.display().to_string(), + source, + }) + } +} + +/// Build an Apple `--mount` argument for a read-only bind mount. +#[must_use] +pub fn readonly_bind_mount(source: &Path, target: &str) -> String { + format!( + "type=bind,source={},target={target},readonly", + source.display() + ) +} + +fn redact_args(args: &[String]) -> String { + let mut redacted = Vec::with_capacity(args.len()); + let mut redact_next_env = false; + for arg in args { + if redact_next_env { + redacted.push(redact_env_arg(arg)); + redact_next_env = false; + } else { + redact_next_env = arg == "--env"; + redacted.push(arg.clone()); + } + } + redacted.join(" ") +} + +fn redact_env_arg(arg: &str) -> String { + arg.split_once('=').map_or_else( + || "".to_string(), + |(key, _)| format!("{key}="), + ) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn status_args_redact_environment_values() { + let args = vec![ + "run".to_string(), + "--env".to_string(), + "TOKEN=secret".to_string(), + "--name".to_string(), + "demo".to_string(), + ]; + + assert_eq!(redact_args(&args), "run --env TOKEN= --name demo"); + } + + #[test] + fn parses_system_status_json() { + let status: AppleContainerSystemStatus = + serde_json::from_str(r#"{"status":"running"}"#).unwrap(); + + assert_eq!(status.status, "running"); + } + + #[test] + fn parses_container_list_json() { + let entries: Vec = serde_json::from_str( + r#"[ + { + "id": "openshell-demo", + "configuration": { + "creationDate": "2026-06-12T08:00:00Z", + "labels": { + "io.openshell.managed-by": "openshell", + "io.openshell.sandbox-id": "sandbox-1" + }, + "image": { + "reference": "ghcr.io/nvidia/openshell/sandbox:latest" + } + }, + "status": { + "state": "running" + } + } + ]"#, + ) + .unwrap(); + + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].id, "openshell-demo"); + assert_eq!( + entries[0].configuration.labels["io.openshell.sandbox-id"], + "sandbox-1" + ); + assert_eq!( + entries[0] + .configuration + .image + .as_ref() + .map(|image| image.reference.as_str()), + Some("ghcr.io/nvidia/openshell/sandbox:latest") + ); + assert_eq!(entries[0].status.state, "running"); + } + + #[test] + fn parses_network_list_json() { + let networks: Vec = serde_json::from_str( + r#"[ + { + "id": "default", + "configuration": { + "name": "default" + }, + "status": { + "ipv4Gateway": "192.168.64.1" + } + } + ]"#, + ) + .unwrap(); + + assert_eq!(networks.len(), 1); + assert_eq!(networks[0].configuration.name, "default"); + assert_eq!( + networks[0].status.ipv4_gateway.map(|ip| ip.to_string()), + Some("192.168.64.1".to_string()) + ); + } +} diff --git a/crates/openshell-driver-apple-container/src/config.rs b/crates/openshell-driver-apple-container/src/config.rs new file mode 100644 index 000000000..58782e8e5 --- /dev/null +++ b/crates/openshell-driver-apple-container/src/config.rs @@ -0,0 +1,88 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Configuration for the Apple Container compute driver. + +use openshell_core::config::{DEFAULT_SERVER_PORT, DEFAULT_STOP_TIMEOUT_SECS}; +use serde::{Deserialize, Serialize}; +use std::path::PathBuf; + +const DEFAULT_APPLE_CONTAINER_HOST_CALLBACK_HOST: &str = "host.container.internal"; + +/// Runtime configuration for the Apple Container driver. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(default, deny_unknown_fields)] +pub struct AppleContainerComputeConfig { + /// Path to the `container` CLI. + pub container_bin: PathBuf, + /// Default OCI image for sandboxes. + pub default_image: String, + /// Namespace label applied to Apple Container sandboxes. + pub sandbox_namespace: String, + /// Gateway gRPC endpoint the sandbox supervisor dials. + pub grpc_endpoint: String, + /// Gateway listener port used when `grpc_endpoint` is empty. + pub gateway_port: u16, + /// Hostname or IP address Apple container VMs use to call back to the gateway. + pub host_callback_host: String, + /// Host path to the CA certificate for sandbox mTLS. + /// + /// When all three guest TLS paths are set, the driver bind-mounts them + /// into Apple Container sandboxes and the implicit supervisor endpoint + /// switches from `http://` to `https://`. + pub guest_tls_ca: Option, + /// Host path to the client certificate for sandbox mTLS. + pub guest_tls_cert: Option, + /// Host path to the client private key for sandbox mTLS. + pub guest_tls_key: Option, + /// Parent directory containing the Linux `openshell-sandbox` binary. + pub supervisor_bin_dir: PathBuf, + /// Unix socket path where the supervisor exposes SSH relay traffic. + pub sandbox_ssh_socket_path: String, + /// Container stop timeout in seconds. + pub stop_timeout_secs: u32, + /// Default log level injected into the sandbox supervisor. + pub log_level: String, +} + +impl AppleContainerComputeConfig { + /// Returns `true` when all three sandbox mTLS paths are configured. + #[must_use] + pub fn tls_enabled(&self) -> bool { + self.guest_tls_ca.is_some() && self.guest_tls_cert.is_some() && self.guest_tls_key.is_some() + } + + /// Return the endpoint used by sandbox supervisors. + #[must_use] + pub fn effective_grpc_endpoint(&self) -> String { + if self.grpc_endpoint.trim().is_empty() { + let scheme = if self.tls_enabled() { "https" } else { "http" }; + format!( + "{scheme}://{}:{}", + self.host_callback_host, self.gateway_port + ) + } else { + self.grpc_endpoint.clone() + } + } +} + +impl Default for AppleContainerComputeConfig { + fn default() -> Self { + Self { + container_bin: PathBuf::from("container"), + default_image: openshell_core::image::default_sandbox_image(), + sandbox_namespace: "default".to_string(), + grpc_endpoint: String::new(), + gateway_port: DEFAULT_SERVER_PORT, + host_callback_host: DEFAULT_APPLE_CONTAINER_HOST_CALLBACK_HOST.to_string(), + guest_tls_ca: None, + guest_tls_cert: None, + guest_tls_key: None, + supervisor_bin_dir: PathBuf::new(), + sandbox_ssh_socket_path: "/run/openshell/ssh.sock".to_string(), + stop_timeout_secs: DEFAULT_STOP_TIMEOUT_SECS, + log_level: "warn".to_string(), + } + } +} diff --git a/crates/openshell-driver-apple-container/src/driver.rs b/crates/openshell-driver-apple-container/src/driver.rs new file mode 100644 index 000000000..f2053f744 --- /dev/null +++ b/crates/openshell-driver-apple-container/src/driver.rs @@ -0,0 +1,2293 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Apple Container compute-driver implementation. + +use crate::cli::{ + AppleContainerCli, AppleContainerCliError, AppleContainerListEntry, AppleContainerNetworkEntry, +}; +use crate::config::AppleContainerComputeConfig; +use futures::Stream; +use openshell_core::driver_utils::{ + LABEL_MANAGED_BY, LABEL_MANAGED_BY_VALUE, LABEL_SANDBOX_ID, LABEL_SANDBOX_NAME, + LABEL_SANDBOX_NAMESPACE, TLS_CA_MOUNT_PATH, TLS_CERT_MOUNT_PATH, TLS_KEY_MOUNT_PATH, +}; +use openshell_core::proto::compute::v1::{ + DriverCondition, DriverSandbox, DriverSandboxStatus, GetCapabilitiesResponse, + WatchSandboxesDeletedEvent, WatchSandboxesEvent, WatchSandboxesSandboxEvent, + watch_sandboxes_event, +}; +use std::collections::BTreeMap; +use std::io::Write as _; +use std::net::{IpAddr, SocketAddr}; +#[cfg(unix)] +use std::os::unix::fs::OpenOptionsExt; +use std::path::{Path, PathBuf}; +use std::pin::Pin; +use std::sync::Arc; +use std::time::Duration; +use tokio::sync::{broadcast, mpsc}; +use tokio_stream::wrappers::ReceiverStream; +use tonic::Status; +use tracing::warn; + +const CONTAINER_PREFIX: &str = "openshell-sandbox-"; +const VOLUME_PREFIX: &str = "openshell-sandbox-"; +const SUPERVISOR_DIR_MOUNT_PATH: &str = "/opt/openshell/bin"; +const AUTH_DIR_MOUNT_PATH: &str = "/etc/openshell/auth"; +const TLS_DIR_MOUNT_PATH: &str = "/etc/openshell/tls/client"; +const TLS_CA_FILE: &str = "ca.crt"; +const TLS_CERT_FILE: &str = "tls.crt"; +const TLS_KEY_FILE: &str = "tls.key"; +const SANDBOX_TOKEN_FILE: &str = "sandbox.jwt"; +const SANDBOX_WORKDIR: &str = "/sandbox"; +const SANDBOX_COMMAND: &str = "sleep infinity"; +const SUPERVISOR_PATH: &str = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"; +const TRANSIENT_STOPPED_LAUNCH_GRACE_MS: i64 = 30_000; +const WATCH_BUFFER: usize = 64; + +#[derive(Debug, Clone)] +struct AppleGuestTlsPaths { + ca: PathBuf, + cert: PathBuf, + key: PathBuf, +} + +#[derive(Debug)] +struct AppleSecretStagingDirs { + auth_mount_dir: Option, + tls_mount_dir: Option, +} + +/// Stream type returned by the Apple Container driver watch API. +pub type WatchStream = + Pin> + Send + 'static>>; + +/// Queried by the driver to decide when a running sandbox is usable. +/// +/// Apple Container can report that the container process has started before +/// the `OpenShell` supervisor has connected back to the gateway. The compute +/// plane treats the sandbox as Ready only after this signal flips true. +pub trait SupervisorReadiness: Send + Sync + 'static { + /// Return true once the sandbox supervisor has an active gateway session. + fn is_supervisor_connected(&self, sandbox_id: &str) -> bool; +} + +/// Compute driver that manages sandboxes with Apple's container runtime. +#[derive(Clone)] +pub struct AppleContainerComputeDriver { + cli: AppleContainerCli, + config: AppleContainerComputeConfig, + gateway_bind_addresses: Vec, + supervisor_readiness: Arc, + events: broadcast::Sender, +} + +impl std::fmt::Debug for AppleContainerComputeDriver { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("AppleContainerComputeDriver") + .field("cli", &self.cli) + .field("config", &self.config) + .finish_non_exhaustive() + } +} + +impl AppleContainerComputeDriver { + /// Create and validate a new Apple Container driver. + /// + /// # Errors + /// Returns an error when the configured `container` CLI is unavailable or + /// reports an unhealthy Apple Container service. + pub async fn new( + config: AppleContainerComputeConfig, + supervisor_readiness: Arc, + ) -> Result { + let _ = apple_guest_tls_paths(&config)?; + let cli = AppleContainerCli::new(config.container_bin.clone()); + cli.health().await.map_err(status_from_cli)?; + let gateway_bind_addresses = gateway_bind_addresses_from_networks(&cli, &config).await?; + Ok(Self { + cli, + config, + gateway_bind_addresses, + supervisor_readiness, + events: broadcast::channel(WATCH_BUFFER).0, + }) + } + + /// Return driver capability metadata. + #[must_use] + pub fn capabilities(&self) -> GetCapabilitiesResponse { + openshell_core::driver_utils::build_capabilities_response( + "apple-container", + openshell_core::VERSION, + &self.config.default_image, + ) + } + + /// Return gateway listener addresses required by Apple container VMs. + #[must_use] + pub fn gateway_bind_addresses(&self) -> Vec { + self.gateway_bind_addresses.clone() + } + + /// Validate a sandbox before creation. + pub fn validate_sandbox_create(&self, sandbox: &DriverSandbox) -> Result<(), Status> { + if sandbox.name.trim().is_empty() { + return Err(Status::failed_precondition("sandbox name is required")); + } + if sandbox.id.trim().is_empty() { + return Err(Status::failed_precondition("sandbox id is required")); + } + if sandbox.spec.as_ref().is_some_and(|spec| spec.gpu) { + return Err(Status::failed_precondition( + "apple-container driver does not support GPU sandboxes", + )); + } + validate_container_name(&container_name_for_sandbox(sandbox))?; + validate_sandbox_template(sandbox)?; + if sandbox_image(sandbox, &self.config).trim().is_empty() { + return Err(Status::failed_precondition( + "no sandbox image configured: set default_image in [openshell.drivers.apple-container] or provide a template image", + )); + } + Ok(()) + } + + /// Create and start one sandbox. + pub async fn create_sandbox(&self, sandbox: &DriverSandbox) -> Result<(), Status> { + self.validate_sandbox_create(sandbox)?; + validate_sandbox_auth(sandbox)?; + if self + .find_managed_entry(&sandbox.id, &sandbox.name) + .await? + .is_some() + { + return Err(Status::already_exists("sandbox already exists")); + } + let volume = volume_name(&sandbox.id); + self.cli + .create_volume(&volume, &managed_labels(sandbox, &self.config)) + .await + .map_err(status_from_cli)?; + let args = match self.create_args(sandbox).await { + Ok(args) => args, + Err(err) => { + self.cleanup_volume_with_warning(&volume, &sandbox.id, "create-args-failed") + .await; + cleanup_secret_staging_dir(&sandbox.id, &self.config); + return Err(err); + } + }; + if let Err(err) = self.cli.run_detached(&args).await { + self.cleanup_volume_with_warning(&volume, &sandbox.id, "container-run-failed") + .await; + cleanup_secret_staging_dir(&sandbox.id, &self.config); + return Err(status_from_cli(err)); + } + Ok(()) + } + + /// Stop a sandbox without deleting it. + pub async fn stop_sandbox(&self, sandbox_id: &str, sandbox_name: &str) -> Result<(), Status> { + require_sandbox_identifier(sandbox_id, sandbox_name)?; + let entry = self + .find_managed_entry(sandbox_id, sandbox_name) + .await? + .ok_or_else(|| Status::not_found("sandbox not found"))?; + self.cli + .stop(&entry.id, self.config.stop_timeout_secs) + .await + .map_err(status_from_cli) + } + + /// Start a managed sandbox that was stopped while the gateway was down. + pub async fn resume_sandbox( + &self, + sandbox_id: &str, + sandbox_name: &str, + ) -> Result { + let Some(entry) = self.find_managed_entry(sandbox_id, sandbox_name).await? else { + return Ok(false); + }; + if !apple_container_state_needs_resume(&entry.status.state) { + return Ok(true); + } + match self.cli.start(&entry.id).await { + Ok(()) => Ok(true), + Err(err) => { + let status = status_from_cli(err); + if status.code() == tonic::Code::NotFound { + Ok(false) + } else { + Err(status) + } + } + } + } + + /// Stop all running OpenShell-managed Apple containers during gateway shutdown. + pub async fn stop_managed_containers_on_shutdown(&self) -> Result { + let targets = self + .list_entries() + .await? + .into_iter() + .filter(|entry| managed_entry(entry, &self.config)) + .filter(|entry| apple_container_state_needs_shutdown_stop(&entry.status.state)) + .map(|entry| entry.id) + .collect::>(); + let target_count = targets.len(); + let mut stopped = 0usize; + let mut failures = Vec::new(); + + for target in targets { + match self.cli.stop(&target, self.config.stop_timeout_secs).await { + Ok(()) => stopped += 1, + Err(err) => { + let status = status_from_cli(err); + if status.code() == tonic::Code::NotFound { + continue; + } + warn!( + container = %target, + error = %status, + "Failed to stop Apple sandbox container during shutdown" + ); + failures.push(target); + } + } + } + + if !failures.is_empty() { + return Err(Status::internal(format!( + "failed to stop {} of {target_count} Apple sandbox containers during shutdown", + failures.len() + ))); + } + + Ok(stopped) + } + + /// Delete a sandbox and its driver-owned secret staging directory. + pub async fn delete_sandbox( + &self, + sandbox_id: &str, + sandbox_name: &str, + ) -> Result { + require_sandbox_identifier(sandbox_id, sandbox_name)?; + let Some(entry) = self.find_managed_entry(sandbox_id, sandbox_name).await? else { + if !sandbox_id.is_empty() { + self.cleanup_volume_with_warning( + &volume_name(sandbox_id), + sandbox_id, + "container-not-found", + ) + .await; + cleanup_secret_staging_dir(sandbox_id, &self.config); + } + return Ok(false); + }; + let resolved_id = entry + .configuration + .labels + .get(LABEL_SANDBOX_ID) + .cloned() + .unwrap_or_else(|| sandbox_id.to_string()); + let deleted = self.cli.delete(&entry.id).await.map_err(status_from_cli)?; + if !resolved_id.is_empty() { + self.cleanup_volume_with_warning( + &volume_name(&resolved_id), + &resolved_id, + "container-deleted", + ) + .await; + cleanup_secret_staging_dir(&resolved_id, &self.config); + } + if deleted && !resolved_id.is_empty() { + self.emit_deleted_event(&resolved_id); + } + Ok(deleted) + } + + async fn find_managed_entry( + &self, + sandbox_id: &str, + sandbox_name: &str, + ) -> Result, Status> { + Ok(self + .list_entries() + .await? + .into_iter() + .filter(|entry| managed_entry(entry, &self.config)) + .find(|entry| entry_matches(entry, sandbox_id, sandbox_name))) + } + + /// Fetch one sandbox by name. + pub async fn get_sandbox( + &self, + sandbox_id: &str, + sandbox_name: &str, + ) -> Result, Status> { + let sandboxes = self.list_entries().await?; + Ok(sandboxes + .into_iter() + .filter(|entry| managed_entry(entry, &self.config)) + .find(|entry| entry_matches(entry, sandbox_id, sandbox_name)) + .and_then(|entry| driver_sandbox_from_entry(entry, self.supervisor_readiness.as_ref()))) + } + + /// List all OpenShell-managed Apple containers. + pub async fn list_sandboxes(&self) -> Result, Status> { + let mut sandboxes = self + .list_entries() + .await? + .into_iter() + .filter(|entry| managed_entry(entry, &self.config)) + .filter_map(|entry| { + driver_sandbox_from_entry(entry, self.supervisor_readiness.as_ref()) + }) + .collect::>(); + sandboxes.sort_by(|left, right| left.name.cmp(&right.name).then(left.id.cmp(&right.id))); + Ok(sandboxes) + } + + /// Start a polling watch stream for sandbox snapshots. + pub fn watch_sandboxes(&self) -> Result { + let driver = self.clone(); + let mut events = self.events.subscribe(); + let (tx, rx) = mpsc::channel(WATCH_BUFFER); + tokio::spawn(async move { + let mut previous: BTreeMap = BTreeMap::new(); + let mut poll = tokio::time::interval(Duration::from_secs(2)); + poll.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay); + loop { + tokio::select! { + _ = poll.tick() => { + match driver.list_sandboxes().await { + Ok(sandboxes) => { + let current = sandboxes + .iter() + .map(|sandbox| (sandbox.id.clone(), sandbox.clone())) + .collect::>(); + if !send_snapshot_delta(&tx, &previous, ¤t).await { + return; + } + previous = current; + } + Err(err) => { + warn!( + error = %err, + "Apple sandbox watch poll failed" + ); + } + } + } + event = events.recv() => { + match event { + Ok(event) => { + apply_watch_event_to_cache(&mut previous, &event); + if tx.send(Ok(event)).await.is_err() { + return; + } + } + Err(broadcast::error::RecvError::Lagged(skipped)) => { + warn!( + skipped, + "Apple sandbox watch event receiver lagged; polling will resynchronize state" + ); + } + Err(broadcast::error::RecvError::Closed) => return, + } + } + } + } + }); + Ok(Box::pin(ReceiverStream::new(rx))) + } + + fn emit_deleted_event(&self, sandbox_id: &str) { + let _ = self + .events + .send(watch_deleted_event(sandbox_id.to_string())); + } + + async fn cleanup_volume_with_warning(&self, volume: &str, sandbox_id: &str, reason: &str) { + match self.cli.delete_volume(volume).await { + Ok(_) => {} + Err(err) => { + let status = status_from_cli(err); + warn!( + sandbox_id, + volume, + reason, + error = %status, + "Failed to delete Apple sandbox volume" + ); + } + } + } + + async fn list_entries(&self) -> Result, Status> { + self.cli.list().await.map_err(status_from_cli) + } + + async fn create_args(&self, sandbox: &DriverSandbox) -> Result, Status> { + self.create_args_with_secret_staging_base(sandbox, None) + .await + } + + async fn create_args_with_secret_staging_base( + &self, + sandbox: &DriverSandbox, + secret_staging_base: Option<&Path>, + ) -> Result, Status> { + let container_name = container_name_for_sandbox(sandbox); + let image = sandbox_image(sandbox, &self.config); + if image.trim().is_empty() { + return Err(Status::failed_precondition( + "no sandbox image configured: set default_image in [openshell.drivers.apple-container] or provide a template image", + )); + } + + let supervisor_dir = supervisor_bin_dir(&self.config.supervisor_bin_dir)?; + let guest_tls = apple_guest_tls_paths(&self.config)?; + let staging_dirs = write_secret_staging_materials( + sandbox, + &self.config, + guest_tls.as_ref(), + secret_staging_base, + ) + .await?; + let mut args = vec!["--name".to_string(), container_name]; + for label in managed_labels(sandbox, &self.config) { + args.push("--label".to_string()); + args.push(label); + } + args.extend([ + // Sandbox images may set USER sandbox for interactive shells. The + // supervisor itself must start as root so it can create the network + // namespace, prepare writable paths, and then drop to the policy user. + "--user".to_string(), + "0:0".to_string(), + "--workdir".to_string(), + SANDBOX_WORKDIR.to_string(), + "--volume".to_string(), + format!("{}:{SANDBOX_WORKDIR}", volume_name(&sandbox.id)), + "--mount".to_string(), + crate::cli::readonly_bind_mount(&supervisor_dir, SUPERVISOR_DIR_MOUNT_PATH), + "--entrypoint".to_string(), + format!("{SUPERVISOR_DIR_MOUNT_PATH}/openshell-sandbox"), + ]); + + if let Some(auth_dir) = staging_dirs.auth_mount_dir { + args.push("--mount".to_string()); + args.push(crate::cli::readonly_bind_mount( + &auth_dir, + AUTH_DIR_MOUNT_PATH, + )); + } + + if let Some(tls_dir) = staging_dirs.tls_mount_dir { + args.push("--mount".to_string()); + args.push(crate::cli::readonly_bind_mount( + &tls_dir, + TLS_DIR_MOUNT_PATH, + )); + } + + for (key, value) in sandbox_environment(sandbox, &self.config) { + args.push("--env".to_string()); + args.push(format!("{key}={value}")); + } + + if let Some(memory) = sandbox_memory_limit(sandbox) { + args.push("--memory".to_string()); + args.push(memory); + } + if let Some(cpus) = sandbox_cpu_limit(sandbox)? { + args.push("--cpus".to_string()); + args.push(cpus); + } + + for cap in ["SYS_ADMIN", "NET_ADMIN", "SYS_PTRACE", "SYSLOG"] { + args.push("--cap-add".to_string()); + args.push(cap.to_string()); + } + args.push(image); + // The Apple CLI does not expose a Docker-style empty-CMD override. + // Pass the supervisor command explicitly after the image so image CMD + // defaults cannot become accidental sandbox workload arguments. + args.push("sleep".to_string()); + args.push("infinity".to_string()); + Ok(args) + } +} + +fn status_from_cli(err: AppleContainerCliError) -> Status { + if matches!(&err, AppleContainerCliError::Unhealthy { .. }) { + return Status::failed_precondition(err.to_string()); + } + let message = err.to_string(); + if message.contains("already exists") || message.contains("exists") { + Status::already_exists(message) + } else if message.contains("not found") || message.contains("does not exist") { + Status::not_found(message) + } else { + Status::internal(message) + } +} + +async fn send_snapshot_delta( + tx: &mpsc::Sender>, + previous: &BTreeMap, + current: &BTreeMap, +) -> bool { + for (sandbox_id, sandbox) in current { + if previous.get(sandbox_id) == Some(sandbox) { + continue; + } + if tx + .send(Ok(watch_sandbox_event(sandbox.clone()))) + .await + .is_err() + { + return false; + } + } + for sandbox_id in previous.keys() { + if current.contains_key(sandbox_id) { + continue; + } + if tx + .send(Ok(watch_deleted_event(sandbox_id.clone()))) + .await + .is_err() + { + return false; + } + } + true +} + +fn watch_sandbox_event(sandbox: DriverSandbox) -> WatchSandboxesEvent { + WatchSandboxesEvent { + payload: Some(watch_sandboxes_event::Payload::Sandbox( + WatchSandboxesSandboxEvent { + sandbox: Some(sandbox), + }, + )), + } +} + +fn watch_deleted_event(sandbox_id: String) -> WatchSandboxesEvent { + WatchSandboxesEvent { + payload: Some(watch_sandboxes_event::Payload::Deleted( + WatchSandboxesDeletedEvent { sandbox_id }, + )), + } +} + +fn apply_watch_event_to_cache( + previous: &mut BTreeMap, + event: &WatchSandboxesEvent, +) { + match event.payload.as_ref() { + Some(watch_sandboxes_event::Payload::Sandbox(WatchSandboxesSandboxEvent { + sandbox: Some(sandbox), + })) => { + previous.insert(sandbox.id.clone(), sandbox.clone()); + } + Some(watch_sandboxes_event::Payload::Deleted(WatchSandboxesDeletedEvent { + sandbox_id, + })) => { + previous.remove(sandbox_id); + } + _ => {} + } +} + +async fn gateway_bind_addresses_from_networks( + cli: &AppleContainerCli, + config: &AppleContainerComputeConfig, +) -> Result, Status> { + if !config.grpc_endpoint.trim().is_empty() { + return Ok(Vec::new()); + } + let networks = cli.list_networks().await.map_err(status_from_cli)?; + let Some(host_gateway) = apple_default_network_gateway(&networks) else { + return Err(Status::failed_precondition( + "apple-container driver could not find a default network ipv4Gateway; set grpc_endpoint to a reachable gateway URL", + )); + }; + Ok(vec![SocketAddr::new(host_gateway, config.gateway_port)]) +} + +fn apple_default_network_gateway(networks: &[AppleContainerNetworkEntry]) -> Option { + networks + .iter() + .find(|network| network.id == "default" || network.configuration.name == "default") + .and_then(|network| network.status.ipv4_gateway) + .or_else(|| { + networks + .iter() + .find_map(|network| network.status.ipv4_gateway) + }) +} + +fn apple_container_state_needs_resume(state: &str) -> bool { + matches!(state, "created" | "stopped" | "exited") +} + +fn apple_container_state_needs_shutdown_stop(state: &str) -> bool { + matches!(state, "created" | "running") +} + +const MAX_CONTAINER_NAME_LEN: usize = 64; + +fn container_name_for_sandbox(sandbox: &DriverSandbox) -> String { + let id_suffix = runtime_name_component(&sandbox.id); + let friendly_name = runtime_name_component(&sandbox.name); + if friendly_name.is_empty() { + let mut base = format!("{CONTAINER_PREFIX}{id_suffix}"); + if base.len() > MAX_CONTAINER_NAME_LEN { + base.truncate(MAX_CONTAINER_NAME_LEN); + } + return trim_runtime_name_tail(base); + } + + // Apple container names are unique per runtime, not per OpenShell + // namespace. Keep the id suffix even when the friendly name is long so two + // sandboxes with the same display name cannot collide at the platform + // layer. + let reserved = CONTAINER_PREFIX.len() + 1 + id_suffix.len(); + if reserved >= MAX_CONTAINER_NAME_LEN { + let mut base = format!("{CONTAINER_PREFIX}{id_suffix}"); + base.truncate(MAX_CONTAINER_NAME_LEN); + return trim_runtime_name_tail(base); + } + + let name_budget = MAX_CONTAINER_NAME_LEN - reserved; + let truncated_name = if friendly_name.len() > name_budget { + trim_runtime_name_tail(friendly_name[..name_budget].to_string()) + } else { + friendly_name + }; + format!("{CONTAINER_PREFIX}{truncated_name}-{id_suffix}") +} + +fn volume_name(sandbox_id: &str) -> String { + format!("{VOLUME_PREFIX}{}", sanitize_name(sandbox_id)) +} + +fn managed_labels(sandbox: &DriverSandbox, config: &AppleContainerComputeConfig) -> Vec { + let mut labels = sandbox + .spec + .as_ref() + .and_then(|spec| spec.template.as_ref()) + .map(|template| template.labels.clone()) + .unwrap_or_default() + .into_iter() + .collect::>(); + labels.insert( + LABEL_MANAGED_BY.to_string(), + LABEL_MANAGED_BY_VALUE.to_string(), + ); + labels.insert(LABEL_SANDBOX_ID.to_string(), sandbox.id.clone()); + labels.insert(LABEL_SANDBOX_NAME.to_string(), sandbox.name.clone()); + labels.insert( + LABEL_SANDBOX_NAMESPACE.to_string(), + config.sandbox_namespace.clone(), + ); + labels + .into_iter() + .map(|(key, value)| format!("{key}={value}")) + .collect() +} + +fn sanitize_name(value: &str) -> String { + let mut out = String::new(); + for ch in value.chars() { + if ch.is_ascii_alphanumeric() || matches!(ch, '.' | '_' | '-') { + out.push(ch); + } else { + out.push('-'); + } + } + if out.is_empty() { + "sandbox".to_string() + } else { + out + } +} + +fn runtime_name_component(value: &str) -> String { + let trimmed = trim_runtime_name_tail(sanitize_name(value)); + if trimmed.is_empty() { + "sandbox".to_string() + } else { + trimmed + } +} + +fn trim_runtime_name_tail(mut value: String) -> String { + while value + .chars() + .last() + .is_some_and(|ch| matches!(ch, '-' | '.' | '_')) + { + value.pop(); + } + value +} + +fn validate_container_name(name: &str) -> Result<(), Status> { + if name.starts_with('-') || name.ends_with('-') { + return Err(Status::failed_precondition( + "apple-container sandbox name cannot start or end with '-'", + )); + } + if !name + .chars() + .all(|ch| ch.is_ascii_alphanumeric() || matches!(ch, '.' | '_' | '-')) + { + return Err(Status::failed_precondition( + "apple-container sandbox name contains unsupported characters", + )); + } + Ok(()) +} + +fn validate_sandbox_template(sandbox: &DriverSandbox) -> Result<(), Status> { + let spec = sandbox + .spec + .as_ref() + .ok_or_else(|| Status::invalid_argument("sandbox.spec is required"))?; + let template = spec + .template + .as_ref() + .ok_or_else(|| Status::invalid_argument("sandbox.spec.template is required"))?; + + if !template.agent_socket_path.trim().is_empty() { + return Err(Status::failed_precondition( + "apple-container compute driver does not support template.agent_socket_path", + )); + } + if template + .platform_config + .as_ref() + .is_some_and(|config| !config.fields.is_empty()) + { + return Err(Status::failed_precondition( + "apple-container compute driver does not support template.platform_config", + )); + } + if template + .driver_config + .as_ref() + .is_some_and(|config| !config.fields.is_empty()) + { + return Err(Status::failed_precondition( + "apple-container compute driver does not support template.driver_config", + )); + } + if let Some(resources) = template.resources.as_ref() { + validate_resources(resources)?; + } + Ok(()) +} + +fn validate_sandbox_auth(sandbox: &DriverSandbox) -> Result<(), Status> { + if sandbox + .spec + .as_ref() + .is_some_and(|spec| !spec.sandbox_token.trim().is_empty()) + { + return Ok(()); + } + + Err(Status::failed_precondition( + "apple-container sandboxes require gateway JWT auth; configure [openshell.gateway.gateway_jwt]", + )) +} + +fn validate_resources( + resources: &openshell_core::proto::compute::v1::DriverResourceRequirements, +) -> Result<(), Status> { + if !resources.cpu_request.trim().is_empty() { + return Err(Status::failed_precondition( + "apple-container compute driver does not support resources.requests.cpu", + )); + } + if !resources.memory_request.trim().is_empty() { + return Err(Status::failed_precondition( + "apple-container compute driver does not support resources.requests.memory", + )); + } + let _ = normalize_cpu_for_apple(&resources.cpu_limit)?; + Ok(()) +} + +fn sandbox_image(sandbox: &DriverSandbox, config: &AppleContainerComputeConfig) -> String { + sandbox + .spec + .as_ref() + .and_then(|spec| spec.template.as_ref()) + .map(|template| template.image.as_str()) + .filter(|image| !image.trim().is_empty()) + .unwrap_or(&config.default_image) + .to_string() +} + +fn sandbox_environment( + sandbox: &DriverSandbox, + config: &AppleContainerComputeConfig, +) -> BTreeMap { + let mut env = BTreeMap::new(); + if let Some(spec) = sandbox.spec.as_ref() { + let mut user_env = BTreeMap::new(); + if let Some(template) = spec.template.as_ref() { + user_env.extend(template.environment.clone()); + } + user_env.extend(spec.environment.clone()); + for key in driver_owned_environment_keys() { + user_env.remove(key); + } + user_env.remove(openshell_core::sandbox_env::SANDBOX_TOKEN); + user_env.remove(openshell_core::sandbox_env::USER_ENVIRONMENT); + if !user_env.is_empty() { + if let Ok(json) = serde_json::to_string(&user_env) { + env.insert( + openshell_core::sandbox_env::USER_ENVIRONMENT.to_string(), + json, + ); + } + env.extend(user_env); + } + } + for key in driver_owned_environment_keys() { + env.remove(key); + } + env.remove(openshell_core::sandbox_env::SANDBOX_TOKEN); + env.extend([ + ("HOME".to_string(), "/root".to_string()), + ("PATH".to_string(), SUPERVISOR_PATH.to_string()), + ("TERM".to_string(), "xterm".to_string()), + ( + openshell_core::sandbox_env::ENDPOINT.to_string(), + config.effective_grpc_endpoint(), + ), + ( + openshell_core::sandbox_env::SANDBOX_ID.to_string(), + sandbox.id.clone(), + ), + ( + openshell_core::sandbox_env::SANDBOX.to_string(), + sandbox.name.clone(), + ), + ( + openshell_core::sandbox_env::SSH_SOCKET_PATH.to_string(), + config.sandbox_ssh_socket_path.clone(), + ), + ( + openshell_core::sandbox_env::SANDBOX_COMMAND.to_string(), + SANDBOX_COMMAND.to_string(), + ), + ( + openshell_core::sandbox_env::TELEMETRY_ENABLED.to_string(), + openshell_core::telemetry::enabled_env_value().to_string(), + ), + ( + openshell_core::sandbox_env::LOG_LEVEL.to_string(), + openshell_core::driver_utils::sandbox_log_level(sandbox, &config.log_level), + ), + ]); + if config.tls_enabled() { + env.insert( + openshell_core::sandbox_env::TLS_CA.to_string(), + TLS_CA_MOUNT_PATH.to_string(), + ); + env.insert( + openshell_core::sandbox_env::TLS_CERT.to_string(), + TLS_CERT_MOUNT_PATH.to_string(), + ); + env.insert( + openshell_core::sandbox_env::TLS_KEY.to_string(), + TLS_KEY_MOUNT_PATH.to_string(), + ); + } + if let Some(spec) = sandbox.spec.as_ref() + && !spec.sandbox_token.is_empty() + { + env.insert( + openshell_core::sandbox_env::SANDBOX_TOKEN_FILE.to_string(), + format!("{AUTH_DIR_MOUNT_PATH}/{SANDBOX_TOKEN_FILE}"), + ); + } + env +} + +fn driver_owned_environment_keys() -> [&'static str; 14] { + [ + "HOME", + "PATH", + "TERM", + openshell_core::sandbox_env::ENDPOINT, + openshell_core::sandbox_env::SANDBOX_ID, + openshell_core::sandbox_env::SANDBOX, + openshell_core::sandbox_env::SSH_SOCKET_PATH, + openshell_core::sandbox_env::SANDBOX_COMMAND, + openshell_core::sandbox_env::TELEMETRY_ENABLED, + openshell_core::sandbox_env::LOG_LEVEL, + openshell_core::sandbox_env::SANDBOX_TOKEN_FILE, + openshell_core::sandbox_env::TLS_CA, + openshell_core::sandbox_env::TLS_CERT, + openshell_core::sandbox_env::TLS_KEY, + ] +} + +fn apple_guest_tls_paths( + config: &AppleContainerComputeConfig, +) -> Result, Status> { + let has_ca = config.guest_tls_ca.is_some(); + let has_cert = config.guest_tls_cert.is_some(); + let has_key = config.guest_tls_key.is_some(); + let any_tls = has_ca || has_cert || has_key; + let all_tls = has_ca && has_cert && has_key; + + if any_tls && !all_tls { + return Err(Status::failed_precondition( + "apple-container compute driver requires guest_tls_ca, guest_tls_cert, and guest_tls_key when any guest TLS material is configured", + )); + } + + let endpoint = config.effective_grpc_endpoint(); + if !endpoint.starts_with("https://") { + if any_tls { + return Err(Status::failed_precondition(format!( + "guest_tls_ca/guest_tls_cert/guest_tls_key were provided but grpc_endpoint is '{endpoint}'; TLS materials require an https:// endpoint", + ))); + } + return Ok(None); + } + + if !all_tls { + return Err(Status::failed_precondition( + "apple-container compute driver requires guest_tls_ca, guest_tls_cert, and guest_tls_key when grpc_endpoint uses https://", + )); + } + + let ca = config + .guest_tls_ca + .as_deref() + .ok_or_else(|| Status::failed_precondition("guest_tls_ca is required"))?; + let cert = config + .guest_tls_cert + .as_deref() + .ok_or_else(|| Status::failed_precondition("guest_tls_cert is required"))?; + let key = config + .guest_tls_key + .as_deref() + .ok_or_else(|| Status::failed_precondition("guest_tls_key is required"))?; + + Ok(Some(AppleGuestTlsPaths { + ca: canonicalize_existing_file(ca, "apple-container TLS CA certificate")?, + cert: canonicalize_existing_file(cert, "apple-container TLS client certificate")?, + key: canonicalize_existing_file(key, "apple-container TLS client private key")?, + })) +} + +fn canonicalize_existing_file(path: &Path, description: &str) -> Result { + if !path.is_file() { + return Err(Status::failed_precondition(format!( + "{description} '{}' does not exist or is not a file", + path.display() + ))); + } + std::fs::canonicalize(path).map_err(|err| { + Status::failed_precondition(format!( + "failed to resolve {description} '{}': {err}", + path.display() + )) + }) +} + +async fn write_secret_staging_materials( + sandbox: &DriverSandbox, + config: &AppleContainerComputeConfig, + tls: Option<&AppleGuestTlsPaths>, + secret_staging_base: Option<&Path>, +) -> Result { + let token = sandbox + .spec + .as_ref() + .and_then(|spec| (!spec.sandbox_token.is_empty()).then_some(spec.sandbox_token.as_str())); + if token.is_none() && tls.is_none() { + return Ok(AppleSecretStagingDirs { + auth_mount_dir: None, + tls_mount_dir: None, + }); + } + let dir = secret_staging_dir_with_base( + &sandbox.id, + Some(&config.sandbox_namespace), + secret_staging_base, + )?; + openshell_core::paths::create_dir_restricted(&dir) + .map_err(|err| Status::internal(format!("create secret staging dir failed: {err}")))?; + let result = async { + if let Some(token) = token { + write_owner_only_file(&dir.join(SANDBOX_TOKEN_FILE), token).await?; + } + let tls_dir = if let Some(tls) = tls { + // Apple Container's virtiofs bind mounts are directory-oriented. + // Stage guest secrets under the per-sandbox secret directory so the + // same lifecycle cleanup removes both the JWT and TLS material. + let tls_dir = dir.join("tls"); + openshell_core::paths::create_dir_restricted(&tls_dir) + .map_err(|err| Status::internal(format!("create TLS staging dir failed: {err}")))?; + copy_file_restricted(&tls.ca, &tls_dir.join(TLS_CA_FILE), false).await?; + copy_file_restricted(&tls.cert, &tls_dir.join(TLS_CERT_FILE), false).await?; + copy_file_restricted(&tls.key, &tls_dir.join(TLS_KEY_FILE), true).await?; + Some(tls_dir) + } else { + None + }; + Ok::, Status>(tls_dir) + } + .await; + let tls_dir = match result { + Ok(tls_dir) => tls_dir, + Err(err) => { + let _ = std::fs::remove_dir_all(&dir); + return Err(err); + } + }; + Ok(AppleSecretStagingDirs { + auth_mount_dir: token.is_some().then(|| dir.clone()), + tls_mount_dir: tls_dir, + }) +} + +async fn copy_file_restricted(source: &Path, dest: &Path, owner_only: bool) -> Result<(), Status> { + let source = source.to_path_buf(); + let dest = dest.to_path_buf(); + tokio::task::spawn_blocking(move || copy_file_restricted_blocking(&source, &dest, owner_only)) + .await + .map_err(|err| Status::internal(format!("copy TLS file task failed: {err}")))? +} + +fn copy_file_restricted_blocking( + source: &Path, + dest: &Path, + owner_only: bool, +) -> Result<(), Status> { + let bytes = std::fs::read(source) + .map_err(|err| Status::internal(format!("read {} failed: {err}", source.display())))?; + let mut options = std::fs::OpenOptions::new(); + options.write(true).create_new(true); + #[cfg(unix)] + { + let mode = if owner_only { 0o600 } else { 0o644 }; + options.mode(mode); + } + let mut file = options + .open(dest) + .map_err(|err| Status::internal(format!("create {} failed: {err}", dest.display())))?; + file.write_all(&bytes) + .map_err(|err| Status::internal(format!("write {} failed: {err}", dest.display())))?; + if owner_only { + openshell_core::paths::set_file_owner_only(dest).map_err(|err| { + Status::internal(format!("restrict {} failed: {err}", dest.display())) + })?; + } + Ok(()) +} + +async fn write_owner_only_file(path: &Path, contents: &str) -> Result<(), Status> { + let path = path.to_path_buf(); + let contents = format!("{contents}\n"); + tokio::task::spawn_blocking(move || write_owner_only_file_blocking(&path, &contents)) + .await + .map_err(|err| Status::internal(format!("write auth file task failed: {err}")))? +} + +fn write_owner_only_file_blocking(path: &Path, contents: &str) -> Result<(), Status> { + let mut options = std::fs::OpenOptions::new(); + options.write(true).create_new(true); + #[cfg(unix)] + { + // Token files must never exist with default umask permissions. Create + // them at owner-only mode, then re-apply the shared path helper so the + // invariant stays consistent with the rest of OpenShell's secret files. + options.mode(0o600); + } + let mut file = options + .open(path) + .map_err(|err| Status::internal(format!("create {} failed: {err}", path.display())))?; + file.write_all(contents.as_bytes()) + .map_err(|err| Status::internal(format!("write {} failed: {err}", path.display())))?; + openshell_core::paths::set_file_owner_only(path) + .map_err(|err| Status::internal(format!("restrict {} failed: {err}", path.display()))) +} + +fn cleanup_secret_staging_dir(sandbox_id: &str, config: &AppleContainerComputeConfig) { + let Ok(dir) = secret_staging_dir(sandbox_id, Some(&config.sandbox_namespace)) else { + return; + }; + if let Err(err) = std::fs::remove_dir_all(&dir) + && err.kind() != std::io::ErrorKind::NotFound + { + warn!(path = %dir.display(), error = %err, "failed to remove Apple container secret staging dir"); + } +} + +fn secret_staging_dir(sandbox_id: &str, namespace: Option<&str>) -> Result { + secret_staging_dir_with_base(sandbox_id, namespace, None) +} + +fn secret_staging_dir_with_base( + sandbox_id: &str, + namespace: Option<&str>, + base: Option<&Path>, +) -> Result { + let mut path = if let Some(base) = base { + base.to_path_buf() + } else { + openshell_core::paths::xdg_state_dir() + .map_err(|err| Status::internal(format!("resolve state dir failed: {err}")))? + .join("openshell") + .join("apple-container-secrets") + }; + if let Some(namespace) = namespace { + path = path.join(namespace.replace(['/', '\\'], "-")); + } + Ok(path.join(sandbox_id)) +} + +fn supervisor_bin_dir(configured: &Path) -> Result { + let path = if configured.as_os_str().is_empty() { + default_supervisor_bin_dir().ok_or_else(|| { + Status::failed_precondition( + "apple-container driver requires supervisor_bin_dir or OPENSHELL_APPLE_CONTAINER_SUPERVISOR_BIN_DIR", + ) + })? + } else { + configured.to_path_buf() + }; + let supervisor = path.join("openshell-sandbox"); + if !supervisor.is_file() { + return Err(Status::failed_precondition(format!( + "openshell-sandbox supervisor not found at {}", + supervisor.display() + ))); + } + Ok(path) +} + +fn default_supervisor_bin_dir() -> Option { + std::env::var_os("OPENSHELL_APPLE_CONTAINER_SUPERVISOR_BIN_DIR").map(PathBuf::from) +} + +fn sandbox_memory_limit(sandbox: &DriverSandbox) -> Option { + let value = sandbox + .spec + .as_ref()? + .template + .as_ref()? + .resources + .as_ref()? + .memory_limit + .trim() + .to_string(); + (!value.is_empty()).then(|| normalize_quantity_for_apple(&value)) +} + +fn sandbox_cpu_limit(sandbox: &DriverSandbox) -> Result, Status> { + let Some(resources) = sandbox + .spec + .as_ref() + .and_then(|spec| spec.template.as_ref()) + .and_then(|template| template.resources.as_ref()) + else { + return Ok(None); + }; + normalize_cpu_for_apple(&resources.cpu_limit) +} + +fn normalize_cpu_for_apple(value: &str) -> Result, Status> { + let value = value.trim(); + if value.is_empty() { + return Ok(None); + } + if let Some(millicores) = value.strip_suffix('m') { + let millicores = millicores.parse::().map_err(|_| { + Status::failed_precondition(format!( + "invalid apple-container cpu_limit '{value}'; expected a positive whole-core or whole-millicore quantity", + )) + })?; + if millicores == 0 { + return Err(Status::failed_precondition( + "apple-container cpu_limit must be greater than zero", + )); + } + if millicores % 1_000 != 0 { + return Err(Status::failed_precondition( + "apple-container cpu_limit must resolve to a whole CPU count because the Apple Container CLI expects an integer --cpus value", + )); + } + return Ok(Some((millicores / 1_000).to_string())); + } + + let cores = value.parse::().map_err(|_| { + Status::failed_precondition(format!( + "invalid apple-container cpu_limit '{value}'; expected a positive whole-core or whole-millicore quantity", + )) + })?; + if cores == 0 { + return Err(Status::failed_precondition( + "apple-container cpu_limit must be greater than zero", + )); + } + Ok(Some(value.to_string())) +} + +fn normalize_quantity_for_apple(value: &str) -> String { + value + .strip_suffix("Ki") + .map(|v| format!("{v}K")) + .or_else(|| value.strip_suffix("Mi").map(|v| format!("{v}M"))) + .or_else(|| value.strip_suffix("Gi").map(|v| format!("{v}G"))) + .or_else(|| value.strip_suffix("Ti").map(|v| format!("{v}T"))) + .unwrap_or_else(|| value.to_string()) +} + +fn managed_entry(entry: &AppleContainerListEntry, config: &AppleContainerComputeConfig) -> bool { + let labels = &entry.configuration.labels; + labels + .get(LABEL_MANAGED_BY) + .is_some_and(|value| value == LABEL_MANAGED_BY_VALUE) + && labels + .get(LABEL_SANDBOX_NAMESPACE) + .is_some_and(|value| value == &config.sandbox_namespace) +} + +fn entry_matches(entry: &AppleContainerListEntry, sandbox_id: &str, sandbox_name: &str) -> bool { + let labels = &entry.configuration.labels; + let id_matches = sandbox_id.is_empty() + || labels + .get(LABEL_SANDBOX_ID) + .is_some_and(|value| value == sandbox_id); + let name_matches = sandbox_name.is_empty() + || labels + .get(LABEL_SANDBOX_NAME) + .is_some_and(|value| value == sandbox_name); + id_matches && name_matches +} + +fn require_sandbox_identifier(sandbox_id: &str, sandbox_name: &str) -> Result<(), Status> { + if sandbox_id.is_empty() && sandbox_name.is_empty() { + return Err(Status::invalid_argument( + "sandbox_id or sandbox_name is required", + )); + } + Ok(()) +} + +fn driver_sandbox_from_entry( + entry: AppleContainerListEntry, + readiness: &dyn SupervisorReadiness, +) -> Option { + let labels = &entry.configuration.labels; + let id = labels.get(LABEL_SANDBOX_ID)?.clone(); + let name = labels.get(LABEL_SANDBOX_NAME)?.clone(); + let namespace = labels + .get(LABEL_SANDBOX_NAMESPACE) + .cloned() + .unwrap_or_default(); + let image = entry + .configuration + .image + .as_ref() + .map(|image| image.reference.clone()) + .unwrap_or_default(); + let supervisor_connected = readiness.is_supervisor_connected(&id); + Some(DriverSandbox { + id, + name: name.clone(), + namespace, + spec: None, + status: Some(DriverSandboxStatus { + sandbox_name: name, + instance_id: entry.id, + agent_fd: String::new(), + sandbox_fd: String::new(), + conditions: vec![condition_from_state( + &entry.status.state, + &image, + supervisor_connected, + entry.configuration.creation_date.as_deref(), + )], + deleting: apple_container_is_deleting(&entry.status.state), + }), + }) +} + +fn condition_from_state( + state: &str, + image: &str, + supervisor_connected: bool, + creation_date: Option<&str>, +) -> DriverCondition { + let launch_age_ms = creation_date.and_then(creation_age_ms); + match state { + "running" if supervisor_connected => DriverCondition { + r#type: "Ready".to_string(), + status: "True".to_string(), + reason: "SupervisorConnected".to_string(), + message: "Supervisor relay is live".to_string(), + last_transition_time: String::new(), + }, + "running" => DriverCondition { + r#type: "Ready".to_string(), + status: "False".to_string(), + reason: "DependenciesNotReady".to_string(), + message: format!( + "Apple container is running from {image}; waiting for supervisor relay" + ), + last_transition_time: String::new(), + }, + "created" => DriverCondition { + r#type: "Ready".to_string(), + status: "False".to_string(), + reason: "Starting".to_string(), + message: "Apple container is created".to_string(), + last_transition_time: String::new(), + }, + "stopped" + if launch_age_ms.is_some_and(|age_ms| age_ms <= TRANSIENT_STOPPED_LAUNCH_GRACE_MS) => + { + DriverCondition { + r#type: "Ready".to_string(), + status: "False".to_string(), + reason: "Starting".to_string(), + message: "Apple container is starting".to_string(), + last_transition_time: String::new(), + } + } + "stopped" | "exited" => DriverCondition { + r#type: "Ready".to_string(), + status: "False".to_string(), + reason: "ContainerStopped".to_string(), + message: "Apple container is stopped".to_string(), + last_transition_time: String::new(), + }, + "deleting" | "removing" => DriverCondition { + r#type: "Ready".to_string(), + status: "False".to_string(), + reason: "Deleting".to_string(), + message: "Apple container is being removed".to_string(), + last_transition_time: String::new(), + }, + other => DriverCondition { + r#type: "Ready".to_string(), + status: "Unknown".to_string(), + reason: "ContainerStateUnknown".to_string(), + message: format!("Apple container state is {other}"), + last_transition_time: String::new(), + }, + } +} + +fn creation_age_ms(creation_date: &str) -> Option { + let created_at = chrono::DateTime::parse_from_rfc3339(creation_date).ok()?; + Some( + openshell_core::time::now_ms() + .saturating_sub(created_at.timestamp_millis()) + .max(0), + ) +} + +fn apple_container_is_deleting(state: &str) -> bool { + matches!(state, "deleting" | "removing") +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::cli::{AppleContainerConfiguration, AppleContainerImage, AppleContainerStatus}; + use openshell_core::proto::compute::v1::{ + DriverResourceRequirements, DriverSandboxSpec, DriverSandboxTemplate, + }; + + struct AlwaysReady; + + impl SupervisorReadiness for AlwaysReady { + fn is_supervisor_connected(&self, _sandbox_id: &str) -> bool { + true + } + } + + struct NeverReady; + + impl SupervisorReadiness for NeverReady { + fn is_supervisor_connected(&self, _sandbox_id: &str) -> bool { + false + } + } + + fn test_supervisor_dir(test_name: &str) -> PathBuf { + let nanos = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos(); + std::env::temp_dir().join(format!( + "openshell-apple-container-{test_name}-{}-{nanos}", + std::process::id() + )) + } + + #[test] + fn container_name_sanitizes_unsupported_characters() { + let sandbox = DriverSandbox { + id: "sbx/id".to_string(), + name: "demo".to_string(), + namespace: String::new(), + spec: None, + status: None, + }; + assert_eq!( + container_name_for_sandbox(&sandbox), + "openshell-sandbox-demo-sbx-id" + ); + } + + #[test] + fn container_name_preserves_id_suffix_with_apple_length_limit() { + let sandbox = DriverSandbox { + id: "d40fd9e4-39be-4182-b0bf-54c295292dca".to_string(), + name: "hermes-apple-e2e-mainbase".to_string(), + namespace: String::new(), + spec: None, + status: None, + }; + + let name = container_name_for_sandbox(&sandbox); + + assert_eq!(name.len(), MAX_CONTAINER_NAME_LEN); + assert!(name.ends_with("d40fd9e4-39be-4182-b0bf-54c295292dca")); + } + + #[test] + fn volume_name_uses_sandbox_id() { + assert_eq!( + volume_name("sandbox/id"), + "openshell-sandbox-sandbox-id".to_string() + ); + } + + #[test] + fn condition_maps_running_to_waiting_until_supervisor_connects() { + let condition = condition_from_state("running", "example:latest", false, None); + assert_eq!(condition.status, "False"); + assert_eq!(condition.reason, "DependenciesNotReady"); + } + + #[test] + fn condition_maps_connected_supervisor_to_ready() { + let condition = condition_from_state("running", "example:latest", true, None); + assert_eq!(condition.status, "True"); + assert_eq!(condition.reason, "SupervisorConnected"); + } + + #[test] + fn condition_maps_recent_stopped_container_to_starting() { + let recent_creation_date = rfc3339_from_unix_ms(openshell_core::time::now_ms() - 1_000); + + let condition = condition_from_state( + "stopped", + "example:latest", + false, + Some(&recent_creation_date), + ); + + assert_eq!(condition.status, "False"); + assert_eq!(condition.reason, "Starting"); + } + + #[test] + fn condition_maps_old_stopped_container_to_terminal_error() { + let old_creation_date = rfc3339_from_unix_ms( + openshell_core::time::now_ms() - TRANSIENT_STOPPED_LAUNCH_GRACE_MS - 1_000, + ); + + let condition = + condition_from_state("stopped", "example:latest", false, Some(&old_creation_date)); + + assert_eq!(condition.status, "False"); + assert_eq!(condition.reason, "ContainerStopped"); + } + + #[test] + fn memory_quantity_maps_kubernetes_suffixes() { + assert_eq!(normalize_quantity_for_apple("512Mi"), "512M"); + assert_eq!(normalize_quantity_for_apple("4Gi"), "4G"); + } + + #[test] + fn cpu_limit_reads_typed_resources() { + let sandbox = DriverSandbox { + id: "id".to_string(), + name: "name".to_string(), + namespace: String::new(), + spec: Some(DriverSandboxSpec { + template: Some(DriverSandboxTemplate { + resources: Some(DriverResourceRequirements { + cpu_limit: "2".to_string(), + ..DriverResourceRequirements::default() + }), + ..DriverSandboxTemplate::default() + }), + ..DriverSandboxSpec::default() + }), + status: None, + }; + assert_eq!(sandbox_cpu_limit(&sandbox).unwrap().as_deref(), Some("2")); + } + + #[test] + fn cpu_limit_accepts_whole_core_quantities_for_apple_cli() { + assert_eq!( + normalize_cpu_for_apple("2000m").unwrap().as_deref(), + Some("2") + ); + assert_eq!(normalize_cpu_for_apple("2").unwrap().as_deref(), Some("2")); + } + + #[test] + fn cpu_limit_rejects_fractional_values_for_apple_cli() { + let err = normalize_cpu_for_apple("500m").unwrap_err(); + assert_eq!( + err.message(), + "apple-container cpu_limit must resolve to a whole CPU count because the Apple Container CLI expects an integer --cpus value" + ); + + let err = normalize_cpu_for_apple("1.5").unwrap_err(); + assert!( + err.message() + .contains("expected a positive whole-core or whole-millicore quantity") + ); + } + + #[test] + fn cpu_limit_rejects_non_positive_values() { + let err = normalize_cpu_for_apple("0").unwrap_err(); + assert_eq!( + err.message(), + "apple-container cpu_limit must be greater than zero" + ); + + let err = normalize_cpu_for_apple("0m").unwrap_err(); + assert_eq!( + err.message(), + "apple-container cpu_limit must be greater than zero" + ); + } + + fn network_entry(id: &str, name: &str, gateway: Option<&str>) -> AppleContainerNetworkEntry { + AppleContainerNetworkEntry { + id: id.to_string(), + configuration: crate::cli::AppleContainerNetworkConfiguration { + name: name.to_string(), + }, + status: crate::cli::AppleContainerNetworkStatus { + ipv4_gateway: gateway.map(|value| value.parse().unwrap()), + }, + } + } + + #[test] + fn apple_default_network_gateway_prefers_default_network() { + let networks = vec![ + network_entry("other", "other", Some("192.168.100.1")), + network_entry("default", "default", Some("192.168.64.1")), + ]; + + assert_eq!( + apple_default_network_gateway(&networks).map(|ip| ip.to_string()), + Some("192.168.64.1".to_string()) + ); + } + + #[test] + fn apple_default_network_gateway_falls_back_to_first_gateway() { + let networks = vec![ + network_entry("default", "default", None), + network_entry("custom", "custom", Some("192.168.127.1")), + ]; + + assert_eq!( + apple_default_network_gateway(&networks).map(|ip| ip.to_string()), + Some("192.168.127.1".to_string()) + ); + } + + #[test] + fn lifecycle_state_predicates_match_startable_and_stoppable_states() { + for state in ["created", "stopped", "exited"] { + assert!( + apple_container_state_needs_resume(state), + "{state} should be resumed" + ); + } + for state in ["running", "deleting", "removing", "unknown"] { + assert!( + !apple_container_state_needs_resume(state), + "{state} should not be resumed" + ); + } + for state in ["created", "running"] { + assert!( + apple_container_state_needs_shutdown_stop(state), + "{state} should be stopped on shutdown" + ); + } + for state in ["stopped", "exited", "deleting", "removing", "unknown"] { + assert!( + !apple_container_state_needs_shutdown_stop(state), + "{state} should not be stopped on shutdown" + ); + } + } + + #[test] + fn default_endpoint_uses_apple_host_dns_name() { + let config = AppleContainerComputeConfig { + gateway_port: 17686, + ..AppleContainerComputeConfig::default() + }; + + assert_eq!( + config.effective_grpc_endpoint(), + "http://host.container.internal:17686" + ); + } + + #[test] + fn default_endpoint_uses_https_when_guest_tls_is_configured() { + let config = AppleContainerComputeConfig { + gateway_port: 17686, + guest_tls_ca: Some(PathBuf::from("/host/ca.crt")), + guest_tls_cert: Some(PathBuf::from("/host/tls.crt")), + guest_tls_key: Some(PathBuf::from("/host/tls.key")), + ..AppleContainerComputeConfig::default() + }; + + assert_eq!( + config.effective_grpc_endpoint(), + "https://host.container.internal:17686" + ); + } + + #[test] + fn guest_tls_validation_rejects_partial_configuration() { + let config = AppleContainerComputeConfig { + guest_tls_ca: Some(PathBuf::from("/host/ca.crt")), + ..AppleContainerComputeConfig::default() + }; + + let err = apple_guest_tls_paths(&config).unwrap_err(); + + assert!( + err.message() + .contains("requires guest_tls_ca, guest_tls_cert, and guest_tls_key") + ); + } + + #[test] + fn guest_tls_validation_rejects_https_without_materials() { + let config = AppleContainerComputeConfig { + grpc_endpoint: "https://host.container.internal:17670".to_string(), + ..AppleContainerComputeConfig::default() + }; + + let err = apple_guest_tls_paths(&config).unwrap_err(); + + assert!(err.message().contains("when grpc_endpoint uses https://")); + } + + #[test] + fn sandbox_environment_preserves_driver_owned_values() { + let mut spec_env = std::collections::HashMap::new(); + spec_env.insert( + openshell_core::sandbox_env::ENDPOINT.to_string(), + "http://attacker.invalid".to_string(), + ); + spec_env.insert( + openshell_core::sandbox_env::SANDBOX_TOKEN_FILE.to_string(), + "/tmp/attacker-token".to_string(), + ); + spec_env.insert( + openshell_core::sandbox_env::SANDBOX_TOKEN.to_string(), + "inline-secret".to_string(), + ); + spec_env.insert( + openshell_core::sandbox_env::TLS_CA.to_string(), + "/tmp/user-ca.crt".to_string(), + ); + spec_env.insert("VISIBLE".to_string(), "value".to_string()); + spec_env.insert( + openshell_core::sandbox_env::USER_ENVIRONMENT.to_string(), + "{\"ATTACK\":\"1\"}".to_string(), + ); + let sandbox = DriverSandbox { + id: "sbx-1".to_string(), + name: "demo".to_string(), + namespace: String::new(), + spec: Some(DriverSandboxSpec { + environment: spec_env, + sandbox_token: "gateway-token".to_string(), + ..DriverSandboxSpec::default() + }), + status: None, + }; + + let env = sandbox_environment(&sandbox, &AppleContainerComputeConfig::default()); + + assert_eq!( + env.get(openshell_core::sandbox_env::ENDPOINT) + .map(String::as_str), + Some("http://host.container.internal:17670") + ); + assert_eq!( + env.get(openshell_core::sandbox_env::SANDBOX_TOKEN_FILE) + .map(String::as_str), + Some("/etc/openshell/auth/sandbox.jwt") + ); + assert!(!env.contains_key(openshell_core::sandbox_env::SANDBOX_TOKEN)); + assert!(!env.contains_key(openshell_core::sandbox_env::TLS_CA)); + assert_eq!(env.get("VISIBLE").map(String::as_str), Some("value")); + let user_env_json = env + .get(openshell_core::sandbox_env::USER_ENVIRONMENT) + .expect("user environment JSON should be set"); + let user_env: BTreeMap = serde_json::from_str(user_env_json).unwrap(); + assert_eq!(user_env.get("VISIBLE").map(String::as_str), Some("value")); + assert!(!user_env.contains_key(openshell_core::sandbox_env::ENDPOINT)); + assert!(!user_env.contains_key(openshell_core::sandbox_env::USER_ENVIRONMENT)); + assert!(!user_env.contains_key(openshell_core::sandbox_env::TLS_CA)); + } + + #[test] + fn sandbox_environment_sets_tls_paths_when_configured() { + let sandbox = DriverSandbox { + id: "sbx-1".to_string(), + name: "demo".to_string(), + namespace: String::new(), + spec: Some(DriverSandboxSpec::default()), + status: None, + }; + let config = AppleContainerComputeConfig { + guest_tls_ca: Some(PathBuf::from("/host/ca.crt")), + guest_tls_cert: Some(PathBuf::from("/host/tls.crt")), + guest_tls_key: Some(PathBuf::from("/host/tls.key")), + ..AppleContainerComputeConfig::default() + }; + + let env = sandbox_environment(&sandbox, &config); + + assert_eq!( + env.get(openshell_core::sandbox_env::ENDPOINT) + .map(String::as_str), + Some("https://host.container.internal:17670") + ); + assert_eq!( + env.get(openshell_core::sandbox_env::TLS_CA) + .map(String::as_str), + Some(TLS_CA_MOUNT_PATH) + ); + assert_eq!( + env.get(openshell_core::sandbox_env::TLS_CERT) + .map(String::as_str), + Some(TLS_CERT_MOUNT_PATH) + ); + assert_eq!( + env.get(openshell_core::sandbox_env::TLS_KEY) + .map(String::as_str), + Some(TLS_KEY_MOUNT_PATH) + ); + } + + #[test] + fn require_sandbox_identifier_rejects_empty_target() { + let err = require_sandbox_identifier("", "").unwrap_err(); + assert_eq!(err.message(), "sandbox_id or sandbox_name is required"); + assert!(require_sandbox_identifier("sbx-1", "").is_ok()); + assert!(require_sandbox_identifier("", "demo").is_ok()); + } + + #[tokio::test] + async fn create_args_force_supervisor_to_run_as_root() { + let tempdir = test_supervisor_dir("root-supervisor"); + std::fs::create_dir_all(&tempdir).unwrap(); + std::fs::write(tempdir.join("openshell-sandbox"), b"fake supervisor").unwrap(); + let driver = AppleContainerComputeDriver { + cli: AppleContainerCli::new(PathBuf::from("container")), + config: AppleContainerComputeConfig { + supervisor_bin_dir: tempdir.clone(), + ..AppleContainerComputeConfig::default() + }, + gateway_bind_addresses: Vec::new(), + supervisor_readiness: Arc::new(NeverReady), + events: broadcast::channel(WATCH_BUFFER).0, + }; + let sandbox = DriverSandbox { + id: "sbx-1".to_string(), + name: "demo".to_string(), + namespace: String::new(), + spec: Some(DriverSandboxSpec::default()), + status: None, + }; + + let args = driver.create_args(&sandbox).await.unwrap(); + + assert_eq!(arg_value(&args, "--user"), Some("0:0")); + std::fs::remove_dir_all(tempdir).unwrap(); + } + + #[tokio::test] + async fn create_args_merges_template_labels_with_managed_labels() { + let tempdir = test_supervisor_dir("container-labels"); + std::fs::create_dir_all(&tempdir).unwrap(); + std::fs::write(tempdir.join("openshell-sandbox"), b"fake supervisor").unwrap(); + let driver = AppleContainerComputeDriver { + cli: AppleContainerCli::new(PathBuf::from("container")), + config: AppleContainerComputeConfig { + supervisor_bin_dir: tempdir.clone(), + sandbox_namespace: "team-a".to_string(), + ..AppleContainerComputeConfig::default() + }, + gateway_bind_addresses: Vec::new(), + supervisor_readiness: Arc::new(NeverReady), + events: broadcast::channel(WATCH_BUFFER).0, + }; + let sandbox = DriverSandbox { + id: "sbx-1".to_string(), + name: "demo".to_string(), + namespace: String::new(), + spec: Some(DriverSandboxSpec { + template: Some(DriverSandboxTemplate { + labels: std::collections::HashMap::from([ + ("custom.example/role".to_string(), "worker".to_string()), + (LABEL_SANDBOX_ID.to_string(), "spoofed".to_string()), + ]), + ..DriverSandboxTemplate::default() + }), + ..DriverSandboxSpec::default() + }), + status: None, + }; + + let args = driver.create_args(&sandbox).await.unwrap(); + let labels = arg_values(&args, "--label"); + + assert!(labels.contains(&"custom.example/role=worker")); + assert!(labels.contains(&"openshell.ai/sandbox-id=sbx-1")); + assert!(labels.contains(&"openshell.ai/sandbox-name=demo")); + assert!(labels.contains(&"openshell.ai/sandbox-namespace=team-a")); + assert!(!labels.contains(&"openshell.ai/sandbox-id=spoofed")); + std::fs::remove_dir_all(tempdir).unwrap(); + } + + #[cfg(unix)] + #[tokio::test] + async fn write_owner_only_file_creates_token_at_0600() { + use std::os::unix::fs::PermissionsExt; + + let tempdir = test_supervisor_dir("owner-only-token"); + std::fs::create_dir_all(&tempdir).unwrap(); + let token = tempdir.join(SANDBOX_TOKEN_FILE); + + write_owner_only_file(&token, "secret-token").await.unwrap(); + + let mode = std::fs::metadata(&token).unwrap().permissions().mode() & 0o777; + assert_eq!(mode, 0o600); + assert_eq!(std::fs::read_to_string(&token).unwrap(), "secret-token\n"); + + std::fs::remove_dir_all(tempdir).unwrap(); + } + + #[tokio::test] + async fn create_args_mounts_guest_tls_materials_without_sandbox_token() { + let tempdir = test_supervisor_dir("tls-mounts"); + std::fs::create_dir_all(&tempdir).unwrap(); + std::fs::write(tempdir.join("openshell-sandbox"), b"fake supervisor").unwrap(); + let ca = tempdir.join("ca.crt"); + let cert = tempdir.join("tls.crt"); + let key = tempdir.join("tls.key"); + std::fs::write(&ca, b"ca").unwrap(); + std::fs::write(&cert, b"cert").unwrap(); + std::fs::write(&key, b"key").unwrap(); + let driver = AppleContainerComputeDriver { + cli: AppleContainerCli::new(PathBuf::from("container")), + config: AppleContainerComputeConfig { + supervisor_bin_dir: tempdir.clone(), + guest_tls_ca: Some(ca.clone()), + guest_tls_cert: Some(cert.clone()), + guest_tls_key: Some(key.clone()), + ..AppleContainerComputeConfig::default() + }, + gateway_bind_addresses: Vec::new(), + supervisor_readiness: Arc::new(NeverReady), + events: broadcast::channel(WATCH_BUFFER).0, + }; + let sandbox = DriverSandbox { + id: "sbx-1".to_string(), + name: "demo".to_string(), + namespace: String::new(), + spec: None, + status: None, + }; + let secret_staging_base = tempdir.join("apple-container-secrets"); + + let args = driver + .create_args_with_secret_staging_base(&sandbox, Some(&secret_staging_base)) + .await + .unwrap(); + + let staged_tls_dir = secret_staging_dir_with_base( + &sandbox.id, + Some(&driver.config.sandbox_namespace), + Some(&secret_staging_base), + ) + .unwrap() + .join("tls"); + assert!(args.contains(&crate::cli::readonly_bind_mount( + &staged_tls_dir, + TLS_DIR_MOUNT_PATH, + ))); + assert_eq!( + std::fs::read(staged_tls_dir.join(TLS_CA_FILE)).unwrap(), + b"ca" + ); + assert_eq!( + std::fs::read(staged_tls_dir.join(TLS_CERT_FILE)).unwrap(), + b"cert" + ); + assert_eq!( + std::fs::read(staged_tls_dir.join(TLS_KEY_FILE)).unwrap(), + b"key" + ); + assert!(args.contains(&format!( + "{}={TLS_CA_MOUNT_PATH}", + openshell_core::sandbox_env::TLS_CA + ))); + assert!(args.contains(&format!( + "{}={TLS_CERT_MOUNT_PATH}", + openshell_core::sandbox_env::TLS_CERT + ))); + assert!(args.contains(&format!( + "{}={TLS_KEY_MOUNT_PATH}", + openshell_core::sandbox_env::TLS_KEY + ))); + std::fs::remove_dir_all(tempdir).unwrap(); + } + + #[tokio::test] + async fn create_args_overrides_image_cmd_with_supervisor_command() { + let tempdir = test_supervisor_dir("supervisor-command"); + std::fs::create_dir_all(&tempdir).unwrap(); + std::fs::write(tempdir.join("openshell-sandbox"), b"fake supervisor").unwrap(); + let driver = AppleContainerComputeDriver { + cli: AppleContainerCli::new(PathBuf::from("container")), + config: AppleContainerComputeConfig { + supervisor_bin_dir: tempdir.clone(), + default_image: "example/image:latest".to_string(), + ..AppleContainerComputeConfig::default() + }, + gateway_bind_addresses: Vec::new(), + supervisor_readiness: Arc::new(NeverReady), + events: broadcast::channel(WATCH_BUFFER).0, + }; + let sandbox = DriverSandbox { + id: "sbx-1".to_string(), + name: "demo".to_string(), + namespace: String::new(), + spec: Some(DriverSandboxSpec::default()), + status: None, + }; + + let args = driver.create_args(&sandbox).await.unwrap(); + + assert_eq!( + args[args.len().saturating_sub(3)..] + .iter() + .map(String::as_str) + .collect::>(), + vec!["example/image:latest", "sleep", "infinity"] + ); + std::fs::remove_dir_all(tempdir).unwrap(); + } + + #[test] + fn validate_sandbox_create_accepts_sanitized_runtime_names() { + let tempdir = test_supervisor_dir("validate-sanitized-name"); + std::fs::create_dir_all(&tempdir).unwrap(); + let driver = AppleContainerComputeDriver { + cli: AppleContainerCli::new(PathBuf::from("container")), + config: AppleContainerComputeConfig { + supervisor_bin_dir: tempdir.clone(), + ..AppleContainerComputeConfig::default() + }, + gateway_bind_addresses: Vec::new(), + supervisor_readiness: Arc::new(NeverReady), + events: broadcast::channel(WATCH_BUFFER).0, + }; + let sandbox = DriverSandbox { + id: "sbx-1".to_string(), + name: "demo/name".to_string(), + namespace: String::new(), + spec: Some(DriverSandboxSpec { + template: Some(DriverSandboxTemplate::default()), + sandbox_token: "token".to_string(), + ..DriverSandboxSpec::default() + }), + status: None, + }; + + driver.validate_sandbox_create(&sandbox).unwrap(); + assert_eq!( + container_name_for_sandbox(&sandbox), + "openshell-sandbox-demo-name-sbx-1" + ); + std::fs::remove_dir_all(tempdir).unwrap(); + } + + #[test] + fn validate_sandbox_create_allows_missing_sandbox_token_for_preflight() { + let tempdir = test_supervisor_dir("validate-auth-token"); + std::fs::create_dir_all(&tempdir).unwrap(); + let driver = AppleContainerComputeDriver { + cli: AppleContainerCli::new(PathBuf::from("container")), + config: AppleContainerComputeConfig { + supervisor_bin_dir: tempdir.clone(), + ..AppleContainerComputeConfig::default() + }, + gateway_bind_addresses: Vec::new(), + supervisor_readiness: Arc::new(NeverReady), + events: broadcast::channel(WATCH_BUFFER).0, + }; + let sandbox = DriverSandbox { + id: "sbx-1".to_string(), + name: "demo".to_string(), + namespace: String::new(), + spec: Some(DriverSandboxSpec { + template: Some(DriverSandboxTemplate::default()), + ..DriverSandboxSpec::default() + }), + status: None, + }; + + driver.validate_sandbox_create(&sandbox).unwrap(); + std::fs::remove_dir_all(tempdir).unwrap(); + } + + #[test] + fn validate_sandbox_auth_rejects_missing_sandbox_token() { + let sandbox = DriverSandbox { + id: "sbx-1".to_string(), + name: "demo".to_string(), + namespace: String::new(), + spec: Some(DriverSandboxSpec { + template: Some(DriverSandboxTemplate::default()), + ..DriverSandboxSpec::default() + }), + status: None, + }; + + let err = validate_sandbox_auth(&sandbox).unwrap_err(); + + assert_eq!( + err.message(), + "apple-container sandboxes require gateway JWT auth; configure [openshell.gateway.gateway_jwt]" + ); + } + + #[test] + fn validate_sandbox_create_rejects_missing_image_sources() { + let tempdir = test_supervisor_dir("validate-image"); + std::fs::create_dir_all(&tempdir).unwrap(); + let driver = AppleContainerComputeDriver { + cli: AppleContainerCli::new(PathBuf::from("container")), + config: AppleContainerComputeConfig { + supervisor_bin_dir: tempdir.clone(), + default_image: String::new(), + ..AppleContainerComputeConfig::default() + }, + gateway_bind_addresses: Vec::new(), + supervisor_readiness: Arc::new(NeverReady), + events: broadcast::channel(WATCH_BUFFER).0, + }; + let sandbox = DriverSandbox { + id: "sbx-1".to_string(), + name: "demo".to_string(), + namespace: String::new(), + spec: Some(DriverSandboxSpec { + template: Some(DriverSandboxTemplate::default()), + ..DriverSandboxSpec::default() + }), + status: None, + }; + + let err = driver.validate_sandbox_create(&sandbox).unwrap_err(); + + assert!( + err.message() + .contains("no sandbox image configured: set default_image") + ); + std::fs::remove_dir_all(tempdir).unwrap(); + } + + #[test] + fn validate_sandbox_template_rejects_driver_config() { + let sandbox = DriverSandbox { + id: "sbx-1".to_string(), + name: "demo".to_string(), + namespace: String::new(), + spec: Some(DriverSandboxSpec { + template: Some(DriverSandboxTemplate { + driver_config: Some(prost_types::Struct { + fields: BTreeMap::from([( + "mounts".to_string(), + prost_types::Value { + kind: Some(prost_types::value::Kind::ListValue( + prost_types::ListValue { values: Vec::new() }, + )), + }, + )]), + }), + ..DriverSandboxTemplate::default() + }), + sandbox_token: "token".to_string(), + ..DriverSandboxSpec::default() + }), + status: None, + }; + + let err = validate_sandbox_template(&sandbox).unwrap_err(); + + assert_eq!( + err.message(), + "apple-container compute driver does not support template.driver_config" + ); + } + + #[test] + fn watch_event_cache_applies_sandbox_and_delete_events() { + let sandbox = DriverSandbox { + id: "sbx-1".to_string(), + name: "demo".to_string(), + namespace: "default".to_string(), + spec: None, + status: None, + }; + let mut cache = BTreeMap::new(); + + apply_watch_event_to_cache(&mut cache, &watch_sandbox_event(sandbox.clone())); + assert_eq!(cache.get("sbx-1"), Some(&sandbox)); + + apply_watch_event_to_cache(&mut cache, &watch_deleted_event("sbx-1".to_string())); + assert!(!cache.contains_key("sbx-1")); + } + + #[test] + fn managed_entry_requires_matching_namespace() { + let config = AppleContainerComputeConfig { + sandbox_namespace: "team-a".to_string(), + ..AppleContainerComputeConfig::default() + }; + let entry = list_entry("sbx-1", "demo", "team-a", "running"); + assert!(managed_entry(&entry, &config)); + + let other = list_entry("sbx-1", "demo", "team-b", "running"); + assert!(!managed_entry(&other, &config)); + } + + #[test] + fn entry_matches_accepts_id_or_name() { + let entry = list_entry("sbx-1", "demo", "default", "running"); + assert!(entry_matches(&entry, "sbx-1", "")); + assert!(entry_matches(&entry, "", "demo")); + assert!(entry_matches(&entry, "sbx-1", "demo")); + assert!(!entry_matches(&entry, "sbx-2", "")); + assert!(!entry_matches(&entry, "", "other")); + } + + #[test] + fn driver_sandbox_uses_supervisor_readiness() { + let waiting = driver_sandbox_from_entry( + list_entry("sbx-1", "demo", "default", "running"), + &NeverReady, + ) + .unwrap(); + let waiting_condition = &waiting.status.unwrap().conditions[0]; + assert_eq!(waiting_condition.status, "False"); + assert_eq!(waiting_condition.reason, "DependenciesNotReady"); + + let ready = driver_sandbox_from_entry( + list_entry("sbx-1", "demo", "default", "running"), + &AlwaysReady, + ) + .unwrap(); + let ready_condition = &ready.status.unwrap().conditions[0]; + assert_eq!(ready_condition.status, "True"); + assert_eq!(ready_condition.reason, "SupervisorConnected"); + } + + fn list_entry( + sandbox_id: &str, + sandbox_name: &str, + namespace: &str, + state: &str, + ) -> AppleContainerListEntry { + let labels = BTreeMap::from([ + ( + LABEL_MANAGED_BY.to_string(), + LABEL_MANAGED_BY_VALUE.to_string(), + ), + (LABEL_SANDBOX_ID.to_string(), sandbox_id.to_string()), + (LABEL_SANDBOX_NAME.to_string(), sandbox_name.to_string()), + (LABEL_SANDBOX_NAMESPACE.to_string(), namespace.to_string()), + ]); + AppleContainerListEntry { + id: format!("runtime-{sandbox_id}"), + configuration: AppleContainerConfiguration { + creation_date: None, + labels, + image: Some(AppleContainerImage { + reference: "example:latest".to_string(), + }), + }, + status: AppleContainerStatus { + state: state.to_string(), + }, + } + } + + fn arg_value<'a>(args: &'a [String], name: &str) -> Option<&'a str> { + args.windows(2) + .find(|window| window[0] == name) + .map(|window| window[1].as_str()) + } + + fn arg_values<'a>(args: &'a [String], name: &str) -> Vec<&'a str> { + args.windows(2) + .filter(|window| window[0] == name) + .map(|window| window[1].as_str()) + .collect() + } + + fn rfc3339_from_unix_ms(unix_ms: i64) -> String { + chrono::DateTime::::from_timestamp_millis(unix_ms) + .unwrap() + .to_rfc3339() + } +} diff --git a/crates/openshell-driver-apple-container/src/grpc.rs b/crates/openshell-driver-apple-container/src/grpc.rs new file mode 100644 index 000000000..124038811 --- /dev/null +++ b/crates/openshell-driver-apple-container/src/grpc.rs @@ -0,0 +1,137 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +#![allow(clippy::result_large_err)] // gRPC handlers return Result<_, tonic::Status> + +//! gRPC service adapter for the Apple Container compute driver. + +use futures::{Stream, StreamExt}; +use openshell_core::proto::compute::v1::{ + CreateSandboxRequest, CreateSandboxResponse, DeleteSandboxRequest, DeleteSandboxResponse, + GetCapabilitiesRequest, GetCapabilitiesResponse, GetSandboxRequest, GetSandboxResponse, + ListSandboxesRequest, ListSandboxesResponse, StopSandboxRequest, StopSandboxResponse, + ValidateSandboxCreateRequest, ValidateSandboxCreateResponse, WatchSandboxesEvent, + WatchSandboxesRequest, compute_driver_server::ComputeDriver, +}; +use std::pin::Pin; +use std::sync::Arc; +use tonic::{Request, Response, Status}; + +use crate::AppleContainerComputeDriver; + +/// Tonic service wrapper around [`AppleContainerComputeDriver`]. +#[derive(Debug, Clone)] +pub struct ComputeDriverService { + driver: Arc, +} + +impl ComputeDriverService { + /// Create a new gRPC service. + #[must_use] + pub fn new(driver: Arc) -> Self { + Self { driver } + } +} + +#[tonic::async_trait] +impl ComputeDriver for ComputeDriverService { + async fn get_capabilities( + &self, + _request: Request, + ) -> Result, Status> { + Ok(Response::new(self.driver.capabilities())) + } + + async fn validate_sandbox_create( + &self, + request: Request, + ) -> Result, Status> { + let sandbox = request + .into_inner() + .sandbox + .ok_or_else(|| Status::invalid_argument("sandbox is required"))?; + self.driver.validate_sandbox_create(&sandbox)?; + Ok(Response::new(ValidateSandboxCreateResponse {})) + } + + async fn get_sandbox( + &self, + request: Request, + ) -> Result, Status> { + let request = request.into_inner(); + if request.sandbox_id.is_empty() && request.sandbox_name.is_empty() { + return Err(Status::invalid_argument( + "sandbox_id or sandbox_name is required", + )); + } + let sandbox = self + .driver + .get_sandbox(&request.sandbox_id, &request.sandbox_name) + .await? + .ok_or_else(|| Status::not_found("sandbox not found"))?; + if !request.sandbox_id.is_empty() && request.sandbox_id != sandbox.id { + return Err(Status::failed_precondition( + "sandbox_id did not match the fetched sandbox", + )); + } + Ok(Response::new(GetSandboxResponse { + sandbox: Some(sandbox), + })) + } + + async fn list_sandboxes( + &self, + _request: Request, + ) -> Result, Status> { + Ok(Response::new(ListSandboxesResponse { + sandboxes: self.driver.list_sandboxes().await?, + })) + } + + async fn create_sandbox( + &self, + request: Request, + ) -> Result, Status> { + let sandbox = request + .into_inner() + .sandbox + .ok_or_else(|| Status::invalid_argument("sandbox is required"))?; + self.driver.create_sandbox(&sandbox).await?; + Ok(Response::new(CreateSandboxResponse {})) + } + + async fn stop_sandbox( + &self, + request: Request, + ) -> Result, Status> { + let request = request.into_inner(); + self.driver + .stop_sandbox(&request.sandbox_id, &request.sandbox_name) + .await?; + Ok(Response::new(StopSandboxResponse {})) + } + + async fn delete_sandbox( + &self, + request: Request, + ) -> Result, Status> { + let request = request.into_inner(); + let deleted = self + .driver + .delete_sandbox(&request.sandbox_id, &request.sandbox_name) + .await?; + Ok(Response::new(DeleteSandboxResponse { deleted })) + } + + type WatchSandboxesStream = + Pin> + Send + 'static>>; + + async fn watch_sandboxes( + &self, + _request: Request, + ) -> Result, Status> { + let stream = self.driver.watch_sandboxes()?; + let stream = stream.map(|item| item.map_err(Status::internal)); + Ok(Response::new(Box::pin(stream))) + } +} diff --git a/crates/openshell-driver-apple-container/src/lib.rs b/crates/openshell-driver-apple-container/src/lib.rs new file mode 100644 index 000000000..a93257a27 --- /dev/null +++ b/crates/openshell-driver-apple-container/src/lib.rs @@ -0,0 +1,17 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +// The compute-driver service boundary uses `tonic::Status` directly so driver +// errors cross the in-process and gRPC implementations with the same shape. +#![allow(clippy::result_large_err)] + +//! Apple Container compute driver. + +pub mod cli; +pub mod config; +pub mod driver; +pub mod grpc; + +pub use config::AppleContainerComputeConfig; +pub use driver::{AppleContainerComputeDriver, SupervisorReadiness}; +pub use grpc::ComputeDriverService; diff --git a/crates/openshell-server/Cargo.toml b/crates/openshell-server/Cargo.toml index cb19cb5f6..1216c316c 100644 --- a/crates/openshell-server/Cargo.toml +++ b/crates/openshell-server/Cargo.toml @@ -17,6 +17,7 @@ path = "src/main.rs" [dependencies] openshell-bootstrap = { path = "../openshell-bootstrap" } openshell-core = { path = "../openshell-core", default-features = false } +openshell-driver-apple-container = { path = "../openshell-driver-apple-container" } openshell-driver-docker = { path = "../openshell-driver-docker" } openshell-driver-kubernetes = { path = "../openshell-driver-kubernetes" } openshell-driver-podman = { path = "../openshell-driver-podman" } diff --git a/crates/openshell-server/src/cli.rs b/crates/openshell-server/src/cli.rs index ce7734262..aa90b4a80 100644 --- a/crates/openshell-server/src/cli.rs +++ b/crates/openshell-server/src/cli.rs @@ -14,7 +14,7 @@ use tracing::{info, warn}; use tracing_subscriber::EnvFilter; use crate::certgen; -use crate::compute::{DockerComputeConfig, VmComputeConfig}; +use crate::compute::{AppleContainerComputeConfig, DockerComputeConfig, VmComputeConfig}; use crate::config_file::{self, ConfigFile, GatewayFileSection}; use crate::defaults::{self, LocalTlsPaths}; use crate::{run_server, tracing_bus::TracingLogBus}; @@ -96,12 +96,14 @@ struct RunArgs { /// Compute drivers configured for this gateway. /// - /// Accepts a comma-delimited list such as `kubernetes` or - /// `kubernetes,podman`. The configuration format is future-proofed for - /// multiple drivers, but the gateway currently requires exactly one. + /// Accepts exactly one driver name today: `kubernetes`, `podman`, `docker`, + /// `vm`, or `apple-container`. The value is parsed as a list because the + /// configuration format reserves room for multiple drivers, but the gateway + /// currently rejects more than one. /// When unset, the gateway auto-detects the driver based on the runtime /// environment (Kubernetes → Podman → Docker CLI or socket). VM is never - /// auto-detected and requires explicit configuration. + /// auto-detected and requires explicit configuration. Apple Container is + /// also explicit because it is macOS-specific and not a package default. #[arg( long, alias = "driver", @@ -411,6 +413,7 @@ async fn run_from_args(mut args: RunArgs, matches: ArgMatches) -> Result<()> { args.port, )?; let docker_config = build_docker_config(file.as_ref(), local_tls.as_ref())?; + let apple_container_config = build_apple_container_config(file.as_ref(), local_tls.as_ref())?; if args.disable_tls { warn!("TLS disabled — listening on plaintext HTTP"); @@ -450,6 +453,7 @@ async fn run_from_args(mut args: RunArgs, matches: ArgMatches) -> Result<()> { config, vm_config, docker_config, + apple_container_config, file, tracing_log_bus, )) @@ -668,7 +672,12 @@ fn effective_single_driver(args: &RunArgs) -> Option { fn is_singleplayer_driver(args: &RunArgs) -> bool { matches!( effective_single_driver(args), - Some(ComputeDriverKind::Docker | ComputeDriverKind::Podman | ComputeDriverKind::Vm) + Some( + ComputeDriverKind::Docker + | ComputeDriverKind::Podman + | ComputeDriverKind::Vm + | ComputeDriverKind::AppleContainer, + ) ) } @@ -755,6 +764,34 @@ fn build_docker_config( Ok(cfg) } +/// Build [`AppleContainerComputeConfig`] using the same inheritance pattern as +/// [`build_vm_config`]. +fn build_apple_container_config( + file: Option<&ConfigFile>, + local_tls: Option<&LocalTlsPaths>, +) -> Result { + let mut cfg = file.map_or_else( + || Ok(AppleContainerComputeConfig::default()), + |file| { + let merged = config_file::driver_table( + ComputeDriverKind::AppleContainer, + &file.openshell.gateway, + file.openshell.drivers.get("apple-container"), + ); + merged.try_into().map_err(|e| { + miette::miette!("invalid [openshell.drivers.apple-container] table: {e}") + }) + }, + )?; + apply_guest_tls_defaults( + &mut cfg.guest_tls_ca, + &mut cfg.guest_tls_cert, + &mut cfg.guest_tls_key, + local_tls, + ); + Ok(cfg) +} + fn apply_guest_tls_defaults( ca: &mut Option, cert: &mut Option, @@ -1268,6 +1305,30 @@ mod tests { assert!(!super::resolve_mtls_auth_enabled(&args, &matches, None)); } + #[test] + fn mtls_auth_auto_defaults_for_apple_container_driver() { + let _lock = ENV_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + let _guard = EnvVarGuard::remove("OPENSHELL_ENABLE_MTLS_AUTH"); + + let (args, matches) = parse_with_args(&[ + "openshell-gateway", + "--db-url", + "sqlite::memory:", + "--drivers", + "apple-container", + "--tls-cert", + "/tmp/server.crt", + "--tls-key", + "/tmp/server.key", + "--tls-client-ca", + "/tmp/ca.crt", + ]); + + assert!(super::resolve_mtls_auth_enabled(&args, &matches, None)); + } + #[test] fn file_mtls_auth_value_overrides_local_auto_default() { let _lock = ENV_LOCK @@ -1528,7 +1589,7 @@ ssh_session_ttl_secs = 1234 #[test] fn singleplayer_driver_matches_only_one_local_driver() { - for driver in ["docker", "podman", "vm"] { + for driver in ["docker", "podman", "vm", "apple-container"] { let (args, _) = parse_with_args(&[ "openshell-gateway", "--db-url", diff --git a/crates/openshell-server/src/compute/mod.rs b/crates/openshell-server/src/compute/mod.rs index 6d687fb7c..3206f36c0 100644 --- a/crates/openshell-server/src/compute/mod.rs +++ b/crates/openshell-server/src/compute/mod.rs @@ -6,6 +6,7 @@ pub mod lease; pub mod vm; +pub use openshell_driver_apple_container::AppleContainerComputeConfig; pub use openshell_driver_docker::DockerComputeConfig; pub use vm::VmComputeConfig; @@ -29,6 +30,9 @@ use openshell_core::proto::{ PlatformEvent, Sandbox, SandboxCondition, SandboxPhase, SandboxSpec, SandboxStatus, SandboxTemplate, SshSession, }; +use openshell_driver_apple_container::{ + AppleContainerComputeDriver, ComputeDriverService as AppleContainerDriverService, +}; use openshell_driver_docker::DockerComputeDriver; use openshell_driver_kubernetes::{ ComputeDriverService, KubernetesComputeConfig, KubernetesComputeDriver, @@ -71,8 +75,23 @@ impl ShutdownCleanup for DockerComputeDriver { } } +#[tonic::async_trait] +impl ShutdownCleanup for AppleContainerComputeDriver { + async fn cleanup_on_shutdown(&self) -> Result<(), String> { + let stopped = self + .stop_managed_containers_on_shutdown() + .await + .map_err(|err| err.to_string())?; + info!( + stopped_containers = stopped, + "Stopped Apple Container sandboxes during gateway shutdown" + ); + Ok(()) + } +} + /// Resume a single sandbox whose store record indicates it should be -/// running. Implemented by drivers (currently only Docker) where compute +/// running. Implemented by local drivers where compute /// resources do not auto-restart with the gateway. Returns `Ok(true)` if /// the backend resource was found and resumed (or was already running), /// `Ok(false)` if no backend resource exists. @@ -89,6 +108,16 @@ impl StartupResume for DockerComputeDriver { .map_err(|err| err.to_string()) } } + +#[tonic::async_trait] +impl StartupResume for AppleContainerComputeDriver { + async fn resume_sandbox(&self, sandbox_id: &str, sandbox_name: &str) -> Result { + Self::resume_sandbox(self, sandbox_id, sandbox_name) + .await + .map_err(|err| err.to_string()) + } +} + /// Interval between store-vs-backend reconciliation sweeps. const RECONCILE_INTERVAL: Duration = Duration::from_secs(60); @@ -412,6 +441,41 @@ impl ComputeRuntime { .await } + /// Create a compute runtime backed by the local Apple Container service. + pub async fn new_apple_container( + config: AppleContainerComputeConfig, + store: Arc, + sandbox_index: SandboxIndex, + sandbox_watch_bus: SandboxWatchBus, + tracing_log_bus: TracingLogBus, + supervisor_sessions: Arc, + ) -> Result { + let driver = Arc::new( + AppleContainerComputeDriver::new(config, supervisor_sessions.clone()) + .await + .map_err(|err| ComputeError::Message(err.to_string()))?, + ); + let gateway_bind_addresses = driver.gateway_bind_addresses(); + let shutdown_cleanup: Arc = driver.clone(); + let startup_resume: Arc = driver.clone(); + let driver: SharedComputeDriver = Arc::new(AppleContainerDriverService::new(driver)); + Self::from_driver( + ComputeDriverKind::AppleContainer, + driver, + Some(shutdown_cleanup), + Some(startup_resume), + None, + store, + sandbox_index, + sandbox_watch_bus, + tracing_log_bus, + supervisor_sessions, + true, + gateway_bind_addresses, + ) + .await + } + #[must_use] pub fn default_image(&self) -> &str { &self.default_image @@ -443,6 +507,10 @@ impl ComputeRuntime { sandbox: Sandbox, sandbox_token: Option, ) -> Result { + // Keep store insertion and backend creation ordered against delete and + // watch reconciliation. Without this, a delete can remove the store + // record in the narrow window before the driver creates the backend. + let _guard = self.sync_lock.lock().await; let sandbox_id = sandbox.object_id().to_string(); let mut driver_sandbox = driver_sandbox_from_public(&sandbox, self.driver_kind).map_err(|status| *status)?; @@ -601,7 +669,7 @@ impl ComputeRuntime { /// Resume sandboxes whose store records say they should be running. /// Drivers that do not auto-restart compute resources across gateway - /// restarts (currently only Docker) implement `StartupResume`. For + /// restarts (Docker and Apple Container) implement `StartupResume`. For /// each sandbox in the store whose phase is not `Deleting` or /// `Error`, we ask the driver to resume the underlying resource. If /// the driver reports that the resource no longer exists or fails to diff --git a/crates/openshell-server/src/config_file.rs b/crates/openshell-server/src/config_file.rs index 39cf02bba..bd9f8cad6 100644 --- a/crates/openshell-server/src/config_file.rs +++ b/crates/openshell-server/src/config_file.rs @@ -119,6 +119,9 @@ pub struct GatewayFileSection { pub service_account_name: Option, #[serde(default)] pub host_gateway_ip: Option, + /// Hostname or IP copied into drivers whose guests call back through host DNS. + #[serde(default)] + pub host_callback_host: Option, #[serde(default)] pub enable_user_namespaces: Option, /// Lifetime (seconds) of the projected `ServiceAccount` token kubelet @@ -284,6 +287,14 @@ fn inheritable_keys(driver: ComputeDriverKind) -> &'static [&'static str] { "guest_tls_cert", "guest_tls_key", ], + ComputeDriverKind::AppleContainer => &[ + "sandbox_namespace", + "default_image", + "host_callback_host", + "guest_tls_ca", + "guest_tls_cert", + "guest_tls_key", + ], ComputeDriverKind::Vm => &[ "default_image", "guest_tls_ca", @@ -301,6 +312,7 @@ fn gateway_inherited_value(g: &GatewayFileSection, key: &str) -> Option g.client_tls_secret_name.as_deref().map(string_value), "service_account_name" => g.service_account_name.as_deref().map(string_value), "host_gateway_ip" => g.host_gateway_ip.as_deref().map(string_value), + "host_callback_host" => g.host_callback_host.as_deref().map(string_value), "enable_user_namespaces" => g.enable_user_namespaces.map(toml::Value::Boolean), "sa_token_ttl_secs" => g.sa_token_ttl_secs.map(toml::Value::Integer), "guest_tls_ca" => g.guest_tls_ca.as_deref().map(path_value), @@ -546,6 +558,45 @@ version = 2 ); } + #[test] + fn apple_container_driver_table_inherits_gateway_defaults() { + let gateway = GatewayFileSection { + sandbox_namespace: Some("agents".to_string()), + default_image: Some("ghcr.io/nvidia/openshell/sandbox:0.9".to_string()), + host_callback_host: Some("host.container.internal".to_string()), + guest_tls_ca: Some(PathBuf::from("/gateway/ca.crt")), + guest_tls_cert: Some(PathBuf::from("/gateway/tls.crt")), + guest_tls_key: Some(PathBuf::from("/gateway/tls.key")), + ..Default::default() + }; + let merged = driver_table(ComputeDriverKind::AppleContainer, &gateway, None); + let table = merged.as_table().expect("table"); + assert_eq!( + table.get("sandbox_namespace").and_then(|v| v.as_str()), + Some("agents") + ); + assert_eq!( + table.get("default_image").and_then(|v| v.as_str()), + Some("ghcr.io/nvidia/openshell/sandbox:0.9") + ); + assert_eq!( + table.get("host_callback_host").and_then(|v| v.as_str()), + Some("host.container.internal") + ); + assert_eq!( + table.get("guest_tls_ca").and_then(|v| v.as_str()), + Some("/gateway/ca.crt") + ); + assert_eq!( + table.get("guest_tls_cert").and_then(|v| v.as_str()), + Some("/gateway/tls.crt") + ); + assert_eq!( + table.get("guest_tls_key").and_then(|v| v.as_str()), + Some("/gateway/tls.key") + ); + } + #[test] fn driver_table_specific_value_overrides_gateway_default() { let gateway = GatewayFileSection { diff --git a/crates/openshell-server/src/grpc/sandbox.rs b/crates/openshell-server/src/grpc/sandbox.rs index e60ce3995..c3f3935d7 100644 --- a/crates/openshell-server/src/grpc/sandbox.rs +++ b/crates/openshell-server/src/grpc/sandbox.rs @@ -1961,6 +1961,10 @@ mod tests { telemetry_compute_driver(Some(openshell_core::ComputeDriverKind::Vm)), TelemetryComputeDriver::Vm ); + assert_eq!( + telemetry_compute_driver(Some(openshell_core::ComputeDriverKind::AppleContainer)), + TelemetryComputeDriver::AppleContainer + ); assert_eq!( telemetry_compute_driver(None), TelemetryComputeDriver::Unknown diff --git a/crates/openshell-server/src/lib.rs b/crates/openshell-server/src/lib.rs index 9f1127d0e..4fc0e300b 100644 --- a/crates/openshell-server/src/lib.rs +++ b/crates/openshell-server/src/lib.rs @@ -60,7 +60,7 @@ use tracing::{debug, error, info, warn}; #[cfg(test)] pub(crate) static TEST_ENV_LOCK: LazyLock> = LazyLock::new(|| Mutex::new(())); -use compute::{ComputeRuntime, DockerComputeConfig, VmComputeConfig}; +use compute::{AppleContainerComputeConfig, ComputeRuntime, DockerComputeConfig, VmComputeConfig}; pub use grpc::OpenShellService; pub use http::{health_router, http_router, metrics_router, service_http_router}; pub use multiplex::{MultiplexService, MultiplexedService}; @@ -200,6 +200,7 @@ pub async fn run_server( config: Config, vm_config: VmComputeConfig, docker_config: DockerComputeConfig, + apple_container_config: AppleContainerComputeConfig, config_file: Option, tracing_log_bus: TracingLogBus, ) -> Result<()> { @@ -235,6 +236,7 @@ pub async fn run_server( &config, &vm_config, &docker_config, + &apple_container_config, config_file.as_ref(), store.clone(), sandbox_index.clone(), @@ -702,6 +704,7 @@ async fn build_compute_runtime( config: &Config, vm_config: &VmComputeConfig, docker_config: &DockerComputeConfig, + apple_container_config: &AppleContainerComputeConfig, file: Option<&config_file::ConfigFile>, store: Arc, sandbox_index: SandboxIndex, @@ -777,6 +780,31 @@ async fn build_compute_runtime( .await .map_err(|e| Error::execution(format!("failed to create compute runtime: {e}"))) } + ComputeDriverKind::AppleContainer => { + let mut apple_container = apple_container_config.clone(); + let gateway_port = config.bind_address.port(); + if gateway_port == 0 { + return Err(Error::config( + "apple-container compute driver requires a fixed non-zero gateway bind port", + )); + } + apple_container.gateway_port = gateway_port; + if let Ok(path) = std::env::var("OPENSHELL_APPLE_CONTAINER_SUPERVISOR_BIN_DIR") { + apple_container.supervisor_bin_dir = PathBuf::from(path); + } + apple_container.log_level.clone_from(&config.log_level); + + ComputeRuntime::new_apple_container( + apple_container, + store, + sandbox_index, + sandbox_watch_bus, + tracing_log_bus, + supervisor_sessions, + ) + .await + .map_err(|e| Error::execution(format!("failed to create compute runtime: {e}"))) + } } } @@ -865,14 +893,15 @@ fn configured_compute_driver(config: &Config) -> Result { Some(driver) => Ok(driver), None => Err(Error::config( "no compute driver configured and auto-detection found no suitable driver; \ - set --drivers or OPENSHELL_DRIVERS to kubernetes, podman, docker, or vm", + set --drivers or OPENSHELL_DRIVERS to kubernetes, podman, docker, vm, or apple-container", )), }, [ driver @ (ComputeDriverKind::Kubernetes | ComputeDriverKind::Vm | ComputeDriverKind::Docker - | ComputeDriverKind::Podman), + | ComputeDriverKind::Podman + | ComputeDriverKind::AppleContainer), ] => Ok(*driver), drivers => Err(Error::config(format!( "multiple compute drivers are not supported yet; configured drivers: {}", @@ -1312,6 +1341,15 @@ mod tests { ); } + #[test] + fn configured_compute_driver_accepts_apple_container() { + let config = Config::new(None).with_compute_drivers([ComputeDriverKind::AppleContainer]); + assert_eq!( + configured_compute_driver(&config).unwrap(), + ComputeDriverKind::AppleContainer + ); + } + #[test] fn kubernetes_sandbox_jwt_expiry_disabled_warns_only_for_kubernetes_zero_ttl() { fn config_with_jwt_ttl(ttl_secs: u64) -> Config { diff --git a/crates/openshell-server/src/supervisor_session.rs b/crates/openshell-server/src/supervisor_session.rs index 4adf9e8b6..9bf27cce1 100644 --- a/crates/openshell-server/src/supervisor_session.rs +++ b/crates/openshell-server/src/supervisor_session.rs @@ -69,6 +69,12 @@ impl openshell_driver_docker::SupervisorReadiness for SupervisorSessionRegistry } } +impl openshell_driver_apple_container::SupervisorReadiness for SupervisorSessionRegistry { + fn is_supervisor_connected(&self, sandbox_id: &str) -> bool { + Self::is_connected(self, sandbox_id) + } +} + /// Registry of active supervisor sessions and pending relay channels. #[derive(Default)] pub struct SupervisorSessionRegistry { diff --git a/docs/about/installation.mdx b/docs/about/installation.mdx index f0ad72455..87ccbcc1d 100644 --- a/docs/about/installation.mdx +++ b/docs/about/installation.mdx @@ -30,6 +30,7 @@ OpenShell supports several local compute drivers. Package-managed gateways leave |---|---|---| | Podman | The gateway is configured to create rootless Podman containers through the Podman API socket. | Linux with Podman 5.x, cgroups v2, rootless networking, and an active Podman user socket. | | Docker | The gateway is configured to create containers through Docker Desktop or Docker Engine. | Docker Desktop or Docker Engine 28.04 or later on the gateway host. | +| Apple Container | The gateway is configured to create Apple Container lightweight VM sandboxes through Apple's `container` CLI. | macOS on Apple silicon with Apple Container installed and running. | | MicroVM | The gateway is configured to create VM-backed sandboxes. | Host virtualization support. MicroVM uses Hypervisor.framework on macOS, KVM on Linux, and QEMU for GPU-backed sandboxes on Linux. | For detailed driver behavior, refer to [Sandbox Compute Drivers](/reference/sandbox-compute-drivers). For gateway and sandbox operations, refer to [Gateways](/sandboxes/manage-gateways) and [Sandboxes](/sandboxes/manage-sandboxes). diff --git a/docs/get-started/quickstart.mdx b/docs/get-started/quickstart.mdx index ebd7a9880..82f9e54bd 100644 --- a/docs/get-started/quickstart.mdx +++ b/docs/get-started/quickstart.mdx @@ -14,7 +14,7 @@ This page gets you from a reachable OpenShell gateway to a running, policy-enfor Before you begin, make sure you have: - A reachable OpenShell gateway. -- At least one compute driver configured for the gateway: Kubernetes, Docker, Podman, or MicroVM. +- At least one compute driver configured for the gateway: Kubernetes, Docker, Podman, Apple Container, or MicroVM. - The OpenShell CLI installed on your workstation. For a complete list of requirements, refer to [Support Matrix](/reference/support-matrix). diff --git a/docs/reference/gateway-config.mdx b/docs/reference/gateway-config.mdx index ff4542136..2ab72c2a0 100644 --- a/docs/reference/gateway-config.mdx +++ b/docs/reference/gateway-config.mdx @@ -69,7 +69,7 @@ metrics_bind_address = "0.0.0.0:9090" log_level = "info" # When empty, the gateway auto-detects Kubernetes, then Podman, then Docker. -# VM is never auto-detected and requires an explicit entry here. +# VM and Apple Container are never auto-detected and require an explicit entry here. compute_drivers = ["kubernetes"] sandbox_namespace = "openshell" @@ -134,9 +134,9 @@ user_role = "openshell-user" scopes_claim = "" ``` -Local Docker, Podman, and VM gateways can also set `[openshell.gateway.mtls_auth] enabled = true` to authenticate CLI callers from verified client certificates. Kubernetes deployments must leave this unset and use OIDC or a trusted access proxy; the Helm chart does not render this table. +Local Docker, Podman, Apple Container, and VM gateways can also set `[openshell.gateway.mtls_auth] enabled = true` to authenticate CLI callers from verified client certificates. Kubernetes deployments must leave this unset and use OIDC or a trusted access proxy; the Helm chart does not render this table. -`[openshell.gateway.gateway_jwt] ttl_secs` controls gateway-minted sandbox JWT lifetime. When omitted, it defaults to `0`: the token `exp` claim and `expires_at_ms` response field become `0`, and the sandbox JWT does not expire. Use that default only for local single-player Docker, Podman, or VM gateways. Kubernetes and other shared deployments should set a positive TTL; Helm renders `3600` seconds by default, and the gateway logs a warning when a Kubernetes gateway uses `0`. +`[openshell.gateway.gateway_jwt] ttl_secs` controls gateway-minted sandbox JWT lifetime. When omitted, it defaults to `0`: the token `exp` claim and `expires_at_ms` response field become `0`, and the sandbox JWT does not expire. Use that default only for local single-player Docker, Podman, Apple Container, or VM gateways. Kubernetes and other shared deployments should set a positive TTL; Helm renders `3600` seconds by default, and the gateway logs a warning when a Kubernetes gateway uses `0`. `[openshell.gateway.auth] allow_unauthenticated_users = true` is an unsafe local-development and trusted-proxy escape hatch. It accepts user-facing CLI/API calls without OIDC or mTLS credentials while sandbox supervisors still authenticate with gateway-minted sandbox JWTs. Leave it false for shared and production gateways. @@ -252,7 +252,7 @@ socket_path = "/run/user/1000/podman/podman.sock" default_image = "ghcr.io/nvidia/openshell/sandbox:latest" image_pull_policy = "missing" # always | missing | never | newer grpc_endpoint = "https://host.containers.internal:17670" -# The gateway overwrites gateway_port from bind_address at runtime. +# The gateway replaces this with bind_address.port at runtime. gateway_port = 17670 network_name = "openshell" # Omit for the platform default: empty on Linux, 192.168.127.254 on macOS Podman machine. @@ -276,6 +276,43 @@ sandbox_pids_limit = 2048 health_check_interval_secs = 10 ``` +### Apple Container + +Sandboxes run as Linux containers inside Apple Container lightweight VMs on macOS. The gateway invokes Apple's `container` CLI and uses JSON output for service, network, and container status. + +When `grpc_endpoint` is empty, the driver builds `http://:` for the sandbox supervisor callback. If `guest_tls_ca`, `guest_tls_cert`, and `guest_tls_key` are all configured, the implicit callback switches to `https://:` and the driver bind-mounts those files into the sandbox at the standard supervisor TLS paths. The gateway replaces `gateway_port` with `bind_address.port` at runtime and rejects port `0`, matching the other local drivers. The default `host_callback_host` is `host.container.internal`, which Apple Container resolves inside the sandbox VM. The gateway also inspects `container network list --format json` and registers an extra listener on the default network `ipv4Gateway` address so loopback-only gateway configs remain reachable from Apple Container VMs. + +Apple Container is treated as a local single-player driver for local mTLS auto-defaults, matching Docker, Podman, and VM. Partial guest TLS configuration is rejected so a typo cannot silently downgrade the supervisor callback to plaintext. + +```toml +[openshell] +version = 1 + +[openshell.gateway] +bind_address = "127.0.0.1:17670" +log_level = "info" +compute_drivers = ["apple-container"] + +[openshell.drivers.apple-container] +container_bin = "container" +default_image = "ghcr.io/nvidia/openshell/sandbox:latest" +sandbox_namespace = "default" +# Empty uses http://host.container.internal:, or https:// when +# all guest_tls_* fields are set. +grpc_endpoint = "" +# The gateway replaces this with bind_address.port at runtime. +gateway_port = 17670 +host_callback_host = "host.container.internal" +# Directory containing a Linux openshell-sandbox binary. Can also be set with +# OPENSHELL_APPLE_CONTAINER_SUPERVISOR_BIN_DIR. +supervisor_bin_dir = "/usr/local/libexec/openshell" +sandbox_ssh_socket_path = "/run/openshell/ssh.sock" +stop_timeout_secs = 10 +guest_tls_ca = "/etc/openshell/certs/ca.pem" +guest_tls_cert = "/etc/openshell/certs/client.pem" +guest_tls_key = "/etc/openshell/certs/client-key.pem" +``` + ### MicroVM Each sandbox runs inside its own libkrun microVM managed by the standalone `openshell-driver-vm` subprocess. Use this driver when you want stronger isolation than container namespaces alone. diff --git a/docs/reference/sandbox-compute-drivers.mdx b/docs/reference/sandbox-compute-drivers.mdx index 95a319c37..d8fc85af5 100644 --- a/docs/reference/sandbox-compute-drivers.mdx +++ b/docs/reference/sandbox-compute-drivers.mdx @@ -3,8 +3,8 @@ # SPDX-License-Identifier: Apache-2.0 title: "Sandbox Compute Drivers" sidebar-title: "Compute Drivers" -description: "Reference for Docker, Podman, MicroVM, and Kubernetes sandbox compute drivers." -keywords: "Generative AI, Cybersecurity, AI Agents, Sandboxing, Docker, Podman, MicroVM, Kubernetes, Reference" +description: "Reference for Docker, Podman, Apple Container, MicroVM, and Kubernetes sandbox compute drivers." +keywords: "Generative AI, Cybersecurity, AI Agents, Sandboxing, Docker, Podman, Apple Container, MicroVM, Kubernetes, Reference" position: 4 --- @@ -21,22 +21,24 @@ Configure the compute driver on the gateway. Current releases accept one driver compute_drivers = ["docker"] ``` -Supported values are `docker`, `podman`, `kubernetes`, and `vm`. +Supported values are `docker`, `podman`, `apple-container`, `kubernetes`, and `vm`. -When `compute_drivers` is unset, the gateway auto-detects Kubernetes, then Podman, then Docker by CLI availability or a local Unix socket. The VM driver is never auto-detected; configure it explicitly with `compute_drivers = ["vm"]` or set `OPENSHELL_DRIVERS=vm` in the launch environment. +When `compute_drivers` is unset, the gateway auto-detects Kubernetes, then Podman, then Docker by CLI availability or a local Unix socket. The VM and Apple Container drivers are never auto-detected; configure them explicitly with `compute_drivers = ["vm"]`, `compute_drivers = ["apple-container"]`, or the matching `OPENSHELL_DRIVERS` value in the launch environment. Common gateway options: | Gateway TOML option | Description | |---|---| -| `compute_drivers = [""]` | Select the compute driver. Supported values are `docker`, `podman`, `kubernetes`, and `vm`. | +| `compute_drivers = [""]` | Select the compute driver. Supported values are `docker`, `podman`, `apple-container`, `kubernetes`, and `vm`. | Set driver-specific values such as sandbox images, callback endpoints, network names, TLS material, and VM sizing in the gateway TOML file. See the [Gateway Configuration File](./gateway-config) reference for the full `[openshell.drivers.]` schema. Sandbox create supports `--cpu` and `--memory` for per-sandbox compute sizing. -Docker and Podman apply them as runtime limits. Kubernetes applies them as both -container requests and limits. The VM driver accepts the fields but currently -ignores them. +Docker, Podman, and Apple Container apply them as runtime limits. Kubernetes +applies them as both container requests and limits. The VM driver accepts the +fields but currently ignores them. Apple Container requires `--cpu` values that +resolve to a whole CPU count because Apple's `container` CLI accepts integer +CPU counts. Sandbox create also accepts experimental driver-owned config through `--driver-config-json`. The value is a JSON object keyed by driver name. The @@ -198,6 +200,22 @@ container root, supervisor files, `/etc/openshell`, `/etc/openshell-tls`, authentication material, or network namespace paths. These checks do not make host bind mounts safe. +## Apple Container Driver + +[Apple Container](https://opensource.apple.com/projects/container/)-backed sandboxes run as Linux containers inside lightweight VMs on macOS. Use Apple Container for local macOS workflows on Apple silicon when you want a native Apple container runtime instead of Docker Desktop or a Podman machine. + +The gateway invokes Apple's `container` CLI and reads JSON output for service, network, and container state. The driver requires the `container` CLI to be installed on the gateway host. + +For maintainer-level implementation details, refer to the [Apple Container driver README](https://github.com/NVIDIA/OpenShell/blob/main/crates/openshell-driver-apple-container/README.md). + +Select Apple Container with `compute_drivers = ["apple-container"]` in `[openshell.gateway]`. Configure Apple Container driver values such as `container_bin`, `default_image`, `sandbox_namespace`, `grpc_endpoint`, `gateway_port`, `host_callback_host`, `supervisor_bin_dir`, `sandbox_ssh_socket_path`, `stop_timeout_secs`, and `guest_tls_*` in `[openshell.drivers.apple-container]`. + +Apple Container guests call back from inside a lightweight VM, so `127.0.0.1:` points at the guest rather than the macOS host. When `grpc_endpoint` is empty, the driver builds a callback URL from `host_callback_host` and the gateway bind port. The default host callback name is `host.container.internal`. The gateway also registers the Apple Container default network gateway address as an extra listener when the driver discovers it. + +The Apple Container driver requires gateway JWT auth because the supervisor token is delivered through a host-owned auth directory mounted read-only into the sandbox. + +Partial guest TLS configuration is rejected. Set all of `guest_tls_ca`, `guest_tls_cert`, and `guest_tls_key` for an HTTPS supervisor callback, or leave all three unset for plaintext local development. + ## MicroVM Driver MicroVM-backed sandboxes run inside VM-backed isolation instead of a container boundary. Use MicroVM when workloads need a VM boundary instead of a local container boundary. diff --git a/docs/reference/support-matrix.mdx b/docs/reference/support-matrix.mdx index 6fe54921a..18e2b5549 100644 --- a/docs/reference/support-matrix.mdx +++ b/docs/reference/support-matrix.mdx @@ -43,6 +43,7 @@ The gateway can manage sandboxes through several compute drivers. |---|---|---| | Docker | Supported for local development and single-machine gateways. | Requires Docker Desktop or Docker Engine on the gateway host. | | Podman | Supported for rootless local and workstation workflows. | Requires a Podman-compatible socket and rootless networking setup. | +| Apple Container | Supported for local macOS workflows on Apple silicon. | Requires Apple's `container` CLI on the gateway host. | | Kubernetes | Supported through the [OpenShell Helm chart](https://github.com/NVIDIA/OpenShell/blob/main/deploy/helm/openshell/README.md). | Requires a Kubernetes cluster supplied by the operator. | | MicroVM | Supported for VM-backed sandboxes. | Uses the VM compute driver and libkrun-based runtime. | @@ -54,6 +55,7 @@ Install the software for the compute driver you use: |---|---|---| | Docker Desktop or Docker Engine | 28.04 | Required for Docker-backed gateways, local image builds, and Docker development workflows. | | Podman | 5.x | Required for Podman-backed gateways. | +| Apple Container CLI | 1.0.0 tested | Required for Apple Container-backed gateways. The driver uses `container system status --format json`, `container list --all --format json`, and `container network list --format json`; no older minimum version is declared yet. | | Kubernetes | 1.29 | Required for Helm deployments and Kubernetes sandbox scheduling. | | Helm | 3.x | Required to install `deploy/helm/openshell`. | | kubectl | Compatible with your cluster | Required for Kubernetes operational inspection and secret creation. | diff --git a/docs/sandboxes/manage-sandboxes.mdx b/docs/sandboxes/manage-sandboxes.mdx index 1a54d0a06..e4567ec97 100644 --- a/docs/sandboxes/manage-sandboxes.mdx +++ b/docs/sandboxes/manage-sandboxes.mdx @@ -399,7 +399,7 @@ Every sandbox moves through a defined set of phases: The gateway's configured compute driver determines how OpenShell creates each sandbox. The CLI workflow stays the same across drivers: you create, connect to, inspect, and delete sandboxes through the gateway API. -For Docker, Podman, MicroVM, and Kubernetes behavior, refer to [Sandbox Compute Drivers](/reference/sandbox-compute-drivers). +For Docker, Podman, Apple Container, MicroVM, and Kubernetes behavior, refer to [Sandbox Compute Drivers](/reference/sandbox-compute-drivers). ## Next Steps