diff --git a/crates/openshell-driver-kubernetes/src/driver.rs b/crates/openshell-driver-kubernetes/src/driver.rs index 5a43eb980..2f64825c9 100644 --- a/crates/openshell-driver-kubernetes/src/driver.rs +++ b/crates/openshell-driver-kubernetes/src/driver.rs @@ -330,6 +330,11 @@ impl KubernetesComputeDriver { enable_user_namespaces: self.config.enable_user_namespaces, workspace_default_storage_size: &self.config.workspace_default_storage_size, sa_token_ttl_secs: self.config.effective_sa_token_ttl_secs(), + is_platform_mode: sandbox + .spec + .as_ref() + .and_then(|s| s.policy.as_ref()) + .map_or(false, |p| p.network_enforcement == 1), }; obj.data = sandbox_to_k8s_spec(sandbox.spec.as_ref(), ¶ms); let api = self.api(); @@ -823,6 +828,7 @@ fn apply_supervisor_sideload( supervisor_image: &str, supervisor_image_pull_policy: &str, method: SupervisorSideloadMethod, + is_platform_mode: bool, ) { let Some(spec) = pod_template.get_mut("spec").and_then(|v| v.as_object_mut()) else { return; @@ -882,16 +888,16 @@ fn apply_supervisor_sideload( serde_json::json!([format!("{}/openshell-sandbox", SUPERVISOR_MOUNT_PATH)]), ); - // Force the supervisor to run as root (UID 0). Sandbox images may set - // a non-root USER directive (e.g. `USER sandbox`), but the supervisor - // needs root to create network namespaces, set up the proxy, and - // configure Landlock/seccomp. The supervisor itself drops privileges - // for child processes via the policy's `run_as_user`/`run_as_group`. - let security_context = container - .entry("securityContext") - .or_insert_with(|| serde_json::json!({})); - if let Some(sc) = security_context.as_object_mut() { - sc.insert("runAsUser".to_string(), serde_json::json!(0)); + // In namespace mode, force root (UID 0) so the supervisor can create + // network namespaces and drop privileges for child processes. + // In platform mode, keep the image's default non-root user. + if !is_platform_mode { + let security_context = container + .entry("securityContext") + .or_insert_with(|| serde_json::json!({})); + if let Some(sc) = security_context.as_object_mut() { + sc.insert("runAsUser".to_string(), serde_json::json!(0)); + } } // Add volume mount @@ -1044,6 +1050,10 @@ struct SandboxPodParams<'a> { /// Lifetime (seconds) of the projected `ServiceAccount` token used /// for the bootstrap `IssueSandboxToken` exchange. sa_token_ttl_secs: i64, + /// Platform network enforcement mode (Issue #899). When true, sandbox + /// pods are emitted without elevated capabilities, compatible with + /// restricted-v2 SCC and restricted Pod Security Standard. + is_platform_mode: bool, } impl Default for SandboxPodParams<'_> { @@ -1065,6 +1075,7 @@ impl Default for SandboxPodParams<'_> { enable_user_namespaces: false, workspace_default_storage_size: DEFAULT_WORKSPACE_STORAGE_SIZE, sa_token_ttl_secs: 3600, + is_platform_mode: false, } } } @@ -1265,22 +1276,32 @@ fn sandbox_template_to_k8s( container.insert("env".to_string(), serde_json::Value::Array(env)); - let mut capabilities: Vec<&str> = vec!["SYS_ADMIN", "NET_ADMIN", "SYS_PTRACE", "SYSLOG"]; - if use_user_namespaces { - // In a user namespace the bounding set is reset. SETUID/SETGID are - // needed for the supervisor to drop privileges to the sandbox user. - // DAC_READ_SEARCH is needed for cross-UID /proc//fd/ access - // for process identity resolution in network policy enforcement. - capabilities.extend(["SETUID", "SETGID", "DAC_READ_SEARCH"]); + if params.is_platform_mode { + // Platform mode: zero elevated capabilities. Compatible with + // restricted-v2 SCC and restricted Pod Security Standard. + container.insert( + "securityContext".to_string(), + serde_json::json!({ + "allowPrivilegeEscalation": false, + "capabilities": { + "drop": ["ALL"] + } + }), + ); + } else { + let mut capabilities: Vec<&str> = vec!["SYS_ADMIN", "NET_ADMIN", "SYS_PTRACE", "SYSLOG"]; + if use_user_namespaces { + capabilities.extend(["SETUID", "SETGID", "DAC_READ_SEARCH"]); + } + container.insert( + "securityContext".to_string(), + serde_json::json!({ + "capabilities": { + "add": capabilities + } + }), + ); } - container.insert( - "securityContext".to_string(), - serde_json::json!({ - "capabilities": { - "add": capabilities - } - }), - ); // Mount client TLS secret for mTLS to the server, plus the projected // ServiceAccount token used to bootstrap the sandbox's gateway JWT @@ -1363,6 +1384,7 @@ fn sandbox_template_to_k8s( params.supervisor_image, params.supervisor_image_pull_policy, params.supervisor_sideload_method, + params.is_platform_mode, ); // Inject workspace persistence (init container + PVC volume mount) so @@ -1750,6 +1772,7 @@ mod tests { "custom-image:latest", "IfNotPresent", SupervisorSideloadMethod::InitContainer, + false, ); let sc = &pod_template["spec"]["containers"][0]["securityContext"]; @@ -1779,6 +1802,7 @@ mod tests { "supervisor-image:latest", "IfNotPresent", SupervisorSideloadMethod::InitContainer, + false, ); let sc = &pod_template["spec"]["containers"][0]["securityContext"]; @@ -1804,6 +1828,7 @@ mod tests { "supervisor-image:latest", "IfNotPresent", SupervisorSideloadMethod::InitContainer, + false, ); // Volume should be an emptyDir @@ -1878,6 +1903,7 @@ mod tests { "supervisor-image:latest", "IfNotPresent", SupervisorSideloadMethod::ImageVolume, + false, ); let volumes = pod_template["spec"]["volumes"] @@ -1932,6 +1958,7 @@ mod tests { "supervisor-image:latest", "", SupervisorSideloadMethod::ImageVolume, + false, ); let volume = &pod_template["spec"]["volumes"][0]; diff --git a/crates/openshell-policy/src/lib.rs b/crates/openshell-policy/src/lib.rs index 26c8fc9d3..315692e53 100644 --- a/crates/openshell-policy/src/lib.rs +++ b/crates/openshell-policy/src/lib.rs @@ -1821,4 +1821,24 @@ network_policies: "port >65535 should fail to parse" ); } + + #[test] + fn platform_mode_round_trip() { + let mut policy = restrictive_default_policy(); + policy.network_enforcement = 1; // PLATFORM + let yaml = serialize_sandbox_policy(&policy).unwrap(); + let parsed = parse_sandbox_policy(&yaml).unwrap(); + assert_eq!(parsed.network_enforcement, 1); + } + + #[test] + fn platform_mode_passes_validation() { + let mut policy = restrictive_default_policy(); + policy.network_enforcement = 1; // PLATFORM + let result = validate_sandbox_policy(&policy); + assert!( + result.is_ok(), + "Platform mode should pass validation: {result:?}" + ); + } } diff --git a/crates/openshell-sandbox/src/lib.rs b/crates/openshell-sandbox/src/lib.rs index 126416546..b1513b5d7 100644 --- a/crates/openshell-sandbox/src/lib.rs +++ b/crates/openshell-sandbox/src/lib.rs @@ -487,7 +487,10 @@ pub async fn run_sandbox( // Generate ephemeral CA and TLS state for HTTPS L7 inspection. // The CA cert is written to disk so sandbox processes can trust it. - let (tls_state, ca_file_paths) = if matches!(policy.network.mode, NetworkMode::Proxy) { + let (tls_state, ca_file_paths) = if matches!( + policy.network.mode, + NetworkMode::Proxy | NetworkMode::Platform + ) { match SandboxCa::generate() { Ok(ca) => { let tls_dir = std::path::Path::new("/etc/openshell-tls"); @@ -601,7 +604,10 @@ pub async fn run_sandbox( let entrypoint_pid = Arc::new(AtomicU32::new(0)); let (_proxy, denial_rx, bypass_denial_tx, activity_rx, bypass_activity_tx) = - if matches!(policy.network.mode, NetworkMode::Proxy) { + if matches!( + policy.network.mode, + NetworkMode::Proxy | NetworkMode::Platform + ) { let proxy_policy = policy.network.proxy.as_ref().ok_or_else(|| { miette::miette!( "Network mode is set to proxy but no proxy configuration was provided" @@ -626,6 +632,20 @@ pub async fn run_sandbox( SocketAddr::new(ns.host_ip(), port) }); + // Platform mode: no netns, bind proxy to loopback. + #[cfg(target_os = "linux")] + let bind_addr = bind_addr.or_else(|| { + if matches!(policy.network.mode, NetworkMode::Platform) { + let port = proxy_policy.http_addr.map_or(3128, |addr| addr.port()); + Some(SocketAddr::new( + std::net::IpAddr::V4(std::net::Ipv4Addr::LOCALHOST), + port, + )) + } else { + None + } + }); + #[cfg(not(target_os = "linux"))] let bind_addr: Option = None; @@ -705,18 +725,30 @@ pub async fn run_sandbox( #[cfg(not(target_os = "linux"))] let ssh_netns_fd: Option = None; - let ssh_proxy_url = if matches!(policy.network.mode, NetworkMode::Proxy) { + let ssh_proxy_url = if matches!( + policy.network.mode, + NetworkMode::Proxy | NetworkMode::Platform + ) { #[cfg(target_os = "linux")] { - netns.as_ref().map(|ns| { + if let Some(ns) = netns.as_ref() { let port = policy .network .proxy .as_ref() .and_then(|p| p.http_addr) .map_or(3128, |addr| addr.port()); - format!("http://{}:{port}", ns.host_ip()) - }) + Some(format!("http://{}:{port}", ns.host_ip())) + } else { + // Platform mode: proxy on loopback + let port = policy + .network + .proxy + .as_ref() + .and_then(|p| p.http_addr) + .map_or(3128, |addr| addr.port()); + Some(format!("http://127.0.0.1:{port}")) + } } #[cfg(not(target_os = "linux"))] { @@ -1730,7 +1762,10 @@ where fn enrich_sandbox_baseline_paths(policy: &mut SandboxPolicy) { let (ro, rw) = - active_baseline_enrichment_paths(matches!(policy.network.mode, NetworkMode::Proxy)); + active_baseline_enrichment_paths(matches!( + policy.network.mode, + NetworkMode::Proxy | NetworkMode::Platform + )); let modified = enrich_sandbox_baseline_paths_with(policy, &ro, &rw, std::path::Path::exists); if modified { diff --git a/crates/openshell-sandbox/src/policy.rs b/crates/openshell-sandbox/src/policy.rs index 0827fa0d0..ce6ee0c43 100644 --- a/crates/openshell-sandbox/src/policy.rs +++ b/crates/openshell-sandbox/src/policy.rs @@ -5,7 +5,8 @@ use openshell_core::proto::{ FilesystemPolicy as ProtoFilesystemPolicy, LandlockPolicy as ProtoLandlockPolicy, - ProcessPolicy as ProtoProcessPolicy, SandboxPolicy as ProtoSandboxPolicy, + NetworkEnforcementMode, ProcessPolicy as ProtoProcessPolicy, + SandboxPolicy as ProtoSandboxPolicy, }; use std::net::SocketAddr; use std::path::PathBuf; @@ -62,6 +63,9 @@ pub enum NetworkMode { Block, Proxy, Allow, + /// Platform mode: Landlock + seccomp + loopback proxy, no network namespace. + /// Compatible with restricted-v2 SCC and restricted Pod Security Standard. + Platform, } #[derive(Debug, Clone)] @@ -99,10 +103,13 @@ impl TryFrom for SandboxPolicy { type Error = miette::Report; fn try_from(proto: ProtoSandboxPolicy) -> Result { - // In cluster mode we always run with proxy networking so all egress - // can be evaluated by OPA and `inference.local` is always addressable. + let mode = match proto.network_enforcement() { + NetworkEnforcementMode::Namespace => NetworkMode::Proxy, + NetworkEnforcementMode::Platform => NetworkMode::Platform, + }; + let network = NetworkPolicy { - mode: NetworkMode::Proxy, + mode, proxy: Some(ProxyPolicy { http_addr: None }), }; diff --git a/crates/openshell-sandbox/src/process.rs b/crates/openshell-sandbox/src/process.rs index d004bb7d4..f059584ce 100644 --- a/crates/openshell-sandbox/src/process.rs +++ b/crates/openshell-sandbox/src/process.rs @@ -226,27 +226,25 @@ impl ProcessHandle { cmd.current_dir(dir); } - if matches!(policy.network.mode, NetworkMode::Proxy) { + if matches!( + policy.network.mode, + NetworkMode::Proxy | NetworkMode::Platform + ) { let proxy = policy.network.proxy.as_ref().ok_or_else(|| { miette::miette!( "Network mode is set to proxy but no proxy configuration was provided" ) })?; - // When using network namespace, set proxy URL to the veth host IP - if netns_fd.is_some() { - // The proxy is on 10.200.0.1:3128 (or configured port) - let port = proxy.http_addr.map_or(3128, |addr| addr.port()); - let proxy_url = format!("http://10.200.0.1:{port}"); - // Both uppercase and lowercase variants: curl/wget use uppercase, - // gRPC C-core (libgrpc) checks lowercase http_proxy/https_proxy. - for (key, value) in child_env::proxy_env_vars(&proxy_url) { - cmd.env(key, value); - } - } else if let Some(http_addr) = proxy.http_addr { - let proxy_url = format!("http://{http_addr}"); - for (key, value) in child_env::proxy_env_vars(&proxy_url) { - cmd.env(key, value); - } + let port = proxy.http_addr.map_or(3128, |addr| addr.port()); + let proxy_url = if netns_fd.is_some() { + // Namespace mode: proxy on veth host IP + format!("http://10.200.0.1:{port}") + } else { + // Platform mode (or non-Linux): proxy on loopback + format!("http://127.0.0.1:{port}") + }; + for (key, value) in child_env::proxy_env_vars(&proxy_url) { + cmd.env(key, value); } } @@ -368,7 +366,10 @@ impl ProcessHandle { cmd.current_dir(dir); } - if matches!(policy.network.mode, NetworkMode::Proxy) { + if matches!( + policy.network.mode, + NetworkMode::Proxy | NetworkMode::Platform + ) { let proxy = policy.network.proxy.as_ref().ok_or_else(|| { miette::miette!( "Network mode is set to proxy but no proxy configuration was provided" diff --git a/crates/openshell-sandbox/src/sandbox/linux/seccomp.rs b/crates/openshell-sandbox/src/sandbox/linux/seccomp.rs index 675b60b24..a708b4b97 100644 --- a/crates/openshell-sandbox/src/sandbox/linux/seccomp.rs +++ b/crates/openshell-sandbox/src/sandbox/linux/seccomp.rs @@ -70,7 +70,10 @@ pub fn apply_supervisor_prelude() -> Result<()> { } pub fn apply(policy: &SandboxPolicy) -> Result<()> { - let allow_inet = matches!(policy.network.mode, NetworkMode::Proxy | NetworkMode::Allow); + let allow_inet = matches!( + policy.network.mode, + NetworkMode::Proxy | NetworkMode::Allow | NetworkMode::Platform + ); let main_filter = build_filter(allow_inet)?; let clone3_filter = build_clone3_filter()?; diff --git a/crates/openshell-sandbox/tests/platform_mode.rs b/crates/openshell-sandbox/tests/platform_mode.rs new file mode 100644 index 000000000..fe0bb9b0a --- /dev/null +++ b/crates/openshell-sandbox/tests/platform_mode.rs @@ -0,0 +1,48 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Tests for NetworkMode::Platform (Issue #899). + +use openshell_sandbox::policy::{NetworkMode, SandboxPolicy}; + +#[test] +fn platform_mode_from_proto() { + use openshell_core::proto::{NetworkEnforcementMode, SandboxPolicy as ProtoSandboxPolicy}; + + let mut proto = ProtoSandboxPolicy::default(); + proto.network_enforcement = NetworkEnforcementMode::Platform as i32; + + let policy: SandboxPolicy = proto.try_into().expect("conversion should succeed"); + assert!( + matches!(policy.network.mode, NetworkMode::Platform), + "expected Platform, got {:?}", + policy.network.mode + ); +} + +#[test] +fn namespace_mode_from_proto_default() { + use openshell_core::proto::SandboxPolicy as ProtoSandboxPolicy; + + let proto = ProtoSandboxPolicy::default(); + let policy: SandboxPolicy = proto.try_into().expect("conversion should succeed"); + assert!( + matches!(policy.network.mode, NetworkMode::Proxy), + "default (zero) should map to Proxy, got {:?}", + policy.network.mode + ); +} + +#[test] +fn platform_mode_allows_proxy_config() { + use openshell_core::proto::{NetworkEnforcementMode, SandboxPolicy as ProtoSandboxPolicy}; + + let mut proto = ProtoSandboxPolicy::default(); + proto.network_enforcement = NetworkEnforcementMode::Platform as i32; + + let policy: SandboxPolicy = proto.try_into().expect("conversion should succeed"); + assert!( + policy.network.proxy.is_some(), + "Platform mode should still have proxy config for loopback CONNECT proxy" + ); +} diff --git a/proto/sandbox.proto b/proto/sandbox.proto index ef0b0540f..28a0f6a1e 100644 --- a/proto/sandbox.proto +++ b/proto/sandbox.proto @@ -13,6 +13,18 @@ package openshell.sandbox.v1; // - Public sandbox resource types live in `openshell.proto`. // - Internal compute-driver sandbox observation types live in `compute_driver.proto`. +// Network enforcement strategy for sandbox isolation. +enum NetworkEnforcementMode { + // Use a dedicated network namespace with veth pair and iptables bypass + // rules. Requires CAP_SYS_ADMIN and CAP_NET_ADMIN. Default. + NETWORK_ENFORCEMENT_NAMESPACE = 0; + // Rely on Kubernetes NetworkPolicy for L3/L4 egress control. The + // supervisor binds the CONNECT proxy to loopback instead of veth. No + // elevated capabilities required -- compatible with restricted-v2 SCC + // and restricted Pod Security Standard. + NETWORK_ENFORCEMENT_PLATFORM = 1; +} + // Sandbox security policy configuration. message SandboxPolicy { // Policy version. @@ -25,6 +37,9 @@ message SandboxPolicy { ProcessPolicy process = 4; // Network access policies keyed by name (e.g. "claude_code", "gitlab"). map network_policies = 5; + // Network enforcement mode. Default (0) preserves current namespace-based + // isolation for backward compatibility. + NetworkEnforcementMode network_enforcement = 6; } // Filesystem access policy.