refactor(naming): A1 — ente→arje, vista→revista, pluma→fana

Rename batch de la Fase A del PLAN_MACRO:
- 25 crates ente-* → arje-* (protocol/init/runtime/compat). El linaje
  arje (init Linux) queda con prefijo coherente.
- vista → revista (revista-core + revista-web).
- pluma → fana (fana-md + fana-md-reader-web). fana absorbe el linaje
  markdown de pluma; será el writer DAG editor (prioridad alta).

Cambios:
- git mv de 29 crate dirs + 2 SDDs
- package/lib/bin names + path refs + imports .rs reescritos
- workspace Cargo.toml + comentarios de sección
- SDDs de init/runtime/compat/protocol actualizados a arje-
- SDD de revista + SDD de fana (reescrito: writer DAG editor)
- docs/STATUS.md, ROADMAP.md, PLAN_MACRO.md, arje-boot.md,
  arje-replace-systemd.md actualizados
- docs/changelog/akasha.md → chasqui.md

scripts/rename-fase-a.py idempotente (--dry-run soportado).
cargo check --workspace verde.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
sergio
2026-05-20 00:10:14 +00:00
parent 3fc6dcfa72
commit b83d40a833
159 changed files with 2384 additions and 1111 deletions
+20
View File
@@ -0,0 +1,20 @@
[package]
name = "arje-incarnate"
version.workspace = true
edition.workspace = true
rust-version.workspace = true
license.workspace = true
authors.workspace = true
publish.workspace = true
description = "Rutina extraída del Init para encarnar Cards en procesos aislados (clone+ns+cgroup+rlimits). Reusable por cualquier supervisor — no implica ser PID 1."
[dependencies]
brahman-card = { path = "../../protocol/brahman-card" }
nix = { workspace = true }
libc = { workspace = true }
anyhow = { workspace = true }
thiserror = { workspace = true }
tracing = { workspace = true }
[dev-dependencies]
tempfile = { workspace = true }
+214
View File
@@ -0,0 +1,214 @@
//! Detección runtime de capacidades del kernel/proceso para aislamiento.
//!
//! Esto NO se cachea entre instancias — sysctls pueden cambiar entre boot, y
//! cgroup delegation depende del proceso concreto. Cada `Incarnator::new`
//! hace su detección al construirse.
use std::path::{Path, PathBuf};
#[derive(Debug, Clone)]
pub struct CapabilitySet {
pub kernel_version: (u32, u32, u32),
pub has_cap_sys_admin: bool,
pub user_ns: UserNsStatus,
pub cgroup_v2: CgroupStatus,
pub cgroup_delegated: bool,
pub max_user_namespaces: Option<u64>,
pub our_cgroup: Option<PathBuf>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum UserNsStatus {
Allowed,
DisabledBySysctl,
RestrictedByLsm,
Unknown,
}
impl UserNsStatus {
pub fn is_allowed(&self) -> bool {
matches!(self, UserNsStatus::Allowed)
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum CgroupStatus {
Unified,
Hybrid,
Legacy,
NotMounted,
}
impl CapabilitySet {
pub fn detect() -> Self {
Self {
kernel_version: detect_kernel_version().unwrap_or((0, 0, 0)),
has_cap_sys_admin: detect_cap_sys_admin(),
user_ns: detect_user_ns(),
cgroup_v2: detect_cgroup_status(),
cgroup_delegated: detect_cgroup_delegated(),
max_user_namespaces: read_u64("/proc/sys/user/max_user_namespaces"),
our_cgroup: detect_our_cgroup(),
}
}
/// ¿Podemos crear el namespace `ns`?
/// Reglas:
/// - user → necesita user_ns Allowed (o ya tener CAP_SYS_ADMIN, en cuyo caso no se crea uno nuevo).
/// - resto → CAP_SYS_ADMIN, o crearlos junto con user ns nuevo.
pub fn can_create_ns(&self, kind: NsKind) -> bool {
match kind {
NsKind::User => self.user_ns.is_allowed() || self.has_cap_sys_admin,
_ => {
self.has_cap_sys_admin
|| (self.user_ns.is_allowed() && self.max_user_namespaces.unwrap_or(0) > 0)
}
}
}
}
#[derive(Debug, Clone, Copy)]
pub enum NsKind {
Mount,
Pid,
Net,
Uts,
Ipc,
User,
Cgroup,
}
impl NsKind {
pub fn name(self) -> &'static str {
match self {
NsKind::Mount => "mount",
NsKind::Pid => "pid",
NsKind::Net => "net",
NsKind::Uts => "uts",
NsKind::Ipc => "ipc",
NsKind::User => "user",
NsKind::Cgroup => "cgroup",
}
}
}
fn detect_kernel_version() -> Option<(u32, u32, u32)> {
let s = std::fs::read_to_string("/proc/sys/kernel/osrelease").ok()?;
let head = s.split(|c: char| !c.is_ascii_digit() && c != '.').next()?;
let mut it = head.split('.');
let a = it.next()?.parse().ok()?;
let b = it.next()?.parse().ok()?;
let c = it.next().and_then(|x| x.parse().ok()).unwrap_or(0);
Some((a, b, c))
}
fn detect_cap_sys_admin() -> bool {
// euid 0 implica caps por default. Modo simple: si euid==0, asumimos CAP_SYS_ADMIN.
// Podríamos parsear /proc/self/status > CapEff, pero para nuestros usos el
// discriminador útil es root vs no-root.
nix::unistd::geteuid().is_root()
}
fn detect_user_ns() -> UserNsStatus {
// Sysctl tradicional Debian/Ubuntu pre-24.
if let Some(v) = read_u64("/proc/sys/kernel/unprivileged_userns_clone") {
if v == 0 {
return UserNsStatus::DisabledBySysctl;
}
}
// AppArmor restriction (Ubuntu 24+). 1 = restringido, 2 = restricción aplicada.
if let Some(v) = read_u64("/proc/sys/kernel/apparmor_restrict_unprivileged_userns") {
if v >= 1 {
return UserNsStatus::RestrictedByLsm;
}
}
if let Some(0) = read_u64("/proc/sys/user/max_user_namespaces") {
return UserNsStatus::DisabledBySysctl;
}
UserNsStatus::Allowed
}
fn detect_cgroup_status() -> CgroupStatus {
// /sys/fs/cgroup montado como cgroup2 → unified.
let mounts = match std::fs::read_to_string("/proc/self/mountinfo") {
Ok(s) => s,
Err(_) => return CgroupStatus::NotMounted,
};
let mut has_v2 = false;
let mut has_v1 = false;
for line in mounts.lines() {
// formato: ... - <fstype> <source> <opts>
let parts: Vec<&str> = line.split(" - ").collect();
if parts.len() < 2 {
continue;
}
let tail = parts[1];
let fields: Vec<&str> = tail.split_whitespace().collect();
if fields.is_empty() {
continue;
}
match fields[0] {
"cgroup2" => has_v2 = true,
"cgroup" => has_v1 = true,
_ => {}
}
}
match (has_v2, has_v1) {
(true, false) => CgroupStatus::Unified,
(true, true) => CgroupStatus::Hybrid,
(false, true) => CgroupStatus::Legacy,
(false, false) => CgroupStatus::NotMounted,
}
}
fn detect_our_cgroup() -> Option<PathBuf> {
let s = std::fs::read_to_string("/proc/self/cgroup").ok()?;
let rel = s.lines().find_map(|l| l.strip_prefix("0::"))?.trim();
let abs = if rel == "/" {
PathBuf::from("/sys/fs/cgroup")
} else {
PathBuf::from(format!("/sys/fs/cgroup{rel}"))
};
Some(abs)
}
fn detect_cgroup_delegated() -> bool {
// Heurística: ¿podemos escribir cgroup.subtree_control en nuestro cgroup
// o crear subdirectorios? En cgroup v2 con Delegate=yes, el dueño es el uid
// del usuario y `access(W_OK)` sobre el directorio devuelve OK.
let Some(p) = detect_our_cgroup() else { return false };
use nix::unistd::{access, AccessFlags};
access(&p, AccessFlags::W_OK).is_ok()
}
fn read_u64(path: &str) -> Option<u64> {
let s = std::fs::read_to_string(Path::new(path)).ok()?;
s.trim().parse().ok()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn detect_does_not_panic() {
let _ = CapabilitySet::detect();
}
#[test]
fn ns_kind_names_unique() {
let names = [
NsKind::Mount.name(),
NsKind::Pid.name(),
NsKind::Net.name(),
NsKind::Uts.name(),
NsKind::Ipc.name(),
NsKind::User.name(),
NsKind::Cgroup.name(),
];
let mut sorted = names.to_vec();
sorted.sort();
sorted.dedup();
assert_eq!(sorted.len(), names.len());
}
}
+116
View File
@@ -0,0 +1,116 @@
//! Resolución y creación de cgroups v2 para el hijo.
use crate::error::IncarnateError;
use brahman_card::{CgroupSpec, ResourceLimits};
use std::path::{Path, PathBuf};
/// Cgroup actual del proceso que llama. Lo usamos como prefijo para paths
/// declarados relativos en `CgroupSpec.path`.
pub fn current_cgroup() -> Option<String> {
let s = std::fs::read_to_string("/proc/self/cgroup").ok()?;
s.lines()
.find_map(|l| l.strip_prefix("0::"))
.map(|s| s.trim().to_string())
}
/// Resuelve un path declarado contra la jerarquía real.
pub fn resolve_cgroup_path(spec_path: &str) -> String {
if spec_path.is_empty() {
return String::new();
}
if spec_path.starts_with('/') {
return spec_path.to_string();
}
let trimmed = spec_path.trim_start_matches('/');
if let Some(cg) = current_cgroup() {
let base = if cg == "/" {
String::new()
} else {
cg.trim_end_matches('/').to_string()
};
format!("{base}/{trimmed}")
} else {
format!("/{trimmed}")
}
}
/// Crea el cgroup declarado y aplica weights. Devuelve el path absoluto
/// resultante bajo `/sys/fs/cgroup`.
pub fn ensure_cgroup(spec: &CgroupSpec) -> Result<PathBuf, IncarnateError> {
let rel = resolve_cgroup_path(&spec.path);
if rel.is_empty() {
return Err(IncarnateError::CgroupNotWritable {
path: PathBuf::from("(empty)"),
});
}
let abs = PathBuf::from(format!("/sys/fs/cgroup{}", rel));
std::fs::create_dir_all(&abs).map_err(|e| match e.kind() {
std::io::ErrorKind::PermissionDenied => IncarnateError::CgroupNotWritable { path: abs.clone() },
_ => IncarnateError::Io(e),
})?;
if let Some(w) = spec.cpu_weight {
let _ = std::fs::write(abs.join("cpu.weight"), format!("{w}\n"));
}
if let Some(w) = spec.io_weight {
// io.weight requiere "default <n>" en cgroup v2.
let _ = std::fs::write(abs.join("io.weight"), format!("default {w}\n"));
}
Ok(abs)
}
/// Escribe `memory.max` y `pids.max` al cgroup según `rlimits`. Falla
/// silenciosamente si los archivos no son escribibles (cgroup no
/// delegated). El kernel hace OOM kill cuando `memory.max` se excede,
/// y bloquea forks cuando `pids.max` se alcanza.
///
/// `memory.max` acepta `max` o un número en bytes. `pids.max` igual.
pub fn apply_rlimits_to_cgroup(cgroup_abs: &Path, rlimits: &ResourceLimits) -> Vec<String> {
let mut applied = Vec::new();
if let Some(mem) = rlimits.mem_bytes {
let path = cgroup_abs.join("memory.max");
match std::fs::write(&path, format!("{mem}\n")) {
Ok(_) => applied.push(format!("memory.max={mem}")),
Err(e) => tracing::warn!(?e, path = %path.display(), "memory.max write failed"),
}
}
if let Some(np) = rlimits.nproc {
let path = cgroup_abs.join("pids.max");
match std::fs::write(&path, format!("{np}\n")) {
Ok(_) => applied.push(format!("pids.max={np}")),
Err(e) => tracing::warn!(?e, path = %path.display(), "pids.max write failed"),
}
}
applied
}
/// Mueve `pid` a `cgroup_abs/cgroup.procs`.
pub fn move_to_cgroup(cgroup_abs: &Path, pid: nix::unistd::Pid) -> Result<(), IncarnateError> {
let procs = cgroup_abs.join("cgroup.procs");
std::fs::write(&procs, format!("{}\n", pid.as_raw())).map_err(|e| match e.kind() {
std::io::ErrorKind::PermissionDenied => IncarnateError::CgroupNotWritable {
path: procs.clone(),
},
_ => IncarnateError::Io(e),
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn absolute_path_passthrough() {
assert_eq!(resolve_cgroup_path("/foo/bar"), "/foo/bar");
}
#[test]
fn empty_returns_empty() {
assert_eq!(resolve_cgroup_path(""), "");
}
#[test]
fn relative_path_prefixed() {
let r = resolve_cgroup_path("shuma/ws-1");
assert!(r.ends_with("/shuma/ws-1") || r == "/shuma/ws-1");
}
}
+47
View File
@@ -0,0 +1,47 @@
//! Helpers que corren EN el hijo post-clone, antes de execve.
//!
//! Reglas inviolables (la clausura de clone(2) corre en stack nuevo, COW):
//! - sólo syscalls async-signal-safe
//! - no `println!`/`tracing!`/cualquier I/O del runtime
//! - no allocator (vec/box/string)
//! - no Drop con efectos
//! - capturar sólo Copy o datos pre-construidos
use brahman_card::ResourceLimits;
/// SAFETY: invocada en el hijo post-clone, sólo libc.
pub unsafe fn apply_rlimits(rl: &ResourceLimits) {
if let Some(mem) = rl.mem_bytes {
let lim = libc::rlimit {
rlim_cur: mem,
rlim_max: mem,
};
libc::setrlimit(libc::RLIMIT_AS, &lim);
}
if let Some(np) = rl.nproc {
let lim = libc::rlimit {
rlim_cur: np as u64,
rlim_max: np as u64,
};
libc::setrlimit(libc::RLIMIT_NPROC, &lim);
}
if let Some(nf) = rl.nofile {
let lim = libc::rlimit {
rlim_cur: nf as u64,
rlim_max: nf as u64,
};
libc::setrlimit(libc::RLIMIT_NOFILE, &lim);
}
}
/// SAFETY: idem. `MS_PRIVATE | MS_REC` sobre `/` para que mounts del hijo
/// no se filtren al host. Trampa típica al delegar mount ns.
pub unsafe fn make_root_private() {
libc::mount(
std::ptr::null(),
b"/\0".as_ptr() as *const _,
std::ptr::null(),
libc::MS_PRIVATE | libc::MS_REC,
std::ptr::null(),
);
}
+95
View File
@@ -0,0 +1,95 @@
//! Construcción del entorno del hijo. Sin globals — toma EnvSpec por valor.
use brahman_card::Card;
use std::path::PathBuf;
/// Var env para el path del bus interno (cuando aplica). Mismo nombre que
/// usa ente-bus para que clientes existentes (`BusClient::from_env`) sigan
/// funcionando sin cambios.
pub const ENV_BUS_SOCK: &str = "ENTE_BUS_SOCK";
/// Var env para el ULID de la Card encarnada.
pub const ENV_ENTE_ID: &str = "ENTE_ID";
#[derive(Debug, Clone, Default)]
pub struct EnvSpec {
/// Si `Some`, se inyecta como ENTE_BUS_SOCK.
pub bus_sock: Option<PathBuf>,
/// Si `Some`, se inyecta como NOTIFY_SOCKET (legacy sd_notify).
pub notify_socket: Option<PathBuf>,
/// Vars adicionales que el caller quiere forzar.
pub extra: Vec<(String, String)>,
}
/// Hereda env del padre, aplica el envp explícito de la Card, y al final
/// inyecta las vars del fractal según `EnvSpec`.
pub fn build_env(card: &Card, base_envp: &[(String, String)], spec: &EnvSpec) -> Vec<(String, String)> {
let mut env: Vec<(String, String)> = std::env::vars().collect();
for (k, v) in base_envp {
env.retain(|(ek, _)| ek != k);
env.push((k.clone(), v.clone()));
}
if let Some(p) = &spec.bus_sock {
env.retain(|(k, _)| k != ENV_BUS_SOCK);
env.push((ENV_BUS_SOCK.into(), p.to_string_lossy().into_owned()));
}
env.retain(|(k, _)| k != ENV_ENTE_ID);
env.push((ENV_ENTE_ID.into(), card.id.to_string()));
if let Some(p) = &spec.notify_socket {
env.retain(|(k, _)| k != "NOTIFY_SOCKET");
env.push(("NOTIFY_SOCKET".into(), p.to_string_lossy().into_owned()));
}
for (k, v) in &spec.extra {
env.retain(|(ek, _)| ek != k);
env.push((k.clone(), v.clone()));
}
env
}
#[cfg(test)]
mod tests {
use super::*;
use brahman_card::Card;
#[test]
fn env_id_and_bus_injected() {
let card = Card::new("test");
let spec = EnvSpec {
bus_sock: Some(PathBuf::from("/tmp/bus.sock")),
notify_socket: None,
extra: vec![],
};
let env = build_env(&card, &[], &spec);
assert!(env.iter().any(|(k, v)| k == ENV_ENTE_ID && v == &card.id.to_string()));
assert!(env.iter().any(|(k, v)| k == ENV_BUS_SOCK && v == "/tmp/bus.sock"));
}
#[test]
fn extra_overrides_inherited() {
let card = Card::new("test");
let spec = EnvSpec {
bus_sock: None,
notify_socket: None,
extra: vec![("PATH".into(), "/sandbox/bin".into())],
};
let env = build_env(&card, &[], &spec);
let path_count = env.iter().filter(|(k, _)| k == "PATH").count();
assert_eq!(path_count, 1);
assert_eq!(env.iter().find(|(k, _)| k == "PATH").unwrap().1, "/sandbox/bin");
}
#[test]
fn notify_socket_only_when_set() {
let card = Card::new("test");
let spec = EnvSpec::default();
let env = build_env(&card, &[], &spec);
assert!(!env.iter().any(|(k, _)| k == "NOTIFY_SOCKET"
&& std::env::var("NOTIFY_SOCKET").is_err()));
}
}
+44
View File
@@ -0,0 +1,44 @@
use std::path::PathBuf;
#[derive(Debug, thiserror::Error)]
pub enum IncarnateError {
#[error("namespace `{ns}` requires CAP_SYS_ADMIN or CLONE_NEWUSER (neither available)")]
NamespaceCapMissing { ns: &'static str },
#[error("user namespaces blocked by sysctl kernel.unprivileged_userns_clone=0")]
UserNsDisabledBySysctl,
#[error("user namespaces restricted by LSM (apparmor/selinux)")]
UserNsRestrictedByLsm,
#[error("cgroup path `{path}` is not writable (delegation missing?)")]
CgroupNotWritable { path: PathBuf },
#[error("payload is not executable in this incarnation path (Wasm/Virtual not supported here)")]
NonExecutablePayload,
#[error("clone(2) failed: {0}")]
Clone(#[source] nix::errno::Errno),
#[error("pipe2(2) failed: {0}")]
Pipe(#[source] nix::errno::Errno),
#[error("post-clone setup: {0}")]
PostClone(#[source] anyhow::Error),
#[error(transparent)]
Io(#[from] std::io::Error),
#[error("invalid argv: contains NUL byte")]
InvalidArgv,
}
/// Cuando `strict_caps = false`, errores no-fatales se reportan como
/// `Degradation` y la encarnación continúa con menos aislamiento del pedido.
#[derive(Debug, Clone)]
pub enum Degradation {
NamespaceSkipped { ns: &'static str },
CgroupSkipped { path: PathBuf, reason: String },
CpuAffinitySkipped { reason: String },
UidMapFailed { reason: String },
}
+419
View File
@@ -0,0 +1,419 @@
//! `ente-incarnate` — rutina extraída del Init para encarnar Cards en
//! procesos aislados (clone(2) + namespaces + cgroup + rlimits + cpu affinity).
//!
//! El núcleo histórico vivía en `ente-soma` con globals dependientes de PID 1.
//! Este crate elimina esos globals: se construye un [`Incarnator`] por
//! supervisor (Init, shuma, etc.), cada uno con su propio bus socket y su
//! propia política de capacidades.
//!
//! ## Limitaciones que NO desaparecen al extraer
//!
//! 1. `mount/pid/net/uts/ipc/cgroup` namespaces requieren `CAP_SYS_ADMIN`
//! o estar combinados con `CLONE_NEWUSER` en el mismo `clone(2)`.
//! 2. `user` namespace puede estar bloqueado por
//! `kernel.unprivileged_userns_clone=0` o por LSM (apparmor/selinux).
//! 3. cgroups v2 requieren delegación (sistemas modernos: systemd
//! `Delegate=yes`). Sin delegación, escribir en `/sys/fs/cgroup` falla.
//! 4. El primer proceso de un PID namespace es PID 1 *de ese ns*; si muere
//! el kernel mata el namespace entero.
//!
//! [`CapabilitySet::detect`] reporta lo que está disponible runtime;
//! [`Incarnator::dry_run`] valida un [`Card`] antes de ejecutar.
#![doc(html_no_source)]
pub mod caps;
pub mod cgroup;
pub mod child;
pub mod env;
pub mod error;
pub mod namespaced;
pub mod plain;
pub mod pre_exec;
pub use brahman_card::Card;
pub use caps::{CapabilitySet, CgroupStatus, NsKind, UserNsStatus};
pub use env::{EnvSpec, ENV_BUS_SOCK, ENV_ENTE_ID};
pub use error::{Degradation, IncarnateError};
pub use pre_exec::{ChildPreExec, ChildSetup};
use std::os::fd::RawFd;
/// Redirección declarativa de stdio del hijo. Cada `Some(fd)` se `dup2`-ea
/// como stdin/stdout/stderr en el hijo.
///
/// **Contrato de ownership**: el caller transfiere ownership de los FDs al
/// `Incarnator` (igual que pasaría a `Command::stdio(Stdio::from_raw_fd)`).
/// `Incarnator` se encarga de cerrarlos en el padre tras `incarnate` (path
/// namespaced) o de dejar que `std::process::Command` los absorba (path
/// plain). **No los cierres en el caller** — habría doble-close.
///
/// Útil para conectar pipes entre procesos del pipeline de shuma sin
/// romper la regla async-signal-safe del callback de clone(2).
#[derive(Debug, Clone, Copy, Default)]
pub struct ChildStdio {
pub stdin_fd: Option<RawFd>,
pub stdout_fd: Option<RawFd>,
pub stderr_fd: Option<RawFd>,
}
impl ChildStdio {
pub fn is_some(&self) -> bool {
self.stdin_fd.is_some() || self.stdout_fd.is_some() || self.stderr_fd.is_some()
}
}
use nix::unistd::Pid;
use std::path::PathBuf;
#[derive(Debug, Clone, Default)]
pub struct IncarnatorConfig {
/// Path del Unix socket del bus interno (se inyecta como `ENTE_BUS_SOCK`).
/// `None` = no inyectar.
pub bus_sock: Option<PathBuf>,
/// Inyectar `NOTIFY_SOCKET` (legacy sd_notify). Default `None`.
/// `ente-zero` lo pasa = `Some("/run/systemd/notify")`.
pub notify_socket: Option<PathBuf>,
/// Vars adicionales que el caller fuerza en cada hijo.
pub extra_env: Vec<(String, String)>,
/// Si `true`, falta de capacidades aborta `incarnate()` con error.
/// Si `false`, se reportan como `Degradation` y la encarnación continúa
/// con menos aislamiento (semántica histórica del Init).
pub strict_caps: bool,
}
pub struct Incarnator {
cfg: IncarnatorConfig,
caps: CapabilitySet,
}
#[derive(Debug, Clone)]
pub struct IncarnateOutcome {
pub pid: Pid,
pub degradations: Vec<Degradation>,
}
#[derive(Debug, Default, Clone)]
pub struct ValidationReport {
pub will_work: bool,
pub blocking: Vec<String>,
pub warnings: Vec<String>,
}
impl Incarnator {
pub fn new(cfg: IncarnatorConfig) -> Self {
Self {
caps: CapabilitySet::detect(),
cfg,
}
}
/// Constructor para testing/inyección de capacidades pre-calculadas.
pub fn with_caps(cfg: IncarnatorConfig, caps: CapabilitySet) -> Self {
Self { cfg, caps }
}
pub fn capabilities(&self) -> &CapabilitySet {
&self.caps
}
pub fn config(&self) -> &IncarnatorConfig {
&self.cfg
}
/// Valida una Card sin ejecutar nada. Útil para que el caller (shuma,
/// admin, tests) sepa de antemano si va a poder encarnar tal cual o si
/// va a tener que aflojar el SomaSpec.
pub fn dry_run(&self, card: &Card) -> ValidationReport {
let mut r = ValidationReport {
will_work: true,
..Default::default()
};
let ns = &card.soma.namespaces;
// Si user_ns está pedido, evaluar su disponibilidad.
if ns.user {
match self.caps.user_ns {
UserNsStatus::DisabledBySysctl => {
r.blocking.push(
"user namespace requested but kernel.unprivileged_userns_clone=0".into(),
);
r.will_work = false;
}
UserNsStatus::RestrictedByLsm => {
r.blocking.push(
"user namespace restricted by LSM (apparmor/selinux)".into(),
);
r.will_work = false;
}
_ => {}
}
}
// El resto de namespaces necesitan CAP_SYS_ADMIN o user ns.
let needs_priv = [
(ns.mount, NsKind::Mount),
(ns.pid, NsKind::Pid),
(ns.net, NsKind::Net),
(ns.uts, NsKind::Uts),
(ns.ipc, NsKind::Ipc),
(ns.cgroup, NsKind::Cgroup),
];
for (wanted, kind) in needs_priv {
if wanted && !self.caps.can_create_ns(kind) {
r.blocking.push(format!(
"{} namespace requires CAP_SYS_ADMIN or user ns (neither available)",
kind.name()
));
r.will_work = false;
}
}
// Cgroup: si el card pide path, chequear que tengamos delegación.
if !card.soma.cgroup.path.is_empty() && !self.caps.cgroup_delegated {
r.warnings.push(format!(
"cgroup `{}` requested but our cgroup is not writable (delegation missing)",
card.soma.cgroup.path
));
}
// Payload ejecutable.
use brahman_card::Payload;
if !matches!(card.payload, Payload::Native { .. } | Payload::Legacy { .. }) {
r.blocking
.push("payload is not Native/Legacy (use ente-wasm for Wasm)".into());
r.will_work = false;
}
r
}
/// Encarna la Card. Si `strict_caps`, valida primero y aborta ante
/// blocking. Si no, ejecuta y deja que las degradaciones se acumulen.
pub fn incarnate(&self, card: &Card) -> Result<IncarnateOutcome, IncarnateError> {
self.incarnate_with(card, ChildStdio::default())
}
/// Variante con redirección de stdio declarativa. Útil para conectar
/// pipes entre procesos (caso: pipeline aislado).
pub fn incarnate_with(
&self,
card: &Card,
stdio: ChildStdio,
) -> Result<IncarnateOutcome, IncarnateError> {
self.incarnate_full(card, stdio, ChildSetup::default())
}
/// Variante full: stdio + setup pre-execve.
pub fn incarnate_full(
&self,
card: &Card,
stdio: ChildStdio,
setup: ChildSetup,
) -> Result<IncarnateOutcome, IncarnateError> {
if self.cfg.strict_caps {
let v = self.dry_run(card);
if !v.will_work {
// Mapeamos el primer blocking a IncarnateError tipado.
if let Some(first) = v.blocking.first() {
if first.contains("unprivileged_userns_clone") {
return Err(IncarnateError::UserNsDisabledBySysctl);
}
if first.contains("LSM") {
return Err(IncarnateError::UserNsRestrictedByLsm);
}
if let Some(ns) = which_ns_blocking(first) {
return Err(IncarnateError::NamespaceCapMissing { ns });
}
if first.contains("payload") {
return Err(IncarnateError::NonExecutablePayload);
}
}
}
}
let env_spec = EnvSpec {
bus_sock: self.cfg.bus_sock.clone(),
notify_socket: self.cfg.notify_socket.clone(),
extra: self.cfg.extra_env.clone(),
};
let mut degradations = Vec::new();
let pid = if namespaced::needs_namespacing(&card.soma.namespaces) {
namespaced::incarnate_namespaced(card, &env_spec, &stdio, &setup, &mut degradations)?
} else {
plain::incarnate_plain(card, &env_spec, &stdio, &setup)?
};
Ok(IncarnateOutcome { pid, degradations })
}
}
fn which_ns_blocking(msg: &str) -> Option<&'static str> {
for n in ["mount", "pid", "net", "uts", "ipc", "user", "cgroup"] {
if msg.starts_with(n) {
return Some(match n {
"mount" => "mount",
"pid" => "pid",
"net" => "net",
"uts" => "uts",
"ipc" => "ipc",
"user" => "user",
"cgroup" => "cgroup",
_ => unreachable!(),
});
}
}
None
}
#[cfg(test)]
mod tests {
use super::*;
use brahman_card::{Card, NamespaceSet, Payload};
fn make_card(payload: Payload, ns: NamespaceSet) -> Card {
let mut c = Card::new("test");
c.payload = payload;
c.soma.namespaces = ns;
c
}
#[test]
fn dry_run_native_no_ns_works() {
let inc = Incarnator::new(IncarnatorConfig::default());
let card = make_card(
Payload::Native {
exec: "/bin/true".into(),
argv: vec![],
envp: vec![],
},
NamespaceSet::default(),
);
let r = inc.dry_run(&card);
assert!(r.will_work, "{:?}", r);
}
#[test]
fn dry_run_wasm_payload_blocks() {
let inc = Incarnator::new(IncarnatorConfig::default());
let card = make_card(
Payload::Wasm {
module_sha256: [0u8; 32],
entry: "main".into(),
},
NamespaceSet::default(),
);
let r = inc.dry_run(&card);
assert!(!r.will_work);
assert!(r.blocking.iter().any(|m| m.contains("payload")));
}
/// Smoke: redirección stdout via ChildStdio en path plain.
/// Lanza /bin/echo con stdout conectado a un pipe que leemos.
#[test]
fn incarnate_with_stdout_redirection_captures_output() {
use nix::fcntl::OFlag;
use nix::unistd::{pipe2, read};
use std::os::fd::{AsRawFd, IntoRawFd};
let inc = Incarnator::new(IncarnatorConfig::default());
let card = make_card(
Payload::Native {
exec: "/bin/echo".into(),
argv: vec!["shuma-stdio".into()],
envp: vec![],
},
NamespaceSet::default(),
);
let (r, w) = pipe2(OFlag::empty()).expect("pipe");
let w_raw = w.into_raw_fd();
let r_raw = r.as_raw_fd();
let stdio = ChildStdio {
stdin_fd: None,
stdout_fd: Some(w_raw),
stderr_fd: None,
};
let out = inc.incarnate_with(&card, stdio).expect("incarnate");
// Cerramos nuestro extremo de write (el hijo tiene su dup2).
// Plain path: Command toma ownership y cierra al spawn.
// Namespaced path: el padre todavía tiene una copia... pero en plain
// no aplica. Para este test usamos plain (NamespaceSet vacío).
// Cosechamos para no zombi.
let _ = nix::sys::wait::waitpid(out.pid, None);
// Leemos la salida.
let mut buf = [0u8; 64];
let n = read(r_raw, &mut buf).expect("read");
assert!(n > 0);
let s = std::str::from_utf8(&buf[..n]).unwrap();
assert!(s.contains("shuma-stdio"), "got: {s:?}");
// r se cierra al drop del OwnedFd.
}
/// child_pre_exec aplica chdir + NoNewPrivs en path plain.
#[test]
fn child_pre_exec_chdir_changes_pwd() {
use crate::{ChildPreExec, ChildSetup};
use nix::fcntl::OFlag;
use nix::unistd::{pipe2, read};
use std::ffi::CString;
use std::os::fd::{AsRawFd, IntoRawFd};
let inc = Incarnator::new(IncarnatorConfig::default());
// Comando: /bin/pwd. Si chdir funciona, output = /tmp.
let card = make_card(
Payload::Native {
exec: "/bin/pwd".into(),
argv: vec![],
envp: vec![],
},
NamespaceSet::default(),
);
let (r, w) = pipe2(OFlag::empty()).expect("pipe");
let w_raw = w.into_raw_fd();
let r_raw = r.as_raw_fd();
let stdio = ChildStdio {
stdin_fd: None,
stdout_fd: Some(w_raw),
stderr_fd: None,
};
let setup = ChildSetup::new()
.with(ChildPreExec::Chdir(CString::new("/tmp").unwrap()))
.with(ChildPreExec::NoNewPrivs);
let out = inc.incarnate_full(&card, stdio, setup).expect("incarnate");
let _ = nix::sys::wait::waitpid(out.pid, None);
let mut buf = [0u8; 64];
let n = read(r_raw, &mut buf).expect("read");
let s = std::str::from_utf8(&buf[..n]).unwrap();
assert!(s.starts_with("/tmp"), "pwd output was: {s:?}");
}
/// Smoke: encarnar /bin/true sin ns. No requiere root.
#[test]
fn incarnate_plain_true_succeeds() {
let inc = Incarnator::new(IncarnatorConfig::default());
let card = make_card(
Payload::Native {
exec: "/bin/true".into(),
argv: vec![],
envp: vec![],
},
NamespaceSet::default(),
);
let out = inc.incarnate(&card).expect("plain incarnation");
assert!(out.pid.as_raw() > 0);
// Cosechamos para no dejar zombi.
let _ = nix::sys::wait::waitpid(out.pid, None);
}
}
@@ -0,0 +1,313 @@
//! Path namespaced: clone(2) + sync pipe + setup post-clone en padre + finalize en hijo.
//!
//! ## Protocolo padre↔hijo
//!
//! ```text
//! parent child
//! | |
//! |--- clone() ------->| (child empieza dentro de los nuevos NS)
//! | |
//! | |---- read(sync_r, 1) ---- (bloquea)
//! | |
//! | write uid_map |
//! | write gid_map |
//! | cgroup move |
//! | cpu affinity |
//! | |
//! |--- write(sync_w) ->|
//! | |---- setrlimit
//! | |---- mount(/, MS_PRIVATE | MS_REC)
//! | |---- execve()
//! ```
use crate::child::{apply_rlimits, make_root_private};
use crate::cgroup::{ensure_cgroup, move_to_cgroup};
use crate::env::{build_env, EnvSpec};
use crate::error::{Degradation, IncarnateError};
use crate::pre_exec::{apply_unchecked, ChildSetup};
use crate::ChildStdio;
use brahman_card::{Card, NamespaceSet, Payload};
use nix::fcntl::OFlag;
use nix::sched::CloneFlags;
use nix::unistd::{pipe2, Pid};
use std::ffi::CString;
use std::os::fd::{IntoRawFd, RawFd};
use tracing::{info, warn};
pub fn needs_namespacing(ns: &NamespaceSet) -> bool {
ns.mount || ns.pid || ns.net || ns.uts || ns.ipc || ns.user || ns.cgroup
}
pub fn build_clone_flags(ns: &NamespaceSet) -> CloneFlags {
let mut f = CloneFlags::empty();
if ns.mount { f |= CloneFlags::CLONE_NEWNS; }
if ns.pid { f |= CloneFlags::CLONE_NEWPID; }
if ns.net { f |= CloneFlags::CLONE_NEWNET; }
if ns.uts { f |= CloneFlags::CLONE_NEWUTS; }
if ns.ipc { f |= CloneFlags::CLONE_NEWIPC; }
if ns.user { f |= CloneFlags::CLONE_NEWUSER; }
if ns.cgroup { f |= CloneFlags::CLONE_NEWCGROUP; }
f
}
pub fn incarnate_namespaced(
card: &Card,
env_spec: &EnvSpec,
stdio: &ChildStdio,
setup: &ChildSetup,
degradations: &mut Vec<Degradation>,
) -> Result<Pid, IncarnateError> {
let flags = build_clone_flags(&card.soma.namespaces);
info!(label = %card.label, ?flags, "namespaced incarnation");
let (exec, argv, base_envp) = match &card.payload {
Payload::Native { exec, argv, envp } => (exec.clone(), argv.clone(), envp.clone()),
Payload::Legacy { exec, argv, .. } => (exec.clone(), argv.clone(), Vec::new()),
_ => return Err(IncarnateError::NonExecutablePayload),
};
// Pipe O_CLOEXEC: el read del lado hijo es lo que hace race-free el setup.
// O_CLOEXEC garantiza cierre automático en execve.
let (sync_r, sync_w) = pipe2(OFlag::O_CLOEXEC).map_err(IncarnateError::Pipe)?;
let sync_r_raw: RawFd = sync_r.into_raw_fd();
let sync_w_raw: RawFd = sync_w.into_raw_fd();
let exec_c = CString::new(exec.clone()).map_err(|_| IncarnateError::InvalidArgv)?;
let argv_c: Vec<CString> = std::iter::once(exec_c.clone())
.chain(argv.iter().filter_map(|s| CString::new(s.as_str()).ok()))
.collect();
let argv_ptrs: Vec<*const libc::c_char> = argv_c
.iter()
.map(|c| c.as_ptr())
.chain(std::iter::once(std::ptr::null()))
.collect();
let env_pairs = build_env(card, &base_envp, env_spec);
let envp_c: Vec<CString> = env_pairs
.iter()
.filter_map(|(k, v)| CString::new(format!("{k}={v}")).ok())
.collect();
let envp_ptrs: Vec<*const libc::c_char> = envp_c
.iter()
.map(|c| c.as_ptr())
.chain(std::iter::once(std::ptr::null()))
.collect();
let rlimits = card.soma.rlimits.clone();
let mount_ns_enabled = card.soma.namespaces.mount;
let stdin_fd = stdio.stdin_fd;
let stdout_fd = stdio.stdout_fd;
let stderr_fd = stdio.stderr_fd;
let setup_ops = setup.ops.clone();
// SAFETY: la clausura corre en stack nuevo dentro de un proceso recién
// clonado, COW del padre. Sólo syscalls async-signal-safe; sin allocator,
// sin Drop con efectos.
let cb = Box::new(move || -> isize {
unsafe { libc::close(sync_w_raw); }
let mut byte = [0u8; 1];
let n = unsafe { libc::read(sync_r_raw, byte.as_mut_ptr() as *mut _, 1) };
if n != 1 {
unsafe { libc::_exit(101); }
}
unsafe { libc::close(sync_r_raw); }
unsafe { apply_rlimits(&rlimits); }
if mount_ns_enabled {
unsafe { make_root_private(); }
}
// dup2 declarativo: caller pasó fds que queremos como stdin/out/err.
// dup2 es async-signal-safe (POSIX) y cierra el fd target si estaba
// abierto. El fd source NO se cierra automáticamente — el padre
// tiene su propia copia.
if let Some(fd) = stdin_fd {
unsafe {
if libc::dup2(fd, 0) < 0 {
libc::_exit(103);
}
}
}
if let Some(fd) = stdout_fd {
unsafe {
if libc::dup2(fd, 1) < 0 {
libc::_exit(104);
}
}
}
if let Some(fd) = stderr_fd {
unsafe {
if libc::dup2(fd, 2) < 0 {
libc::_exit(105);
}
}
}
// Aplica las ops declarativas pre-execve (NoNewPrivs, chdir, etc.).
if !setup_ops.is_empty() {
let r = unsafe { apply_unchecked(&setup_ops) };
if r != 0 {
unsafe { libc::_exit(r) };
}
}
unsafe {
libc::execve(exec_c.as_ptr(), argv_ptrs.as_ptr(), envp_ptrs.as_ptr());
libc::_exit(102);
}
});
let mut stack = vec![0u8; 1024 * 1024];
#[allow(deprecated)]
let pid = unsafe { nix::sched::clone(cb, &mut stack, flags, Some(libc::SIGCHLD)) }
.map_err(|e| {
unsafe {
libc::close(sync_r_raw);
libc::close(sync_w_raw);
}
IncarnateError::Clone(e)
})?;
// Padre: cerrar el extremo de lectura.
unsafe { libc::close(sync_r_raw); }
// Setup post-clone. Errores aquí los registramos como degradations y
// continuamos (la decisión strict_caps la toma el wrapper).
if let Err(e) = configure_child(pid, card, degradations) {
warn!(?e, ?pid, "configure_child errores");
}
// Despertar al hijo.
let signal_byte = [b'x'];
let written = unsafe { libc::write(sync_w_raw, signal_byte.as_ptr() as *const _, 1) };
unsafe { libc::close(sync_w_raw); }
if written != 1 {
warn!(?pid, "write sync pipe devolvió {}", written);
}
// El hijo ya dup2-eó los fds del ChildStdio. La copia del padre no
// sirve más y la cerramos para que el otro extremo del pipe reciba
// EOF cuando corresponda.
if let Some(fd) = stdio.stdin_fd {
unsafe { libc::close(fd); }
}
if let Some(fd) = stdio.stdout_fd {
unsafe { libc::close(fd); }
}
if let Some(fd) = stdio.stderr_fd {
unsafe { libc::close(fd); }
}
Ok(pid)
}
/// Setup que requiere capacidades del padre: uid_map, gid_map, cgroup move.
/// Estos archivos en `/proc/<pid>/*` tienen reglas de propiedad que sólo el
/// padre puede satisfacer mientras el hijo está suspendido en el sync pipe.
fn configure_child(
pid: Pid,
card: &Card,
degradations: &mut Vec<Degradation>,
) -> Result<(), IncarnateError> {
if card.soma.namespaces.user {
// Desde kernel 3.19 hay que escribir "deny" a setgroups antes de
// poder escribir gid_map sin CAP_SETGID. Ignorar errores aquí: en
// kernels antiguos el archivo no existe.
let _ = std::fs::write(format!("/proc/{}/setgroups", pid.as_raw()), "deny");
let uid = nix::unistd::getuid().as_raw();
let gid = nix::unistd::getgid().as_raw();
if let Err(e) = std::fs::write(
format!("/proc/{}/uid_map", pid.as_raw()),
format!("0 {uid} 1"),
) {
degradations.push(Degradation::UidMapFailed {
reason: format!("uid_map: {e}"),
});
}
if let Err(e) = std::fs::write(
format!("/proc/{}/gid_map", pid.as_raw()),
format!("0 {gid} 1"),
) {
degradations.push(Degradation::UidMapFailed {
reason: format!("gid_map: {e}"),
});
}
}
if !card.soma.cgroup.path.is_empty() {
match ensure_cgroup(&card.soma.cgroup) {
Ok(abs) => {
if let Err(e) = move_to_cgroup(&abs, pid) {
degradations.push(Degradation::CgroupSkipped {
path: abs,
reason: format!("{e}"),
});
}
}
Err(e) => degradations.push(Degradation::CgroupSkipped {
path: std::path::PathBuf::from(&card.soma.cgroup.path),
reason: format!("{e}"),
}),
}
}
if let Some(cpus) = &card.soma.cpu_affinity {
if let Err(e) = set_cpu_affinity(pid, cpus) {
degradations.push(Degradation::CpuAffinitySkipped {
reason: format!("{e}"),
});
}
}
Ok(())
}
fn set_cpu_affinity(pid: Pid, cpus: &[u32]) -> Result<(), std::io::Error> {
let mut set: libc::cpu_set_t = unsafe { std::mem::zeroed() };
unsafe { libc::CPU_ZERO(&mut set); }
for &c in cpus {
unsafe { libc::CPU_SET(c as usize, &mut set); }
}
let r = unsafe {
libc::sched_setaffinity(pid.as_raw(), std::mem::size_of::<libc::cpu_set_t>(), &set)
};
if r != 0 {
Err(std::io::Error::last_os_error())
} else {
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use brahman_card::NamespaceSet;
#[test]
fn empty_ns_does_not_need_namespacing() {
let ns = NamespaceSet::default();
assert!(!needs_namespacing(&ns));
}
#[test]
fn any_ns_triggers_namespacing() {
let mut ns = NamespaceSet::default();
ns.user = true;
assert!(needs_namespacing(&ns));
}
#[test]
fn flags_match_namespace_bools() {
let mut ns = NamespaceSet::default();
ns.user = true;
ns.pid = true;
let f = build_clone_flags(&ns);
assert!(f.contains(CloneFlags::CLONE_NEWUSER));
assert!(f.contains(CloneFlags::CLONE_NEWPID));
assert!(!f.contains(CloneFlags::CLONE_NEWNET));
}
}
+60
View File
@@ -0,0 +1,60 @@
//! Path simple: spawn directo, sin namespacing.
use crate::env::{build_env, EnvSpec};
use crate::error::IncarnateError;
use crate::pre_exec::{apply_unchecked, ChildSetup};
use crate::ChildStdio;
use brahman_card::{Card, Payload};
use nix::unistd::Pid;
use std::os::fd::FromRawFd;
use std::os::unix::process::CommandExt;
use std::process::{Command, Stdio};
pub fn incarnate_plain(
card: &Card,
env_spec: &EnvSpec,
stdio: &ChildStdio,
setup: &ChildSetup,
) -> Result<Pid, IncarnateError> {
let (exec, argv, base_envp) = match &card.payload {
Payload::Native { exec, argv, envp } => (exec.clone(), argv.clone(), envp.clone()),
Payload::Legacy { exec, argv, .. } => (exec.clone(), argv.clone(), Vec::new()),
_ => return Err(IncarnateError::NonExecutablePayload),
};
let env = build_env(card, &base_envp, env_spec);
let mut cmd = Command::new(&exec);
cmd.args(&argv);
cmd.env_clear();
for (k, v) in &env {
cmd.env(k, v);
}
if let Some(fd) = stdio.stdin_fd {
// SAFETY: el caller garantiza que `fd` está abierto y le
// transfiere ownership al child. `Command` lo cierra tras spawn.
cmd.stdin(unsafe { Stdio::from_raw_fd(fd) });
}
if let Some(fd) = stdio.stdout_fd {
cmd.stdout(unsafe { Stdio::from_raw_fd(fd) });
}
if let Some(fd) = stdio.stderr_fd {
cmd.stderr(unsafe { Stdio::from_raw_fd(fd) });
}
if !setup.is_empty() {
// Clone para que la closure sea 'static (Command::pre_exec lo exige).
let ops = setup.ops.clone();
// SAFETY: pre_exec corre post-fork pre-exec. apply_unchecked sólo
// hace syscalls async-signal-safe.
unsafe {
cmd.pre_exec(move || {
let r = apply_unchecked(&ops);
if r != 0 {
Err(std::io::Error::from_raw_os_error(libc::EINVAL))
} else {
Ok(())
}
});
}
}
let child = cmd.spawn()?;
Ok(Pid::from_raw(child.id() as i32))
}
+103
View File
@@ -0,0 +1,103 @@
//! Hook declarativo pre-execve para el hijo.
//!
//! Las ops corren EN EL HIJO, post-fork/clone, pre-execve. Reglas:
//! - sólo syscalls async-signal-safe.
//! - sin allocator (los CStrings ya están construidos por el padre).
//! - sin Drop con efectos.
use std::ffi::CString;
/// Operaciones declarativas aplicables pre-execve.
#[derive(Debug, Clone)]
pub enum ChildPreExec {
/// `PR_SET_NO_NEW_PRIVS = 1` — bloquea escaladas futuras
/// (suid bits, file caps, AT_SECURE). Recomendado en sandboxes.
NoNewPrivs,
/// `PR_SET_PDEATHSIG = sig` — el child recibe esta señal cuando su
/// padre (PID 1 del namespace, o el que sea) muere. Útil para
/// auto-cleanup de procesos huérfanos.
ParentDeathSig(i32),
/// `PR_SET_DUMPABLE` — controla si el proceso permite core dump.
Dumpable(bool),
/// `setsid()` — nuevo session/group leader (desconecta del controlling tty).
NewSession,
/// `chdir(path)` — cambiar working dir. Path pre-allocado.
Chdir(CString),
/// `umask(mode)` — fijar umask (octal, e.g. 0o022).
Umask(libc::mode_t),
}
/// Setup completo del hijo. Default = sin ops.
#[derive(Debug, Clone, Default)]
pub struct ChildSetup {
pub ops: Vec<ChildPreExec>,
}
impl ChildSetup {
pub fn new() -> Self {
Self::default()
}
pub fn push(&mut self, op: ChildPreExec) -> &mut Self {
self.ops.push(op);
self
}
pub fn with(mut self, op: ChildPreExec) -> Self {
self.ops.push(op);
self
}
pub fn is_empty(&self) -> bool {
self.ops.is_empty()
}
}
/// Aplica las ops en orden. SAFETY: ejecuta en el hijo, post-fork,
/// pre-execve. Sólo libc, sin allocator, sin Drop.
///
/// En caso de error, retorna el código de exit que el caller usará para
/// abortar el child (igual semántica que el resto de la closure de clone).
/// 0 = todo OK.
pub unsafe fn apply_unchecked(ops: &[ChildPreExec]) -> i32 {
for op in ops {
match op {
ChildPreExec::NoNewPrivs => {
// PR_SET_NO_NEW_PRIVS = 38 en Linux.
let r = unsafe { libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1u64, 0u64, 0u64, 0u64) };
if r != 0 {
return 110;
}
}
ChildPreExec::ParentDeathSig(sig) => {
let r = unsafe { libc::prctl(libc::PR_SET_PDEATHSIG, *sig as u64, 0u64, 0u64, 0u64) };
if r != 0 {
return 111;
}
}
ChildPreExec::Dumpable(yes) => {
let v: u64 = if *yes { 1 } else { 0 };
let r = unsafe { libc::prctl(libc::PR_SET_DUMPABLE, v, 0u64, 0u64, 0u64) };
if r != 0 {
return 112;
}
}
ChildPreExec::NewSession => {
let r = unsafe { libc::setsid() };
if r < 0 {
return 113;
}
}
ChildPreExec::Chdir(path) => {
let r = unsafe { libc::chdir(path.as_ptr()) };
if r != 0 {
return 114;
}
}
ChildPreExec::Umask(mode) => {
unsafe { libc::umask(*mode) };
}
}
}
0
}