This commit is contained in:
sergio
2026-05-10 21:58:16 +00:00
parent 3d55f189c0
commit c22d2480b9
36 changed files with 5158 additions and 363 deletions
@@ -0,0 +1,27 @@
[package]
name = "shipote-core"
version.workspace = true
edition.workspace = true
rust-version.workspace = true
license.workspace = true
authors.workspace = true
publish.workspace = true
description = "Runtime de shipote: WorkspaceManager sobre ente-incarnate. Estado in-memory, lifecycle, reaping."
[dependencies]
shipote-card = { path = "../shipote-card" }
shipote-discern = { path = "../shipote-discern" }
brahman-card = { path = "../../../core/brahman-card" }
ente-incarnate = { path = "../../../shared/ente-incarnate" }
nix = { workspace = true }
libc = { workspace = true }
anyhow = { workspace = true }
thiserror = { workspace = true }
tokio = { workspace = true }
tracing = { workspace = true }
ulid = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
[dev-dependencies]
tempfile = { workspace = true }
@@ -0,0 +1,584 @@
//! `shipote-core` — runtime in-memory de Workspaces y comandos.
//!
//! Mantiene un estado tokio-friendly (Mutex sobre HashMap) con:
//! - Workspaces vivos (id → state).
//! - PIDs de comandos lanzados, indexados por workspace.
//! - Reaping cooperativo: `reap_dead()` cosecha hijos terminados.
// `pipeline` necesita `unsafe` puntual para `libc::close` y construir
// `OwnedFd` desde fds que armamos con `pipe2(2)`. El resto del crate
// permanece safe — el cargo lint `unsafe_code` queda permitido sólo en
// el módulo concreto.
#![deny(unsafe_op_in_unsafe_fn)]
pub mod logbuf;
pub mod persist;
pub mod pipeline;
use brahman_card::{Card, Payload, Supervision};
use ente_incarnate::{Incarnator, IncarnatorConfig};
use nix::sys::signal::{kill, Signal};
use nix::sys::wait::{waitpid, WaitPidFlag, WaitStatus};
use nix::unistd::Pid;
use shipote_card::{CommandRef, PipelineSpec, WorkspaceId, WorkspaceSpec};
use std::collections::HashMap;
use std::sync::Arc;
use std::time::Instant;
use thiserror::Error;
use tokio::sync::Mutex;
use tracing::{info, warn};
use ulid::Ulid;
#[derive(Debug, Error)]
pub enum CoreError {
#[error("workspace {0} not found")]
WorkspaceNotFound(WorkspaceId),
#[error("compile: {0}")]
Compile(#[from] shipote_card::CompileError),
#[error("incarnate: {0}")]
Incarnate(#[from] ente_incarnate::IncarnateError),
}
#[derive(Debug)]
pub struct WorkspaceState {
pub id: WorkspaceId,
pub spec: WorkspaceSpec,
pub root_card: Card,
pub commands: HashMap<Ulid, CommandState>,
pub started: Instant,
}
#[derive(Debug, Clone)]
pub struct CommandState {
pub id: Ulid,
pub label: String,
pub pid: Pid,
pub alive: bool,
pub exit_status: Option<i32>,
/// Ring buffer compartido con la tokio task que drena stdout+stderr
/// del comando. `None` para comandos que no capturan output (futuro:
/// comandos con stdout=inherit).
pub logs: Option<logbuf::LogBuf>,
}
pub struct WorkspaceManager {
inner: Arc<Mutex<Inner>>,
incarnator: Arc<Incarnator>,
}
struct Inner {
workspaces: HashMap<WorkspaceId, WorkspaceState>,
/// Definiciones nombradas de pipelines persistidas. NO es lo mismo
/// que "pipelines vivos" — son specs guardados para reusar con
/// `run-saved`. Sobreviven restart vía snapshot.
saved_pipelines: HashMap<String, PipelineSpec>,
}
#[derive(Debug, Clone)]
pub struct CommandSummary {
pub id: Ulid,
pub label: String,
pub pid: i32,
}
#[derive(Debug, Clone)]
pub struct CommandInfo {
pub id: Ulid,
pub label: String,
pub pid: i32,
pub alive: bool,
pub exit_status: Option<i32>,
pub log_bytes: u64,
}
fn spawn_log_drainer(read_fd: std::os::fd::RawFd, logs: logbuf::LogBuf) {
// Marcar non-blocking + envolver en AsyncFd; igual patrón que el tap.
// SAFETY: F_SETFL sobre fd válido.
unsafe {
let flags = libc::fcntl(read_fd, libc::F_GETFL, 0);
if flags >= 0 {
libc::fcntl(read_fd, libc::F_SETFL, flags | libc::O_NONBLOCK);
}
}
tokio::spawn(async move {
// SAFETY: ownership del fd transferido al drainer task.
let owned = unsafe { std::os::fd::OwnedFd::from_raw_fd_compat(read_fd) };
let afd = match tokio::io::unix::AsyncFd::with_interest(owned, tokio::io::Interest::READABLE) {
Ok(a) => a,
Err(e) => {
tracing::warn!(?e, "log drainer AsyncFd failed");
return;
}
};
let mut buf = [0u8; 4096];
loop {
let mut guard = match afd.readable().await {
Ok(g) => g,
Err(_) => break,
};
use std::os::fd::AsRawFd;
let fd = afd.as_raw_fd();
// SAFETY: read sobre fd válido.
let r = unsafe { libc::read(fd, buf.as_mut_ptr() as *mut _, buf.len()) };
if r > 0 {
logs.append(&buf[..r as usize]);
continue;
}
if r == 0 {
break; // EOF
}
let err = std::io::Error::last_os_error();
if err.kind() == std::io::ErrorKind::WouldBlock {
guard.clear_ready();
continue;
}
tracing::warn!(?err, "log drainer read err");
break;
}
});
}
trait OwnedFdFromRawCompat: Sized {
unsafe fn from_raw_fd_compat(fd: std::os::fd::RawFd) -> Self;
}
impl OwnedFdFromRawCompat for std::os::fd::OwnedFd {
unsafe fn from_raw_fd_compat(fd: std::os::fd::RawFd) -> Self {
use std::os::fd::FromRawFd;
// SAFETY: el caller transfiere ownership de fd a OwnedFd.
unsafe { std::os::fd::OwnedFd::from_raw_fd(fd) }
}
}
impl WorkspaceManager {
pub fn new(cfg: IncarnatorConfig) -> Self {
Self {
inner: Arc::new(Mutex::new(Inner {
workspaces: HashMap::new(),
saved_pipelines: HashMap::new(),
})),
incarnator: Arc::new(Incarnator::new(cfg)),
}
}
pub fn incarnator(&self) -> &Incarnator {
&self.incarnator
}
/// Handle Arc-clonable del Incarnator, para que el pipeline lo pueda
/// usar fuera del manager.
pub fn incarnator_handle(&self) -> Arc<Incarnator> {
self.incarnator.clone()
}
// -----------------------------------------------------------------
// Saved pipelines (definiciones nombradas, no runs)
// -----------------------------------------------------------------
/// Guarda (o reemplaza) un PipelineSpec bajo `name`.
pub async fn save_pipeline(&self, name: String, spec: PipelineSpec) {
self.inner.lock().await.saved_pipelines.insert(name, spec);
}
/// Devuelve los nombres de los pipelines guardados.
pub async fn list_saved_pipelines(&self) -> Vec<String> {
let g = self.inner.lock().await;
let mut v: Vec<String> = g.saved_pipelines.keys().cloned().collect();
v.sort();
v
}
/// Recupera el PipelineSpec guardado bajo `name`.
pub async fn get_saved_pipeline(&self, name: &str) -> Option<PipelineSpec> {
self.inner.lock().await.saved_pipelines.get(name).cloned()
}
/// Elimina un saved pipeline.
pub async fn drop_saved_pipeline(&self, name: &str) -> bool {
self.inner.lock().await.saved_pipelines.remove(name).is_some()
}
/// Label del workspace, si existe.
pub async fn workspace_label(&self, id: WorkspaceId) -> Option<String> {
self.inner
.lock()
.await
.workspaces
.get(&id)
.map(|w| w.spec.label.clone())
}
pub async fn create(
self: &Arc<Self>,
spec: WorkspaceSpec,
) -> Result<(WorkspaceId, Vec<String>), CoreError> {
self.create_with_id(WorkspaceId::new(), spec).await
}
/// Variante que acepta el ID. Útil para restore_snapshot: preserva
/// ULIDs entre restarts, así clients que tracking workspace_id no se
/// rompen.
pub async fn create_with_id(
self: &Arc<Self>,
id: WorkspaceId,
spec: WorkspaceSpec,
) -> Result<(WorkspaceId, Vec<String>), CoreError> {
let card = spec.to_card(id)?;
let warnings = self.incarnator.dry_run(&card).warnings;
let ttl = spec.ttl;
let state = WorkspaceState {
id,
spec,
root_card: card,
commands: HashMap::new(),
started: Instant::now(),
};
self.inner.lock().await.workspaces.insert(id, state);
info!(%id, ?ttl, "workspace created");
// Si tiene TTL, programar auto-stop. El task captura un weak ref
// al manager para no impedir que se dropée si el daemon termina.
if let Some(duration) = ttl {
let mgr_weak = Arc::downgrade(self);
tokio::spawn(async move {
tokio::time::sleep(duration).await;
if let Some(mgr) = mgr_weak.upgrade() {
let exists = mgr.inner.lock().await.workspaces.contains_key(&id);
if exists {
info!(%id, "workspace TTL expired — auto-stop");
let _ = mgr.stop(id).await;
}
}
});
}
Ok((id, warnings))
}
pub async fn list(&self) -> Vec<WorkspaceSnapshot> {
let g = self.inner.lock().await;
g.workspaces
.values()
.map(|w| WorkspaceSnapshot {
id: w.id,
label: w.spec.label.clone(),
commands: w.commands.len() as u32,
uptime_ms: w.started.elapsed().as_millis() as u64,
})
.collect()
}
pub async fn stop(&self, id: WorkspaceId) -> Result<u32, CoreError> {
let mut g = self.inner.lock().await;
let ws = g.workspaces.remove(&id).ok_or(CoreError::WorkspaceNotFound(id))?;
let mut reaped = 0u32;
for (_cid, cmd) in ws.commands {
if cmd.alive {
let _ = kill(cmd.pid, Signal::SIGTERM);
// Cosecha sin bloquear infinito: WNOHANG en loop con un par de intentos.
for _ in 0..50 {
match waitpid(cmd.pid, Some(WaitPidFlag::WNOHANG)) {
Ok(WaitStatus::StillAlive) => {
std::thread::sleep(std::time::Duration::from_millis(20));
}
Ok(_) => {
reaped += 1;
break;
}
Err(_) => break,
}
}
// Último recurso: SIGKILL.
let _ = kill(cmd.pid, Signal::SIGKILL);
let _ = waitpid(cmd.pid, None);
}
}
info!(%id, reaped, "workspace stopped");
Ok(reaped)
}
/// Ejecuta un comando one-shot dentro de un workspace existente.
/// Captura stdout+stderr en un ring buffer accesible vía
/// [`get_command_logs`](Self::get_command_logs).
pub async fn run(
&self,
id: WorkspaceId,
exec: String,
argv: Vec<String>,
envp: Vec<(String, String)>,
) -> Result<CommandSummary, CoreError> {
let workspace_label = {
let g = self.inner.lock().await;
let ws = g.workspaces.get(&id).ok_or(CoreError::WorkspaceNotFound(id))?;
ws.spec.label.clone()
};
let cmd_ref = CommandRef {
label: format!("run-{}", short_ulid(&Ulid::new())),
payload: Payload::Native { exec, argv, envp },
soma: Default::default(),
flows: Default::default(),
supervision: Supervision::OneShot,
};
let card = cmd_ref.to_card(0, &workspace_label)?;
// Pipe para capturar stdout. O_CLOEXEC para que hijos del hijo
// no hereden la copia. v1: stderr=inherit (simplicidad; tail útil
// para stdout solo). Futuro: stderr separado en el ring.
let (capture_r, capture_w) =
nix::unistd::pipe2(nix::fcntl::OFlag::O_CLOEXEC).map_err(|e| {
CoreError::Incarnate(ente_incarnate::IncarnateError::Pipe(e))
})?;
use std::os::fd::IntoRawFd;
let capture_r_fd = capture_r.into_raw_fd();
let capture_w_fd = capture_w.into_raw_fd();
let logs = logbuf::LogBuf::new();
let stdio = ente_incarnate::ChildStdio {
stdin_fd: None,
stdout_fd: Some(capture_w_fd),
stderr_fd: None,
};
let out = self.incarnator.incarnate_with(&card, stdio)?;
let cmd_id = card.id;
let cmd_label = cmd_ref.label.clone();
let pid = out.pid;
// Drainer: tokio task que lee capture_r_fd y appendea al ring.
spawn_log_drainer(capture_r_fd, logs.clone());
let mut g = self.inner.lock().await;
if let Some(ws) = g.workspaces.get_mut(&id) {
ws.commands.insert(
cmd_id,
CommandState {
id: cmd_id,
label: cmd_label.clone(),
pid,
alive: true,
exit_status: None,
logs: Some(logs),
},
);
}
for d in &out.degradations {
warn!(?d, %id, "command incarnation degradation");
}
Ok(CommandSummary {
id: cmd_id,
label: cmd_label,
pid: pid.as_raw(),
})
}
/// Devuelve el tail del log capturado para `(workspace, command)`.
pub async fn get_command_logs(
&self,
workspace: WorkspaceId,
command: Ulid,
tail_bytes: usize,
) -> Option<Vec<u8>> {
let g = self.inner.lock().await;
let ws = g.workspaces.get(&workspace)?;
let cmd = ws.commands.get(&command)?;
cmd.logs.as_ref().map(|lb| lb.tail(tail_bytes))
}
/// Lista comandos de un workspace.
pub async fn list_commands(&self, workspace: WorkspaceId) -> Vec<CommandInfo> {
let g = self.inner.lock().await;
let Some(ws) = g.workspaces.get(&workspace) else { return Vec::new() };
let mut out: Vec<CommandInfo> = ws
.commands
.values()
.map(|c| CommandInfo {
id: c.id,
label: c.label.clone(),
pid: c.pid.as_raw(),
alive: c.alive,
exit_status: c.exit_status,
log_bytes: c.logs.as_ref().map(|l| l.written_total()).unwrap_or(0),
})
.collect();
// Orden estable por ULID (temporal).
out.sort_by_key(|c| c.id);
out
}
/// Lanza todas las Cards de un Pipeline. Devuelve (label, pid) por nodo.
/// La conexión via flows queda librada al broker (cuando haya integración
/// completa con sidecar; v1 sólo lanza).
pub async fn run_pipeline(
&self,
spec: &PipelineSpec,
) -> Result<Vec<(String, Pid)>, CoreError> {
spec.validate()?;
let workspace_label = {
let g = self.inner.lock().await;
let ws = g
.workspaces
.get(&spec.workspace)
.ok_or(CoreError::WorkspaceNotFound(spec.workspace))?;
ws.spec.label.clone()
};
let mut launched = Vec::new();
for (i, node) in spec.nodes.iter().enumerate() {
let card = node.to_card(i, &workspace_label)?;
let out = self.incarnator.incarnate(&card)?;
let mut g = self.inner.lock().await;
if let Some(ws) = g.workspaces.get_mut(&spec.workspace) {
ws.commands.insert(
card.id,
CommandState {
id: card.id,
label: node.label.clone(),
pid: out.pid,
alive: true,
exit_status: None,
logs: None, // run_pipeline NO captura logs (los conecta por pipes).
},
);
}
launched.push((node.label.clone(), out.pid));
}
Ok(launched)
}
/// Cosecha hijos terminados (no-bloqueante). Llamar periódicamente desde
/// el daemon o ante SIGCHLD. Marca `alive=false` y guarda exit_status.
pub async fn reap_dead(&self) {
let mut g = self.inner.lock().await;
for ws in g.workspaces.values_mut() {
for cmd in ws.commands.values_mut() {
if !cmd.alive {
continue;
}
match waitpid(cmd.pid, Some(WaitPidFlag::WNOHANG)) {
Ok(WaitStatus::Exited(_, code)) => {
cmd.alive = false;
cmd.exit_status = Some(code);
}
Ok(WaitStatus::Signaled(_, sig, _)) => {
cmd.alive = false;
cmd.exit_status = Some(128 + (sig as i32));
}
_ => {}
}
}
}
}
}
#[derive(Debug, Clone)]
pub struct WorkspaceSnapshot {
pub id: WorkspaceId,
pub label: String,
pub commands: u32,
pub uptime_ms: u64,
}
fn short_ulid(u: &Ulid) -> String {
let s = u.to_string();
s[s.len() - 6..].to_string()
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn ttl_auto_stops_workspace() {
let mgr = Arc::new(WorkspaceManager::new(IncarnatorConfig::default()));
let spec = WorkspaceSpec {
label: "ttl-test".into(),
soma: Default::default(),
permissions: Default::default(),
ttl: Some(std::time::Duration::from_millis(120)),
flow_dirs: vec![],
on_exit: shipote_card::ExitPolicy::Reap,
};
let (id, _) = mgr.create(spec).await.unwrap();
assert_eq!(mgr.list().await.len(), 1);
tokio::time::sleep(std::time::Duration::from_millis(250)).await;
assert_eq!(
mgr.list().await.len(),
0,
"TTL expirado: workspace debe haber sido removido"
);
let _ = id;
}
#[tokio::test]
async fn create_and_list_workspace() {
let mgr = Arc::new(WorkspaceManager::new(IncarnatorConfig::default()));
let spec = WorkspaceSpec {
label: "test".into(),
soma: Default::default(),
permissions: Default::default(),
ttl: None,
flow_dirs: vec![],
on_exit: shipote_card::ExitPolicy::Reap,
};
let (id, _w) = mgr.create(spec).await.unwrap();
let list = mgr.list().await;
assert_eq!(list.len(), 1);
assert_eq!(list[0].id, id);
}
#[tokio::test]
async fn run_captures_stdout_to_log() {
let mgr = Arc::new(WorkspaceManager::new(IncarnatorConfig::default()));
let spec = WorkspaceSpec {
label: "logs".into(),
soma: Default::default(),
permissions: Default::default(),
ttl: None,
flow_dirs: vec![],
on_exit: shipote_card::ExitPolicy::Reap,
};
let (id, _) = mgr.create(spec).await.unwrap();
let summary = mgr
.run(
id,
"/bin/echo".into(),
vec!["captured-output".into()],
vec![],
)
.await
.unwrap();
// Esperamos a que el comando termine y el drainer drene.
for _ in 0..50 {
tokio::time::sleep(std::time::Duration::from_millis(20)).await;
mgr.reap_dead().await;
let logs = mgr.get_command_logs(id, summary.id, 0).await.unwrap_or_default();
if !logs.is_empty() {
let s = String::from_utf8_lossy(&logs);
assert!(s.contains("captured-output"), "got: {s:?}");
return;
}
}
panic!("logs never captured");
}
#[tokio::test]
async fn run_true_in_workspace() {
let mgr = Arc::new(WorkspaceManager::new(IncarnatorConfig::default()));
let spec = WorkspaceSpec {
label: "exec".into(),
soma: Default::default(),
permissions: Default::default(),
ttl: None,
flow_dirs: vec![],
on_exit: shipote_card::ExitPolicy::Reap,
};
let (id, _) = mgr.create(spec).await.unwrap();
let summary = mgr
.run(id, "/bin/true".into(), vec![], vec![])
.await
.unwrap();
assert!(summary.pid > 0);
// Cosecha.
std::thread::sleep(std::time::Duration::from_millis(100));
mgr.reap_dead().await;
}
}
@@ -0,0 +1,122 @@
//! Ring buffer en memoria para capturar stdout/stderr de comandos.
//!
//! Tamaño fijo por comando (config: `MAX_LOG_BYTES`). Cuando se llena,
//! descarta los bytes más viejos. Pensado para diagnostico rápido, no
//! para retención histórica — eso es trabajo de un journald-like aparte.
use std::sync::{Arc, Mutex};
/// Bytes máximos retenidos por comando. 64 KiB cubre logs típicos sin
/// abusar de memoria si el daemon tiene cientos de comandos vivos.
pub const MAX_LOG_BYTES: usize = 64 * 1024;
#[derive(Debug, Clone)]
pub struct LogBuf {
inner: Arc<Mutex<Inner>>,
}
#[derive(Debug)]
struct Inner {
/// Bytes raw. Cuando se acerca al cap, descartamos head para mantener
/// el tail.
buf: Vec<u8>,
cap: usize,
/// Total escrito alguna vez (no decrementado al recortar).
written_total: u64,
}
impl LogBuf {
pub fn new() -> Self {
Self::with_cap(MAX_LOG_BYTES)
}
pub fn with_cap(cap: usize) -> Self {
Self {
inner: Arc::new(Mutex::new(Inner {
buf: Vec::with_capacity(cap.min(4096)),
cap,
written_total: 0,
})),
}
}
pub fn append(&self, data: &[u8]) {
let Ok(mut g) = self.inner.lock() else { return };
g.written_total += data.len() as u64;
g.buf.extend_from_slice(data);
// Recorte cuando excede cap (con un pequeño slack para evitar
// shift en cada append). El usuario ve sólo el tail.
if g.buf.len() > g.cap + 1024 {
let drop = g.buf.len() - g.cap;
g.buf.drain(..drop);
}
}
/// Devuelve el tail de hasta `n` bytes (o todo si `n=0`).
pub fn tail(&self, n: usize) -> Vec<u8> {
let g = match self.inner.lock() {
Ok(g) => g,
Err(_) => return Vec::new(),
};
if n == 0 || n >= g.buf.len() {
return g.buf.clone();
}
g.buf[g.buf.len() - n..].to_vec()
}
/// Cuántos bytes hay actualmente en el buffer.
pub fn len(&self) -> usize {
self.inner.lock().map(|g| g.buf.len()).unwrap_or(0)
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn written_total(&self) -> u64 {
self.inner.lock().map(|g| g.written_total).unwrap_or(0)
}
}
impl Default for LogBuf {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn append_and_tail_basic() {
let lb = LogBuf::with_cap(100);
lb.append(b"hello ");
lb.append(b"world\n");
let t = lb.tail(0);
assert_eq!(t, b"hello world\n");
}
#[test]
fn cap_drops_oldest() {
let lb = LogBuf::with_cap(10);
lb.append(&[b'a'; 8]);
lb.append(&[b'b'; 8]);
// Después del recorte, debe quedar ~10 bytes pero el slack
// permite hasta 10+1024. Como pasamos slack, no se recorta aún
// en este caso (16 bytes < 10+1024). Forzamos un append grande.
lb.append(&[b'c'; 2048]);
assert!(lb.len() <= 10 + 1024);
let t = lb.tail(0);
// El tail debe contener 'c's (los más recientes).
assert!(t.iter().filter(|&&b| b == b'c').count() > 0);
}
#[test]
fn written_total_tracks_all() {
let lb = LogBuf::with_cap(10);
lb.append(b"abcdef");
lb.append(b"ghijkl");
assert_eq!(lb.written_total(), 12);
}
}
@@ -0,0 +1,228 @@
//! Persistencia del estado del WorkspaceManager.
//!
//! v1: sólo `WorkspaceSpec`s vivos. Los comandos (PIDs) NO se persisten —
//! el kernel los mata al cerrar el daemon. Sólo la *intención declarada*
//! (Workspaces creados con su spec) sobrevive a un reboot del daemon.
use crate::WorkspaceManager;
use serde::{Deserialize, Serialize};
use shipote_card::{PipelineSpec, WorkspaceId, WorkspaceSpec};
use std::path::{Path, PathBuf};
use tracing::{info, warn};
/// v2 agregó `saved_pipelines`. v1 lee con campo ausente como vacío.
pub const SNAPSHOT_VERSION: u16 = 2;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShipoteSnapshot {
pub version: u16,
pub timestamp_ms: u64,
pub workspaces: Vec<WorkspaceEntry>,
#[serde(default)]
pub saved_pipelines: Vec<PipelineEntry>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WorkspaceEntry {
pub id: WorkspaceId,
pub spec: WorkspaceSpec,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PipelineEntry {
pub name: String,
pub spec: PipelineSpec,
}
impl ShipoteSnapshot {
pub fn write(&self, path: &Path) -> anyhow::Result<()> {
let bytes = serde_json::to_vec_pretty(self)?;
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent).ok();
}
let tmp = path.with_extension("tmp");
std::fs::write(&tmp, &bytes)?;
std::fs::rename(&tmp, path)?;
Ok(())
}
pub fn read(path: &Path) -> anyhow::Result<Self> {
let bytes = std::fs::read(path)?;
let snap: ShipoteSnapshot = serde_json::from_slice(&bytes)?;
// v1 y v2 son compatibles forward (v1 sin saved_pipelines lee como vec vacío).
if snap.version > SNAPSHOT_VERSION {
anyhow::bail!(
"snapshot version {} no soportada (esperada ≤ {})",
snap.version,
SNAPSHOT_VERSION
);
}
Ok(snap)
}
}
/// Path canónico del snapshot: `$XDG_STATE_HOME/shipote/state.json`,
/// fallback `$HOME/.local/state/shipote/state.json`,
/// fallback `/tmp/shipote-state-$UID.json`.
pub fn default_snapshot_path() -> PathBuf {
if let Ok(state) = std::env::var("XDG_STATE_HOME") {
return PathBuf::from(state).join("shipote/state.json");
}
if let Ok(home) = std::env::var("HOME") {
return PathBuf::from(home).join(".local/state/shipote/state.json");
}
let uid = nix::unistd::getuid().as_raw();
PathBuf::from(format!("/tmp/shipote-state-{uid}.json"))
}
fn now_ms() -> u64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_millis() as u64)
.unwrap_or(0)
}
impl WorkspaceManager {
/// Toma snapshot del estado actual.
pub async fn snapshot(&self) -> ShipoteSnapshot {
let g = self.inner.lock().await;
let workspaces = g
.workspaces
.iter()
.map(|(id, ws)| WorkspaceEntry {
id: *id,
spec: ws.spec.clone(),
})
.collect();
let saved_pipelines = g
.saved_pipelines
.iter()
.map(|(name, spec)| PipelineEntry {
name: name.clone(),
spec: spec.clone(),
})
.collect();
ShipoteSnapshot {
version: SNAPSHOT_VERSION,
timestamp_ms: now_ms(),
workspaces,
saved_pipelines,
}
}
/// Escribe snapshot a disco.
pub async fn save_snapshot(&self, path: &Path) -> anyhow::Result<()> {
let snap = self.snapshot().await;
snap.write(path)?;
info!(path = %path.display(), workspaces = snap.workspaces.len(), "snapshot saved");
Ok(())
}
/// Carga snapshot desde disco y restaura los Workspaces.
/// Errores no-fatales (workspaces inválidos) se loguean y se saltan.
pub async fn restore_snapshot(self: &std::sync::Arc<Self>, path: &Path) -> anyhow::Result<usize> {
let snap = match ShipoteSnapshot::read(path) {
Ok(s) => s,
Err(e) => {
warn!(?e, path = %path.display(), "no snapshot — start fresh");
return Ok(0);
}
};
let mut restored = 0usize;
for entry in snap.workspaces {
// v2+: reusamos el id original así clients que tracking
// workspace_id no se rompen al restart.
let label = entry.spec.label.clone();
match self.create_with_id(entry.id, entry.spec).await {
Ok(_) => restored += 1,
Err(e) => warn!(?e, %label, "skipped workspace en restore"),
}
}
for entry in snap.saved_pipelines {
self.save_pipeline(entry.name, entry.spec).await;
}
info!(restored, "snapshot restored");
Ok(restored)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::WorkspaceManager;
use ente_incarnate::IncarnatorConfig;
use shipote_card::{ExitPolicy, WorkspaceSpec};
use std::sync::Arc;
fn sample_ws(label: &str) -> WorkspaceSpec {
WorkspaceSpec {
label: label.into(),
soma: Default::default(),
permissions: Default::default(),
ttl: None,
flow_dirs: vec![],
on_exit: ExitPolicy::Reap,
}
}
#[tokio::test]
async fn roundtrip_snapshot_preserves_ulids() {
let tmp = tempfile::tempdir().unwrap();
let path = tmp.path().join("state.json");
let mgr1 = Arc::new(WorkspaceManager::new(IncarnatorConfig::default()));
let (id1, _) = mgr1.create(sample_ws("a")).await.unwrap();
let (id2, _) = mgr1.create(sample_ws("b")).await.unwrap();
mgr1.save_snapshot(&path).await.unwrap();
let mgr2 = Arc::new(WorkspaceManager::new(IncarnatorConfig::default()));
let n = mgr2.restore_snapshot(&path).await.unwrap();
assert_eq!(n, 2);
let listed = mgr2.list().await;
let restored_ids: std::collections::HashSet<_> = listed.iter().map(|s| s.id).collect();
assert!(restored_ids.contains(&id1));
assert!(restored_ids.contains(&id2));
}
#[tokio::test]
async fn snapshot_includes_saved_pipelines() {
use shipote_card::{CommandRef, DiscernPolicy, PipelineSpec};
let tmp = tempfile::tempdir().unwrap();
let path = tmp.path().join("state.json");
let mgr1 = Arc::new(WorkspaceManager::new(IncarnatorConfig::default()));
let (ws_id, _) = mgr1.create(sample_ws("ws")).await.unwrap();
let spec = PipelineSpec {
label: "echo-cat".into(),
workspace: ws_id,
nodes: vec![CommandRef {
label: "n1".into(),
payload: brahman_card::Payload::Native {
exec: "/bin/echo".into(),
argv: vec!["hi".into()],
envp: vec![],
},
soma: Default::default(),
flows: Default::default(),
supervision: brahman_card::Supervision::OneShot,
}],
edges: vec![],
discern: DiscernPolicy::default(),
};
mgr1.save_pipeline("daily".into(), spec).await;
mgr1.save_snapshot(&path).await.unwrap();
let mgr2 = Arc::new(WorkspaceManager::new(IncarnatorConfig::default()));
mgr2.restore_snapshot(&path).await.unwrap();
let saved = mgr2.list_saved_pipelines().await;
assert_eq!(saved, vec!["daily".to_string()]);
let got = mgr2.get_saved_pipeline("daily").await.expect("saved");
assert_eq!(got.label, "echo-cat");
}
#[test]
fn default_path_ends_with_state_json() {
let p = default_snapshot_path();
assert!(p.to_string_lossy().ends_with("state.json"));
}
}
@@ -0,0 +1,420 @@
//! Pipeline runtime: encadena nodos con pipes y opcionalmente intercepta
//! cada flow para discernir su contenido.
//!
//! Cada nodo se encarna via [`ente_incarnate::Incarnator`] — eso significa
//! que **cada comando puede tener su propio SomaSpec** (namespaces, cgroup,
//! rlimits) heredado del workspace. La conexión stdin↔stdout se hace con
//! `pipe2(2)` + `ChildStdio` declarativo: el callback de clone(2) hace los
//! `dup2` pre-execve sin romper la regla async-signal-safe.
use crate::CoreError;
use brahman_card::Payload;
use ente_incarnate::{ChildStdio, Incarnator};
use nix::fcntl::OFlag;
use nix::unistd::pipe2;
use shipote_card::{FlowEdge, PipelineSpec};
use shipote_discern::{DiscernPipeline, Discernment, Hint};
use std::os::fd::{AsRawFd, IntoRawFd, RawFd};
use std::sync::Arc;
use tokio::io::unix::AsyncFd;
use tokio::io::Interest;
use tracing::{debug, info, warn};
use ulid::Ulid;
/// Resultado de lanzar un pipeline.
#[derive(Debug, Clone)]
pub struct PipelineLaunch {
pub pipeline: Ulid,
pub command_pids: Vec<(String, i32)>,
/// Discernments por edge, en el mismo orden que `spec.edges`.
pub edge_discernments: Vec<EdgeDiscernment>,
}
#[derive(Debug, Clone)]
pub struct EdgeDiscernment {
pub from_label: String,
pub from_output: String,
pub to_label: String,
pub to_input: String,
pub discernment: Option<Discernment>,
}
/// Lanza un pipeline conectando nodos por stdin/stdout. Cada nodo se
/// encarna via `Incarnator` (con o sin namespacing según su SomaSpec).
///
/// v1: pipeline lineal (un edge entrante por nodo). Múltiples edges
/// entrantes generan warning y sólo el primero se honra.
pub async fn run_pipeline(
spec: &PipelineSpec,
workspace_label: &str,
tap: bool,
discerner: Arc<DiscernPipeline>,
incarnator: Arc<Incarnator>,
) -> Result<PipelineLaunch, CoreError> {
spec.validate()?;
let n = spec.nodes.len();
info!(
nodes = n,
edges = spec.edges.len(),
tap,
"launching pipeline (incarnated)"
);
// Predecessor: para cada nodo, su edge entrante (si tiene).
let mut predecessor: Vec<Option<&FlowEdge>> = vec![None; n];
for e in &spec.edges {
if predecessor[e.to].is_some() {
warn!(node = e.to, "v1 pipeline: nodo con múltiples predecessors — sólo se honra el primero");
continue;
}
predecessor[e.to] = Some(e);
}
let mut pids = Vec::with_capacity(n);
let mut taps: Vec<TapHandle> = Vec::new();
// Para cada nodo i que produce, guardamos el FD de read del pipe
// del productor → al armar el consumidor lo consume.
// Pero como puede haber tap intermedio, llevamos un esquema:
// - Sin tap: read FD del pipe productor → stdin del consumidor.
// - Con tap: read FD del pipe productor → tokio proxy → write FD
// del pipe consumidor → stdin del consumidor.
// Para simplicidad lineal, `pending_stdin_for_next` guarda el FD que
// el siguiente consumidor debe usar como stdin.
let mut pending_stdin_for_next: Option<RawFd> = None;
for (i, node) in spec.nodes.iter().enumerate() {
// Validar payload ejecutable.
match &node.payload {
Payload::Native { .. } | Payload::Legacy { .. } => {}
_ => {
return Err(CoreError::Incarnate(
ente_incarnate::IncarnateError::NonExecutablePayload,
))
}
}
// Compilamos a Card.
let card = node.to_card(i, workspace_label)?;
// ¿Soy productor? Necesito stdout_fd hacia un pipe nuevo.
let i_is_producer = spec.edges.iter().any(|e| e.from == i);
let stdin_fd: Option<RawFd> = pending_stdin_for_next.take();
let mut stdout_fd: Option<RawFd> = None;
let mut next_pending: Option<RawFd> = None;
// FDs que el PADRE debe cerrar tras spawn (son nuestra copia del
// extremo que pasamos al hijo).
let mut parent_closes: Vec<RawFd> = Vec::new();
if i_is_producer {
let (r, w) = pipe2(OFlag::O_CLOEXEC).map_err(|e| {
CoreError::Incarnate(ente_incarnate::IncarnateError::Pipe(e))
})?;
let r_raw = r.into_raw_fd();
let w_raw = w.into_raw_fd();
stdout_fd = Some(w_raw);
parent_closes.push(w_raw);
if tap {
// Necesitamos un segundo pipe entre tap y consumidor.
let (r2, w2) = pipe2(OFlag::O_CLOEXEC).map_err(|e| {
CoreError::Incarnate(ente_incarnate::IncarnateError::Pipe(e))
})?;
let r2_raw = r2.into_raw_fd();
let w2_raw = w2.into_raw_fd();
next_pending = Some(r2_raw);
// El tap lee de r_raw y escribe a w2_raw.
let edge = predecessor
.iter()
.find_map(|p| *p)
.and_then(|e| if e.from == i { Some(e) } else { None })
// Edge donde i es from:
.or_else(|| spec.edges.iter().find(|e| e.from == i));
let from_label = node.label.clone();
let to_label = edge
.map(|e| spec.nodes[e.to].label.clone())
.unwrap_or_default();
let from_output = edge.map(|e| e.from_output.clone()).unwrap_or_default();
let to_input = edge.map(|e| e.to_input.clone()).unwrap_or_default();
let sample_bytes = spec.discern.sample_bytes;
let disc = discerner.clone();
let h = spawn_tap(
r_raw, w2_raw, sample_bytes, disc, from_label, from_output, to_label, to_input,
);
taps.push(h);
// r_raw y w2_raw pasaron a manos del tokio task. No los
// cerramos en el padre.
} else {
// Sin tap, el read del productor va directo al stdin del
// siguiente consumidor.
next_pending = Some(r_raw);
}
}
let stdio = ChildStdio {
stdin_fd,
stdout_fd,
stderr_fd: None,
};
// Incarnator absorbe los fds de `stdio` — no los cerramos acá.
// `parent_closes` queda obsoleto.
let _ = parent_closes;
let outcome = incarnator
.incarnate_with(&card, stdio)
.map_err(CoreError::Incarnate)?;
let pid = outcome.pid;
pids.push((node.label.clone(), pid.as_raw()));
debug!(label = %node.label, pid = pid.as_raw(), "node incarnated");
pending_stdin_for_next = next_pending;
}
let pipeline_id = Ulid::new();
let mut edge_discernments = Vec::with_capacity(taps.len());
for t in taps {
match t.handle.await {
Ok(d) => edge_discernments.push(d),
Err(e) => warn!(?e, "tap handle joined with error"),
}
}
Ok(PipelineLaunch {
pipeline: pipeline_id,
command_pids: pids,
edge_discernments,
})
}
struct TapHandle {
handle: tokio::task::JoinHandle<EdgeDiscernment>,
}
#[allow(clippy::too_many_arguments)]
fn spawn_tap(
producer_r_fd: RawFd,
consumer_w_fd: RawFd,
sample_bytes: usize,
discerner: Arc<DiscernPipeline>,
from_label: String,
from_output: String,
to_label: String,
to_input: String,
) -> TapHandle {
// Marcar non-blocking ANTES de envolverlos en AsyncFd. Sino tokio
// bloquea el reactor en operaciones lentas.
set_nonblocking(producer_r_fd);
set_nonblocking(consumer_w_fd);
let handle = tokio::spawn(async move {
// SAFETY: el caller transfiere ownership de los fds al task.
let r_std = unsafe { std::os::fd::OwnedFd::from_raw_fd_compat(producer_r_fd) };
let w_std = unsafe { std::os::fd::OwnedFd::from_raw_fd_compat(consumer_w_fd) };
let r = AsyncFd::with_interest(r_std, Interest::READABLE).expect("AsyncFd r");
let w = AsyncFd::with_interest(w_std, Interest::WRITABLE).expect("AsyncFd w");
let mut sample: Vec<u8> = Vec::with_capacity(sample_bytes);
let mut buf = [0u8; 4096];
let mut total: u64 = 0;
// Fase 1: sampling + pump.
let mut eof = false;
while !eof && sample.len() < sample_bytes {
let n = match async_read(&r, &mut buf).await {
Ok(0) => { eof = true; 0 }
Ok(n) => n,
Err(e) => { warn!(?e, "tap producer read failed"); break; }
};
if n == 0 { break; }
let take = n.min(sample_bytes - sample.len());
sample.extend_from_slice(&buf[..take]);
if let Err(e) = async_write_all(&w, &buf[..n]).await {
warn!(?e, "tap consumer write failed");
break;
}
total += n as u64;
}
let d = discerner.discern(&sample, &Hint { path: None, size_total: None });
// Fase 2: pump-only hasta EOF.
while !eof {
let n = match async_read(&r, &mut buf).await {
Ok(0) => { eof = true; 0 }
Ok(n) => n,
Err(_) => break,
};
if n == 0 { break; }
if async_write_all(&w, &buf[..n]).await.is_err() { break; }
total += n as u64;
}
debug!(bytes = total, "tap finished");
EdgeDiscernment {
from_label,
from_output,
to_label,
to_input,
discernment: d,
}
});
TapHandle { handle }
}
async fn async_read(
afd: &AsyncFd<std::os::fd::OwnedFd>,
buf: &mut [u8],
) -> std::io::Result<usize> {
loop {
let mut guard = afd.readable().await?;
let fd = afd.as_raw_fd();
// SAFETY: lectura sobre fd válido propiedad del AsyncFd.
let r = unsafe { libc::read(fd, buf.as_mut_ptr() as *mut _, buf.len()) };
if r >= 0 {
return Ok(r as usize);
}
let err = std::io::Error::last_os_error();
if err.kind() == std::io::ErrorKind::WouldBlock {
guard.clear_ready();
continue;
}
return Err(err);
}
}
async fn async_write_all(
afd: &AsyncFd<std::os::fd::OwnedFd>,
mut buf: &[u8],
) -> std::io::Result<()> {
while !buf.is_empty() {
let mut guard = afd.writable().await?;
let fd = afd.as_raw_fd();
// SAFETY: escritura sobre fd válido propiedad del AsyncFd.
let r = unsafe { libc::write(fd, buf.as_ptr() as *const _, buf.len()) };
if r > 0 {
buf = &buf[r as usize..];
continue;
}
if r == 0 {
return Err(std::io::Error::new(
std::io::ErrorKind::WriteZero,
"write 0",
));
}
let err = std::io::Error::last_os_error();
if err.kind() == std::io::ErrorKind::WouldBlock {
guard.clear_ready();
continue;
}
return Err(err);
}
Ok(())
}
fn set_nonblocking(fd: RawFd) {
// SAFETY: fcntl con F_SETFL es seguro para fds válidos.
unsafe {
let flags = libc::fcntl(fd, libc::F_GETFL, 0);
if flags >= 0 {
libc::fcntl(fd, libc::F_SETFL, flags | libc::O_NONBLOCK);
}
}
}
// Extension trait para abstraer la API de OwnedFd entre versiones (compat).
trait OwnedFdFromRawCompat: Sized {
unsafe fn from_raw_fd_compat(fd: RawFd) -> Self;
}
impl OwnedFdFromRawCompat for std::os::fd::OwnedFd {
unsafe fn from_raw_fd_compat(fd: RawFd) -> Self {
use std::os::fd::FromRawFd;
// SAFETY: el caller transfiere ownership de `fd` a la `OwnedFd`.
unsafe { std::os::fd::OwnedFd::from_raw_fd(fd) }
}
}
// Re-export para que el unused warning del AsRawFd se calle si no se usa.
#[allow(dead_code)]
fn _keep_raw(_: &dyn AsRawFd) {}
#[cfg(test)]
mod tests {
use super::*;
use brahman_card::Payload;
use ente_incarnate::IncarnatorConfig;
use shipote_card::{CommandRef, DiscernPolicy, FlowEdge, PipelineSpec, WorkspaceId};
fn cmd(label: &str, exec: &str, argv: &[&str]) -> CommandRef {
CommandRef {
label: label.into(),
payload: Payload::Native {
exec: exec.into(),
argv: argv.iter().map(|s| s.to_string()).collect(),
envp: vec![],
},
soma: Default::default(),
flows: Default::default(),
supervision: brahman_card::Supervision::OneShot,
}
}
#[tokio::test]
async fn pipeline_isolated_echo_to_cat_runs() {
let spec = PipelineSpec {
label: "echo-cat".into(),
workspace: WorkspaceId::new(),
nodes: vec![
cmd("p1", "/bin/echo", &["hola pipeline aislado"]),
cmd("p2", "/bin/cat", &[]),
],
edges: vec![FlowEdge {
from: 0,
from_output: "stdout".into(),
to: 1,
to_input: "stdin".into(),
}],
discern: DiscernPolicy::default(),
};
let disc = Arc::new(DiscernPipeline::default_pipeline());
let inc = Arc::new(Incarnator::new(IncarnatorConfig::default()));
let launch = run_pipeline(&spec, "ws", false, disc, inc).await.unwrap();
assert_eq!(launch.command_pids.len(), 2);
// Cosecha.
for (_, pid) in &launch.command_pids {
let _ = nix::sys::wait::waitpid(nix::unistd::Pid::from_raw(*pid), None);
}
}
#[tokio::test]
async fn pipeline_isolated_with_tap_captures_discernment() {
let spec = PipelineSpec {
label: "json-cat".into(),
workspace: WorkspaceId::new(),
nodes: vec![
cmd("p1", "/bin/echo", &["{\"hello\": 1}"]),
cmd("p2", "/bin/cat", &[]),
],
edges: vec![FlowEdge {
from: 0,
from_output: "stdout".into(),
to: 1,
to_input: "stdin".into(),
}],
discern: DiscernPolicy {
sample_bytes: 4096,
enrich_producer: true,
},
};
let disc = Arc::new(DiscernPipeline::default_pipeline());
let inc = Arc::new(Incarnator::new(IncarnatorConfig::default()));
let launch = run_pipeline(&spec, "ws", true, disc, inc).await.unwrap();
assert_eq!(launch.edge_discernments.len(), 1);
let d = &launch.edge_discernments[0];
let dis = d.discernment.as_ref().expect("discernment present");
assert_eq!(dis.mime.as_deref(), Some("application/json"));
// Cosecha.
for (_, pid) in &launch.command_pids {
let _ = nix::sys::wait::waitpid(nix::unistd::Pid::from_raw(*pid), None);
}
}
}