This commit is contained in:
sergio
2026-05-10 21:58:16 +00:00
parent 3d55f189c0
commit c22d2480b9
36 changed files with 5158 additions and 363 deletions
@@ -0,0 +1,584 @@
//! `shipote-core` — runtime in-memory de Workspaces y comandos.
//!
//! Mantiene un estado tokio-friendly (Mutex sobre HashMap) con:
//! - Workspaces vivos (id → state).
//! - PIDs de comandos lanzados, indexados por workspace.
//! - Reaping cooperativo: `reap_dead()` cosecha hijos terminados.
// `pipeline` necesita `unsafe` puntual para `libc::close` y construir
// `OwnedFd` desde fds que armamos con `pipe2(2)`. El resto del crate
// permanece safe — el cargo lint `unsafe_code` queda permitido sólo en
// el módulo concreto.
#![deny(unsafe_op_in_unsafe_fn)]
pub mod logbuf;
pub mod persist;
pub mod pipeline;
use brahman_card::{Card, Payload, Supervision};
use ente_incarnate::{Incarnator, IncarnatorConfig};
use nix::sys::signal::{kill, Signal};
use nix::sys::wait::{waitpid, WaitPidFlag, WaitStatus};
use nix::unistd::Pid;
use shipote_card::{CommandRef, PipelineSpec, WorkspaceId, WorkspaceSpec};
use std::collections::HashMap;
use std::sync::Arc;
use std::time::Instant;
use thiserror::Error;
use tokio::sync::Mutex;
use tracing::{info, warn};
use ulid::Ulid;
#[derive(Debug, Error)]
pub enum CoreError {
#[error("workspace {0} not found")]
WorkspaceNotFound(WorkspaceId),
#[error("compile: {0}")]
Compile(#[from] shipote_card::CompileError),
#[error("incarnate: {0}")]
Incarnate(#[from] ente_incarnate::IncarnateError),
}
#[derive(Debug)]
pub struct WorkspaceState {
pub id: WorkspaceId,
pub spec: WorkspaceSpec,
pub root_card: Card,
pub commands: HashMap<Ulid, CommandState>,
pub started: Instant,
}
#[derive(Debug, Clone)]
pub struct CommandState {
pub id: Ulid,
pub label: String,
pub pid: Pid,
pub alive: bool,
pub exit_status: Option<i32>,
/// Ring buffer compartido con la tokio task que drena stdout+stderr
/// del comando. `None` para comandos que no capturan output (futuro:
/// comandos con stdout=inherit).
pub logs: Option<logbuf::LogBuf>,
}
pub struct WorkspaceManager {
inner: Arc<Mutex<Inner>>,
incarnator: Arc<Incarnator>,
}
struct Inner {
workspaces: HashMap<WorkspaceId, WorkspaceState>,
/// Definiciones nombradas de pipelines persistidas. NO es lo mismo
/// que "pipelines vivos" — son specs guardados para reusar con
/// `run-saved`. Sobreviven restart vía snapshot.
saved_pipelines: HashMap<String, PipelineSpec>,
}
#[derive(Debug, Clone)]
pub struct CommandSummary {
pub id: Ulid,
pub label: String,
pub pid: i32,
}
#[derive(Debug, Clone)]
pub struct CommandInfo {
pub id: Ulid,
pub label: String,
pub pid: i32,
pub alive: bool,
pub exit_status: Option<i32>,
pub log_bytes: u64,
}
fn spawn_log_drainer(read_fd: std::os::fd::RawFd, logs: logbuf::LogBuf) {
// Marcar non-blocking + envolver en AsyncFd; igual patrón que el tap.
// SAFETY: F_SETFL sobre fd válido.
unsafe {
let flags = libc::fcntl(read_fd, libc::F_GETFL, 0);
if flags >= 0 {
libc::fcntl(read_fd, libc::F_SETFL, flags | libc::O_NONBLOCK);
}
}
tokio::spawn(async move {
// SAFETY: ownership del fd transferido al drainer task.
let owned = unsafe { std::os::fd::OwnedFd::from_raw_fd_compat(read_fd) };
let afd = match tokio::io::unix::AsyncFd::with_interest(owned, tokio::io::Interest::READABLE) {
Ok(a) => a,
Err(e) => {
tracing::warn!(?e, "log drainer AsyncFd failed");
return;
}
};
let mut buf = [0u8; 4096];
loop {
let mut guard = match afd.readable().await {
Ok(g) => g,
Err(_) => break,
};
use std::os::fd::AsRawFd;
let fd = afd.as_raw_fd();
// SAFETY: read sobre fd válido.
let r = unsafe { libc::read(fd, buf.as_mut_ptr() as *mut _, buf.len()) };
if r > 0 {
logs.append(&buf[..r as usize]);
continue;
}
if r == 0 {
break; // EOF
}
let err = std::io::Error::last_os_error();
if err.kind() == std::io::ErrorKind::WouldBlock {
guard.clear_ready();
continue;
}
tracing::warn!(?err, "log drainer read err");
break;
}
});
}
trait OwnedFdFromRawCompat: Sized {
unsafe fn from_raw_fd_compat(fd: std::os::fd::RawFd) -> Self;
}
impl OwnedFdFromRawCompat for std::os::fd::OwnedFd {
unsafe fn from_raw_fd_compat(fd: std::os::fd::RawFd) -> Self {
use std::os::fd::FromRawFd;
// SAFETY: el caller transfiere ownership de fd a OwnedFd.
unsafe { std::os::fd::OwnedFd::from_raw_fd(fd) }
}
}
impl WorkspaceManager {
pub fn new(cfg: IncarnatorConfig) -> Self {
Self {
inner: Arc::new(Mutex::new(Inner {
workspaces: HashMap::new(),
saved_pipelines: HashMap::new(),
})),
incarnator: Arc::new(Incarnator::new(cfg)),
}
}
pub fn incarnator(&self) -> &Incarnator {
&self.incarnator
}
/// Handle Arc-clonable del Incarnator, para que el pipeline lo pueda
/// usar fuera del manager.
pub fn incarnator_handle(&self) -> Arc<Incarnator> {
self.incarnator.clone()
}
// -----------------------------------------------------------------
// Saved pipelines (definiciones nombradas, no runs)
// -----------------------------------------------------------------
/// Guarda (o reemplaza) un PipelineSpec bajo `name`.
pub async fn save_pipeline(&self, name: String, spec: PipelineSpec) {
self.inner.lock().await.saved_pipelines.insert(name, spec);
}
/// Devuelve los nombres de los pipelines guardados.
pub async fn list_saved_pipelines(&self) -> Vec<String> {
let g = self.inner.lock().await;
let mut v: Vec<String> = g.saved_pipelines.keys().cloned().collect();
v.sort();
v
}
/// Recupera el PipelineSpec guardado bajo `name`.
pub async fn get_saved_pipeline(&self, name: &str) -> Option<PipelineSpec> {
self.inner.lock().await.saved_pipelines.get(name).cloned()
}
/// Elimina un saved pipeline.
pub async fn drop_saved_pipeline(&self, name: &str) -> bool {
self.inner.lock().await.saved_pipelines.remove(name).is_some()
}
/// Label del workspace, si existe.
pub async fn workspace_label(&self, id: WorkspaceId) -> Option<String> {
self.inner
.lock()
.await
.workspaces
.get(&id)
.map(|w| w.spec.label.clone())
}
pub async fn create(
self: &Arc<Self>,
spec: WorkspaceSpec,
) -> Result<(WorkspaceId, Vec<String>), CoreError> {
self.create_with_id(WorkspaceId::new(), spec).await
}
/// Variante que acepta el ID. Útil para restore_snapshot: preserva
/// ULIDs entre restarts, así clients que tracking workspace_id no se
/// rompen.
pub async fn create_with_id(
self: &Arc<Self>,
id: WorkspaceId,
spec: WorkspaceSpec,
) -> Result<(WorkspaceId, Vec<String>), CoreError> {
let card = spec.to_card(id)?;
let warnings = self.incarnator.dry_run(&card).warnings;
let ttl = spec.ttl;
let state = WorkspaceState {
id,
spec,
root_card: card,
commands: HashMap::new(),
started: Instant::now(),
};
self.inner.lock().await.workspaces.insert(id, state);
info!(%id, ?ttl, "workspace created");
// Si tiene TTL, programar auto-stop. El task captura un weak ref
// al manager para no impedir que se dropée si el daemon termina.
if let Some(duration) = ttl {
let mgr_weak = Arc::downgrade(self);
tokio::spawn(async move {
tokio::time::sleep(duration).await;
if let Some(mgr) = mgr_weak.upgrade() {
let exists = mgr.inner.lock().await.workspaces.contains_key(&id);
if exists {
info!(%id, "workspace TTL expired — auto-stop");
let _ = mgr.stop(id).await;
}
}
});
}
Ok((id, warnings))
}
pub async fn list(&self) -> Vec<WorkspaceSnapshot> {
let g = self.inner.lock().await;
g.workspaces
.values()
.map(|w| WorkspaceSnapshot {
id: w.id,
label: w.spec.label.clone(),
commands: w.commands.len() as u32,
uptime_ms: w.started.elapsed().as_millis() as u64,
})
.collect()
}
pub async fn stop(&self, id: WorkspaceId) -> Result<u32, CoreError> {
let mut g = self.inner.lock().await;
let ws = g.workspaces.remove(&id).ok_or(CoreError::WorkspaceNotFound(id))?;
let mut reaped = 0u32;
for (_cid, cmd) in ws.commands {
if cmd.alive {
let _ = kill(cmd.pid, Signal::SIGTERM);
// Cosecha sin bloquear infinito: WNOHANG en loop con un par de intentos.
for _ in 0..50 {
match waitpid(cmd.pid, Some(WaitPidFlag::WNOHANG)) {
Ok(WaitStatus::StillAlive) => {
std::thread::sleep(std::time::Duration::from_millis(20));
}
Ok(_) => {
reaped += 1;
break;
}
Err(_) => break,
}
}
// Último recurso: SIGKILL.
let _ = kill(cmd.pid, Signal::SIGKILL);
let _ = waitpid(cmd.pid, None);
}
}
info!(%id, reaped, "workspace stopped");
Ok(reaped)
}
/// Ejecuta un comando one-shot dentro de un workspace existente.
/// Captura stdout+stderr en un ring buffer accesible vía
/// [`get_command_logs`](Self::get_command_logs).
pub async fn run(
&self,
id: WorkspaceId,
exec: String,
argv: Vec<String>,
envp: Vec<(String, String)>,
) -> Result<CommandSummary, CoreError> {
let workspace_label = {
let g = self.inner.lock().await;
let ws = g.workspaces.get(&id).ok_or(CoreError::WorkspaceNotFound(id))?;
ws.spec.label.clone()
};
let cmd_ref = CommandRef {
label: format!("run-{}", short_ulid(&Ulid::new())),
payload: Payload::Native { exec, argv, envp },
soma: Default::default(),
flows: Default::default(),
supervision: Supervision::OneShot,
};
let card = cmd_ref.to_card(0, &workspace_label)?;
// Pipe para capturar stdout. O_CLOEXEC para que hijos del hijo
// no hereden la copia. v1: stderr=inherit (simplicidad; tail útil
// para stdout solo). Futuro: stderr separado en el ring.
let (capture_r, capture_w) =
nix::unistd::pipe2(nix::fcntl::OFlag::O_CLOEXEC).map_err(|e| {
CoreError::Incarnate(ente_incarnate::IncarnateError::Pipe(e))
})?;
use std::os::fd::IntoRawFd;
let capture_r_fd = capture_r.into_raw_fd();
let capture_w_fd = capture_w.into_raw_fd();
let logs = logbuf::LogBuf::new();
let stdio = ente_incarnate::ChildStdio {
stdin_fd: None,
stdout_fd: Some(capture_w_fd),
stderr_fd: None,
};
let out = self.incarnator.incarnate_with(&card, stdio)?;
let cmd_id = card.id;
let cmd_label = cmd_ref.label.clone();
let pid = out.pid;
// Drainer: tokio task que lee capture_r_fd y appendea al ring.
spawn_log_drainer(capture_r_fd, logs.clone());
let mut g = self.inner.lock().await;
if let Some(ws) = g.workspaces.get_mut(&id) {
ws.commands.insert(
cmd_id,
CommandState {
id: cmd_id,
label: cmd_label.clone(),
pid,
alive: true,
exit_status: None,
logs: Some(logs),
},
);
}
for d in &out.degradations {
warn!(?d, %id, "command incarnation degradation");
}
Ok(CommandSummary {
id: cmd_id,
label: cmd_label,
pid: pid.as_raw(),
})
}
/// Devuelve el tail del log capturado para `(workspace, command)`.
pub async fn get_command_logs(
&self,
workspace: WorkspaceId,
command: Ulid,
tail_bytes: usize,
) -> Option<Vec<u8>> {
let g = self.inner.lock().await;
let ws = g.workspaces.get(&workspace)?;
let cmd = ws.commands.get(&command)?;
cmd.logs.as_ref().map(|lb| lb.tail(tail_bytes))
}
/// Lista comandos de un workspace.
pub async fn list_commands(&self, workspace: WorkspaceId) -> Vec<CommandInfo> {
let g = self.inner.lock().await;
let Some(ws) = g.workspaces.get(&workspace) else { return Vec::new() };
let mut out: Vec<CommandInfo> = ws
.commands
.values()
.map(|c| CommandInfo {
id: c.id,
label: c.label.clone(),
pid: c.pid.as_raw(),
alive: c.alive,
exit_status: c.exit_status,
log_bytes: c.logs.as_ref().map(|l| l.written_total()).unwrap_or(0),
})
.collect();
// Orden estable por ULID (temporal).
out.sort_by_key(|c| c.id);
out
}
/// Lanza todas las Cards de un Pipeline. Devuelve (label, pid) por nodo.
/// La conexión via flows queda librada al broker (cuando haya integración
/// completa con sidecar; v1 sólo lanza).
pub async fn run_pipeline(
&self,
spec: &PipelineSpec,
) -> Result<Vec<(String, Pid)>, CoreError> {
spec.validate()?;
let workspace_label = {
let g = self.inner.lock().await;
let ws = g
.workspaces
.get(&spec.workspace)
.ok_or(CoreError::WorkspaceNotFound(spec.workspace))?;
ws.spec.label.clone()
};
let mut launched = Vec::new();
for (i, node) in spec.nodes.iter().enumerate() {
let card = node.to_card(i, &workspace_label)?;
let out = self.incarnator.incarnate(&card)?;
let mut g = self.inner.lock().await;
if let Some(ws) = g.workspaces.get_mut(&spec.workspace) {
ws.commands.insert(
card.id,
CommandState {
id: card.id,
label: node.label.clone(),
pid: out.pid,
alive: true,
exit_status: None,
logs: None, // run_pipeline NO captura logs (los conecta por pipes).
},
);
}
launched.push((node.label.clone(), out.pid));
}
Ok(launched)
}
/// Cosecha hijos terminados (no-bloqueante). Llamar periódicamente desde
/// el daemon o ante SIGCHLD. Marca `alive=false` y guarda exit_status.
pub async fn reap_dead(&self) {
let mut g = self.inner.lock().await;
for ws in g.workspaces.values_mut() {
for cmd in ws.commands.values_mut() {
if !cmd.alive {
continue;
}
match waitpid(cmd.pid, Some(WaitPidFlag::WNOHANG)) {
Ok(WaitStatus::Exited(_, code)) => {
cmd.alive = false;
cmd.exit_status = Some(code);
}
Ok(WaitStatus::Signaled(_, sig, _)) => {
cmd.alive = false;
cmd.exit_status = Some(128 + (sig as i32));
}
_ => {}
}
}
}
}
}
#[derive(Debug, Clone)]
pub struct WorkspaceSnapshot {
pub id: WorkspaceId,
pub label: String,
pub commands: u32,
pub uptime_ms: u64,
}
fn short_ulid(u: &Ulid) -> String {
let s = u.to_string();
s[s.len() - 6..].to_string()
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn ttl_auto_stops_workspace() {
let mgr = Arc::new(WorkspaceManager::new(IncarnatorConfig::default()));
let spec = WorkspaceSpec {
label: "ttl-test".into(),
soma: Default::default(),
permissions: Default::default(),
ttl: Some(std::time::Duration::from_millis(120)),
flow_dirs: vec![],
on_exit: shipote_card::ExitPolicy::Reap,
};
let (id, _) = mgr.create(spec).await.unwrap();
assert_eq!(mgr.list().await.len(), 1);
tokio::time::sleep(std::time::Duration::from_millis(250)).await;
assert_eq!(
mgr.list().await.len(),
0,
"TTL expirado: workspace debe haber sido removido"
);
let _ = id;
}
#[tokio::test]
async fn create_and_list_workspace() {
let mgr = Arc::new(WorkspaceManager::new(IncarnatorConfig::default()));
let spec = WorkspaceSpec {
label: "test".into(),
soma: Default::default(),
permissions: Default::default(),
ttl: None,
flow_dirs: vec![],
on_exit: shipote_card::ExitPolicy::Reap,
};
let (id, _w) = mgr.create(spec).await.unwrap();
let list = mgr.list().await;
assert_eq!(list.len(), 1);
assert_eq!(list[0].id, id);
}
#[tokio::test]
async fn run_captures_stdout_to_log() {
let mgr = Arc::new(WorkspaceManager::new(IncarnatorConfig::default()));
let spec = WorkspaceSpec {
label: "logs".into(),
soma: Default::default(),
permissions: Default::default(),
ttl: None,
flow_dirs: vec![],
on_exit: shipote_card::ExitPolicy::Reap,
};
let (id, _) = mgr.create(spec).await.unwrap();
let summary = mgr
.run(
id,
"/bin/echo".into(),
vec!["captured-output".into()],
vec![],
)
.await
.unwrap();
// Esperamos a que el comando termine y el drainer drene.
for _ in 0..50 {
tokio::time::sleep(std::time::Duration::from_millis(20)).await;
mgr.reap_dead().await;
let logs = mgr.get_command_logs(id, summary.id, 0).await.unwrap_or_default();
if !logs.is_empty() {
let s = String::from_utf8_lossy(&logs);
assert!(s.contains("captured-output"), "got: {s:?}");
return;
}
}
panic!("logs never captured");
}
#[tokio::test]
async fn run_true_in_workspace() {
let mgr = Arc::new(WorkspaceManager::new(IncarnatorConfig::default()));
let spec = WorkspaceSpec {
label: "exec".into(),
soma: Default::default(),
permissions: Default::default(),
ttl: None,
flow_dirs: vec![],
on_exit: shipote_card::ExitPolicy::Reap,
};
let (id, _) = mgr.create(spec).await.unwrap();
let summary = mgr
.run(id, "/bin/true".into(), vec![], vec![])
.await
.unwrap();
assert!(summary.pid > 0);
// Cosecha.
std::thread::sleep(std::time::Duration::from_millis(100));
mgr.reap_dead().await;
}
}