This commit is contained in:
sergio
2026-05-10 21:58:16 +00:00
parent 3d55f189c0
commit c22d2480b9
36 changed files with 5158 additions and 363 deletions
+1
View File
@@ -11,6 +11,7 @@ description = "Nouser — explorador de Mónadas: scanner, clustering determinis
[dependencies]
nouser-card = { path = "../card" }
nouser-nous = { path = "../nous" }
shipote-discern = { path = "../../shipote/shipote-discern" }
brahman-card = { path = "../../../core/brahman-card" }
brahman-handshake = { path = "../../../core/brahman-handshake" }
brahman-sidecar = { path = "../../../shared/brahman-sidecar" }
+40 -2
View File
@@ -152,10 +152,12 @@ fn top_extensions(files: &[&FileEntry], n: usize) -> Vec<String> {
sorted.into_iter().take(n).map(|(k, _)| k).collect()
}
/// Elige el lente dominante según la extensión más frecuente.
/// Elige el lente dominante según la extensión más frecuente, con
/// fallback a `shipote-discern` sobre el head del archivo más
/// representativo cuando la extensión no da hint claro (Lens::Grid).
fn pick_lens(files: &[&FileEntry]) -> Lens {
let dominant = top_extensions(files, 1).into_iter().next();
match dominant.as_deref() {
let by_ext = match dominant.as_deref() {
Some("rs" | "py" | "ts" | "tsx" | "js" | "jsx" | "go" | "java" | "kt" | "c" | "cpp"
| "cc" | "h" | "hpp" | "rb" | "swift" | "zig") => Lens::Code,
Some("png" | "jpg" | "jpeg" | "gif" | "webp" | "svg" | "bmp" | "tiff" | "heic") => {
@@ -164,6 +166,42 @@ fn pick_lens(files: &[&FileEntry]) -> Lens {
Some("md" | "markdown" | "rst" | "txt" | "org" | "tex") => Lens::Markdown,
Some("db" | "sqlite" | "sqlite3" | "csv" | "tsv" | "parquet") => Lens::Database,
_ => Lens::Grid,
};
if by_ext != Lens::Grid {
return by_ext;
}
// Fallback: samplear el primer archivo del grupo con shipote-discern.
// Sólo si tiene path real (FileEntry con path absoluto/relativo).
if let Some(first) = files.first() {
if let Some(lens) = discern_lens(&first.path) {
return lens;
}
}
Lens::Grid
}
fn discern_lens(path: &std::path::Path) -> Option<Lens> {
use std::io::Read;
let mut buf = vec![0u8; 4096];
let mut f = std::fs::File::open(path).ok()?;
let n = f.read(&mut buf).ok()?;
buf.truncate(n);
let pipeline = shipote_discern::DiscernPipeline::default_pipeline();
let path_str = path.to_str();
let d = pipeline.discern(
&buf,
&shipote_discern::Hint {
path: path_str,
size_total: None,
},
)?;
match d.lens.as_deref()? {
"code" => Some(Lens::Code),
"gallery" => Some(Lens::Gallery),
"markdown" => Some(Lens::Markdown),
"database" => Some(Lens::Database),
"tree" => Some(Lens::Tree),
_ => None,
}
}
@@ -0,0 +1,17 @@
[package]
name = "shipote-card"
version.workspace = true
edition.workspace = true
rust-version.workspace = true
license.workspace = true
authors.workspace = true
publish.workspace = true
description = "Tipos de shipote: WorkspaceSpec, PipelineSpec, CommandRef, FlowEdge. Compilan a Cards de brahman-card."
[dependencies]
brahman-card = { path = "../../../core/brahman-card" }
serde = { workspace = true }
serde_json = { workspace = true }
toml = { workspace = true }
thiserror = { workspace = true }
ulid = { workspace = true }
@@ -0,0 +1,449 @@
//! `shipote-card` — tipos del runtime shipote.
//!
//! Tres entidades nuevas encima del `brahman-card::Card`:
//!
//! - [`WorkspaceSpec`] — espacio aislado raíz con su propio `SomaSpec`.
//! - [`CommandRef`] — un comando dentro de un workspace.
//! - [`PipelineSpec`] — DAG de `CommandRef` conectados por `FlowEdge`.
//!
//! Cada `WorkspaceSpec`/`CommandRef` se **compila** a una o varias
//! [`brahman_card::Card`] que el daemon entrega al [`Incarnator`] de
//! `ente-incarnate`. Esto preserva el contrato canónico del fractal.
#![forbid(unsafe_code)]
use brahman_card::{Card, Payload, Permissions, SomaSpec, Supervision};
use serde::{Deserialize, Serialize};
use std::time::Duration;
use thiserror::Error;
use ulid::Ulid;
// =====================================================================
// Identidades
// =====================================================================
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct WorkspaceId(pub Ulid);
impl WorkspaceId {
pub fn new() -> Self {
Self(Ulid::new())
}
}
impl Default for WorkspaceId {
fn default() -> Self {
Self::new()
}
}
impl std::fmt::Display for WorkspaceId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct PipelineId(pub Ulid);
impl PipelineId {
pub fn new() -> Self {
Self(Ulid::new())
}
}
impl Default for PipelineId {
fn default() -> Self {
Self::new()
}
}
impl std::fmt::Display for PipelineId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
// =====================================================================
// Workspace
// =====================================================================
/// Espacio aislado de shipote. Es la raíz de aislamiento — cualquier comando
/// que corre dentro hereda restricciones y no puede aflojarlas.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WorkspaceSpec {
pub label: String,
/// Aislamiento del workspace mismo (cuando se materializa como Card raíz).
#[serde(default)]
pub soma: SomaSpec,
/// Permisos máximos para hijas. Hijas pueden bajar pero no subir.
#[serde(default)]
pub permissions: Permissions,
/// `None` = vive hasta `stop`. `Some(d)` = el daemon lo termina tras d.
#[serde(default, with = "opt_duration_millis")]
pub ttl: Option<Duration>,
/// Slots de flow pre-declarados. Limitan qué consumidores externos al
/// workspace pueden empatar contra los productores internos.
#[serde(default)]
pub flow_dirs: Vec<FlowSlot>,
/// Política al terminar el workspace.
#[serde(default)]
pub on_exit: ExitPolicy,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FlowSlot {
pub name: String,
pub direction: FlowDirection,
/// Si `Workspace`, sólo otros nodos del mismo workspace pueden empatar.
/// Si `Public`, el broker global puede emparejar.
#[serde(default)]
pub scope: FlowScope,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum FlowDirection {
Input,
Output,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum FlowScope {
#[default]
Workspace,
Public,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub enum ExitPolicy {
/// Reapear procesos hijos y descartar estado.
#[default]
Reap,
/// Mantener el workspace en `stopped` para inspección.
Keep,
/// Tomar snapshot del estado (para restart posterior).
Snapshot,
}
mod opt_duration_millis {
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use std::time::Duration;
pub fn serialize<S: Serializer>(d: &Option<Duration>, s: S) -> Result<S::Ok, S::Error> {
d.map(|x| x.as_millis() as u64).serialize(s)
}
pub fn deserialize<'de, D: Deserializer<'de>>(d: D) -> Result<Option<Duration>, D::Error> {
let v: Option<u64> = Option::deserialize(d)?;
Ok(v.map(Duration::from_millis))
}
}
// =====================================================================
// CommandRef
// =====================================================================
/// Un comando que vive dentro de un workspace. Se compila a una `Card` con
/// `pin_to` apuntando al workspace padre (label) y su `SomaSpec`
/// intersectado con el del workspace.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CommandRef {
pub label: String,
pub payload: Payload,
/// SomaSpec del comando. El compilador lo intersecta con el del workspace.
#[serde(default)]
pub soma: SomaSpec,
/// Inputs/outputs tipados (mismos `Flow` de brahman-card).
#[serde(default)]
pub flows: brahman_card::Flows,
/// Política de supervisión. Default `OneShot` (un comando se ejecuta y muere).
#[serde(default = "default_oneshot")]
pub supervision: Supervision,
}
fn default_oneshot() -> Supervision {
Supervision::OneShot
}
// =====================================================================
// Pipeline
// =====================================================================
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PipelineSpec {
pub label: String,
pub workspace: WorkspaceId,
pub nodes: Vec<CommandRef>,
#[serde(default)]
pub edges: Vec<FlowEdge>,
#[serde(default)]
pub discern: DiscernPolicy,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FlowEdge {
/// Índice en `PipelineSpec.nodes` del productor.
pub from: usize,
/// Nombre del Flow output del productor.
pub from_output: String,
/// Índice en `PipelineSpec.nodes` del consumidor.
pub to: usize,
/// Nombre del Flow input del consumidor.
pub to_input: String,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct DiscernPolicy {
/// Bytes a samplear por flow para el discernidor. Default 4 KiB.
#[serde(default = "default_sample_bytes")]
pub sample_bytes: usize,
/// Si `true`, enriquece la Card del producer con el TypeRef detectado.
#[serde(default = "default_true")]
pub enrich_producer: bool,
}
fn default_sample_bytes() -> usize {
4096
}
fn default_true() -> bool {
true
}
// =====================================================================
// Compilación a Card
// =====================================================================
#[derive(Debug, Error)]
pub enum CompileError {
#[error("workspace label vacío")]
EmptyWorkspaceLabel,
#[error("comando con label vacío en posición {0}")]
EmptyCommandLabel(usize),
#[error("edge fuera de rango: from={from}, to={to}, nodes={nodes}")]
EdgeOutOfBounds { from: usize, to: usize, nodes: usize },
}
impl WorkspaceSpec {
/// Compila el WorkspaceSpec a una Card raíz que el Incarnator puede
/// encarnar. Usa `Payload::Virtual` (el workspace no es un proceso por
/// sí solo; sólo aloja hijos).
pub fn to_card(&self, id: WorkspaceId) -> Result<Card, CompileError> {
if self.label.trim().is_empty() {
return Err(CompileError::EmptyWorkspaceLabel);
}
let mut c = Card::new(format!("shipote.workspace.{}", self.label));
c.id = id.0;
c.soma = self.soma.clone();
c.permissions = self.permissions.clone();
c.payload = Payload::Virtual;
c.supervision = Supervision::OneShot;
Ok(c)
}
}
impl CommandRef {
/// Compila un CommandRef a Card hija de un workspace. La Card resultante
/// referencia al workspace por label en `pin_to` de cada Flow.
pub fn to_card(&self, idx: usize, workspace_label: &str) -> Result<Card, CompileError> {
if self.label.trim().is_empty() {
return Err(CompileError::EmptyCommandLabel(idx));
}
let mut c = Card::new(format!("shipote.cmd.{}.{}", workspace_label, self.label));
c.payload = self.payload.clone();
c.soma = intersect_soma(&self.soma, /*workspace*/ &SomaSpec::default());
c.supervision = self.supervision.clone();
c.flow = self.flows.clone();
// pin_to del workspace en cada Flow input/output → el broker prefiere
// resolver dentro del mismo workspace cuando hay candidatos múltiples.
let pin = format!("shipote.workspace.{}", workspace_label);
for f in c.flow.input.iter_mut().chain(c.flow.output.iter_mut()) {
if f.pin_to.is_none() {
f.pin_to = Some(pin.clone());
}
}
Ok(c)
}
}
/// Intersección conservadora: si el workspace pidió aislamiento, la hija
/// también lo tiene (no puede aflojar). Si la hija pidió aislamiento extra,
/// se respeta.
fn intersect_soma(child: &SomaSpec, ws: &SomaSpec) -> SomaSpec {
let mut out = child.clone();
out.namespaces.mount |= ws.namespaces.mount;
out.namespaces.pid |= ws.namespaces.pid;
out.namespaces.net |= ws.namespaces.net;
out.namespaces.uts |= ws.namespaces.uts;
out.namespaces.ipc |= ws.namespaces.ipc;
out.namespaces.user |= ws.namespaces.user;
out.namespaces.cgroup |= ws.namespaces.cgroup;
// rlimits: el menor (más restrictivo) gana.
out.rlimits.mem_bytes = min_opt(out.rlimits.mem_bytes, ws.rlimits.mem_bytes);
out.rlimits.nproc = min_opt(out.rlimits.nproc, ws.rlimits.nproc);
out.rlimits.nofile = min_opt(out.rlimits.nofile, ws.rlimits.nofile);
out
}
fn min_opt<T: Ord + Copy>(a: Option<T>, b: Option<T>) -> Option<T> {
match (a, b) {
(Some(x), Some(y)) => Some(x.min(y)),
(Some(x), None) | (None, Some(x)) => Some(x),
(None, None) => None,
}
}
impl PipelineSpec {
pub fn validate(&self) -> Result<(), CompileError> {
let n = self.nodes.len();
for (i, c) in self.nodes.iter().enumerate() {
if c.label.trim().is_empty() {
return Err(CompileError::EmptyCommandLabel(i));
}
}
for e in &self.edges {
if e.from >= n || e.to >= n {
return Err(CompileError::EdgeOutOfBounds {
from: e.from,
to: e.to,
nodes: n,
});
}
}
Ok(())
}
}
// =====================================================================
// I/O conveniencia (TOML + JSON)
// =====================================================================
#[derive(Debug, Error)]
pub enum LoadError {
#[error("io: {0}")]
Io(#[from] std::io::Error),
#[error("toml: {0}")]
Toml(#[from] toml::de::Error),
#[error("json: {0}")]
Json(#[from] serde_json::Error),
#[error("formato desconocido (esperado .toml o .json)")]
UnknownFormat,
}
pub fn load_workspace_spec(path: &std::path::Path) -> Result<WorkspaceSpec, LoadError> {
let raw = std::fs::read_to_string(path)?;
match path.extension().and_then(|s| s.to_str()) {
Some("toml") => Ok(toml::from_str(&raw)?),
Some("json") => Ok(serde_json::from_str(&raw)?),
_ => Err(LoadError::UnknownFormat),
}
}
pub fn load_pipeline_spec(path: &std::path::Path) -> Result<PipelineSpec, LoadError> {
let raw = std::fs::read_to_string(path)?;
match path.extension().and_then(|s| s.to_str()) {
Some("toml") => Ok(toml::from_str(&raw)?),
Some("json") => Ok(serde_json::from_str(&raw)?),
_ => Err(LoadError::UnknownFormat),
}
}
#[cfg(test)]
mod tests {
use super::*;
fn sample_workspace() -> WorkspaceSpec {
WorkspaceSpec {
label: "demo".into(),
soma: SomaSpec::default(),
permissions: Permissions::default(),
ttl: Some(Duration::from_secs(60)),
flow_dirs: vec![FlowSlot {
name: "out".into(),
direction: FlowDirection::Output,
scope: FlowScope::Public,
}],
on_exit: ExitPolicy::Reap,
}
}
#[test]
fn workspace_toml_roundtrip() {
let ws = sample_workspace();
let s = toml::to_string(&ws).unwrap();
let back: WorkspaceSpec = toml::from_str(&s).unwrap();
assert_eq!(back.label, ws.label);
assert_eq!(back.ttl, ws.ttl);
assert_eq!(back.flow_dirs.len(), 1);
}
#[test]
fn workspace_json_roundtrip() {
let ws = sample_workspace();
let s = serde_json::to_string(&ws).unwrap();
let back: WorkspaceSpec = serde_json::from_str(&s).unwrap();
assert_eq!(back.label, ws.label);
}
#[test]
fn workspace_compiles_to_card() {
let ws = sample_workspace();
let id = WorkspaceId::new();
let c = ws.to_card(id).unwrap();
assert_eq!(c.id, id.0);
assert!(c.label.starts_with("shipote.workspace."));
assert!(matches!(c.payload, Payload::Virtual));
}
#[test]
fn empty_label_rejected() {
let mut ws = sample_workspace();
ws.label = String::new();
assert!(ws.to_card(WorkspaceId::new()).is_err());
}
#[test]
fn pipeline_validates_edges() {
let p = PipelineSpec {
label: "p".into(),
workspace: WorkspaceId::new(),
nodes: vec![CommandRef {
label: "a".into(),
payload: Payload::Virtual,
soma: SomaSpec::default(),
flows: brahman_card::Flows::default(),
supervision: Supervision::OneShot,
}],
edges: vec![FlowEdge {
from: 0,
from_output: "x".into(),
to: 5,
to_input: "y".into(),
}],
discern: DiscernPolicy::default(),
};
assert!(p.validate().is_err());
}
#[test]
fn intersect_soma_takes_more_restrictive() {
let mut child = SomaSpec::default();
child.rlimits.mem_bytes = Some(1_000_000);
let mut ws = SomaSpec::default();
ws.rlimits.mem_bytes = Some(500_000);
ws.namespaces.user = true;
let r = intersect_soma(&child, &ws);
assert_eq!(r.rlimits.mem_bytes, Some(500_000));
assert!(r.namespaces.user);
}
}
@@ -0,0 +1,27 @@
[package]
name = "shipote-core"
version.workspace = true
edition.workspace = true
rust-version.workspace = true
license.workspace = true
authors.workspace = true
publish.workspace = true
description = "Runtime de shipote: WorkspaceManager sobre ente-incarnate. Estado in-memory, lifecycle, reaping."
[dependencies]
shipote-card = { path = "../shipote-card" }
shipote-discern = { path = "../shipote-discern" }
brahman-card = { path = "../../../core/brahman-card" }
ente-incarnate = { path = "../../../shared/ente-incarnate" }
nix = { workspace = true }
libc = { workspace = true }
anyhow = { workspace = true }
thiserror = { workspace = true }
tokio = { workspace = true }
tracing = { workspace = true }
ulid = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
[dev-dependencies]
tempfile = { workspace = true }
@@ -0,0 +1,584 @@
//! `shipote-core` — runtime in-memory de Workspaces y comandos.
//!
//! Mantiene un estado tokio-friendly (Mutex sobre HashMap) con:
//! - Workspaces vivos (id → state).
//! - PIDs de comandos lanzados, indexados por workspace.
//! - Reaping cooperativo: `reap_dead()` cosecha hijos terminados.
// `pipeline` necesita `unsafe` puntual para `libc::close` y construir
// `OwnedFd` desde fds que armamos con `pipe2(2)`. El resto del crate
// permanece safe — el cargo lint `unsafe_code` queda permitido sólo en
// el módulo concreto.
#![deny(unsafe_op_in_unsafe_fn)]
pub mod logbuf;
pub mod persist;
pub mod pipeline;
use brahman_card::{Card, Payload, Supervision};
use ente_incarnate::{Incarnator, IncarnatorConfig};
use nix::sys::signal::{kill, Signal};
use nix::sys::wait::{waitpid, WaitPidFlag, WaitStatus};
use nix::unistd::Pid;
use shipote_card::{CommandRef, PipelineSpec, WorkspaceId, WorkspaceSpec};
use std::collections::HashMap;
use std::sync::Arc;
use std::time::Instant;
use thiserror::Error;
use tokio::sync::Mutex;
use tracing::{info, warn};
use ulid::Ulid;
#[derive(Debug, Error)]
pub enum CoreError {
#[error("workspace {0} not found")]
WorkspaceNotFound(WorkspaceId),
#[error("compile: {0}")]
Compile(#[from] shipote_card::CompileError),
#[error("incarnate: {0}")]
Incarnate(#[from] ente_incarnate::IncarnateError),
}
#[derive(Debug)]
pub struct WorkspaceState {
pub id: WorkspaceId,
pub spec: WorkspaceSpec,
pub root_card: Card,
pub commands: HashMap<Ulid, CommandState>,
pub started: Instant,
}
#[derive(Debug, Clone)]
pub struct CommandState {
pub id: Ulid,
pub label: String,
pub pid: Pid,
pub alive: bool,
pub exit_status: Option<i32>,
/// Ring buffer compartido con la tokio task que drena stdout+stderr
/// del comando. `None` para comandos que no capturan output (futuro:
/// comandos con stdout=inherit).
pub logs: Option<logbuf::LogBuf>,
}
pub struct WorkspaceManager {
inner: Arc<Mutex<Inner>>,
incarnator: Arc<Incarnator>,
}
struct Inner {
workspaces: HashMap<WorkspaceId, WorkspaceState>,
/// Definiciones nombradas de pipelines persistidas. NO es lo mismo
/// que "pipelines vivos" — son specs guardados para reusar con
/// `run-saved`. Sobreviven restart vía snapshot.
saved_pipelines: HashMap<String, PipelineSpec>,
}
#[derive(Debug, Clone)]
pub struct CommandSummary {
pub id: Ulid,
pub label: String,
pub pid: i32,
}
#[derive(Debug, Clone)]
pub struct CommandInfo {
pub id: Ulid,
pub label: String,
pub pid: i32,
pub alive: bool,
pub exit_status: Option<i32>,
pub log_bytes: u64,
}
fn spawn_log_drainer(read_fd: std::os::fd::RawFd, logs: logbuf::LogBuf) {
// Marcar non-blocking + envolver en AsyncFd; igual patrón que el tap.
// SAFETY: F_SETFL sobre fd válido.
unsafe {
let flags = libc::fcntl(read_fd, libc::F_GETFL, 0);
if flags >= 0 {
libc::fcntl(read_fd, libc::F_SETFL, flags | libc::O_NONBLOCK);
}
}
tokio::spawn(async move {
// SAFETY: ownership del fd transferido al drainer task.
let owned = unsafe { std::os::fd::OwnedFd::from_raw_fd_compat(read_fd) };
let afd = match tokio::io::unix::AsyncFd::with_interest(owned, tokio::io::Interest::READABLE) {
Ok(a) => a,
Err(e) => {
tracing::warn!(?e, "log drainer AsyncFd failed");
return;
}
};
let mut buf = [0u8; 4096];
loop {
let mut guard = match afd.readable().await {
Ok(g) => g,
Err(_) => break,
};
use std::os::fd::AsRawFd;
let fd = afd.as_raw_fd();
// SAFETY: read sobre fd válido.
let r = unsafe { libc::read(fd, buf.as_mut_ptr() as *mut _, buf.len()) };
if r > 0 {
logs.append(&buf[..r as usize]);
continue;
}
if r == 0 {
break; // EOF
}
let err = std::io::Error::last_os_error();
if err.kind() == std::io::ErrorKind::WouldBlock {
guard.clear_ready();
continue;
}
tracing::warn!(?err, "log drainer read err");
break;
}
});
}
trait OwnedFdFromRawCompat: Sized {
unsafe fn from_raw_fd_compat(fd: std::os::fd::RawFd) -> Self;
}
impl OwnedFdFromRawCompat for std::os::fd::OwnedFd {
unsafe fn from_raw_fd_compat(fd: std::os::fd::RawFd) -> Self {
use std::os::fd::FromRawFd;
// SAFETY: el caller transfiere ownership de fd a OwnedFd.
unsafe { std::os::fd::OwnedFd::from_raw_fd(fd) }
}
}
impl WorkspaceManager {
pub fn new(cfg: IncarnatorConfig) -> Self {
Self {
inner: Arc::new(Mutex::new(Inner {
workspaces: HashMap::new(),
saved_pipelines: HashMap::new(),
})),
incarnator: Arc::new(Incarnator::new(cfg)),
}
}
pub fn incarnator(&self) -> &Incarnator {
&self.incarnator
}
/// Handle Arc-clonable del Incarnator, para que el pipeline lo pueda
/// usar fuera del manager.
pub fn incarnator_handle(&self) -> Arc<Incarnator> {
self.incarnator.clone()
}
// -----------------------------------------------------------------
// Saved pipelines (definiciones nombradas, no runs)
// -----------------------------------------------------------------
/// Guarda (o reemplaza) un PipelineSpec bajo `name`.
pub async fn save_pipeline(&self, name: String, spec: PipelineSpec) {
self.inner.lock().await.saved_pipelines.insert(name, spec);
}
/// Devuelve los nombres de los pipelines guardados.
pub async fn list_saved_pipelines(&self) -> Vec<String> {
let g = self.inner.lock().await;
let mut v: Vec<String> = g.saved_pipelines.keys().cloned().collect();
v.sort();
v
}
/// Recupera el PipelineSpec guardado bajo `name`.
pub async fn get_saved_pipeline(&self, name: &str) -> Option<PipelineSpec> {
self.inner.lock().await.saved_pipelines.get(name).cloned()
}
/// Elimina un saved pipeline.
pub async fn drop_saved_pipeline(&self, name: &str) -> bool {
self.inner.lock().await.saved_pipelines.remove(name).is_some()
}
/// Label del workspace, si existe.
pub async fn workspace_label(&self, id: WorkspaceId) -> Option<String> {
self.inner
.lock()
.await
.workspaces
.get(&id)
.map(|w| w.spec.label.clone())
}
pub async fn create(
self: &Arc<Self>,
spec: WorkspaceSpec,
) -> Result<(WorkspaceId, Vec<String>), CoreError> {
self.create_with_id(WorkspaceId::new(), spec).await
}
/// Variante que acepta el ID. Útil para restore_snapshot: preserva
/// ULIDs entre restarts, así clients que tracking workspace_id no se
/// rompen.
pub async fn create_with_id(
self: &Arc<Self>,
id: WorkspaceId,
spec: WorkspaceSpec,
) -> Result<(WorkspaceId, Vec<String>), CoreError> {
let card = spec.to_card(id)?;
let warnings = self.incarnator.dry_run(&card).warnings;
let ttl = spec.ttl;
let state = WorkspaceState {
id,
spec,
root_card: card,
commands: HashMap::new(),
started: Instant::now(),
};
self.inner.lock().await.workspaces.insert(id, state);
info!(%id, ?ttl, "workspace created");
// Si tiene TTL, programar auto-stop. El task captura un weak ref
// al manager para no impedir que se dropée si el daemon termina.
if let Some(duration) = ttl {
let mgr_weak = Arc::downgrade(self);
tokio::spawn(async move {
tokio::time::sleep(duration).await;
if let Some(mgr) = mgr_weak.upgrade() {
let exists = mgr.inner.lock().await.workspaces.contains_key(&id);
if exists {
info!(%id, "workspace TTL expired — auto-stop");
let _ = mgr.stop(id).await;
}
}
});
}
Ok((id, warnings))
}
pub async fn list(&self) -> Vec<WorkspaceSnapshot> {
let g = self.inner.lock().await;
g.workspaces
.values()
.map(|w| WorkspaceSnapshot {
id: w.id,
label: w.spec.label.clone(),
commands: w.commands.len() as u32,
uptime_ms: w.started.elapsed().as_millis() as u64,
})
.collect()
}
pub async fn stop(&self, id: WorkspaceId) -> Result<u32, CoreError> {
let mut g = self.inner.lock().await;
let ws = g.workspaces.remove(&id).ok_or(CoreError::WorkspaceNotFound(id))?;
let mut reaped = 0u32;
for (_cid, cmd) in ws.commands {
if cmd.alive {
let _ = kill(cmd.pid, Signal::SIGTERM);
// Cosecha sin bloquear infinito: WNOHANG en loop con un par de intentos.
for _ in 0..50 {
match waitpid(cmd.pid, Some(WaitPidFlag::WNOHANG)) {
Ok(WaitStatus::StillAlive) => {
std::thread::sleep(std::time::Duration::from_millis(20));
}
Ok(_) => {
reaped += 1;
break;
}
Err(_) => break,
}
}
// Último recurso: SIGKILL.
let _ = kill(cmd.pid, Signal::SIGKILL);
let _ = waitpid(cmd.pid, None);
}
}
info!(%id, reaped, "workspace stopped");
Ok(reaped)
}
/// Ejecuta un comando one-shot dentro de un workspace existente.
/// Captura stdout+stderr en un ring buffer accesible vía
/// [`get_command_logs`](Self::get_command_logs).
pub async fn run(
&self,
id: WorkspaceId,
exec: String,
argv: Vec<String>,
envp: Vec<(String, String)>,
) -> Result<CommandSummary, CoreError> {
let workspace_label = {
let g = self.inner.lock().await;
let ws = g.workspaces.get(&id).ok_or(CoreError::WorkspaceNotFound(id))?;
ws.spec.label.clone()
};
let cmd_ref = CommandRef {
label: format!("run-{}", short_ulid(&Ulid::new())),
payload: Payload::Native { exec, argv, envp },
soma: Default::default(),
flows: Default::default(),
supervision: Supervision::OneShot,
};
let card = cmd_ref.to_card(0, &workspace_label)?;
// Pipe para capturar stdout. O_CLOEXEC para que hijos del hijo
// no hereden la copia. v1: stderr=inherit (simplicidad; tail útil
// para stdout solo). Futuro: stderr separado en el ring.
let (capture_r, capture_w) =
nix::unistd::pipe2(nix::fcntl::OFlag::O_CLOEXEC).map_err(|e| {
CoreError::Incarnate(ente_incarnate::IncarnateError::Pipe(e))
})?;
use std::os::fd::IntoRawFd;
let capture_r_fd = capture_r.into_raw_fd();
let capture_w_fd = capture_w.into_raw_fd();
let logs = logbuf::LogBuf::new();
let stdio = ente_incarnate::ChildStdio {
stdin_fd: None,
stdout_fd: Some(capture_w_fd),
stderr_fd: None,
};
let out = self.incarnator.incarnate_with(&card, stdio)?;
let cmd_id = card.id;
let cmd_label = cmd_ref.label.clone();
let pid = out.pid;
// Drainer: tokio task que lee capture_r_fd y appendea al ring.
spawn_log_drainer(capture_r_fd, logs.clone());
let mut g = self.inner.lock().await;
if let Some(ws) = g.workspaces.get_mut(&id) {
ws.commands.insert(
cmd_id,
CommandState {
id: cmd_id,
label: cmd_label.clone(),
pid,
alive: true,
exit_status: None,
logs: Some(logs),
},
);
}
for d in &out.degradations {
warn!(?d, %id, "command incarnation degradation");
}
Ok(CommandSummary {
id: cmd_id,
label: cmd_label,
pid: pid.as_raw(),
})
}
/// Devuelve el tail del log capturado para `(workspace, command)`.
pub async fn get_command_logs(
&self,
workspace: WorkspaceId,
command: Ulid,
tail_bytes: usize,
) -> Option<Vec<u8>> {
let g = self.inner.lock().await;
let ws = g.workspaces.get(&workspace)?;
let cmd = ws.commands.get(&command)?;
cmd.logs.as_ref().map(|lb| lb.tail(tail_bytes))
}
/// Lista comandos de un workspace.
pub async fn list_commands(&self, workspace: WorkspaceId) -> Vec<CommandInfo> {
let g = self.inner.lock().await;
let Some(ws) = g.workspaces.get(&workspace) else { return Vec::new() };
let mut out: Vec<CommandInfo> = ws
.commands
.values()
.map(|c| CommandInfo {
id: c.id,
label: c.label.clone(),
pid: c.pid.as_raw(),
alive: c.alive,
exit_status: c.exit_status,
log_bytes: c.logs.as_ref().map(|l| l.written_total()).unwrap_or(0),
})
.collect();
// Orden estable por ULID (temporal).
out.sort_by_key(|c| c.id);
out
}
/// Lanza todas las Cards de un Pipeline. Devuelve (label, pid) por nodo.
/// La conexión via flows queda librada al broker (cuando haya integración
/// completa con sidecar; v1 sólo lanza).
pub async fn run_pipeline(
&self,
spec: &PipelineSpec,
) -> Result<Vec<(String, Pid)>, CoreError> {
spec.validate()?;
let workspace_label = {
let g = self.inner.lock().await;
let ws = g
.workspaces
.get(&spec.workspace)
.ok_or(CoreError::WorkspaceNotFound(spec.workspace))?;
ws.spec.label.clone()
};
let mut launched = Vec::new();
for (i, node) in spec.nodes.iter().enumerate() {
let card = node.to_card(i, &workspace_label)?;
let out = self.incarnator.incarnate(&card)?;
let mut g = self.inner.lock().await;
if let Some(ws) = g.workspaces.get_mut(&spec.workspace) {
ws.commands.insert(
card.id,
CommandState {
id: card.id,
label: node.label.clone(),
pid: out.pid,
alive: true,
exit_status: None,
logs: None, // run_pipeline NO captura logs (los conecta por pipes).
},
);
}
launched.push((node.label.clone(), out.pid));
}
Ok(launched)
}
/// Cosecha hijos terminados (no-bloqueante). Llamar periódicamente desde
/// el daemon o ante SIGCHLD. Marca `alive=false` y guarda exit_status.
pub async fn reap_dead(&self) {
let mut g = self.inner.lock().await;
for ws in g.workspaces.values_mut() {
for cmd in ws.commands.values_mut() {
if !cmd.alive {
continue;
}
match waitpid(cmd.pid, Some(WaitPidFlag::WNOHANG)) {
Ok(WaitStatus::Exited(_, code)) => {
cmd.alive = false;
cmd.exit_status = Some(code);
}
Ok(WaitStatus::Signaled(_, sig, _)) => {
cmd.alive = false;
cmd.exit_status = Some(128 + (sig as i32));
}
_ => {}
}
}
}
}
}
#[derive(Debug, Clone)]
pub struct WorkspaceSnapshot {
pub id: WorkspaceId,
pub label: String,
pub commands: u32,
pub uptime_ms: u64,
}
fn short_ulid(u: &Ulid) -> String {
let s = u.to_string();
s[s.len() - 6..].to_string()
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn ttl_auto_stops_workspace() {
let mgr = Arc::new(WorkspaceManager::new(IncarnatorConfig::default()));
let spec = WorkspaceSpec {
label: "ttl-test".into(),
soma: Default::default(),
permissions: Default::default(),
ttl: Some(std::time::Duration::from_millis(120)),
flow_dirs: vec![],
on_exit: shipote_card::ExitPolicy::Reap,
};
let (id, _) = mgr.create(spec).await.unwrap();
assert_eq!(mgr.list().await.len(), 1);
tokio::time::sleep(std::time::Duration::from_millis(250)).await;
assert_eq!(
mgr.list().await.len(),
0,
"TTL expirado: workspace debe haber sido removido"
);
let _ = id;
}
#[tokio::test]
async fn create_and_list_workspace() {
let mgr = Arc::new(WorkspaceManager::new(IncarnatorConfig::default()));
let spec = WorkspaceSpec {
label: "test".into(),
soma: Default::default(),
permissions: Default::default(),
ttl: None,
flow_dirs: vec![],
on_exit: shipote_card::ExitPolicy::Reap,
};
let (id, _w) = mgr.create(spec).await.unwrap();
let list = mgr.list().await;
assert_eq!(list.len(), 1);
assert_eq!(list[0].id, id);
}
#[tokio::test]
async fn run_captures_stdout_to_log() {
let mgr = Arc::new(WorkspaceManager::new(IncarnatorConfig::default()));
let spec = WorkspaceSpec {
label: "logs".into(),
soma: Default::default(),
permissions: Default::default(),
ttl: None,
flow_dirs: vec![],
on_exit: shipote_card::ExitPolicy::Reap,
};
let (id, _) = mgr.create(spec).await.unwrap();
let summary = mgr
.run(
id,
"/bin/echo".into(),
vec!["captured-output".into()],
vec![],
)
.await
.unwrap();
// Esperamos a que el comando termine y el drainer drene.
for _ in 0..50 {
tokio::time::sleep(std::time::Duration::from_millis(20)).await;
mgr.reap_dead().await;
let logs = mgr.get_command_logs(id, summary.id, 0).await.unwrap_or_default();
if !logs.is_empty() {
let s = String::from_utf8_lossy(&logs);
assert!(s.contains("captured-output"), "got: {s:?}");
return;
}
}
panic!("logs never captured");
}
#[tokio::test]
async fn run_true_in_workspace() {
let mgr = Arc::new(WorkspaceManager::new(IncarnatorConfig::default()));
let spec = WorkspaceSpec {
label: "exec".into(),
soma: Default::default(),
permissions: Default::default(),
ttl: None,
flow_dirs: vec![],
on_exit: shipote_card::ExitPolicy::Reap,
};
let (id, _) = mgr.create(spec).await.unwrap();
let summary = mgr
.run(id, "/bin/true".into(), vec![], vec![])
.await
.unwrap();
assert!(summary.pid > 0);
// Cosecha.
std::thread::sleep(std::time::Duration::from_millis(100));
mgr.reap_dead().await;
}
}
@@ -0,0 +1,122 @@
//! Ring buffer en memoria para capturar stdout/stderr de comandos.
//!
//! Tamaño fijo por comando (config: `MAX_LOG_BYTES`). Cuando se llena,
//! descarta los bytes más viejos. Pensado para diagnostico rápido, no
//! para retención histórica — eso es trabajo de un journald-like aparte.
use std::sync::{Arc, Mutex};
/// Bytes máximos retenidos por comando. 64 KiB cubre logs típicos sin
/// abusar de memoria si el daemon tiene cientos de comandos vivos.
pub const MAX_LOG_BYTES: usize = 64 * 1024;
#[derive(Debug, Clone)]
pub struct LogBuf {
inner: Arc<Mutex<Inner>>,
}
#[derive(Debug)]
struct Inner {
/// Bytes raw. Cuando se acerca al cap, descartamos head para mantener
/// el tail.
buf: Vec<u8>,
cap: usize,
/// Total escrito alguna vez (no decrementado al recortar).
written_total: u64,
}
impl LogBuf {
pub fn new() -> Self {
Self::with_cap(MAX_LOG_BYTES)
}
pub fn with_cap(cap: usize) -> Self {
Self {
inner: Arc::new(Mutex::new(Inner {
buf: Vec::with_capacity(cap.min(4096)),
cap,
written_total: 0,
})),
}
}
pub fn append(&self, data: &[u8]) {
let Ok(mut g) = self.inner.lock() else { return };
g.written_total += data.len() as u64;
g.buf.extend_from_slice(data);
// Recorte cuando excede cap (con un pequeño slack para evitar
// shift en cada append). El usuario ve sólo el tail.
if g.buf.len() > g.cap + 1024 {
let drop = g.buf.len() - g.cap;
g.buf.drain(..drop);
}
}
/// Devuelve el tail de hasta `n` bytes (o todo si `n=0`).
pub fn tail(&self, n: usize) -> Vec<u8> {
let g = match self.inner.lock() {
Ok(g) => g,
Err(_) => return Vec::new(),
};
if n == 0 || n >= g.buf.len() {
return g.buf.clone();
}
g.buf[g.buf.len() - n..].to_vec()
}
/// Cuántos bytes hay actualmente en el buffer.
pub fn len(&self) -> usize {
self.inner.lock().map(|g| g.buf.len()).unwrap_or(0)
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn written_total(&self) -> u64 {
self.inner.lock().map(|g| g.written_total).unwrap_or(0)
}
}
impl Default for LogBuf {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn append_and_tail_basic() {
let lb = LogBuf::with_cap(100);
lb.append(b"hello ");
lb.append(b"world\n");
let t = lb.tail(0);
assert_eq!(t, b"hello world\n");
}
#[test]
fn cap_drops_oldest() {
let lb = LogBuf::with_cap(10);
lb.append(&[b'a'; 8]);
lb.append(&[b'b'; 8]);
// Después del recorte, debe quedar ~10 bytes pero el slack
// permite hasta 10+1024. Como pasamos slack, no se recorta aún
// en este caso (16 bytes < 10+1024). Forzamos un append grande.
lb.append(&[b'c'; 2048]);
assert!(lb.len() <= 10 + 1024);
let t = lb.tail(0);
// El tail debe contener 'c's (los más recientes).
assert!(t.iter().filter(|&&b| b == b'c').count() > 0);
}
#[test]
fn written_total_tracks_all() {
let lb = LogBuf::with_cap(10);
lb.append(b"abcdef");
lb.append(b"ghijkl");
assert_eq!(lb.written_total(), 12);
}
}
@@ -0,0 +1,228 @@
//! Persistencia del estado del WorkspaceManager.
//!
//! v1: sólo `WorkspaceSpec`s vivos. Los comandos (PIDs) NO se persisten —
//! el kernel los mata al cerrar el daemon. Sólo la *intención declarada*
//! (Workspaces creados con su spec) sobrevive a un reboot del daemon.
use crate::WorkspaceManager;
use serde::{Deserialize, Serialize};
use shipote_card::{PipelineSpec, WorkspaceId, WorkspaceSpec};
use std::path::{Path, PathBuf};
use tracing::{info, warn};
/// v2 agregó `saved_pipelines`. v1 lee con campo ausente como vacío.
pub const SNAPSHOT_VERSION: u16 = 2;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ShipoteSnapshot {
pub version: u16,
pub timestamp_ms: u64,
pub workspaces: Vec<WorkspaceEntry>,
#[serde(default)]
pub saved_pipelines: Vec<PipelineEntry>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WorkspaceEntry {
pub id: WorkspaceId,
pub spec: WorkspaceSpec,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PipelineEntry {
pub name: String,
pub spec: PipelineSpec,
}
impl ShipoteSnapshot {
pub fn write(&self, path: &Path) -> anyhow::Result<()> {
let bytes = serde_json::to_vec_pretty(self)?;
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent).ok();
}
let tmp = path.with_extension("tmp");
std::fs::write(&tmp, &bytes)?;
std::fs::rename(&tmp, path)?;
Ok(())
}
pub fn read(path: &Path) -> anyhow::Result<Self> {
let bytes = std::fs::read(path)?;
let snap: ShipoteSnapshot = serde_json::from_slice(&bytes)?;
// v1 y v2 son compatibles forward (v1 sin saved_pipelines lee como vec vacío).
if snap.version > SNAPSHOT_VERSION {
anyhow::bail!(
"snapshot version {} no soportada (esperada ≤ {})",
snap.version,
SNAPSHOT_VERSION
);
}
Ok(snap)
}
}
/// Path canónico del snapshot: `$XDG_STATE_HOME/shipote/state.json`,
/// fallback `$HOME/.local/state/shipote/state.json`,
/// fallback `/tmp/shipote-state-$UID.json`.
pub fn default_snapshot_path() -> PathBuf {
if let Ok(state) = std::env::var("XDG_STATE_HOME") {
return PathBuf::from(state).join("shipote/state.json");
}
if let Ok(home) = std::env::var("HOME") {
return PathBuf::from(home).join(".local/state/shipote/state.json");
}
let uid = nix::unistd::getuid().as_raw();
PathBuf::from(format!("/tmp/shipote-state-{uid}.json"))
}
fn now_ms() -> u64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_millis() as u64)
.unwrap_or(0)
}
impl WorkspaceManager {
/// Toma snapshot del estado actual.
pub async fn snapshot(&self) -> ShipoteSnapshot {
let g = self.inner.lock().await;
let workspaces = g
.workspaces
.iter()
.map(|(id, ws)| WorkspaceEntry {
id: *id,
spec: ws.spec.clone(),
})
.collect();
let saved_pipelines = g
.saved_pipelines
.iter()
.map(|(name, spec)| PipelineEntry {
name: name.clone(),
spec: spec.clone(),
})
.collect();
ShipoteSnapshot {
version: SNAPSHOT_VERSION,
timestamp_ms: now_ms(),
workspaces,
saved_pipelines,
}
}
/// Escribe snapshot a disco.
pub async fn save_snapshot(&self, path: &Path) -> anyhow::Result<()> {
let snap = self.snapshot().await;
snap.write(path)?;
info!(path = %path.display(), workspaces = snap.workspaces.len(), "snapshot saved");
Ok(())
}
/// Carga snapshot desde disco y restaura los Workspaces.
/// Errores no-fatales (workspaces inválidos) se loguean y se saltan.
pub async fn restore_snapshot(self: &std::sync::Arc<Self>, path: &Path) -> anyhow::Result<usize> {
let snap = match ShipoteSnapshot::read(path) {
Ok(s) => s,
Err(e) => {
warn!(?e, path = %path.display(), "no snapshot — start fresh");
return Ok(0);
}
};
let mut restored = 0usize;
for entry in snap.workspaces {
// v2+: reusamos el id original así clients que tracking
// workspace_id no se rompen al restart.
let label = entry.spec.label.clone();
match self.create_with_id(entry.id, entry.spec).await {
Ok(_) => restored += 1,
Err(e) => warn!(?e, %label, "skipped workspace en restore"),
}
}
for entry in snap.saved_pipelines {
self.save_pipeline(entry.name, entry.spec).await;
}
info!(restored, "snapshot restored");
Ok(restored)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::WorkspaceManager;
use ente_incarnate::IncarnatorConfig;
use shipote_card::{ExitPolicy, WorkspaceSpec};
use std::sync::Arc;
fn sample_ws(label: &str) -> WorkspaceSpec {
WorkspaceSpec {
label: label.into(),
soma: Default::default(),
permissions: Default::default(),
ttl: None,
flow_dirs: vec![],
on_exit: ExitPolicy::Reap,
}
}
#[tokio::test]
async fn roundtrip_snapshot_preserves_ulids() {
let tmp = tempfile::tempdir().unwrap();
let path = tmp.path().join("state.json");
let mgr1 = Arc::new(WorkspaceManager::new(IncarnatorConfig::default()));
let (id1, _) = mgr1.create(sample_ws("a")).await.unwrap();
let (id2, _) = mgr1.create(sample_ws("b")).await.unwrap();
mgr1.save_snapshot(&path).await.unwrap();
let mgr2 = Arc::new(WorkspaceManager::new(IncarnatorConfig::default()));
let n = mgr2.restore_snapshot(&path).await.unwrap();
assert_eq!(n, 2);
let listed = mgr2.list().await;
let restored_ids: std::collections::HashSet<_> = listed.iter().map(|s| s.id).collect();
assert!(restored_ids.contains(&id1));
assert!(restored_ids.contains(&id2));
}
#[tokio::test]
async fn snapshot_includes_saved_pipelines() {
use shipote_card::{CommandRef, DiscernPolicy, PipelineSpec};
let tmp = tempfile::tempdir().unwrap();
let path = tmp.path().join("state.json");
let mgr1 = Arc::new(WorkspaceManager::new(IncarnatorConfig::default()));
let (ws_id, _) = mgr1.create(sample_ws("ws")).await.unwrap();
let spec = PipelineSpec {
label: "echo-cat".into(),
workspace: ws_id,
nodes: vec![CommandRef {
label: "n1".into(),
payload: brahman_card::Payload::Native {
exec: "/bin/echo".into(),
argv: vec!["hi".into()],
envp: vec![],
},
soma: Default::default(),
flows: Default::default(),
supervision: brahman_card::Supervision::OneShot,
}],
edges: vec![],
discern: DiscernPolicy::default(),
};
mgr1.save_pipeline("daily".into(), spec).await;
mgr1.save_snapshot(&path).await.unwrap();
let mgr2 = Arc::new(WorkspaceManager::new(IncarnatorConfig::default()));
mgr2.restore_snapshot(&path).await.unwrap();
let saved = mgr2.list_saved_pipelines().await;
assert_eq!(saved, vec!["daily".to_string()]);
let got = mgr2.get_saved_pipeline("daily").await.expect("saved");
assert_eq!(got.label, "echo-cat");
}
#[test]
fn default_path_ends_with_state_json() {
let p = default_snapshot_path();
assert!(p.to_string_lossy().ends_with("state.json"));
}
}
@@ -0,0 +1,420 @@
//! Pipeline runtime: encadena nodos con pipes y opcionalmente intercepta
//! cada flow para discernir su contenido.
//!
//! Cada nodo se encarna via [`ente_incarnate::Incarnator`] — eso significa
//! que **cada comando puede tener su propio SomaSpec** (namespaces, cgroup,
//! rlimits) heredado del workspace. La conexión stdin↔stdout se hace con
//! `pipe2(2)` + `ChildStdio` declarativo: el callback de clone(2) hace los
//! `dup2` pre-execve sin romper la regla async-signal-safe.
use crate::CoreError;
use brahman_card::Payload;
use ente_incarnate::{ChildStdio, Incarnator};
use nix::fcntl::OFlag;
use nix::unistd::pipe2;
use shipote_card::{FlowEdge, PipelineSpec};
use shipote_discern::{DiscernPipeline, Discernment, Hint};
use std::os::fd::{AsRawFd, IntoRawFd, RawFd};
use std::sync::Arc;
use tokio::io::unix::AsyncFd;
use tokio::io::Interest;
use tracing::{debug, info, warn};
use ulid::Ulid;
/// Resultado de lanzar un pipeline.
#[derive(Debug, Clone)]
pub struct PipelineLaunch {
pub pipeline: Ulid,
pub command_pids: Vec<(String, i32)>,
/// Discernments por edge, en el mismo orden que `spec.edges`.
pub edge_discernments: Vec<EdgeDiscernment>,
}
#[derive(Debug, Clone)]
pub struct EdgeDiscernment {
pub from_label: String,
pub from_output: String,
pub to_label: String,
pub to_input: String,
pub discernment: Option<Discernment>,
}
/// Lanza un pipeline conectando nodos por stdin/stdout. Cada nodo se
/// encarna via `Incarnator` (con o sin namespacing según su SomaSpec).
///
/// v1: pipeline lineal (un edge entrante por nodo). Múltiples edges
/// entrantes generan warning y sólo el primero se honra.
pub async fn run_pipeline(
spec: &PipelineSpec,
workspace_label: &str,
tap: bool,
discerner: Arc<DiscernPipeline>,
incarnator: Arc<Incarnator>,
) -> Result<PipelineLaunch, CoreError> {
spec.validate()?;
let n = spec.nodes.len();
info!(
nodes = n,
edges = spec.edges.len(),
tap,
"launching pipeline (incarnated)"
);
// Predecessor: para cada nodo, su edge entrante (si tiene).
let mut predecessor: Vec<Option<&FlowEdge>> = vec![None; n];
for e in &spec.edges {
if predecessor[e.to].is_some() {
warn!(node = e.to, "v1 pipeline: nodo con múltiples predecessors — sólo se honra el primero");
continue;
}
predecessor[e.to] = Some(e);
}
let mut pids = Vec::with_capacity(n);
let mut taps: Vec<TapHandle> = Vec::new();
// Para cada nodo i que produce, guardamos el FD de read del pipe
// del productor → al armar el consumidor lo consume.
// Pero como puede haber tap intermedio, llevamos un esquema:
// - Sin tap: read FD del pipe productor → stdin del consumidor.
// - Con tap: read FD del pipe productor → tokio proxy → write FD
// del pipe consumidor → stdin del consumidor.
// Para simplicidad lineal, `pending_stdin_for_next` guarda el FD que
// el siguiente consumidor debe usar como stdin.
let mut pending_stdin_for_next: Option<RawFd> = None;
for (i, node) in spec.nodes.iter().enumerate() {
// Validar payload ejecutable.
match &node.payload {
Payload::Native { .. } | Payload::Legacy { .. } => {}
_ => {
return Err(CoreError::Incarnate(
ente_incarnate::IncarnateError::NonExecutablePayload,
))
}
}
// Compilamos a Card.
let card = node.to_card(i, workspace_label)?;
// ¿Soy productor? Necesito stdout_fd hacia un pipe nuevo.
let i_is_producer = spec.edges.iter().any(|e| e.from == i);
let stdin_fd: Option<RawFd> = pending_stdin_for_next.take();
let mut stdout_fd: Option<RawFd> = None;
let mut next_pending: Option<RawFd> = None;
// FDs que el PADRE debe cerrar tras spawn (son nuestra copia del
// extremo que pasamos al hijo).
let mut parent_closes: Vec<RawFd> = Vec::new();
if i_is_producer {
let (r, w) = pipe2(OFlag::O_CLOEXEC).map_err(|e| {
CoreError::Incarnate(ente_incarnate::IncarnateError::Pipe(e))
})?;
let r_raw = r.into_raw_fd();
let w_raw = w.into_raw_fd();
stdout_fd = Some(w_raw);
parent_closes.push(w_raw);
if tap {
// Necesitamos un segundo pipe entre tap y consumidor.
let (r2, w2) = pipe2(OFlag::O_CLOEXEC).map_err(|e| {
CoreError::Incarnate(ente_incarnate::IncarnateError::Pipe(e))
})?;
let r2_raw = r2.into_raw_fd();
let w2_raw = w2.into_raw_fd();
next_pending = Some(r2_raw);
// El tap lee de r_raw y escribe a w2_raw.
let edge = predecessor
.iter()
.find_map(|p| *p)
.and_then(|e| if e.from == i { Some(e) } else { None })
// Edge donde i es from:
.or_else(|| spec.edges.iter().find(|e| e.from == i));
let from_label = node.label.clone();
let to_label = edge
.map(|e| spec.nodes[e.to].label.clone())
.unwrap_or_default();
let from_output = edge.map(|e| e.from_output.clone()).unwrap_or_default();
let to_input = edge.map(|e| e.to_input.clone()).unwrap_or_default();
let sample_bytes = spec.discern.sample_bytes;
let disc = discerner.clone();
let h = spawn_tap(
r_raw, w2_raw, sample_bytes, disc, from_label, from_output, to_label, to_input,
);
taps.push(h);
// r_raw y w2_raw pasaron a manos del tokio task. No los
// cerramos en el padre.
} else {
// Sin tap, el read del productor va directo al stdin del
// siguiente consumidor.
next_pending = Some(r_raw);
}
}
let stdio = ChildStdio {
stdin_fd,
stdout_fd,
stderr_fd: None,
};
// Incarnator absorbe los fds de `stdio` — no los cerramos acá.
// `parent_closes` queda obsoleto.
let _ = parent_closes;
let outcome = incarnator
.incarnate_with(&card, stdio)
.map_err(CoreError::Incarnate)?;
let pid = outcome.pid;
pids.push((node.label.clone(), pid.as_raw()));
debug!(label = %node.label, pid = pid.as_raw(), "node incarnated");
pending_stdin_for_next = next_pending;
}
let pipeline_id = Ulid::new();
let mut edge_discernments = Vec::with_capacity(taps.len());
for t in taps {
match t.handle.await {
Ok(d) => edge_discernments.push(d),
Err(e) => warn!(?e, "tap handle joined with error"),
}
}
Ok(PipelineLaunch {
pipeline: pipeline_id,
command_pids: pids,
edge_discernments,
})
}
struct TapHandle {
handle: tokio::task::JoinHandle<EdgeDiscernment>,
}
#[allow(clippy::too_many_arguments)]
fn spawn_tap(
producer_r_fd: RawFd,
consumer_w_fd: RawFd,
sample_bytes: usize,
discerner: Arc<DiscernPipeline>,
from_label: String,
from_output: String,
to_label: String,
to_input: String,
) -> TapHandle {
// Marcar non-blocking ANTES de envolverlos en AsyncFd. Sino tokio
// bloquea el reactor en operaciones lentas.
set_nonblocking(producer_r_fd);
set_nonblocking(consumer_w_fd);
let handle = tokio::spawn(async move {
// SAFETY: el caller transfiere ownership de los fds al task.
let r_std = unsafe { std::os::fd::OwnedFd::from_raw_fd_compat(producer_r_fd) };
let w_std = unsafe { std::os::fd::OwnedFd::from_raw_fd_compat(consumer_w_fd) };
let r = AsyncFd::with_interest(r_std, Interest::READABLE).expect("AsyncFd r");
let w = AsyncFd::with_interest(w_std, Interest::WRITABLE).expect("AsyncFd w");
let mut sample: Vec<u8> = Vec::with_capacity(sample_bytes);
let mut buf = [0u8; 4096];
let mut total: u64 = 0;
// Fase 1: sampling + pump.
let mut eof = false;
while !eof && sample.len() < sample_bytes {
let n = match async_read(&r, &mut buf).await {
Ok(0) => { eof = true; 0 }
Ok(n) => n,
Err(e) => { warn!(?e, "tap producer read failed"); break; }
};
if n == 0 { break; }
let take = n.min(sample_bytes - sample.len());
sample.extend_from_slice(&buf[..take]);
if let Err(e) = async_write_all(&w, &buf[..n]).await {
warn!(?e, "tap consumer write failed");
break;
}
total += n as u64;
}
let d = discerner.discern(&sample, &Hint { path: None, size_total: None });
// Fase 2: pump-only hasta EOF.
while !eof {
let n = match async_read(&r, &mut buf).await {
Ok(0) => { eof = true; 0 }
Ok(n) => n,
Err(_) => break,
};
if n == 0 { break; }
if async_write_all(&w, &buf[..n]).await.is_err() { break; }
total += n as u64;
}
debug!(bytes = total, "tap finished");
EdgeDiscernment {
from_label,
from_output,
to_label,
to_input,
discernment: d,
}
});
TapHandle { handle }
}
async fn async_read(
afd: &AsyncFd<std::os::fd::OwnedFd>,
buf: &mut [u8],
) -> std::io::Result<usize> {
loop {
let mut guard = afd.readable().await?;
let fd = afd.as_raw_fd();
// SAFETY: lectura sobre fd válido propiedad del AsyncFd.
let r = unsafe { libc::read(fd, buf.as_mut_ptr() as *mut _, buf.len()) };
if r >= 0 {
return Ok(r as usize);
}
let err = std::io::Error::last_os_error();
if err.kind() == std::io::ErrorKind::WouldBlock {
guard.clear_ready();
continue;
}
return Err(err);
}
}
async fn async_write_all(
afd: &AsyncFd<std::os::fd::OwnedFd>,
mut buf: &[u8],
) -> std::io::Result<()> {
while !buf.is_empty() {
let mut guard = afd.writable().await?;
let fd = afd.as_raw_fd();
// SAFETY: escritura sobre fd válido propiedad del AsyncFd.
let r = unsafe { libc::write(fd, buf.as_ptr() as *const _, buf.len()) };
if r > 0 {
buf = &buf[r as usize..];
continue;
}
if r == 0 {
return Err(std::io::Error::new(
std::io::ErrorKind::WriteZero,
"write 0",
));
}
let err = std::io::Error::last_os_error();
if err.kind() == std::io::ErrorKind::WouldBlock {
guard.clear_ready();
continue;
}
return Err(err);
}
Ok(())
}
fn set_nonblocking(fd: RawFd) {
// SAFETY: fcntl con F_SETFL es seguro para fds válidos.
unsafe {
let flags = libc::fcntl(fd, libc::F_GETFL, 0);
if flags >= 0 {
libc::fcntl(fd, libc::F_SETFL, flags | libc::O_NONBLOCK);
}
}
}
// Extension trait para abstraer la API de OwnedFd entre versiones (compat).
trait OwnedFdFromRawCompat: Sized {
unsafe fn from_raw_fd_compat(fd: RawFd) -> Self;
}
impl OwnedFdFromRawCompat for std::os::fd::OwnedFd {
unsafe fn from_raw_fd_compat(fd: RawFd) -> Self {
use std::os::fd::FromRawFd;
// SAFETY: el caller transfiere ownership de `fd` a la `OwnedFd`.
unsafe { std::os::fd::OwnedFd::from_raw_fd(fd) }
}
}
// Re-export para que el unused warning del AsRawFd se calle si no se usa.
#[allow(dead_code)]
fn _keep_raw(_: &dyn AsRawFd) {}
#[cfg(test)]
mod tests {
use super::*;
use brahman_card::Payload;
use ente_incarnate::IncarnatorConfig;
use shipote_card::{CommandRef, DiscernPolicy, FlowEdge, PipelineSpec, WorkspaceId};
fn cmd(label: &str, exec: &str, argv: &[&str]) -> CommandRef {
CommandRef {
label: label.into(),
payload: Payload::Native {
exec: exec.into(),
argv: argv.iter().map(|s| s.to_string()).collect(),
envp: vec![],
},
soma: Default::default(),
flows: Default::default(),
supervision: brahman_card::Supervision::OneShot,
}
}
#[tokio::test]
async fn pipeline_isolated_echo_to_cat_runs() {
let spec = PipelineSpec {
label: "echo-cat".into(),
workspace: WorkspaceId::new(),
nodes: vec![
cmd("p1", "/bin/echo", &["hola pipeline aislado"]),
cmd("p2", "/bin/cat", &[]),
],
edges: vec![FlowEdge {
from: 0,
from_output: "stdout".into(),
to: 1,
to_input: "stdin".into(),
}],
discern: DiscernPolicy::default(),
};
let disc = Arc::new(DiscernPipeline::default_pipeline());
let inc = Arc::new(Incarnator::new(IncarnatorConfig::default()));
let launch = run_pipeline(&spec, "ws", false, disc, inc).await.unwrap();
assert_eq!(launch.command_pids.len(), 2);
// Cosecha.
for (_, pid) in &launch.command_pids {
let _ = nix::sys::wait::waitpid(nix::unistd::Pid::from_raw(*pid), None);
}
}
#[tokio::test]
async fn pipeline_isolated_with_tap_captures_discernment() {
let spec = PipelineSpec {
label: "json-cat".into(),
workspace: WorkspaceId::new(),
nodes: vec![
cmd("p1", "/bin/echo", &["{\"hello\": 1}"]),
cmd("p2", "/bin/cat", &[]),
],
edges: vec![FlowEdge {
from: 0,
from_output: "stdout".into(),
to: 1,
to_input: "stdin".into(),
}],
discern: DiscernPolicy {
sample_bytes: 4096,
enrich_producer: true,
},
};
let disc = Arc::new(DiscernPipeline::default_pipeline());
let inc = Arc::new(Incarnator::new(IncarnatorConfig::default()));
let launch = run_pipeline(&spec, "ws", true, disc, inc).await.unwrap();
assert_eq!(launch.edge_discernments.len(), 1);
let d = &launch.edge_discernments[0];
let dis = d.discernment.as_ref().expect("discernment present");
assert_eq!(dis.mime.as_deref(), Some("application/json"));
// Cosecha.
for (_, pid) in &launch.command_pids {
let _ = nix::sys::wait::waitpid(nix::unistd::Pid::from_raw(*pid), None);
}
}
}
@@ -0,0 +1,15 @@
[package]
name = "shipote-discern"
version.workspace = true
edition.workspace = true
rust-version.workspace = true
license.workspace = true
authors.workspace = true
publish.workspace = true
description = "Discernidor de contenido sobre buffers: MIME, codificación, parser hints. Compartible con file_explorer y nouser."
[dependencies]
brahman-card = { path = "../../../core/brahman-card" }
serde = { workspace = true }
serde_json = { workspace = true }
toml = { workspace = true }
@@ -0,0 +1,307 @@
//! `shipote-discern` — detección de tipo de contenido sobre buffers.
//!
//! Trait + pipeline + discerners default. Devuelve un [`Discernment`] con
//! `TypeRef` consistente con el broker, confidence, MIME y un `lens` hint
//! para UIs (reusa el espíritu del `dominant_lens` de nouser).
#![forbid(unsafe_code)]
use brahman_card::TypeRef;
#[derive(Debug, Clone)]
pub struct Hint<'a> {
pub path: Option<&'a str>,
pub size_total: Option<u64>,
}
#[derive(Debug, Clone)]
pub struct Discernment {
pub ty: TypeRef,
pub confidence: f32,
pub mime: Option<String>,
pub lens: Option<String>,
}
pub trait Discerner: Send + Sync {
fn name(&self) -> &str;
fn discern(&self, sample: &[u8], hint: &Hint<'_>) -> Option<Discernment>;
}
pub struct DiscernPipeline {
discerners: Vec<Box<dyn Discerner>>,
}
impl DiscernPipeline {
pub fn new() -> Self {
Self { discerners: Vec::new() }
}
/// Pipeline con los discerners default. Orden importa: el primer match
/// con confidence ≥ `accept_threshold` corta.
pub fn default_pipeline() -> Self {
let mut p = Self::new();
p.push(Box::new(MagicBytes));
// CardProbe antes que JsonProbe: una Card es JSON, pero queremos el
// TypeRef más específico cuando aplique.
p.push(Box::new(CardProbe));
p.push(Box::new(JsonProbe));
p.push(Box::new(TomlProbe));
p.push(Box::new(Utf8Probe));
p
}
pub fn push(&mut self, d: Box<dyn Discerner>) {
self.discerners.push(d);
}
/// Recorre los discerners y devuelve el primer Discernment con
/// confidence ≥ 0.5, o el más confidente si ninguno alcanza el umbral.
pub fn discern(&self, sample: &[u8], hint: &Hint<'_>) -> Option<Discernment> {
let mut best: Option<Discernment> = None;
for d in &self.discerners {
if let Some(r) = d.discern(sample, hint) {
if r.confidence >= 0.9 {
return Some(r);
}
best = match best {
Some(prev) if prev.confidence >= r.confidence => Some(prev),
_ => Some(r),
};
}
}
best
}
}
impl Default for DiscernPipeline {
fn default() -> Self {
Self::default_pipeline()
}
}
// =====================================================================
// Discerners
// =====================================================================
/// Magic-bytes para formatos comunes. Confidence alta cuando hay match.
pub struct MagicBytes;
impl Discerner for MagicBytes {
fn name(&self) -> &str { "magic-bytes" }
fn discern(&self, s: &[u8], _h: &Hint<'_>) -> Option<Discernment> {
let d = |ty: &str, mime: &str, lens: Option<&str>| Discernment {
ty: TypeRef::Primitive { name: ty.into() },
confidence: 0.99,
mime: Some(mime.into()),
lens: lens.map(String::from),
};
match s {
x if x.starts_with(&[0x89, b'P', b'N', b'G']) => Some(d("png", "image/png", Some("gallery"))),
x if x.starts_with(&[0xFF, 0xD8, 0xFF]) => Some(d("jpeg", "image/jpeg", Some("gallery"))),
x if x.starts_with(b"%PDF-") => Some(d("pdf", "application/pdf", Some("reader"))),
x if x.starts_with(&[0x7F, b'E', b'L', b'F']) => Some(d("elf", "application/x-executable", None)),
x if x.starts_with(&[0x00, 0x61, 0x73, 0x6D]) => Some(d("wasm", "application/wasm", None)),
x if x.starts_with(&[0x1F, 0x8B]) => Some(d("gzip", "application/gzip", None)),
x if x.starts_with(b"PK\x03\x04") || x.starts_with(b"PK\x05\x06") => {
Some(d("zip", "application/zip", None))
}
x if x.starts_with(b"GIF87a") || x.starts_with(b"GIF89a") => {
Some(d("gif", "image/gif", Some("gallery")))
}
_ => None,
}
}
}
/// JSON: parsea el inicio. No requiere parsearlo entero; con que arranque
/// con `{`/`[` y haga progreso cuenta.
pub struct JsonProbe;
impl Discerner for JsonProbe {
fn name(&self) -> &str { "json" }
fn discern(&self, s: &[u8], _h: &Hint<'_>) -> Option<Discernment> {
let trimmed = trim_left(s);
let first = *trimmed.first()?;
if first != b'{' && first != b'[' {
return None;
}
// Intento parsear tal cual; si falla por truncated, igualmente confidence media.
let txt = std::str::from_utf8(trimmed).ok()?;
match serde_json::from_str::<serde_json::Value>(txt) {
Ok(_) => Some(Discernment {
ty: TypeRef::Primitive { name: "json".into() },
confidence: 0.95,
mime: Some("application/json".into()),
lens: Some("tree".into()),
}),
Err(_) => Some(Discernment {
ty: TypeRef::Primitive { name: "json".into() },
confidence: 0.6, // sample truncado
mime: Some("application/json".into()),
lens: Some("tree".into()),
}),
}
}
}
pub struct TomlProbe;
impl Discerner for TomlProbe {
fn name(&self) -> &str { "toml" }
fn discern(&self, s: &[u8], h: &Hint<'_>) -> Option<Discernment> {
let txt = std::str::from_utf8(s).ok()?;
// Heurística: presencia de `[seccion]` y/o `clave = valor` y extensión.
let looks_like = txt.lines().any(|l| {
let l = l.trim();
l.starts_with('[') && l.ends_with(']')
}) || txt.lines().any(|l| {
let l = l.trim();
!l.starts_with('#') && l.contains(" = ")
});
if !looks_like {
return None;
}
let confidence = if h.path.map_or(false, |p| p.ends_with(".toml")) {
0.95
} else {
0.55
};
// Si parsea, sube confidence.
let parsed = toml::from_str::<toml::Value>(txt).is_ok();
Some(Discernment {
ty: TypeRef::Primitive { name: "toml".into() },
confidence: if parsed { 0.93 } else { confidence },
mime: Some("application/toml".into()),
lens: Some("tree".into()),
})
}
}
/// Si el JSON parsea como Card, lo emite como Wit { brahman:card }.
pub struct CardProbe;
impl Discerner for CardProbe {
fn name(&self) -> &str { "card" }
fn discern(&self, s: &[u8], _h: &Hint<'_>) -> Option<Discernment> {
let trimmed = trim_left(s);
if trimmed.first()? != &b'{' {
return None;
}
let txt = std::str::from_utf8(trimmed).ok()?;
let v: serde_json::Value = serde_json::from_str(txt).ok()?;
let obj = v.as_object()?;
if obj.contains_key("schema_version") && obj.contains_key("id") && obj.contains_key("payload") {
Some(Discernment {
ty: TypeRef::Wit {
package: "brahman:card".into(),
interface: None,
name: "card".into(),
},
confidence: 0.97,
mime: Some("application/json".into()),
lens: Some("card".into()),
})
} else {
None
}
}
}
/// Texto UTF-8 plano. Fallback de baja confidence.
pub struct Utf8Probe;
impl Discerner for Utf8Probe {
fn name(&self) -> &str { "utf8" }
fn discern(&self, s: &[u8], h: &Hint<'_>) -> Option<Discernment> {
if s.is_empty() {
return None;
}
let valid = std::str::from_utf8(s).is_ok();
if !valid {
return None;
}
// Detectar binario disfrazado: bytes de control fuera de \t\n\r.
let suspicious = s.iter().filter(|&&b| b < 0x09 || (b > 0x0D && b < 0x20)).count();
if suspicious * 100 / s.len().max(1) > 5 {
return None;
}
let lens = h.path.and_then(|p| {
if p.ends_with(".md") { Some("markdown") }
else if p.ends_with(".rs") || p.ends_with(".py") || p.ends_with(".go") || p.ends_with(".js") || p.ends_with(".ts") {
Some("code")
} else { None }
}).map(String::from);
Some(Discernment {
ty: TypeRef::Primitive { name: "text".into() },
confidence: 0.5,
mime: Some("text/plain; charset=utf-8".into()),
lens,
})
}
}
fn trim_left(s: &[u8]) -> &[u8] {
let mut i = 0;
while i < s.len() && (s[i] == b' ' || s[i] == b'\t' || s[i] == b'\n' || s[i] == b'\r') {
i += 1;
}
&s[i..]
}
#[cfg(test)]
mod tests {
use super::*;
fn discern(sample: &[u8]) -> Option<Discernment> {
DiscernPipeline::default_pipeline().discern(sample, &Hint { path: None, size_total: None })
}
#[test]
fn png_detected() {
let r = discern(&[0x89, b'P', b'N', b'G', 0x0D, 0x0A, 0x1A, 0x0A, 0, 0]).unwrap();
assert_eq!(r.mime.as_deref(), Some("image/png"));
assert!(r.confidence > 0.9);
}
#[test]
fn json_detected() {
let r = discern(b"{\"hello\": 1}").unwrap();
assert_eq!(r.mime.as_deref(), Some("application/json"));
}
#[test]
fn card_wins_over_plain_json() {
let payload = br#"{"schema_version":1,"id":"01ARZ3NDEKTSV4RRFFQ69G5FAV","label":"x","payload":{"Virtual":null},"supervision":"OneShot"}"#;
let r = discern(payload).unwrap();
match r.ty {
TypeRef::Wit { ref package, .. } => assert_eq!(package, "brahman:card"),
_ => panic!("expected card"),
}
}
#[test]
fn utf8_text_fallback() {
let r = discern(b"hello world\nthis is text").unwrap();
// Puede ser detected as toml (= heurística) o text. Ambos son aceptables, sólo aseguro algo razonable.
assert!(r.mime.is_some());
}
#[test]
fn binary_rejected_by_utf8() {
let mut bytes = vec![0u8; 100];
bytes[0] = 0x00;
bytes[1] = 0x01;
bytes[2] = 0x02;
let r = DiscernPipeline::default_pipeline().discern(&bytes, &Hint { path: None, size_total: None });
// Tras Utf8Probe rechazar, no hay match → None.
// Si por casualidad otro discerner mata antes, también es OK.
if let Some(r) = r {
assert_ne!(r.mime.as_deref(), Some("text/plain; charset=utf-8"));
}
}
}
@@ -0,0 +1,19 @@
[package]
name = "shipote-protocol"
version.workspace = true
edition.workspace = true
rust-version.workspace = true
license.workspace = true
authors.workspace = true
publish.workspace = true
description = "Wire protocol entre shipote-daemon y clientes (cli/gui). Postcard length-prefixed sobre Unix socket."
[dependencies]
shipote-card = { path = "../shipote-card" }
brahman-card = { path = "../../../core/brahman-card" }
serde = { workspace = true }
postcard = { workspace = true }
thiserror = { workspace = true }
tokio = { workspace = true }
ulid = { workspace = true }
nix = { workspace = true }
@@ -0,0 +1,290 @@
//! `shipote-protocol` — wire daemon ↔ cliente (cli/gui).
//!
//! Framing: u32 BE length-prefix + payload postcard. Mismo patrón que
//! `ente-bus`/`brahman-handshake` para que clientes existentes compartan
//! reader/writer helpers si quieren.
#![forbid(unsafe_code)]
use serde::{Deserialize, Serialize};
use shipote_card::{PipelineSpec, WorkspaceId, WorkspaceSpec};
use std::path::PathBuf;
use thiserror::Error;
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tokio::net::UnixStream;
use ulid::Ulid;
pub const DEFAULT_SOCK_NAME: &str = "shipote.sock";
pub const MAX_FRAME: usize = 1 << 20;
// =====================================================================
// Mensajes
// =====================================================================
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum Request {
/// Health-check.
Ping,
/// Crear un workspace nuevo.
WorkspaceCreate { spec: WorkspaceSpec },
/// Listar todos los workspaces vivos.
WorkspaceList,
/// Detener un workspace y reapear sus comandos.
WorkspaceStop { id: WorkspaceId },
/// Ejecutar un comando one-shot dentro de un workspace existente.
Run {
workspace: WorkspaceId,
exec: String,
argv: Vec<String>,
envp: Vec<(String, String)>,
},
/// Lanzar un Pipeline completo dentro de un workspace.
PipelineRun {
spec: PipelineSpec,
/// Si `true`, el daemon interpone un tap entre productor y
/// consumidor de cada FlowEdge, sampleando los primeros bytes
/// y discerniendo el TypeRef.
tap: bool,
},
/// Discernir un buffer ad-hoc (sin workspace). Útil para `shipote discern <file>`.
Discern { sample: Vec<u8>, hint_path: Option<PathBuf> },
/// Capacidades runtime del kernel/proceso del daemon.
Capabilities,
/// Listar comandos vivos+pasados de un workspace.
CommandList { workspace: shipote_card::WorkspaceId },
/// Tail del log capturado para un comando.
CommandLogs {
workspace: shipote_card::WorkspaceId,
command: Ulid,
tail_bytes: usize,
},
/// Guardar (o reemplazar) un PipelineSpec bajo un nombre.
PipelineSave { name: String, spec: PipelineSpec },
/// Listar nombres de pipelines guardados.
PipelineSavedList,
/// Eliminar un pipeline guardado.
PipelineDrop { name: String },
/// Ejecutar un pipeline guardado.
PipelineRunSaved { name: String, tap: bool },
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum Response {
Pong,
WorkspaceCreated {
id: WorkspaceId,
warnings: Vec<String>,
},
WorkspaceList {
items: Vec<WorkspaceSummary>,
},
WorkspaceStopped {
id: WorkspaceId,
reaped: u32,
},
RunStarted {
workspace: WorkspaceId,
command_id: Ulid,
pid: i32,
},
PipelineStarted {
pipeline: Ulid,
command_pids: Vec<(String, i32)>,
/// Discernments por edge cuando tap=true. Vacío sin tap.
edges: Vec<EdgeDiscernmentInfo>,
},
Discernment {
ty: String,
confidence: f32,
mime: Option<String>,
lens: Option<String>,
},
Capabilities {
kernel_version: (u32, u32, u32),
user_ns: String,
cgroup_v2: String,
cgroup_delegated: bool,
has_cap_sys_admin: bool,
},
CommandList {
items: Vec<CommandInfo>,
},
CommandLogs {
bytes: Vec<u8>,
},
PipelineSaved {
name: String,
},
PipelineSavedList {
names: Vec<String>,
},
PipelineDropped {
name: String,
existed: bool,
},
Error {
message: String,
},
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CommandInfo {
pub id: Ulid,
pub label: String,
pub pid: i32,
pub alive: bool,
pub exit_status: Option<i32>,
pub log_bytes: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EdgeDiscernmentInfo {
pub from_label: String,
pub from_output: String,
pub to_label: String,
pub to_input: String,
/// `Some(ty)` si el discerner detectó algo. `None` si no hubo data
/// suficiente o no matcheó ningún discerner.
pub ty: Option<String>,
pub mime: Option<String>,
pub lens: Option<String>,
pub confidence: f32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WorkspaceSummary {
pub id: WorkspaceId,
pub label: String,
pub commands: u32,
pub uptime_ms: u64,
}
// =====================================================================
// Errores
// =====================================================================
#[derive(Debug, Error)]
pub enum ProtocolError {
#[error("frame oversize: {0} bytes (max {MAX_FRAME})")]
FrameOversize(usize),
#[error("io: {0}")]
Io(#[from] std::io::Error),
#[error("postcard: {0}")]
Postcard(#[from] postcard::Error),
#[error("connection closed")]
Closed,
}
// =====================================================================
// Framing helpers
// =====================================================================
pub async fn write_frame<T: Serialize>(stream: &mut UnixStream, msg: &T) -> Result<(), ProtocolError> {
let bytes = postcard::to_allocvec(msg)?;
if bytes.len() > MAX_FRAME {
return Err(ProtocolError::FrameOversize(bytes.len()));
}
let len = (bytes.len() as u32).to_be_bytes();
stream.write_all(&len).await?;
stream.write_all(&bytes).await?;
stream.flush().await?;
Ok(())
}
pub async fn read_frame<T: for<'de> Deserialize<'de>>(
stream: &mut UnixStream,
) -> Result<T, ProtocolError> {
let mut len_buf = [0u8; 4];
stream.read_exact(&mut len_buf).await.map_err(|e| {
if e.kind() == std::io::ErrorKind::UnexpectedEof {
ProtocolError::Closed
} else {
ProtocolError::Io(e)
}
})?;
let len = u32::from_be_bytes(len_buf) as usize;
if len > MAX_FRAME {
return Err(ProtocolError::FrameOversize(len));
}
let mut buf = vec![0u8; len];
stream.read_exact(&mut buf).await?;
Ok(postcard::from_bytes(&buf)?)
}
/// Path canónico del socket del daemon: `$XDG_RUNTIME_DIR/shipote.sock`,
/// fallback `/run/user/$UID/shipote.sock`, fallback `/tmp/shipote-$UID.sock`.
pub fn default_socket_path() -> PathBuf {
if let Ok(xdg) = std::env::var("XDG_RUNTIME_DIR") {
return PathBuf::from(xdg).join(DEFAULT_SOCK_NAME);
}
let uid = nix::unistd::getuid().as_raw();
let p = PathBuf::from(format!("/run/user/{uid}"));
if p.exists() {
return p.join(DEFAULT_SOCK_NAME);
}
PathBuf::from(format!("/tmp/shipote-{uid}.sock"))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn ping_roundtrip() {
let bytes = postcard::to_allocvec(&Request::Ping).unwrap();
let back: Request = postcard::from_bytes(&bytes).unwrap();
assert!(matches!(back, Request::Ping));
}
#[test]
fn workspace_create_roundtrip() {
let req = Request::WorkspaceCreate {
spec: WorkspaceSpec {
label: "demo".into(),
soma: Default::default(),
permissions: Default::default(),
ttl: None,
flow_dirs: vec![],
on_exit: shipote_card::ExitPolicy::Reap,
},
};
let bytes = postcard::to_allocvec(&req).unwrap();
let back: Request = postcard::from_bytes(&bytes).unwrap();
match back {
Request::WorkspaceCreate { spec } => assert_eq!(spec.label, "demo"),
_ => panic!("wrong variant"),
}
}
#[test]
fn default_socket_path_uses_runtime_dir() {
let p = default_socket_path();
assert!(p.to_string_lossy().ends_with("shipote.sock"));
}
}
@@ -3,10 +3,11 @@ name = "yahweh-provider-fs"
version = { workspace = true }
edition = { workspace = true }
license = { workspace = true }
description = "DataProvider de filesystem local."
description = "DataProvider de filesystem local con discernimiento de contenido (shipote-discern)."
[dependencies]
yahweh-core = { workspace = true }
async-trait = { workspace = true }
tokio = { workspace = true }
notify = { workspace = true }
shipote-discern = { path = "../../../../../modules/shipote/shipote-discern" }
@@ -3,16 +3,45 @@
//! `std::fs::read_dir` y leyendo archivos a `Vec<u8>` via `tokio::io`.
use async_trait::async_trait;
use shipote_discern::{DiscernPipeline, Hint};
use std::fs;
use std::io::Cursor;
use std::io::{Cursor, Read};
use std::path::Path;
use std::pin::Pin;
use std::sync::Arc;
use tokio::io::{AsyncRead, AsyncWrite};
use yahweh_core::{DataProvider, DisplayType, EntityNode};
pub const PROVIDER_ID: &str = "local_fs";
pub struct FileDataProvider;
/// Bytes que samplea el discerner por archivo. 4 KiB cubre headers de
/// formatos comunes (PNG, ELF, JSON/TOML hasta una clave de profundidad
/// razonable) sin saturar I/O al expandir un directorio.
const DISCERN_SAMPLE_BYTES: usize = 4096;
/// Tamaño máximo de archivo que sampleamos. Archivos más grandes se
/// discernen igual via los primeros 4 KiB: el `seek/read` siempre lee
/// head, y el costo es O(SAMPLE) sin importar el size total.
/// Mantenemos esta constante por documentación; no se usa para skipear.
const _DISCERN_SAMPLE_DOC: () = ();
pub struct FileDataProvider {
discerner: Arc<DiscernPipeline>,
}
impl FileDataProvider {
pub fn new() -> Self {
Self {
discerner: Arc::new(DiscernPipeline::default_pipeline()),
}
}
}
impl Default for FileDataProvider {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
impl DataProvider for FileDataProvider {
@@ -32,17 +61,21 @@ impl DataProvider for FileDataProvider {
.unwrap_or_default()
.to_string_lossy()
.into_owned();
let display_type = if path.is_dir() {
DisplayType::Folder
let is_dir = path.is_dir();
let display_type = if is_dir { DisplayType::Folder } else { DisplayType::File };
// Discernimos sólo archivos. Folders no tienen MIME útil.
let mime_type = if is_dir {
None
} else {
DisplayType::File
discern_head(&path, &self.discerner)
};
children.push(EntityNode {
id: path.to_string_lossy().into_owned(),
name,
display_type,
mime_type: None,
mime_type,
});
}
}
@@ -65,3 +98,22 @@ impl DataProvider for FileDataProvider {
Err("Escritura en streaming no implementada para FS".to_string())
}
}
/// Lee el head del archivo y lo pasa por el DiscernPipeline. Devuelve el
/// MIME detectado (si alguno) o `None` si no hubo match.
///
/// Sync intencional: estamos dentro del runtime que ya es async, pero la
/// lectura es de tamaño fijo (4 KiB) y va a page cache; el costo de
/// `tokio::fs` no compensaría para esto.
fn discern_head(path: &Path, discerner: &DiscernPipeline) -> Option<String> {
let mut buf = vec![0u8; DISCERN_SAMPLE_BYTES];
let mut f = fs::File::open(path).ok()?;
let n = f.read(&mut buf).ok()?;
buf.truncate(n);
let path_str = path.to_str();
let hint = Hint {
path: path_str,
size_total: None,
};
discerner.discern(&buf, &hint).and_then(|d| d.mime)
}