feat(shipote): throughput card + rate-limit + snapshot incremental (fase Q)
- shipote-shell Flow channels card extiende con bytes_total + bytes/s por socket. Lookup helper evita borrows en closures. - DiscernPolicy.max_bytes_per_sec: splitter task hace sleep proporcional al tamaño de chunk tras cada broadcast. Token-bucket simple v1. - WorkspaceManager.dirty: AtomicBool. mark_dirty() en mutaciones que afectan al snapshot. save_snapshot skip si clean y path existe. restore_snapshot resetea dirty=false (hidratación no es mutation). 85 tests pasan (ente-incarnate 16, nouser-core 27, shipote-card 8, shipote-core 26, shipote-discern 5, yahweh-provider-fs 3). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -6,8 +6,8 @@
|
||||
|
||||
use gpui::{div, prelude::*, px, Context, IntoElement, Render, SharedString, Window};
|
||||
use shipote_protocol::{
|
||||
default_socket_path, read_frame, write_frame, CommandInfo, FlowInfo, QuotaReportInfo, Request,
|
||||
Response, WorkspaceStatsInfo, WorkspaceSummary,
|
||||
default_socket_path, read_frame, write_frame, CommandInfo, FlowInfo, FlowThroughputInfo,
|
||||
QuotaReportInfo, Request, Response, WorkspaceStatsInfo, WorkspaceSummary,
|
||||
};
|
||||
use std::path::PathBuf;
|
||||
use std::time::Duration;
|
||||
@@ -44,6 +44,8 @@ struct Shell {
|
||||
commands: std::collections::BTreeMap<String, Vec<CommandInfo>>,
|
||||
saved_pipelines: Vec<String>,
|
||||
flows: Vec<FlowInfo>,
|
||||
/// Throughput por flow socket (bytes_total + bytes/s).
|
||||
flow_throughput: Vec<FlowThroughputInfo>,
|
||||
/// History de RSS por workspace (últimas N samples).
|
||||
stats_history: std::collections::BTreeMap<String, std::collections::VecDeque<WorkspaceStatsInfo>>,
|
||||
/// Quota report fresco por workspace.
|
||||
@@ -81,6 +83,7 @@ impl Shell {
|
||||
me.commands = snap.commands;
|
||||
me.saved_pipelines = snap.saved_pipelines;
|
||||
me.flows = snap.flows;
|
||||
me.flow_throughput = snap.flow_throughput;
|
||||
me.quotas = snap.quotas;
|
||||
// Hidratar history server-side para workspaces
|
||||
// que no tenían history local (primer probe).
|
||||
@@ -122,6 +125,7 @@ impl Shell {
|
||||
me.commands.clear();
|
||||
me.saved_pipelines.clear();
|
||||
me.flows.clear();
|
||||
me.flow_throughput.clear();
|
||||
me.quotas.clear();
|
||||
me.caps = None;
|
||||
me.recent_log = None;
|
||||
@@ -142,6 +146,7 @@ impl Shell {
|
||||
commands: std::collections::BTreeMap::new(),
|
||||
saved_pipelines: Vec::new(),
|
||||
flows: Vec::new(),
|
||||
flow_throughput: Vec::new(),
|
||||
stats_history: std::collections::BTreeMap::new(),
|
||||
quotas: std::collections::BTreeMap::new(),
|
||||
caps: None,
|
||||
@@ -157,6 +162,7 @@ struct Snapshot {
|
||||
commands: std::collections::BTreeMap<String, Vec<CommandInfo>>,
|
||||
saved_pipelines: Vec<String>,
|
||||
flows: Vec<FlowInfo>,
|
||||
flow_throughput: Vec<FlowThroughputInfo>,
|
||||
/// Stats fresco por workspace (id.toString → stats).
|
||||
fresh_stats: std::collections::BTreeMap<String, WorkspaceStatsInfo>,
|
||||
/// Quota report fresco por workspace.
|
||||
@@ -254,6 +260,17 @@ fn probe_blocking(path: &std::path::Path) -> Result<Snapshot, String> {
|
||||
Response::FlowList { items } => items,
|
||||
_ => Vec::new(),
|
||||
};
|
||||
// Throughput per-socket.
|
||||
write_frame(&mut stream, &Request::FlowThroughput)
|
||||
.await
|
||||
.map_err(|e| format!("write throughput: {e}"))?;
|
||||
let resp: Response = read_frame(&mut stream)
|
||||
.await
|
||||
.map_err(|e| format!("read throughput: {e}"))?;
|
||||
let flow_throughput = match resp {
|
||||
Response::FlowThroughput { items } => items,
|
||||
_ => Vec::new(),
|
||||
};
|
||||
|
||||
// Live tail: log del comando más reciente con bytes>0.
|
||||
let recent_log = {
|
||||
@@ -330,6 +347,7 @@ fn probe_blocking(path: &std::path::Path) -> Result<Snapshot, String> {
|
||||
commands: commands_map,
|
||||
saved_pipelines,
|
||||
flows,
|
||||
flow_throughput,
|
||||
fresh_stats,
|
||||
quotas,
|
||||
hydrate_history,
|
||||
@@ -509,31 +527,38 @@ impl Render for Shell {
|
||||
"ws_suffix · recurso · uso > limit".to_string()
|
||||
};
|
||||
|
||||
// Flow channels (data plane).
|
||||
// Flow channels (data plane) con throughput.
|
||||
let flow_count: usize = self.flows.iter().map(|f| f.sockets.len()).sum();
|
||||
let flow_items: Vec<String> = self
|
||||
.flows
|
||||
.iter()
|
||||
.flat_map(|f| {
|
||||
let pipe = f.pipeline.to_string();
|
||||
let short = &pipe[pipe.len() - 6..];
|
||||
f.sockets
|
||||
.iter()
|
||||
.map(move |s| {
|
||||
format!(
|
||||
"{short} {}",
|
||||
s.file_name()
|
||||
.map(|n| n.to_string_lossy().to_string())
|
||||
.unwrap_or_else(|| s.display().to_string())
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.collect();
|
||||
// Lookup helper que NO captura por ref (evita issue de borrow
|
||||
// en el closure de flat_map).
|
||||
let find_tp = |s: &std::path::PathBuf| -> (f64, f64) {
|
||||
for t in &self.flow_throughput {
|
||||
if t.socket == *s {
|
||||
return (t.bytes_total as f64 / 1024.0, t.bytes_per_sec / 1024.0);
|
||||
}
|
||||
}
|
||||
(0.0, 0.0)
|
||||
};
|
||||
let mut flow_items: Vec<String> = Vec::new();
|
||||
for f in &self.flows {
|
||||
let pipe = f.pipeline.to_string();
|
||||
let short_pipe = &pipe[pipe.len() - 6..];
|
||||
for s in &f.sockets {
|
||||
let name = s
|
||||
.file_name()
|
||||
.map(|n| n.to_string_lossy().to_string())
|
||||
.unwrap_or_else(|| s.display().to_string());
|
||||
let (total_kib, rate_kib) = find_tp(s);
|
||||
flow_items.push(format!(
|
||||
"{short_pipe} {:<48} {:>7.1} KiB {:>6.2} KiB/s",
|
||||
name, total_kib, rate_kib
|
||||
));
|
||||
}
|
||||
}
|
||||
let flow_descr = if flow_count == 0 {
|
||||
"pipelines con --tap exponen sockets aquí".to_string()
|
||||
} else {
|
||||
"shipote flow tail <socket> para suscribirse".to_string()
|
||||
"pipe6 · socket · total · rate".to_string()
|
||||
};
|
||||
|
||||
let body = div()
|
||||
|
||||
@@ -274,6 +274,11 @@ pub struct DiscernPolicy {
|
||||
/// productores con chunks de tamaño variable.
|
||||
#[serde(default)]
|
||||
pub replay_bytes: usize,
|
||||
/// Rate-limit del flow channel (bytes/s). `0` = sin límite. Si está
|
||||
/// definido, el splitter sleeps proporcional al tamaño del chunk
|
||||
/// antes de re-broadcastear. Protege subscribers lentos.
|
||||
#[serde(default)]
|
||||
pub max_bytes_per_sec: u64,
|
||||
}
|
||||
|
||||
impl Default for DiscernPolicy {
|
||||
@@ -283,6 +288,7 @@ impl Default for DiscernPolicy {
|
||||
enrich_producer: default_true(),
|
||||
replay_chunks: default_replay_chunks(),
|
||||
replay_bytes: 0,
|
||||
max_bytes_per_sec: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -87,6 +87,10 @@ pub enum LogStream {
|
||||
pub struct WorkspaceManager {
|
||||
inner: Arc<Mutex<Inner>>,
|
||||
incarnator: Arc<Incarnator>,
|
||||
/// True si hubo alguna mutación desde el último `save_snapshot`.
|
||||
/// `save_snapshot` skip si false (snapshot incremental — evita
|
||||
/// re-serialize cuando nada cambió, ej. SIGTERM tras un período idle).
|
||||
dirty: std::sync::atomic::AtomicBool,
|
||||
}
|
||||
|
||||
struct Inner {
|
||||
@@ -238,9 +242,23 @@ impl WorkspaceManager {
|
||||
pending_pipeline_restarts: Vec::new(),
|
||||
})),
|
||||
incarnator: Arc::new(Incarnator::new(cfg)),
|
||||
dirty: std::sync::atomic::AtomicBool::new(false),
|
||||
}
|
||||
}
|
||||
|
||||
/// Marca el manager como dirty. Cualquier mutación que afecta al
|
||||
/// snapshot debería llamar esto.
|
||||
#[inline]
|
||||
fn mark_dirty(&self) {
|
||||
self.dirty.store(true, std::sync::atomic::Ordering::Relaxed);
|
||||
}
|
||||
|
||||
/// True si hubo cambios desde el último `save_snapshot`. Útil para
|
||||
/// chequeos cooperativos (ej. monitoring que pollea cada N).
|
||||
pub fn is_dirty(&self) -> bool {
|
||||
self.dirty.load(std::sync::atomic::Ordering::Relaxed)
|
||||
}
|
||||
|
||||
/// Registra un supervisor para un pipeline con `restart_on_failure=true`.
|
||||
/// El daemon llama esto tras `run_pipeline` para que `reap_dead` agregue
|
||||
/// el pipeline a la cola de restart cuando algún command falle.
|
||||
@@ -267,6 +285,8 @@ impl WorkspaceManager {
|
||||
current_backoff_ms: initial_backoff,
|
||||
},
|
||||
);
|
||||
drop(g);
|
||||
self.mark_dirty();
|
||||
}
|
||||
|
||||
/// Variante que preserva backoff/count del supervisor anterior (para
|
||||
@@ -480,6 +500,7 @@ impl WorkspaceManager {
|
||||
/// Guarda (o reemplaza) un PipelineSpec bajo `name`.
|
||||
pub async fn save_pipeline(&self, name: String, spec: PipelineSpec) {
|
||||
self.inner.lock().await.saved_pipelines.insert(name, spec);
|
||||
self.mark_dirty();
|
||||
}
|
||||
|
||||
/// Devuelve los nombres de los pipelines guardados.
|
||||
@@ -497,7 +518,11 @@ impl WorkspaceManager {
|
||||
|
||||
/// Elimina un saved pipeline.
|
||||
pub async fn drop_saved_pipeline(&self, name: &str) -> bool {
|
||||
self.inner.lock().await.saved_pipelines.remove(name).is_some()
|
||||
let existed = self.inner.lock().await.saved_pipelines.remove(name).is_some();
|
||||
if existed {
|
||||
self.mark_dirty();
|
||||
}
|
||||
existed
|
||||
}
|
||||
|
||||
/// Label del workspace, si existe.
|
||||
@@ -648,6 +673,7 @@ impl WorkspaceManager {
|
||||
stats_history: std::collections::VecDeque::with_capacity(STATS_HISTORY_CAP),
|
||||
};
|
||||
self.inner.lock().await.workspaces.insert(id, state);
|
||||
self.mark_dirty();
|
||||
info!(%id, ?ttl, "workspace created");
|
||||
|
||||
// Si tiene TTL, programar auto-stop. El task captura un weak ref
|
||||
@@ -698,6 +724,7 @@ impl WorkspaceManager {
|
||||
// También limpiamos flow_channels del workspace si los hubiera —
|
||||
// por workspace lo retenemos por pipeline, no por workspace.
|
||||
drop(g);
|
||||
self.mark_dirty();
|
||||
|
||||
// 1) SIGTERM (o SIGKILL si grace=0) a todos vivos.
|
||||
let initial_signal = if grace.is_zero() { Signal::SIGKILL } else { Signal::SIGTERM };
|
||||
|
||||
@@ -181,10 +181,18 @@ impl WorkspaceManager {
|
||||
}
|
||||
}
|
||||
|
||||
/// Escribe snapshot a disco.
|
||||
/// Escribe snapshot a disco. Si `is_dirty()` es false **y** el path
|
||||
/// existe (snapshot previo válido), skip la escritura.
|
||||
pub async fn save_snapshot(&self, path: &Path) -> anyhow::Result<()> {
|
||||
if !self.is_dirty() && path.exists() {
|
||||
info!(path = %path.display(), "snapshot SKIPPED (clean)");
|
||||
return Ok(());
|
||||
}
|
||||
let snap = self.snapshot().await;
|
||||
snap.write(path)?;
|
||||
// Clear dirty: lo que está en disco es el current state.
|
||||
self.dirty
|
||||
.store(false, std::sync::atomic::Ordering::Relaxed);
|
||||
info!(path = %path.display(), workspaces = snap.workspaces.len(), "snapshot saved");
|
||||
Ok(())
|
||||
}
|
||||
@@ -245,6 +253,11 @@ impl WorkspaceManager {
|
||||
out.saved_pipelines_restored += 1;
|
||||
}
|
||||
out.live_pipelines = snap.live_pipelines;
|
||||
// Restore no cuenta como mutación — lo que está en disco es lo
|
||||
// que acabamos de cargar. Sin esto, el próximo SIGTERM siempre
|
||||
// re-escribiría aunque no hubiese cambios reales.
|
||||
self.dirty
|
||||
.store(false, std::sync::atomic::Ordering::Relaxed);
|
||||
info!(
|
||||
workspaces = out.workspaces_restored,
|
||||
saved_pipelines = out.saved_pipelines_restored,
|
||||
@@ -304,6 +317,24 @@ mod tests {
|
||||
assert!(restored_ids.contains(&id2));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn save_snapshot_skips_when_clean() {
|
||||
let tmp = tempfile::tempdir().unwrap();
|
||||
let path = tmp.path().join("state.json");
|
||||
let mgr = Arc::new(WorkspaceManager::new(IncarnatorConfig::default()));
|
||||
let _ = mgr.create(sample_ws("dirty-test")).await.unwrap();
|
||||
assert!(mgr.is_dirty(), "create debería marcar dirty");
|
||||
mgr.save_snapshot(&path).await.unwrap();
|
||||
assert!(!mgr.is_dirty(), "save_snapshot debería limpiar dirty");
|
||||
let mtime1 = std::fs::metadata(&path).unwrap().modified().unwrap();
|
||||
// Esperamos un pelín para que mtime cambie si fuera re-escrito.
|
||||
tokio::time::sleep(std::time::Duration::from_millis(20)).await;
|
||||
// Segundo save sin mutación → skip.
|
||||
mgr.save_snapshot(&path).await.unwrap();
|
||||
let mtime2 = std::fs::metadata(&path).unwrap().modified().unwrap();
|
||||
assert_eq!(mtime1, mtime2, "skip cuando clean — mtime no cambia");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn snapshot_includes_saved_pipelines() {
|
||||
use shipote_card::{CommandRef, DiscernPolicy, PipelineSpec};
|
||||
|
||||
@@ -132,6 +132,7 @@ pub async fn run_pipeline(
|
||||
edges: edge_meta,
|
||||
tap,
|
||||
sample_bytes: spec.discern.sample_bytes,
|
||||
max_bytes_per_sec: spec.discern.max_bytes_per_sec,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -308,6 +309,9 @@ struct SplitterSpec {
|
||||
edges: Vec<EdgeMeta>,
|
||||
tap: bool,
|
||||
sample_bytes: usize,
|
||||
/// Rate-limit en bytes/s (0 = sin limit). Tras cada chunk de `n`
|
||||
/// bytes, splitter sleeps `n / max_bytes_per_sec` segundos.
|
||||
max_bytes_per_sec: u64,
|
||||
}
|
||||
|
||||
struct SplitterHandle {
|
||||
@@ -430,6 +434,7 @@ fn spawn_splitter(
|
||||
}
|
||||
broadcast_chunk(&writers, &edge_senders, &buf[..n]).await;
|
||||
total += n as u64;
|
||||
rate_limit_sleep(spec.max_bytes_per_sec, n).await;
|
||||
}
|
||||
|
||||
let d = if spec.tap {
|
||||
@@ -448,6 +453,7 @@ fn spawn_splitter(
|
||||
if n == 0 { break; }
|
||||
broadcast_chunk(&writers, &edge_senders, &buf[..n]).await;
|
||||
total += n as u64;
|
||||
rate_limit_sleep(spec.max_bytes_per_sec, n).await;
|
||||
}
|
||||
debug!(bytes = total, consumers = writers.len(), "splitter finished");
|
||||
|
||||
@@ -469,6 +475,19 @@ fn spawn_splitter(
|
||||
SplitterHandle { handle }
|
||||
}
|
||||
|
||||
/// Token-bucket simple: si `max_bps > 0`, sleep `chunk_size / max_bps`
|
||||
/// segundos. Implementación crude pero suficiente para v1.
|
||||
async fn rate_limit_sleep(max_bps: u64, chunk_bytes: usize) {
|
||||
if max_bps == 0 {
|
||||
return;
|
||||
}
|
||||
let secs = chunk_bytes as f64 / max_bps as f64;
|
||||
let ms = (secs * 1000.0) as u64;
|
||||
if ms > 0 {
|
||||
tokio::time::sleep(std::time::Duration::from_millis(ms)).await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn broadcast_chunk(
|
||||
writers: &[AsyncFd<std::os::fd::OwnedFd>],
|
||||
edge_senders: &[Option<crate::flow_channel::FlowSender>],
|
||||
@@ -721,6 +740,7 @@ mod tests {
|
||||
enrich_producer: true,
|
||||
replay_chunks: 32,
|
||||
replay_bytes: 0,
|
||||
max_bytes_per_sec: 0,
|
||||
},
|
||||
restart_on_failure: false,
|
||||
restart_backoff_ms: 200,
|
||||
|
||||
Reference in New Issue
Block a user