feat(shipote): health endpoint + audit log + token-bucket real (fase R)

- Request::Health → Response::Health { version, uptime_ms, alive_*,
  active_flows, dirty }. CLI: shipote health.
- handle_client lee peer_uid una vez al accept. audit_request emite
  info!(target: "audit", uid, action, detail) por mutación (create/stop/
  run/pipeline.*/flow.drop). Reads omitidos. Filtrable con SHIPOTE_LOG=
  warn,audit=info.
- TokenBucket real reemplaza rate_limit_sleep: refill por wall time,
  capacity = 1s de rate, debt negativo dispara sleep proporcional.
  Permite burst real, no chunk-by-chunk uniforme.

85 tests pasan (ente-incarnate 16, nouser-core 27, shipote-card 8,
shipote-core 26, shipote-discern 5, yahweh-provider-fs 3).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
sergio
2026-05-11 16:58:10 +00:00
parent 18c0344a52
commit a9124449f9
5 changed files with 180 additions and 13 deletions
+27
View File
@@ -24,6 +24,9 @@ enum Cmd {
/// Health-check del daemon.
Ping,
/// Health endpoint estructurado.
Health,
/// Capacidades runtime detectadas por el daemon.
Caps,
@@ -195,6 +198,30 @@ async fn main() -> Result<()> {
}
}
Cmd::Health => {
let resp = round_trip(&mut stream, Request::Health).await?;
match resp {
Response::Health {
version,
uptime_ms,
alive_workspaces,
alive_commands,
alive_pipelines,
active_flows,
dirty,
} => {
println!("version: {version}");
println!("uptime: {} ms", uptime_ms);
println!("alive_workspaces: {alive_workspaces}");
println!("alive_commands: {alive_commands}");
println!("alive_pipelines: {alive_pipelines}");
println!("active_flows: {active_flows}");
println!("dirty: {dirty}");
}
other => print_unexpected(&other),
}
}
Cmd::Caps => {
let resp = round_trip(&mut stream, Request::Capabilities).await?;
match resp {
+56 -2
View File
@@ -38,6 +38,7 @@ async fn main() -> anyhow::Result<()> {
}
let listener = UnixListener::bind(&sock).with_context(|| format!("bind {}", sock.display()))?;
info!(socket = %sock.display(), "shipote-daemon listening");
let daemon_started = std::time::Instant::now();
// Sidecar pool: una sesión global del daemon + N sesiones efímeras
// por edge enriquecido tras cada pipeline tap.
@@ -241,7 +242,7 @@ async fn main() -> anyhow::Result<()> {
let disc = discerner.clone();
let pool = sidecar_pool.clone();
tokio::spawn(async move {
if let Err(e) = handle_client(stream, mgr, disc, pool).await {
if let Err(e) = handle_client(stream, mgr, disc, pool, daemon_started).await {
warn!(?e, "client handler error");
}
});
@@ -280,27 +281,80 @@ async fn handle_client(
mgr: Arc<WorkspaceManager>,
disc: Arc<DiscernPipeline>,
pool: Option<Arc<brahman_sidecar::SidecarPool>>,
daemon_started: std::time::Instant,
) -> anyhow::Result<()> {
// Audit: peer uid lo leemos una vez aquí (no cambia durante la conexión).
let peer = peer_uid(&stream).unwrap_or(u32::MAX);
loop {
let req: Request = match read_frame(&mut stream).await {
Ok(r) => r,
Err(shipote_protocol::ProtocolError::Closed) => return Ok(()),
Err(e) => return Err(e.into()),
};
let resp = dispatch(&mgr, &disc, &pool, req).await;
audit_request(peer, &req);
let resp = dispatch(&mgr, &disc, &pool, daemon_started, req).await;
write_frame(&mut stream, &resp).await?;
}
}
/// Loguea cada mutación con target="audit" y el peer uid. Reads (ping,
/// list, stats) se omiten para no inundar el log.
fn audit_request(peer_uid: u32, req: &Request) {
let (action, detail) = match req {
Request::WorkspaceCreate { spec } => ("workspace.create", format!("label={}", spec.label)),
Request::WorkspaceStop { id, grace_ms } => ("workspace.stop", format!("id={id} grace_ms={grace_ms}")),
Request::Run { workspace, exec, restart_on_failure, .. } => (
"run",
format!("ws={workspace} exec={exec} restart={restart_on_failure}"),
),
Request::PipelineRun { spec, tap, .. } => ("pipeline.run", format!("label={} tap={tap}", spec.label)),
Request::PipelineRunSaved { name, tap, .. } => ("pipeline.run-saved", format!("name={name} tap={tap}")),
Request::PipelineStop { pipeline, grace_ms } => ("pipeline.stop", format!("id={pipeline} grace_ms={grace_ms}")),
Request::PipelineSave { name, .. } => ("pipeline.save", format!("name={name}")),
Request::PipelineDrop { name } => ("pipeline.drop", format!("name={name}")),
Request::FlowDrop { pipeline } => ("flow.drop", format!("pipeline={pipeline}")),
// Reads (no audit):
Request::Ping
| Request::Health
| Request::WorkspaceList
| Request::WorkspaceStats { .. }
| Request::WorkspaceQuota { .. }
| Request::WorkspaceStatsHistory { .. }
| Request::WorkspaceFullSummary { .. }
| Request::CommandList { .. }
| Request::CommandLogs { .. }
| Request::PipelineSavedList
| Request::FlowList
| Request::FlowThroughput
| Request::Discern { .. }
| Request::Capabilities => return,
};
info!(target: "audit", uid = peer_uid, action, detail = %detail, "audit");
}
async fn dispatch(
mgr: &Arc<WorkspaceManager>,
disc: &DiscernPipeline,
pool: &Option<Arc<brahman_sidecar::SidecarPool>>,
daemon_started: std::time::Instant,
req: Request,
) -> Response {
match req {
Request::Ping => Response::Pong,
Request::Health => {
let counts = mgr.health_counts().await;
Response::Health {
version: env!("CARGO_PKG_VERSION").to_string(),
uptime_ms: daemon_started.elapsed().as_millis() as u64,
alive_workspaces: counts.alive_workspaces,
alive_commands: counts.alive_commands,
alive_pipelines: counts.alive_pipelines,
active_flows: counts.active_flows,
dirty: mgr.is_dirty(),
}
}
Request::WorkspaceCreate { spec } => match mgr.create(spec).await {
Ok((id, warnings)) => Response::WorkspaceCreated { id, warnings },
Err(e) => Response::Error { message: format!("{e}") },