feat(shipote): multi-core CPU% + quota report + restart-on-failure (fase K)
- WorkspaceStats.cpu_cores via sysconf cacheado. CLI muestra `cpu_pct: 98.7 % (24.7% total / 4 cores)`. - workspace_quota compara SomaSpec.rlimits contra accounting actual. Reporta breaches humanos. NO enforcement automático en v1. - run_with_options(.., restart_on_failure): si exit != 0, reaper relaunch con backoff exponencial 200ms → 30s cap. Inner.restart_specs persiste el spec entre intentos. 81 tests pasan (ente-incarnate 16, nouser-core 27, shipote-card 8, shipote-core 22, shipote-discern 5, yahweh-provider-fs 3). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -36,6 +36,9 @@ enum Cmd {
|
||||
/// ULID del workspace destino.
|
||||
#[arg(short = 'w', long)]
|
||||
workspace: String,
|
||||
/// Si exit != 0, relanzar con backoff exponencial.
|
||||
#[arg(long)]
|
||||
restart_on_failure: bool,
|
||||
/// Path del ejecutable.
|
||||
exec: String,
|
||||
/// Argumentos del comando.
|
||||
@@ -164,6 +167,10 @@ enum WsCmd {
|
||||
Stats {
|
||||
id: String,
|
||||
},
|
||||
/// Quota report: rlimits declarados vs uso actual.
|
||||
Quota {
|
||||
id: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
@@ -251,7 +258,9 @@ async fn main() -> Result<()> {
|
||||
.unwrap_or_else(|| "—".into());
|
||||
let cpu_pct = info
|
||||
.cpu_percent
|
||||
.map(|p| format!("{p:.1} %"))
|
||||
.map(|p| format!("{p:.1} % ({:.1}% total / {} cores)",
|
||||
if info.cpu_cores > 0 { p / info.cpu_cores as f32 } else { p },
|
||||
info.cpu_cores))
|
||||
.unwrap_or_else(|| "— (esperando 2do sample)".into());
|
||||
println!("rss: {rss}");
|
||||
println!("rss_peak: {peak}");
|
||||
@@ -265,6 +274,35 @@ async fn main() -> Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
Cmd::Workspace(WsCmd::Quota { id }) => {
|
||||
let id = parse_ws_id(&id)?;
|
||||
let resp = round_trip(&mut stream, Request::WorkspaceQuota { workspace: id }).await?;
|
||||
match resp {
|
||||
Response::WorkspaceQuota { info } => {
|
||||
let mem = info
|
||||
.mem_limit
|
||||
.map(|b| format!("{:.2} MiB", b as f64 / 1024.0 / 1024.0))
|
||||
.unwrap_or_else(|| "—".into());
|
||||
let nproc = info
|
||||
.nproc_limit
|
||||
.map(|n| n.to_string())
|
||||
.unwrap_or_else(|| "—".into());
|
||||
println!("mem_limit: {mem}");
|
||||
println!("nproc_limit: {nproc}");
|
||||
if info.breaches.is_empty() {
|
||||
println!("breaches: (none — dentro de quota)");
|
||||
} else {
|
||||
println!("breaches:");
|
||||
for b in info.breaches {
|
||||
println!(" - {b}");
|
||||
}
|
||||
}
|
||||
}
|
||||
Response::Error { message } => return Err(anyhow!(message)),
|
||||
other => print_unexpected(&other),
|
||||
}
|
||||
}
|
||||
|
||||
Cmd::Workspace(WsCmd::Stop { id, grace_ms }) => {
|
||||
let id = parse_ws_id(&id)?;
|
||||
let resp = round_trip(&mut stream, Request::WorkspaceStop { id, grace_ms }).await?;
|
||||
@@ -277,7 +315,7 @@ async fn main() -> Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
Cmd::Run { workspace, exec, argv } => {
|
||||
Cmd::Run { workspace, exec, argv, restart_on_failure } => {
|
||||
let id = parse_ws_id(&workspace)?;
|
||||
let resp = round_trip(
|
||||
&mut stream,
|
||||
@@ -286,6 +324,7 @@ async fn main() -> Result<()> {
|
||||
exec,
|
||||
argv,
|
||||
envp: vec![],
|
||||
restart_on_failure,
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
|
||||
@@ -17,7 +17,8 @@ use shipote_core::WorkspaceManager;
|
||||
use shipote_discern::{DiscernPipeline, Hint};
|
||||
use shipote_protocol::{
|
||||
default_socket_path, read_frame, write_frame, CommandInfo as ProtoCommandInfo,
|
||||
EdgeDiscernmentInfo, FlowInfo, Request, Response, WorkspaceStatsInfo, WorkspaceSummary,
|
||||
EdgeDiscernmentInfo, FlowInfo, QuotaReportInfo, Request, Response, WorkspaceStatsInfo,
|
||||
WorkspaceSummary,
|
||||
};
|
||||
use std::sync::Arc;
|
||||
use tokio::net::{UnixListener, UnixStream};
|
||||
@@ -180,8 +181,11 @@ async fn dispatch(
|
||||
}
|
||||
}
|
||||
|
||||
Request::Run { workspace, exec, argv, envp } => {
|
||||
match mgr.run(workspace, exec, argv, envp).await {
|
||||
Request::Run { workspace, exec, argv, envp, restart_on_failure } => {
|
||||
match mgr
|
||||
.run_with_options(workspace, exec, argv, envp, restart_on_failure)
|
||||
.await
|
||||
{
|
||||
Ok(s) => Response::RunStarted {
|
||||
workspace,
|
||||
command_id: s.id,
|
||||
@@ -345,6 +349,7 @@ async fn dispatch(
|
||||
rss_peak_bytes: s.rss_peak_bytes,
|
||||
cpu_usec: s.cpu_usec,
|
||||
cpu_percent: s.cpu_percent,
|
||||
cpu_cores: s.cpu_cores,
|
||||
source: s.source,
|
||||
uptime_ms: s.uptime_ms,
|
||||
},
|
||||
@@ -354,6 +359,19 @@ async fn dispatch(
|
||||
},
|
||||
},
|
||||
|
||||
Request::WorkspaceQuota { workspace } => match mgr.workspace_quota(workspace).await {
|
||||
Some(q) => Response::WorkspaceQuota {
|
||||
info: QuotaReportInfo {
|
||||
mem_limit: q.mem_limit,
|
||||
nproc_limit: q.nproc_limit,
|
||||
breaches: q.breaches,
|
||||
},
|
||||
},
|
||||
None => Response::Error {
|
||||
message: format!("workspace {workspace} not found"),
|
||||
},
|
||||
},
|
||||
|
||||
Request::FlowList => {
|
||||
let items = mgr
|
||||
.list_flow_pipelines()
|
||||
|
||||
Reference in New Issue
Block a user