feat(shipote): pipeline backoff + quota card + logs follow (fase M)
- PipelineSpec.restart_backoff_ms + restart_max_backoff_ms + restart_max: backoff exponencial entre relaunches (anti-thrash). take_pending_restarts aplica restart_max (0 = infinito); excedido = supervisor descartado con warning. Daemon hace tokio::sleep(backoff) antes del relaunch y escala current_backoff x2 hasta el cap. - shipote-shell card "Quota breaches": probe extiende con WorkspaceQuota por workspace. Color rojo si hay breaches, verde si no. - shipote logs --follow: poll cada 200ms al daemon, imprime suffix nuevo hasta que el comando termine. Sin cambios al protocolo. Best-effort: si el ring rota más rápido que el poll, se pierden bytes. 83 tests pasan (ente-incarnate 16, nouser-core 27, shipote-card 8, shipote-core 24, shipote-discern 5, yahweh-provider-fs 3). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -104,23 +104,25 @@ async fn main() -> anyhow::Result<()> {
|
||||
mgr.reap_dead().await;
|
||||
let pending = mgr.take_pending_restarts().await;
|
||||
for sup in pending {
|
||||
let backoff = std::time::Duration::from_millis(sup.current_backoff_ms);
|
||||
info!(
|
||||
label = %sup.spec.label,
|
||||
restart_count = sup.restart_count,
|
||||
"pipeline restart: relaunching"
|
||||
backoff_ms = sup.current_backoff_ms,
|
||||
"pipeline restart: relaunching after backoff"
|
||||
);
|
||||
// Backoff antes del relaunch — anti-thrash.
|
||||
tokio::time::sleep(backoff).await;
|
||||
let inc = mgr.incarnator_handle();
|
||||
let disc = std::sync::Arc::new(DiscernPipeline::default_pipeline());
|
||||
let ws_label = mgr
|
||||
.workspace_label(sup.spec.workspace)
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
let restart_count = sup.restart_count;
|
||||
let workspace = sup.spec.workspace;
|
||||
let ws_label = mgr.workspace_label(workspace).await.unwrap_or_default();
|
||||
let tap = sup.tap;
|
||||
let mut new_spec = sup.spec.clone();
|
||||
// Mantener restart_on_failure para futuras fallas.
|
||||
new_spec.restart_on_failure = true;
|
||||
// Escalar el backoff para la PRÓXIMA falla.
|
||||
let next_backoff = (sup.current_backoff_ms * 2)
|
||||
.min(new_spec.restart_max_backoff_ms);
|
||||
match shipote_core::pipeline::run_pipeline(
|
||||
&new_spec,
|
||||
&ws_label,
|
||||
@@ -132,19 +134,23 @@ async fn main() -> anyhow::Result<()> {
|
||||
.await
|
||||
{
|
||||
Ok(launch) => {
|
||||
mgr.register_pipeline_commands(workspace, launch.pipeline, launch.command_pids.clone())
|
||||
.await;
|
||||
// Re-registrar supervisor con el nuevo pipeline_id,
|
||||
// preservando restart_count.
|
||||
let mut s = shipote_core::PipelineSupervisor {
|
||||
mgr.register_pipeline_commands(
|
||||
workspace,
|
||||
spec: new_spec,
|
||||
launch.pipeline,
|
||||
launch.command_pids.clone(),
|
||||
)
|
||||
.await;
|
||||
// Re-registrar supervisor con backoff escalado +
|
||||
// restart_count preservado.
|
||||
mgr.register_pipeline_supervisor_with_state(
|
||||
launch.pipeline,
|
||||
workspace,
|
||||
new_spec,
|
||||
tap,
|
||||
restart_count,
|
||||
};
|
||||
s.restart_count = restart_count;
|
||||
mgr.register_pipeline_supervisor(launch.pipeline, workspace, s.spec.clone(), tap)
|
||||
.await;
|
||||
sup.restart_count,
|
||||
next_backoff,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(?e, "pipeline restart failed");
|
||||
|
||||
Reference in New Issue
Block a user