Tests audit replay, métricas chain, TTL por cap, brain snapshot

- 3 tests integración audit con CAS aislado por test (ENTE_CAS_ROOT en
  tempdir): flush_round_trip_preserves_chain, replay_reconstructs_engine_state,
  replay_after_eviction_still_works.
- AuditLog tracks last_flush_at_ms + subscriber_count. metrics expone:
  audit_chain_length, audit_in_memory, audit_subscribers,
  audit_last_flush_age_seconds, audit_head_info{sha=...}.
- ttl_for_capability() tabla per-variant: Spawn/FilesystemRoot 30s,
  Endpoint/KernelNetlink/LegacyLogind 5min, Journal 1h.
  mediate_capability y renew_grant consultan la tabla.
- ObserverSnapshot serializable (sin Instants — last_seen se anchora a
  now() al restore). Counters, cooccurrencias e histogramas persistidos.
  Snapshot adjunto al fractal: <checkpoint>.brain.json. --restore lo
  carga si existe.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Sergio
2026-05-04 00:18:55 +00:00
parent ca75ba185f
commit 6aee9254d4
6 changed files with 331 additions and 8 deletions
+7 -3
View File
@@ -4,7 +4,7 @@
//! periódicamente con `renew_grant(token)`; en caso contrario, el background
//! task `purge_expired_grants` los revoca al vencimiento.
use super::{EnteGraph, GrantedCapability, DEFAULT_GRANT_TTL};
use super::{ttl_for_capability, EnteGraph, GrantedCapability};
use crate::events::CapabilityGrant;
use ente_card::Capability;
use std::time::Instant;
@@ -24,7 +24,10 @@ impl EnteGraph {
Some(provider) => {
let token = self.next_token;
self.next_token += 1;
let expires_at = Instant::now() + DEFAULT_GRANT_TTL;
// TTL específico por variante de capability — caps escaladas
// (Spawn, FilesystemRoot) viven menos.
let ttl = ttl_for_capability(&cap);
let expires_at = Instant::now() + ttl;
self.grants.insert(token, GrantedCapability {
cap: cap.clone(),
provider,
@@ -39,11 +42,12 @@ impl EnteGraph {
/// Extiende un grant existente. Devuelve `true` si renovó. Si el token
/// no existe o ya expiró, `false` (el cliente debe re-acquire).
/// Usa el TTL específico de la cap del grant.
pub fn renew_grant(&mut self, token: u64) -> bool {
let now = Instant::now();
if let Some(g) = self.grants.get_mut(&token) {
if g.expires_at > now {
g.expires_at = now + DEFAULT_GRANT_TTL;
g.expires_at = now + ttl_for_capability(&g.cap);
return true;
}
// Expired — purgamos aquí mismo.
+24 -1
View File
@@ -71,9 +71,32 @@ pub(in crate::graph) struct GrantedCapability {
pub expires_at: std::time::Instant,
}
/// TTL default para nuevos grants. Configurable por bus en el futuro.
/// TTL default para grants cuando la cap no tiene override. 60s es un
/// compromiso: largo enough para evitar churn en patrones interactivos,
/// corto enough para que credenciales filtradas expiren rápidamente.
pub const DEFAULT_GRANT_TTL: std::time::Duration = std::time::Duration::from_secs(60);
/// TTL específico por variante de Capability. Caps de mayor riesgo / costo
/// (Spawn, FilesystemRoot) tienen TTL más corto; caps "logging" como
/// Journal pueden vivir más.
///
/// Cualquier cap no listada cae al `DEFAULT_GRANT_TTL`.
pub fn ttl_for_capability(cap: &Capability) -> std::time::Duration {
use std::time::Duration;
match cap {
// Caps escaladas: TTL corto para forzar renovación frecuente.
Capability::Spawn => Duration::from_secs(30),
Capability::FilesystemRoot => Duration::from_secs(30),
Capability::Device { .. } => Duration::from_secs(60),
// Caps de propósito general.
Capability::Endpoint { .. } => Duration::from_secs(300), // 5 min
Capability::KernelNetlink(_) => Duration::from_secs(300),
Capability::LegacyLogind => Duration::from_secs(300),
// Logging puede vivir mucho.
Capability::Journal => Duration::from_secs(3600), // 1h
}
}
impl EnteGraph {
pub fn new(mut seed: EntityCard) -> Self {
// Extraemos genesis antes de almacenar la Semilla — evita duplicación
+55 -3
View File
@@ -98,7 +98,7 @@ fn main() -> anyhow::Result<()> {
rt.block_on(primordial_loop(
card, dev_mode,
cli.checkpoint, cli.rules, cli.rules_out,
cli.checkpoint, cli.restore, cli.rules, cli.rules_out,
cli.audit_head, cli.metrics_addr, cli.brain_half_life,
cli.autopromote_secs,
))
@@ -108,6 +108,7 @@ async fn primordial_loop(
seed_card: ente_card::EntityCard,
dev_mode: bool,
checkpoint_path: Option<PathBuf>,
restore_path: Option<PathBuf>,
rules_path: Option<PathBuf>,
rules_out: Option<PathBuf>,
audit_head: Option<PathBuf>,
@@ -175,6 +176,28 @@ async fn primordial_loop(
},
);
}
// Brain restore: si hay --restore <path>, cargamos el snapshot adjunto
// <path>.brain.json. Counters preservados across reboots.
if let Some(rpath) = &restore_path {
let brain_path = rpath.with_extension("brain.json");
if brain_path.exists() {
match read_brain_snapshot(&brain_path) {
Ok(snap) => {
let total = snap.total;
let kinds = snap.marginal.len();
let restored = ente_brain::Observer::from_snapshot(snap);
*brain.observer.write().await = restored;
info!(
path = %brain_path.display(),
total, kinds,
"brain snapshot restaurado"
);
}
Err(e) => warn!(?e, path = %brain_path.display(), "brain snapshot read falló"),
}
}
}
// Si --audit-head, configuramos el head pointer y arrancamos auto-flush.
if let Some(head_path) = audit_head {
// Re-creamos el AuditLog con head pointer.
@@ -257,7 +280,7 @@ async fn primordial_loop(
// Cerebro observa antes que el grafo mute. Snapshot del
// SubjectInfo se hace contra el estado pre-mutación.
feed_brain(&brain, &brain_sink, &graph, &evt).await;
if dispatch_graph_event(&mut graph, evt, &graph_tx, &checkpoint_path).await {
if dispatch_graph_event(&mut graph, evt, &graph_tx, &checkpoint_path, &brain).await {
return Ok(());
}
}
@@ -285,6 +308,7 @@ async fn dispatch_graph_event(
evt: GraphEvent,
tx: &mpsc::Sender<GraphEvent>,
checkpoint: &Option<PathBuf>,
brain: &BrainState,
) -> bool {
match evt {
GraphEvent::EnteDied { id, status } => {
@@ -310,11 +334,24 @@ async fn dispatch_graph_event(
GraphEvent::Shutdown { reason } => {
warn!(?reason, "shutdown del fractal");
if let Some(path) = checkpoint.as_ref() {
// Snapshot del grafo
let snap = graph.snapshot();
match snap.write(path) {
Ok(()) => info!(path = %path.display(), entes = snap.entes.len(), "snapshot persistido"),
Ok(()) => info!(path = %path.display(), entes = snap.entes.len(), "snapshot fractal persistido"),
Err(e) => warn!(?e, "snapshot write falló"),
}
// Snapshot del cerebro (observer state) en archivo adjunto
let brain_path = path.with_extension("brain.json");
let obs_snap = brain.observer.read().await.snapshot();
match write_brain_snapshot(&brain_path, &obs_snap) {
Ok(()) => info!(
path = %brain_path.display(),
total = obs_snap.total,
kinds = obs_snap.marginal.len(),
"snapshot brain persistido"
),
Err(e) => warn!(?e, "brain snapshot write falló"),
}
}
graph.cascade_shutdown().await;
return true;
@@ -382,6 +419,21 @@ fn spawn_echo_smoke_test(bus_path: PathBuf) {
});
}
fn write_brain_snapshot(path: &std::path::Path, snap: &ente_brain::observer::ObserverSnapshot) -> anyhow::Result<()> {
let bytes = serde_json::to_vec_pretty(snap)?;
if let Some(parent) = path.parent() { let _ = std::fs::create_dir_all(parent); }
let tmp = path.with_extension("tmp");
std::fs::write(&tmp, &bytes)?;
std::fs::rename(&tmp, path)?;
Ok(())
}
fn read_brain_snapshot(path: &std::path::Path) -> anyhow::Result<ente_brain::observer::ObserverSnapshot> {
let bytes = std::fs::read(path)?;
let snap: ente_brain::observer::ObserverSnapshot = serde_json::from_slice(&bytes)?;
Ok(snap)
}
fn init_tracing() {
use tracing_subscriber::{fmt, EnvFilter};
let filter = EnvFilter::try_from_default_env()