Files
arje/crates/ente-zero/src/main.rs
T
Sergio ca75ba185f Cristales temporales, replay, lease/renew y audit streaming
- GapHistogram añade sum_squares_secs → stddev en O(1). GapStats serializable
  con count/mean/stddev/max.
- Crystal incluye gap_stats?: GapStats. crystal_to_rule emite Sequence con
  within_ms = (mean+2σ)*1000 cuando gap_stats.count >= 4; fallback a Single.
- audit::collect_chain_from_cas() recoge la cadena en orden cronológico.
  replay_chain() reconstruye RuleEngine aplicando PromoteCrystal/RemoveRule.
  Endpoint ReplayAudit + brainctl replay.
- GrantedCapability con expires_at: Instant. DEFAULT_GRANT_TTL = 60s.
  EnteGraph::renew_grant + purge_expired_grants. Tick cada 10s en el bucle
  primordial.
- AuditLog::subscribe() entrega un mpsc::UnboundedReceiver. append() empuja
  a todos los subscribers, purgando los muertos. IntrospectRequest::StreamAudit
  toma posesión de la conn y envía AuditStreamFrame en bucle. brainctl
  stream-audit imprime entries en directo.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 23:51:36 +00:00

455 lines
17 KiB
Rust

//! Ente #0 — el primer Ente. PID 1 del fractal.
//!
//! Reglas no negociables:
//! 1. NUNCA lógica de servicio aquí. Sólo: leer Semilla, cosechar zombis,
//! mediar capacidades, propagar eventos.
//! 2. Single-threaded. Cualquier paralelismo se delega a Entes worker.
//! Un panic en un thread de PID 1 = kernel panic.
//! 3. Errores de hijos son *eventos* en `graph_tx`, no `Result` propagado.
//!
//! Este archivo es sólo wireup. La lógica vive en:
//! - `seed` : construcción/restauración de la Tarjeta Semilla
//! - `bus` : listener Unix + auth via SO_PEERCRED
//! - `graph::*` : estado del fractal (lifecycle, topology, shutdown,
//! bus_mediator, devices, capabilities)
//! - `events` : tipos de eventos del bucle primordial
//! - crates externos del workspace para CAS, soma, wasm, snapshot, kernel.
mod brain_glue;
mod bus;
mod events;
mod graph;
mod seed;
use anyhow::Context;
use ente_brain::{BrainState, IntrospectServer};
use ente_kernel::{become_child_subreaper, bootstrap_kernel_surface, spawn_sigchld_stream, spawn_uevent_stream};
use events::{ExitStatus, GraphEvent, ShutdownReason};
use graph::EnteGraph;
use nix::errno::Errno;
use nix::sys::wait::{waitpid, WaitPidFlag, WaitStatus};
use nix::unistd::{getpid, Pid};
use std::path::PathBuf;
use std::time::Duration;
use tokio::sync::mpsc;
use tracing::{error, info, warn};
struct CliArgs {
checkpoint: Option<PathBuf>,
restore: Option<PathBuf>,
rules: Option<PathBuf>,
rules_out: Option<PathBuf>,
audit_head: Option<PathBuf>,
metrics_addr: Option<String>,
brain_half_life: Option<f64>,
autopromote_secs: Option<u64>,
}
fn parse_args() -> CliArgs {
let mut args = std::env::args().skip(1);
let mut checkpoint = None;
let mut restore = None;
let mut rules = None;
let mut rules_out = None;
let mut audit_head = None;
let mut metrics_addr = None;
let mut brain_half_life = None;
let mut autopromote_secs = None;
while let Some(a) = args.next() {
match a.as_str() {
"--checkpoint" => checkpoint = args.next().map(PathBuf::from),
"--restore" => restore = args.next().map(PathBuf::from),
"--rules" => rules = args.next().map(PathBuf::from),
"--rules-out" => rules_out = args.next().map(PathBuf::from),
"--audit-head" => audit_head = args.next().map(PathBuf::from),
"--metrics-addr" => metrics_addr = args.next(),
"--brain-half-life" => brain_half_life = args.next().and_then(|s| s.parse().ok()),
"--autopromote-secs" => autopromote_secs = args.next().and_then(|s| s.parse().ok()),
other => warn!(arg = %other, "argumento desconocido, ignorado"),
}
}
CliArgs {
checkpoint, restore, rules, rules_out, audit_head,
metrics_addr, brain_half_life, autopromote_secs,
}
}
fn main() -> anyhow::Result<()> {
init_tracing();
let cli = parse_args();
let pid = getpid();
let dev_mode = pid != Pid::from_raw(1);
if dev_mode {
warn!(?pid, "ente-zero corriendo en DEV MODE (no PID 1) — kernel surface no se monta");
} else {
info!("ente-zero despierta como PID 1");
bootstrap_kernel_surface().context("bootstrap kernel surface")?;
become_child_subreaper().context("PR_SET_CHILD_SUBREAPER")?;
}
let card = seed::load(dev_mode, cli.restore.as_deref())?;
// current_thread runtime: ver doctrina al inicio del módulo.
let rt = tokio::runtime::Builder::new_current_thread()
.enable_io()
.enable_time()
.build()?;
rt.block_on(primordial_loop(
card, dev_mode,
cli.checkpoint, cli.rules, cli.rules_out,
cli.audit_head, cli.metrics_addr, cli.brain_half_life,
cli.autopromote_secs,
))
}
async fn primordial_loop(
seed_card: ente_card::EntityCard,
dev_mode: bool,
checkpoint_path: Option<PathBuf>,
rules_path: Option<PathBuf>,
rules_out: Option<PathBuf>,
audit_head: Option<PathBuf>,
metrics_addr: Option<String>,
brain_half_life: Option<f64>,
autopromote_secs: Option<u64>,
) -> anyhow::Result<()> {
info!(seed_id = %seed_card.id, label = %seed_card.label, "Ente #0 entra al bucle primordial");
let (graph_tx, mut graph_rx) = mpsc::channel::<GraphEvent>(64);
let mut sigchld = spawn_sigchld_stream()?;
// Uevents puede fallar en dev (sin CAP_NET_ADMIN). Degradamos a un
// canal nunca-listo en lugar de abortar el bucle primordial.
let mut uevents = match spawn_uevent_stream() {
Ok(rx) => rx,
Err(e) => {
warn!(?e, "uevents deshabilitados (probablemente falta CAP_NET_ADMIN)");
let (_keep_tx, rx) = mpsc::channel::<ente_kernel::UEvent>(1);
std::mem::forget(_keep_tx);
rx
}
};
// Bus interno: listener antes de spawn de hijos para que su Announce
// tenga adónde llegar. Su path se inyecta en ENTE_BUS_SOCK por soma.
let bus_sock = bus::default_socket_path();
let bus_path = bus::spawn_bus(bus_sock, graph_tx.clone())?;
ente_soma::set_bus_sock(bus_path.to_string_lossy().into_owned());
let mut graph = EnteGraph::new(seed_card);
graph.instantiate_seed_dependencies(&graph_tx).await?;
// Cerebro: BrainState compartido + servidor de introspección.
// Window de 1024 eventos — suficiente para correlaciones interesantes
// sin gastar memoria de PID 1. En dev bajamos el umbral de cristalización
// para que el demo (pocos eventos) produzca cristales observables.
let mut brain = if dev_mode {
// Umbrales relajados para que el demo (pocos eventos) produzca
// cristales observables. Con P(b|a) normalizada a [0,1], los
// valores típicos en muestras pequeñas son 0.2-0.5.
BrainState::with_params(1024, ente_brain::CrystallizationParams {
min_support: 2,
min_conditional_prob: 0.3,
min_pmi: 1.0,
})
} else {
BrainState::new(1024)
};
if let Some(out_path) = rules_out {
brain = brain.with_rules_out(out_path);
}
if let Some(hl) = brain_half_life {
let mut obs = brain.observer.write().await;
// Reemplazar con un observer nuevo que tenga half-life. Estado
// anterior (vacío en este punto) descartado.
*obs = ente_brain::Observer::new(1024).with_half_life(hl);
info!(hl_secs = hl, "observer con time-decay activo");
}
if let Some(secs) = autopromote_secs {
ente_brain::spawn_autopromote_loop(
brain.clone(),
ente_brain::AutopromoteParams {
interval_secs: secs,
threshold: brain.params, // mismo threshold que crystals manuales
},
);
}
// Si --audit-head, configuramos el head pointer y arrancamos auto-flush.
if let Some(head_path) = audit_head {
// Re-creamos el AuditLog con head pointer.
let new_audit = ente_brain::audit::AuditLog::new()
.with_head_pointer(head_path);
*brain.audit.write().await = new_audit;
spawn_audit_auto_flush(brain.clone());
}
// Carga inicial de reglas vía KCL o JSON, si --rules path proporcionado.
if let Some(path) = &rules_path {
match ente_brain::load_rules_file(path) {
Ok(rules) => {
let mut engine = brain.engine.write().await;
for r in rules {
engine.insert(r);
}
info!(count = engine.len(), path = %path.display(), "reglas cargadas");
}
Err(e) => warn!(?e, path = %path.display(), "carga de reglas falló"),
}
}
// Endpoint Prometheus opcional. En dev por defecto en 127.0.0.1:9911 si
// el flag no se pasó.
let metrics_addr = metrics_addr.or_else(|| {
if dev_mode { Some("127.0.0.1:9911".to_string()) } else { None }
});
if let Some(addr_s) = metrics_addr {
match addr_s.parse::<std::net::SocketAddr>() {
Ok(addr) => {
let s = brain.clone();
tokio::spawn(async move {
if let Err(e) = ente_brain::serve_metrics(s, addr).await {
warn!(?e, "metrics server cayó");
}
});
}
Err(e) => warn!(?e, addr = %addr_s, "metrics-addr inválido"),
}
}
spawn_brain_introspect(brain.clone());
let brain_sink = brain_glue::GraphSink {
graph_tx: graph_tx.clone(),
// Spawns auto-disparados desde reglas usan la identidad de la Semilla
// (único Ente con Capability::Spawn por construcción).
requester: graph.seed_id(),
};
// Demo automático del forwarding (sólo dev, sólo si el binario existe).
if dev_mode && std::path::Path::new("target/debug/ente-echo").exists() {
spawn_echo_smoke_test(bus_path.clone());
}
// En dev mode no tenemos hijos por defecto y el bucle se quedaría inerte.
let dev_exit = if dev_mode {
Some(tokio::time::sleep(Duration::from_secs(4)))
} else {
None
};
tokio::pin!(dev_exit);
// GC de capability grants expirados — corre cada 10 segundos.
let mut grant_purge = tokio::time::interval(Duration::from_secs(10));
grant_purge.tick().await; // descartar primer tick inmediato
loop {
tokio::select! {
biased;
Some(_) = sigchld.recv() => {
reap_until_empty(&mut graph, &graph_tx).await;
}
Some(uevt) = uevents.recv() => {
graph.on_uevent(uevt, &graph_tx).await;
}
Some(evt) = graph_rx.recv() => {
// Cerebro observa antes que el grafo mute. Snapshot del
// SubjectInfo se hace contra el estado pre-mutación.
feed_brain(&brain, &brain_sink, &graph, &evt).await;
if dispatch_graph_event(&mut graph, evt, &graph_tx, &checkpoint_path).await {
return Ok(());
}
}
_ = grant_purge.tick() => {
let n = graph.purge_expired_grants();
if n > 0 {
info!(purged = n, active = graph.active_grants_count(), "GC capability grants");
}
}
_ = async { dev_exit.as_mut().as_pin_mut().unwrap().await }, if dev_mode => {
info!("dev mode: timer expirado, cerrando bucle primordial");
let _ = graph_tx.send(GraphEvent::Shutdown {
reason: ShutdownReason::SeedRequested,
}).await;
}
}
}
}
/// Devuelve `true` si el bucle primordial debe terminar.
async fn dispatch_graph_event(
graph: &mut EnteGraph,
evt: GraphEvent,
tx: &mpsc::Sender<GraphEvent>,
checkpoint: &Option<PathBuf>,
) -> bool {
match evt {
GraphEvent::EnteDied { id, status } => {
graph.on_death(id, status, tx).await;
}
GraphEvent::CapabilityRequested { from, cap, reply } => {
graph.mediate_capability(from, cap, reply).await;
}
GraphEvent::SpawnRequest { card, requester } => {
if let Err(e) = graph.authorize_and_spawn(card, requester).await {
warn!(?e, "spawn request error");
}
}
GraphEvent::BusRequest { peer, from, request, outbound, reply } => {
graph.on_bus_request(peer, from, request, outbound, reply).await;
}
GraphEvent::BusResponse { seq, response } => {
graph.on_bus_response(seq, response).await;
}
GraphEvent::BusConnClosed { ente_id } => {
graph.on_bus_conn_closed(ente_id).await;
}
GraphEvent::Shutdown { reason } => {
warn!(?reason, "shutdown del fractal");
if let Some(path) = checkpoint.as_ref() {
let snap = graph.snapshot();
match snap.write(path) {
Ok(()) => info!(path = %path.display(), entes = snap.entes.len(), "snapshot persistido"),
Err(e) => warn!(?e, "snapshot write falló"),
}
}
graph.cascade_shutdown().await;
return true;
}
}
false
}
async fn reap_until_empty(graph: &mut EnteGraph, tx: &mpsc::Sender<GraphEvent>) {
loop {
match waitpid(None, Some(WaitPidFlag::WNOHANG)) {
Ok(WaitStatus::StillAlive) => return,
Ok(WaitStatus::Exited(pid, code)) => {
emit_death(graph, tx, pid, ExitStatus::Exit(code)).await;
}
Ok(WaitStatus::Signaled(pid, sig, _core)) => {
emit_death(graph, tx, pid, ExitStatus::Killed(sig)).await;
}
Ok(_) => continue, // Stopped/Continued — irrelevantes
Err(Errno::ECHILD) => return,
Err(e) => {
error!(?e, "waitpid fallo no recuperable en bucle de reaping");
return;
}
}
}
}
async fn emit_death(
graph: &EnteGraph,
tx: &mpsc::Sender<GraphEvent>,
pid: Pid,
status: ExitStatus,
) {
let id = match graph.lookup_pid(pid) {
Some(id) => id,
None => {
// Proceso adoptado (subreaper): no está en nuestro grafo.
info!(?pid, ?status, "huérfano cosechado (no en grafo)");
return;
}
};
let _ = tx.send(GraphEvent::EnteDied { id, status }).await;
}
fn spawn_echo_smoke_test(bus_path: PathBuf) {
tokio::spawn(async move {
tokio::time::sleep(Duration::from_millis(300)).await;
match ente_bus::BusClient::connect(&bus_path).await {
Ok(mut client) => {
let req = ente_bus::BusRequest::Invoke {
cap: ente_echo::echo_capability(),
blob: b"hola fractal forwardeado".to_vec(),
};
match client.call(req).await {
Ok(ente_bus::BusResponse::Invoked { result }) => {
info!(echo = %String::from_utf8_lossy(&result), "Invoke ECHO round-trip OK");
}
Ok(other) => warn!(?other, "Invoke ECHO respuesta inesperada"),
Err(e) => warn!(?e, "Invoke ECHO falló"),
}
}
Err(e) => warn!(?e, "no se pudo conectar al bus para test"),
}
});
}
fn init_tracing() {
use tracing_subscriber::{fmt, EnvFilter};
let filter = EnvFilter::try_from_default_env()
.unwrap_or_else(|_| EnvFilter::new("ente_zero=debug,info"));
fmt().with_env_filter(filter).with_target(true).init();
}
fn brain_introspect_path() -> PathBuf {
if let Ok(p) = std::env::var("ENTE_BRAIN_SOCK") {
return p.into();
}
let runtime = std::env::var("XDG_RUNTIME_DIR")
.unwrap_or_else(|_| std::env::var("TMPDIR").unwrap_or_else(|_| "/tmp".into()));
format!("{runtime}/ente-brain.sock").into()
}
/// Auto-flush del audit log a CAS cada 10 segundos. Ejecuta best-effort:
/// si el flush falla lo logeamos pero no abortamos. La integridad del log
/// queda garantizada por su hash chain — re-flushar es idempotente.
fn spawn_audit_auto_flush(state: BrainState) {
tokio::spawn(async move {
let mut tick = tokio::time::interval(std::time::Duration::from_secs(10));
tick.tick().await; // descartar primer tick inmediato
loop {
tick.tick().await;
let mut audit = state.audit.write().await;
match audit.flush_to_cas() {
Ok(0) => {} // nada nuevo
Ok(n) => info!(written = n, total = audit.flushed_count(), "audit auto-flush"),
Err(e) => warn!(?e, "audit auto-flush falló"),
}
}
});
}
fn spawn_brain_introspect(state: BrainState) {
let path = brain_introspect_path();
tokio::spawn(async move {
let server = IntrospectServer::new(state);
if let Err(e) = server.serve(&path).await {
warn!(?e, "introspect server cayó");
}
});
}
/// Registra el evento en el observer y dispatcha cualquier regla matched.
/// Para reglas Sequence: pasamos los últimos N eventos del observer como
/// history al engine.
async fn feed_brain(
brain: &BrainState,
sink: &brain_glue::GraphSink,
graph: &EnteGraph,
evt: &GraphEvent,
) {
let Some((kind, subj)) = brain_glue::graph_event_to_brain(evt, graph) else { return };
let history: Vec<ente_brain::TimedEvent> = {
let mut obs = brain.observer.write().await;
obs.record(kind.clone());
// Snapshot de los últimos 16 eventos — suficiente para cualquier
// Sequence pattern razonable. Clone hace una sola alocación.
obs.recent(16).cloned().collect()
};
let rules = {
let engine = brain.engine.read().await;
engine.dispatch(&kind, &subj, &history)
};
if !rules.is_empty() {
ente_brain::dispatch_actions(&rules, sink).await;
}
}