refactor(naming): A1 — ente→arje, vista→revista, pluma→fana

Rename batch de la Fase A del PLAN_MACRO: - 25 crates ente-* → arje-* (protocol/init/runtime/compat). El linaje arje (init Linux) queda con prefijo coherente. - vista → revista (revista-core + revista-web). - pluma → fana (fana-md + fana-md-reader-web). fana absorbe el linaje markdown de pluma; será el writer DAG editor (prioridad alta). Cambios: - git mv de 29 crate dirs + 2 SDDs - package/lib/bin names + path refs + imports .rs reescritos - workspace Cargo.toml + comentarios de sección - SDDs de init/runtime/compat/protocol actualizados a arje- - SDD de revista + SDD de fana (reescrito: writer DAG editor) - docs/STATUS.md, ROADMAP.md, PLAN_MACRO.md, arje-boot.md, arje-replace-systemd.md actualizados - docs/changelog/akasha.md → chasqui.md scripts/rename-fase-a.py idempotente (--dry-run soportado). cargo check --workspace verde. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-20 00:10:14 +00:00
parent 3fc6dcfa72
commit b83d40a833
159 changed files with 2384 additions and 1111 deletions
@@ -0,0 +1,199 @@
+//! Cache de embeddings keyed por sha256 del contenido + model_id.
+//!
+//! Razón de existir: el modelo real (`fastembed-allMiniLML6V2`) es
+//! caro (1-50 ms por archivo según tamaño y CPU). Cada vez que el
+//! daemon de chasqui re-publica una Mónada o el watcher dispara un
+//! re-cluster por cambio de FS, todos los archivos pasan otra vez
+//! por embed. Para árboles de 1000 archivos, eso son segundos
+//! desperdiciados re-embedidando contenido que no cambió.
+//!
+//! ## Diseño
+//!
+//! - **Cache key**: `sha256(bytes que el modelo realmente vio)` +
+//!   `MODEL_ID` (string). Usar el sha de los bytes-vistos garantiza
+//!   que la cache no devuelva un embedding de contenido viejo
+//!   simplemente porque el path no cambió.
+//! - **Cache value**: el `Vec<f32>` serializado como bytes
+//!   little-endian (4 bytes por f32). Compacto, sin overhead de
+//!   bincode/postcard para datos numéricos puros.
+//! - **Backend**: sled, tree único `embed_cache_v1`. Path:
+//!   `$XDG_CACHE_HOME/brahman/chasqui-nous-real-embed-cache.sled`.
+//!
+//! ## Versionado
+//!
+//! El nombre del tree (`embed_cache_v1`) es el "schema version" del
+//! formato value. Si bumpeamos a (p. ej.) almacenar también el
+//! tiempo de cómputo o el ONNX session id, creamos `embed_cache_v2`
+//! y el viejo queda como dato muerto que sled puede limpiar.
+//!
+//! El `MODEL_ID` viaja dentro del key, así que cambiar de modelo
+//! invalida implícitamente las entradas viejas (no se accede más
+//! a esos keys; sled las mantiene hasta GC manual).
+
+use std::path::PathBuf;
+
+/// Wrapper sobre sled::Db con la API justa que necesita `handle_file`.
+#[derive(Clone)]
+pub struct EmbedCache {
+    tree: sled::Tree,
+}
+
+impl EmbedCache {
+    /// Abre (o crea) la cache en su path canónico. El sled::Db queda
+    /// referenciado por el Tree; mientras `EmbedCache` viva, el DB
+    /// vive.
+    pub fn open() -> Result<Self, sled::Error> {
+        let path = default_path();
+        if let Some(parent) = path.parent() {
+            // best-effort: si no podemos crear el dir, sled falla con
+            // mensaje específico abajo.
+            let _ = std::fs::create_dir_all(parent);
+        }
+        let db = sled::open(&path)?;
+        let tree = db.open_tree("embed_cache_v1")?;
+        Ok(Self { tree })
+    }
+
+    /// Variante para tests: cache efímera bajo `dir`.
+    #[cfg(test)]
+    pub fn open_at(dir: &std::path::Path) -> Result<Self, sled::Error> {
+        let db = sled::open(dir)?;
+        let tree = db.open_tree("embed_cache_v1")?;
+        Ok(Self { tree })
+    }
+
+    /// Lookup. `None` si miss; `Some(vec)` si hit.
+    pub fn get(&self, file_sha: &[u8; 32], model_id: &str) -> Option<Vec<f32>> {
+        let key = build_key(file_sha, model_id);
+        let bytes = self.tree.get(&key).ok()??;
+        decode_embedding(&bytes)
+    }
+
+    /// Almacena. Errores se loggean pero no propagan — cache miss es
+    /// recuperable, no querés tirar el embed válido por fallo de I/O
+    /// de cache.
+    pub fn put(&self, file_sha: &[u8; 32], model_id: &str, embedding: &[f32]) {
+        let key = build_key(file_sha, model_id);
+        let bytes = encode_embedding(embedding);
+        if let Err(e) = self.tree.insert(key, bytes) {
+            tracing::warn!(error = %e, "embed-cache put falló (no-fatal)");
+        }
+    }
+
+    /// Cantidad actual de entradas (best-effort para logs).
+    pub fn len(&self) -> usize {
+        self.tree.len()
+    }
+}
+
+/// Path default. Honra `XDG_CACHE_HOME`, cae a `$HOME/.cache`, y de
+/// último recurso a `/tmp` (sin persistencia, pero al menos no
+/// crashea en entornos minimalistas como CI sin HOME).
+fn default_path() -> PathBuf {
+    if let Ok(p) = std::env::var("NOUSER_NOUS_REAL_CACHE") {
+        return PathBuf::from(p);
+    }
+    let base = std::env::var("XDG_CACHE_HOME")
+        .ok()
+        .map(PathBuf::from)
+        .or_else(|| {
+            std::env::var("HOME")
+                .ok()
+                .map(|h| PathBuf::from(h).join(".cache"))
+        })
+        .unwrap_or_else(std::env::temp_dir);
+    base.join("brahman").join("chasqui-nous-real-embed-cache.sled")
+}
+
+fn build_key(file_sha: &[u8; 32], model_id: &str) -> Vec<u8> {
+    let mut k = Vec::with_capacity(32 + 1 + model_id.len());
+    k.extend_from_slice(file_sha);
+    // separator byte para que prefijos de model_id no choquen con
+    // bytes del sha (improbable pero barato).
+    k.push(0xff);
+    k.extend_from_slice(model_id.as_bytes());
+    k
+}
+
+fn encode_embedding(v: &[f32]) -> Vec<u8> {
+    let mut out = Vec::with_capacity(v.len() * 4);
+    for f in v {
+        out.extend_from_slice(&f.to_le_bytes());
+    }
+    out
+}
+
+fn decode_embedding(bytes: &[u8]) -> Option<Vec<f32>> {
+    if bytes.len() % 4 != 0 {
+        return None;
+    }
+    let mut out = Vec::with_capacity(bytes.len() / 4);
+    for chunk in bytes.chunks_exact(4) {
+        out.push(f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]));
+    }
+    Some(out)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn sha(s: &[u8]) -> [u8; 32] {
+        arje_cas::sha256_of(s)
+    }
+
+    #[test]
+    fn roundtrip_returns_same_vector() {
+        let dir = tempfile::tempdir().unwrap();
+        let cache = EmbedCache::open_at(dir.path()).unwrap();
+        let key = sha(b"hello world");
+        let v = vec![0.1f32, -0.5, 1.0, 3.14159];
+        cache.put(&key, "real-fastembed-allMiniLML6V2-384d", &v);
+        let got = cache
+            .get(&key, "real-fastembed-allMiniLML6V2-384d")
+            .expect("hit esperado");
+        assert_eq!(got, v);
+    }
+
+    #[test]
+    fn miss_returns_none() {
+        let dir = tempfile::tempdir().unwrap();
+        let cache = EmbedCache::open_at(dir.path()).unwrap();
+        let key = sha(b"never stored");
+        assert!(cache.get(&key, "real-fastembed-allMiniLML6V2-384d").is_none());
+    }
+
+    #[test]
+    fn different_models_do_not_collide() {
+        let dir = tempfile::tempdir().unwrap();
+        let cache = EmbedCache::open_at(dir.path()).unwrap();
+        let key = sha(b"same content");
+        cache.put(&key, "model-a", &[1.0, 2.0]);
+        cache.put(&key, "model-b", &[7.0, 8.0]);
+        assert_eq!(cache.get(&key, "model-a").unwrap(), vec![1.0, 2.0]);
+        assert_eq!(cache.get(&key, "model-b").unwrap(), vec![7.0, 8.0]);
+    }
+
+    #[test]
+    fn different_content_different_keys() {
+        let dir = tempfile::tempdir().unwrap();
+        let cache = EmbedCache::open_at(dir.path()).unwrap();
+        let k1 = sha(b"abc");
+        let k2 = sha(b"abd");
+        cache.put(&k1, "m", &[1.0]);
+        assert!(cache.get(&k2, "m").is_none());
+    }
+
+    #[test]
+    fn corrupted_value_returns_none() {
+        // Si sled devuelve bytes con length no múltiplo de 4, decode
+        // debe fallar limpio (None) en vez de panicar.
+        let dir = tempfile::tempdir().unwrap();
+        let cache = EmbedCache::open_at(dir.path()).unwrap();
+        let key = sha(b"x");
+        // Insertamos manualmente bytes inválidos.
+        let raw_key = build_key(&key, "m");
+        cache.tree.insert(raw_key, &[1u8, 2, 3][..]).unwrap();
+        assert!(cache.get(&key, "m").is_none());
+    }
+}
@@ -0,0 +1,205 @@
+//! Modo embeddings: usa fastembed-rs (ONNX Runtime) para producir
+//! vectores reales de text-embedding.
+//!
+//! Modelo default: `all-MiniLM-L6-v2` (384-d). Se descarga al primer
+//! arranque a `~/.cache/fastembed` y queda cacheado.
+//!
+//! ## Mapeo del contrato
+//!
+//! - `EmbedText`: pasa el texto al modelo, devuelve el vector 384-d.
+//! - `EmbedFile`: lee hasta los primeros 8 KiB del archivo, los
+//!   interpreta como UTF-8 con replacement-char, y los embeda como
+//!   texto. Para archivos binarios el resultado no es semánticamente
+//!   útil — caller decide qué hacer.
+//! - `Ping`: devuelve `model_id` y `embed_dim` reales.
+
+use std::fs::File;
+use std::io::Read;
+use std::path::PathBuf;
+use std::sync::Arc;
+use std::time::Instant;
+
+use fastembed::{EmbeddingModel, InitOptions, TextEmbedding};
+use chasqui_nous::{
+    EmbedFilePayload, EmbedRequest, EmbedResponse, EmbedTextPayload, ErrorResponse, PingResponse,
+    RequestKind,
+};
+use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
+use tokio::net::UnixStream;
+use tracing::{info, warn};
+
+use crate::cache::EmbedCache;
+
+const MAX_FILE_BYTES: usize = 8192;
+
+/// Backend concreto: posee el modelo cargado.
+pub struct Backend {
+    model: TextEmbedding,
+}
+
+impl Backend {
+    pub fn init() -> Result<Self, String> {
+        info!("cargando modelo all-MiniLM-L6-v2 (puede descargar ~80MB la primera vez)");
+        let opts = InitOptions::new(EmbeddingModel::AllMiniLML6V2)
+            .with_show_download_progress(true);
+        let model = TextEmbedding::try_new(opts).map_err(|e| format!("fastembed init: {e}"))?;
+        info!("modelo listo");
+        Ok(Self { model })
+    }
+
+    fn embed_one(&self, text: &str) -> Result<Vec<f32>, String> {
+        let out = self
+            .model
+            .embed(vec![text], None)
+            .map_err(|e| format!("embed: {e}"))?;
+        out.into_iter()
+            .next()
+            .ok_or_else(|| "fastembed devolvió 0 vectores".to_string())
+    }
+}
+
+pub async fn handle_conn(
+    stream: UnixStream,
+    backend: Arc<Backend>,
+    cache: Option<EmbedCache>,
+) -> std::io::Result<()> {
+    let mut reader = BufReader::new(stream);
+    let mut line = String::new();
+    let n = reader.read_line(&mut line).await?;
+    if n == 0 {
+        return Ok(());
+    }
+
+    let req: EmbedRequest = match serde_json::from_str(&line) {
+        Ok(r) => r,
+        Err(e) => {
+            return write_error(reader.into_inner(), format!("JSON inválido: {e}")).await;
+        }
+    };
+
+    let started = Instant::now();
+    let result = match req.kind {
+        RequestKind::EmbedFile => handle_file(req.payload, &backend, cache.as_ref(), started),
+        RequestKind::EmbedText => handle_text(req.payload, &backend, started),
+        RequestKind::Ping => handle_ping(),
+    };
+
+    let mut stream = reader.into_inner();
+    match result {
+        Ok(json) => {
+            stream.write_all(json.as_bytes()).await?;
+            stream.write_all(b"\n").await?;
+        }
+        Err(msg) => return write_error(stream, msg).await,
+    }
+    stream.shutdown().await?;
+    Ok(())
+}
+
+fn handle_text(
+    payload: serde_json::Value,
+    backend: &Backend,
+    started: Instant,
+) -> Result<String, String> {
+    let p: EmbedTextPayload =
+        serde_json::from_value(payload).map_err(|e| format!("payload: {e}"))?;
+    info!(text_len = p.text.len(), "embed_text");
+    let v = backend.embed_one(&p.text)?;
+    let resp = EmbedResponse {
+        embedding: v,
+        model: super::model_id().to_string(),
+        elapsed_ms: started.elapsed().as_millis() as u64,
+    };
+    serde_json::to_string(&resp).map_err(|e| format!("encode: {e}"))
+}
+
+fn handle_file(
+    payload: serde_json::Value,
+    backend: &Backend,
+    cache: Option<&EmbedCache>,
+    started: Instant,
+) -> Result<String, String> {
+    let p: EmbedFilePayload =
+        serde_json::from_value(payload).map_err(|e| format!("payload: {e}"))?;
+
+    let path = PathBuf::from(&p.path);
+    let mut file = File::open(&path).map_err(|e| format!("abrir archivo: {e}"))?;
+    let mut buf = vec![0u8; MAX_FILE_BYTES];
+    let n = file.read(&mut buf).map_err(|e| format!("leer archivo: {e}"))?;
+    buf.truncate(n);
+
+    let model_id = super::model_id();
+    // Hash de los bytes que el modelo realmente verá. Si el archivo
+    // crece pasada la ventana MAX_FILE_BYTES sin modificar la cabeza,
+    // el hash NO cambia — el embedding cacheado sigue siendo válido
+    // bajo la semántica del proveedor (el modelo nunca vio los bytes
+    // adicionales). Si la cabeza cambia, el hash cambia y caemos a
+    // re-embed naturalmente.
+    let file_sha = arje_cas::sha256_of(&buf);
+
+    if let Some(cache) = cache {
+        if let Some(cached) = cache.get(&file_sha, model_id) {
+            info!(
+                path = %p.path,
+                sha = %arje_cas::hex(&file_sha),
+                bytes = n,
+                "embed_file: cache HIT"
+            );
+            let resp = EmbedResponse {
+                embedding: cached,
+                model: model_id.to_string(),
+                elapsed_ms: started.elapsed().as_millis() as u64,
+            };
+            return serde_json::to_string(&resp).map_err(|e| format!("encode: {e}"));
+        }
+    }
+
+    info!(
+        path = %p.path,
+        sha = %arje_cas::hex(&file_sha),
+        bytes = n,
+        "embed_file: cache MISS — invocando modelo"
+    );
+
+    // Write-through al CAS de arje: hacemos la cabeza del archivo
+    // direccionable por contenido. No es la fuente de verdad para
+    // el cache (sled lo es) pero deja un registro consultable por
+    // herramientas como `ente-cas gc` y permite que otros consumers
+    // resuelvan los bytes por hash.
+    if let Err(e) = arje_cas::store(&buf) {
+        // No-fatal: si CAS no escribe, cacheamos el embedding igual.
+        warn!(error = %e, "arje_cas::store falló (no-fatal)");
+    }
+
+    let text = String::from_utf8_lossy(&buf).to_string();
+    let v = backend.embed_one(&text)?;
+
+    if let Some(cache) = cache {
+        cache.put(&file_sha, model_id, &v);
+    }
+
+    let resp = EmbedResponse {
+        embedding: v,
+        model: model_id.to_string(),
+        elapsed_ms: started.elapsed().as_millis() as u64,
+    };
+    serde_json::to_string(&resp).map_err(|e| format!("encode: {e}"))
+}
+
+fn handle_ping() -> Result<String, String> {
+    let resp = PingResponse {
+        model: super::model_id().to_string(),
+        embed_dim: super::embed_dim(),
+    };
+    serde_json::to_string(&resp).map_err(|e| format!("encode: {e}"))
+}
+
+async fn write_error(mut stream: UnixStream, msg: String) -> std::io::Result<()> {
+    warn!(error = %msg, "respuesta de error");
+    let resp = ErrorResponse { error: msg };
+    let json = serde_json::to_string(&resp).unwrap_or_else(|_| "{\"error\":\"encode\"}".into());
+    stream.write_all(json.as_bytes()).await?;
+    stream.write_all(b"\n").await?;
+    stream.shutdown().await?;
+    Ok(())
+}
@@ -0,0 +1,202 @@
+//! `chasqui-nous-real` — proveedor Nous con LLM real (gated por feature).
+//!
+//! ## Build modes
+//!
+//! - `cargo build -p chasqui-nous-real`
+//!   Compila como **stub**: bin que arranca, sidecarea al brahman-init
+//!   pero rechaza toda request con un error explicando que falta la
+//!   feature. Útil para que `cargo build --workspace` no requiera ML
+//!   deps.
+//!
+//! - `cargo build -p chasqui-nous-real --features embeddings`
+//!   Compila con `fastembed` + ONNX Runtime descargado por Cargo.
+//!   Modelo default: `all-MiniLM-L6-v2` (384-d, ~80 MB descargado al
+//!   primer run y cacheado en `~/.cache/fastembed`).
+//!
+//! ## Diseño
+//!
+//! Mismo contrato wire que `chasqui-nous-mock` (`chasqui-nous` crate). La
+//! diferencia operativa: real produce 384-d con semantic content
+//! (text-embedding del modelo); mock produce 32-d con metadata-hashing.
+//! No son intercambiables a media-deployment — los centroides de
+//! Mónadas calculadas con uno NO matchean con el otro.
+//!
+//! La Card declara `priority_contexts.prod = { priority_offset: +1 }`,
+//! contrapeso del mock que tiene `+1 en test`. Así el broker brahman
+//! elige automáticamente:
+//! - `BRAHMAN_BROKER_CONTEXT=test` → mock gana.
+//! - `BRAHMAN_BROKER_CONTEXT=prod` → real gana.
+//! - sin contexto → empate por label alfabético.
+
+#![forbid(unsafe_code)]
+
+use std::collections::BTreeMap;
+
+use brahman_card::{
+    ulid::Ulid, Card, CardKind, ContextBias, Flow, Flows, Lifecycle, Payload, Priority,
+    Supervision, TypeRef,
+};
+use chasqui_nous::{transport, FLOW_EMBED_REQUEST, FLOW_EMBED_RESULT, FLOW_TYPE_NAME};
+use tokio::net::UnixListener;
+use tracing::info;
+
+#[cfg(feature = "embeddings")]
+mod cache;
+#[cfg(feature = "embeddings")]
+mod embeddings;
+#[cfg(not(feature = "embeddings"))]
+mod stub;
+
+#[cfg(feature = "embeddings")]
+const MODEL_ID: &str = "real-fastembed-allMiniLML6V2-384d";
+#[cfg(not(feature = "embeddings"))]
+const MODEL_ID: &str = "real-stub-no-feature";
+
+#[cfg(feature = "embeddings")]
+const EMBED_DIM: u32 = 384;
+#[cfg(not(feature = "embeddings"))]
+const EMBED_DIM: u32 = 0;
+
+#[tokio::main(flavor = "current_thread")]
+async fn main() -> std::io::Result<()> {
+    init_tracing();
+
+    #[cfg(not(feature = "embeddings"))]
+    info!(
+        "chasqui-nous-real corriendo en modo STUB (compilá con \
+        --features embeddings para activar el modelo)"
+    );
+
+    // 1. Resolver socket del data-plane (default `chasqui-nous-real.sock`,
+    //    distinto del mock para coexistir).
+    let sock_path = transport::provider_socket_path("real");
+    if sock_path.exists() {
+        std::fs::remove_file(&sock_path)?;
+    }
+    if let Some(parent) = sock_path.parent() {
+        std::fs::create_dir_all(parent)?;
+    }
+    let listener = UnixListener::bind(&sock_path)?;
+    info!(socket = %sock_path.display(), "chasqui-nous-real escuchando");
+
+    // 2. Sidecar al brahman-init con Card declarando el socket.
+    let card = build_card(sock_path.clone());
+    info!(label = %card.label, mode = MODEL_ID, "publicando Card al brahman-init");
+    brahman_sidecar::spawn(card);
+
+    // 3. Inicializar el modelo (sólo en modo embeddings).
+    #[cfg(feature = "embeddings")]
+    let backend = embeddings::Backend::init().map_err(|e| {
+        std::io::Error::other(format!("init modelo: {e}"))
+    })?;
+    #[cfg(feature = "embeddings")]
+    let backend = std::sync::Arc::new(backend);
+
+    // 4. Abrir el cache de embeddings (sled local, sha256-keyed).
+    //    Si falla, seguimos sin cache — degrada a "siempre embed".
+    #[cfg(feature = "embeddings")]
+    let embed_cache = match cache::EmbedCache::open() {
+        Ok(c) => {
+            info!(entries = c.len(), "embed-cache abierto");
+            Some(c)
+        }
+        Err(e) => {
+            tracing::warn!(error = %e, "embed-cache no disponible — todas las requests irán al modelo");
+            None
+        }
+    };
+
+    // 5. Accept loop.
+    loop {
+        let (stream, _addr) = listener.accept().await?;
+
+        #[cfg(feature = "embeddings")]
+        {
+            let backend = backend.clone();
+            let cache = embed_cache.clone();
+            tokio::spawn(async move {
+                if let Err(e) = embeddings::handle_conn(stream, backend, cache).await {
+                    tracing::warn!(error = %e, "conn falló");
+                }
+            });
+        }
+
+        #[cfg(not(feature = "embeddings"))]
+        {
+            tokio::spawn(async move {
+                if let Err(e) = stub::handle_conn(stream).await {
+                    tracing::warn!(error = %e, "conn falló");
+                }
+            });
+        }
+    }
+}
+
+fn init_tracing() {
+    tracing_subscriber::fmt()
+        .with_env_filter(
+            tracing_subscriber::EnvFilter::try_from_default_env()
+                .unwrap_or_else(|_| "info".into()),
+        )
+        .with_target(false)
+        .compact()
+        .init();
+}
+
+/// Card que real-nous anuncia. Idéntica al mock excepto por:
+/// - label distinto (`chasqui.nous_real`) para que coexistan en el broker.
+/// - `priority_contexts.prod = +1` (gana en contexto prod).
+/// - `service_socket` propio para que clientes lo descubran directo.
+fn build_card(service_socket: std::path::PathBuf) -> Card {
+    let mut priority_contexts = BTreeMap::new();
+    priority_contexts.insert(
+        "prod".into(),
+        ContextBias {
+            pin_to: None,
+            priority_offset: 1,
+        },
+    );
+
+    Card {
+        schema_version: brahman_card::CARD_SCHEMA_VERSION,
+        id: Ulid::new(),
+        label: "chasqui.nous_real".into(),
+        payload: Payload::Virtual,
+        supervision: Supervision::Delegate,
+        lifecycle: Lifecycle::Daemon,
+        priority: Priority::Normal,
+        kind: CardKind::Ente,
+        service_socket: Some(service_socket),
+        flow: Flows {
+            input: vec![Flow {
+                name: FLOW_EMBED_REQUEST.into(),
+                ty: TypeRef::Primitive {
+                    name: FLOW_TYPE_NAME.into(),
+                },
+                pin_to: None,
+            }],
+            output: vec![Flow {
+                name: FLOW_EMBED_RESULT.into(),
+                ty: TypeRef::Primitive {
+                    name: FLOW_TYPE_NAME.into(),
+                },
+                pin_to: None,
+            }],
+        },
+        priority_contexts,
+        ..Default::default()
+    }
+}
+
+// Helpers compartidos. Anotados allow(dead_code) porque en stub mode
+// algunos quedan sin uso pero los queremos disponibles consistentemente.
+
+#[allow(dead_code)]
+pub(crate) fn model_id() -> &'static str {
+    MODEL_ID
+}
+
+#[allow(dead_code)]
+pub(crate) fn embed_dim() -> u32 {
+    EMBED_DIM
+}
@@ -0,0 +1,36 @@
+//! Modo stub: arranca el bin pero rechaza las requests con un error
+//! que explica que falta la feature `embeddings`.
+
+use chasqui_nous::{EmbedRequest, ErrorResponse};
+use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
+use tokio::net::UnixStream;
+use tracing::warn;
+
+pub async fn handle_conn(stream: UnixStream) -> std::io::Result<()> {
+    let mut reader = BufReader::new(stream);
+    let mut line = String::new();
+    let n = reader.read_line(&mut line).await?;
+    if n == 0 {
+        return Ok(());
+    }
+
+    // Parseamos para validar la forma; igual rechazamos.
+    let _: Result<EmbedRequest, _> = serde_json::from_str(&line);
+
+    warn!("rechazando request en modo stub (feature `embeddings` ausente)");
+
+    let resp = ErrorResponse {
+        error: format!(
+            "chasqui-nous-real compilado sin la feature `embeddings`. \
+             Rebuild con: cargo build -p chasqui-nous-real --features embeddings"
+        ),
+    };
+    let mut stream = reader.into_inner();
+    let payload = serde_json::to_string(&resp).unwrap_or_else(|_| {
+        "{\"error\":\"stub mode and serialization failed\"}".to_string()
+    });
+    stream.write_all(payload.as_bytes()).await?;
+    stream.write_all(b"\n").await?;
+    stream.shutdown().await?;
+    Ok(())
+}