chore: monorepo inicial con arje + minga + yahweh absorbidos

Workspace en 4 ejes (core/modules/apps/shared): - core/: 24 crates de arje (Init systemd-compatible: ente-card, ente-zero, ente-kernel, ente-bus, ente-cas, ente-soma, ente-wasm, ente-snapshot, ente-brain, ente-echo, ente-policy-provider, + 12 crates *-compat) - modules/semantic_dht/: 5 crates de minga (minga-core con AST/CAS/MST, minga-p2p con libp2p Kad, minga-store, minga-vfs, minga-cli) - modules/ui_engine/: 11 crates de yahweh (libs/{core,theme,bus,providers}, widgets/{tree,splitter,tabs,tiled,container_core,text_input}) - apps/: 5 crates de yahweh (file_explorer, database_explorer, text_viewer, image_viewer, yahweh-shell) - shared_wit/protocol.wit: handshake/lifecycle inicial Cargo.toml unificado: thiserror bumped a 2 (transparente para arje), tokio "full", paths intra-workspace de yahweh redirigidos a su nueva ubicación. cargo check --workspace: 0 errores, 17 warnings (dead code preexistente). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-08 04:45:44 +00:00
commit 53dbdf0f1d
176 changed files with 34845 additions and 0 deletions
@@ -0,0 +1,515 @@
+//! Hash α-equivalente.
+//!
+//! Dos términos que difieren *solo* en los nombres de variables ligadas
+//! producen el mismo hash. Los nombres de funciones, los identificadores
+//! libres y los constructores (variantes, tipos) **sí** afectan al hash:
+//! forman parte de la interfaz pública o discriminan el término.
+//!
+//! Implementación: durante el recorrido se mantiene una pila de scopes.
+//! Al encontrar un binder reconocido, su nombre se empuja sobre la pila;
+//! al salir del scope, se descarta. Las referencias a identificadores se
+//! buscan desde la cima:
+//! - si están, se emite un índice estilo de Bruijn (offset desde la cima);
+//! - si no, se emite el nombre literal (variable libre).
+//!
+//! **Distinción binder vs. constructor:** dentro de un patrón, un
+//! `identifier` puede ser binder (`x`, `mi_var`) o constructor / variante
+//! (`None`, `Ok`, `MAX_VAL`). La gramática no los distingue; usamos la
+//! convención de Rust: minúscula inicial (o `_` seguido de letra) = binder,
+//! mayúscula inicial = constructor. Cuando el grammar marca explícitamente
+//! `field_name = "pattern"` (parámetros, lets), forzamos binder.
+//!
+//! **Cobertura del MVP:**
+//! - Parámetros de `function_item` y `closure_expression`.
+//! - Bindings de `let_declaration` dentro de `block`, con desestructura.
+//! - Variable de `for_expression`.
+//! - Brazos de `match` (`match_arm` con guarda; cada arm es un scope
+//!   independiente).
+//! - Patrones: `tuple_pattern`, `tuple_struct_pattern`, `struct_pattern`,
+//!   `field_pattern` (forma completa y shorthand), `captured_pattern`
+//!   (`n @ pat`), `range_pattern`, `slice_pattern`, `ref_pattern`,
+//!   `reference_pattern`, `mut_pattern`.
+//!
+//! **Pendiente:** `if let`, `while let`, `let-else`, let-chains, `or_pattern`
+//! con bindings (Rust requiere mismas variables en cada rama).
+
+use crate::ast::SemanticNode;
+use crate::cas::ContentHash;
+use blake3::Hasher;
+
+const TAG_NO_LEAF: u8 = 0;
+const TAG_LEAF: u8 = 1;
+const TAG_BINDER: u8 = 2;
+const TAG_REF_BOUND: u8 = 3;
+const TAG_REF_FREE: u8 = 4;
+
+pub fn hash_node_alpha(node: &SemanticNode) -> ContentHash {
+    let mut h = Hasher::new();
+    let mut scope: Vec<String> = Vec::new();
+    feed(&mut h, node, &mut scope);
+    ContentHash(*h.finalize().as_bytes())
+}
+
+fn feed(h: &mut Hasher, node: &SemanticNode, scope: &mut Vec<String>) {
+    write_kind_and_field(h, node);
+
+    match node.kind.as_str() {
+        "function_item" | "closure_expression" => feed_callable(h, node, scope),
+        "block" => feed_block(h, node, scope),
+        "for_expression" => feed_for(h, node, scope),
+        "match_arm" => feed_match_arm(h, node, scope),
+        "identifier" if node.field_name.as_deref() == Some("pattern") => emit_binder_body(h),
+        "identifier" => emit_identifier_ref(h, node, scope),
+        _ => feed_default(h, node, scope),
+    }
+}
+
+fn feed_default(h: &mut Hasher, node: &SemanticNode, scope: &mut Vec<String>) {
+    emit_leaf_marker(h, node);
+    h.update(&(node.children.len() as u64).to_le_bytes());
+    for c in &node.children {
+        feed(h, c, scope);
+    }
+}
+
+fn emit_identifier_ref(h: &mut Hasher, node: &SemanticNode, scope: &Vec<String>) {
+    h.update(&[TAG_NO_LEAF]);
+    if let Some(t) = &node.leaf_text {
+        if let Ok(name) = std::str::from_utf8(t) {
+            if let Some(i) = scope.iter().rposition(|n| n == name) {
+                let de_bruijn = (scope.len() - 1 - i) as u64;
+                h.update(&[TAG_REF_BOUND]);
+                h.update(&de_bruijn.to_le_bytes());
+            } else {
+                h.update(&[TAG_REF_FREE]);
+                h.update(&(t.len() as u64).to_le_bytes());
+                h.update(t);
+            }
+        } else {
+            h.update(&[TAG_REF_FREE]);
+            h.update(&(t.len() as u64).to_le_bytes());
+            h.update(t);
+        }
+    } else {
+        h.update(&[TAG_REF_FREE]);
+        h.update(&[0u8; 8]);
+    }
+    h.update(&[0u8; 8]);
+}
+
+fn emit_binder_body(h: &mut Hasher) {
+    h.update(&[TAG_NO_LEAF]);
+    h.update(&[TAG_BINDER]);
+    h.update(&[0u8; 8]);
+}
+
+fn emit_binder_node(h: &mut Hasher, node: &SemanticNode) {
+    write_kind_and_field(h, node);
+    emit_binder_body(h);
+}
+
+fn emit_leaf_marker(h: &mut Hasher, node: &SemanticNode) {
+    match &node.leaf_text {
+        Some(t) => {
+            h.update(&[TAG_LEAF]);
+            h.update(&(t.len() as u64).to_le_bytes());
+            h.update(t);
+        }
+        None => {
+            h.update(&[TAG_NO_LEAF]);
+        }
+    }
+}
+
+fn feed_callable(h: &mut Hasher, node: &SemanticNode, scope: &mut Vec<String>) {
+    h.update(&[TAG_NO_LEAF]);
+
+    let mut binders: Vec<String> = Vec::new();
+    for c in &node.children {
+        if c.field_name.as_deref() == Some("parameters") {
+            collect_callable_binders(c, &mut binders);
+        }
+    }
+
+    let scope_before = scope.len();
+    scope.extend(binders);
+
+    h.update(&(node.children.len() as u64).to_le_bytes());
+    for c in &node.children {
+        if c.field_name.as_deref() == Some("parameters") {
+            feed_callable_params(h, c);
+        } else {
+            feed(h, c, scope);
+        }
+    }
+
+    scope.truncate(scope_before);
+}
+
+fn feed_block(h: &mut Hasher, node: &SemanticNode, scope: &mut Vec<String>) {
+    h.update(&[TAG_NO_LEAF]);
+
+    let scope_before = scope.len();
+    h.update(&(node.children.len() as u64).to_le_bytes());
+    for c in &node.children {
+        if c.kind == "let_declaration" {
+            feed_let(h, c, scope);
+            for cc in &c.children {
+                if cc.field_name.as_deref() == Some("pattern") {
+                    collect_pattern_binders(cc, scope);
+                }
+            }
+        } else {
+            feed(h, c, scope);
+        }
+    }
+    scope.truncate(scope_before);
+}
+
+fn feed_let(h: &mut Hasher, node: &SemanticNode, scope: &mut Vec<String>) {
+    write_kind_and_field(h, node);
+    h.update(&[TAG_NO_LEAF]);
+    h.update(&(node.children.len() as u64).to_le_bytes());
+    for c in &node.children {
+        if c.field_name.as_deref() == Some("pattern") {
+            feed_pattern(h, c);
+        } else {
+            feed(h, c, scope);
+        }
+    }
+}
+
+fn feed_for(h: &mut Hasher, node: &SemanticNode, scope: &mut Vec<String>) {
+    h.update(&[TAG_NO_LEAF]);
+
+    let mut binders: Vec<String> = Vec::new();
+    for c in &node.children {
+        if c.field_name.as_deref() == Some("pattern") {
+            collect_pattern_binders(c, &mut binders);
+        }
+    }
+
+    h.update(&(node.children.len() as u64).to_le_bytes());
+    for c in &node.children {
+        match c.field_name.as_deref() {
+            Some("pattern") => feed_pattern(h, c),
+            Some("body") => {
+                let scope_before = scope.len();
+                scope.extend(binders.iter().cloned());
+                feed(h, c, scope);
+                scope.truncate(scope_before);
+            }
+            _ => feed(h, c, scope),
+        }
+    }
+}
+
+fn feed_match_arm(h: &mut Hasher, node: &SemanticNode, scope: &mut Vec<String>) {
+    h.update(&[TAG_NO_LEAF]);
+
+    let mut binders: Vec<String> = Vec::new();
+    for c in &node.children {
+        if c.field_name.as_deref() == Some("pattern") {
+            collect_match_pattern_binders(c, &mut binders);
+        }
+    }
+
+    let scope_before = scope.len();
+    scope.extend(binders);
+
+    h.update(&(node.children.len() as u64).to_le_bytes());
+    for c in &node.children {
+        if c.field_name.as_deref() == Some("pattern") {
+            if c.kind == "match_pattern" {
+                feed_match_pattern_split(h, c, scope);
+            } else {
+                feed_pattern(h, c);
+            }
+        } else {
+            feed(h, c, scope);
+        }
+    }
+
+    scope.truncate(scope_before);
+}
+
+fn feed_match_pattern_split(h: &mut Hasher, mp: &SemanticNode, scope: &mut Vec<String>) {
+    write_kind_and_field(h, mp);
+    emit_leaf_marker(h, mp);
+    h.update(&(mp.children.len() as u64).to_le_bytes());
+    for c in &mp.children {
+        if c.field_name.as_deref() == Some("condition") {
+            feed(h, c, scope);
+        } else {
+            feed_pattern(h, c);
+        }
+    }
+}
+
+fn collect_match_pattern_binders(p: &SemanticNode, out: &mut Vec<String>) {
+    if p.kind == "match_pattern" {
+        for c in &p.children {
+            if c.field_name.as_deref() != Some("condition") {
+                collect_pattern_binders(c, out);
+            }
+        }
+    } else {
+        collect_pattern_binders(p, out);
+    }
+}
+
+fn feed_callable_params(h: &mut Hasher, params: &SemanticNode) {
+    write_kind_and_field(h, params);
+    h.update(&[TAG_NO_LEAF]);
+    h.update(&(params.children.len() as u64).to_le_bytes());
+    for c in &params.children {
+        match c.kind.as_str() {
+            "parameter" => feed_parameter(h, c),
+            _ => feed_pattern(h, c),
+        }
+    }
+}
+
+fn feed_parameter(h: &mut Hasher, node: &SemanticNode) {
+    write_kind_and_field(h, node);
+    h.update(&[TAG_NO_LEAF]);
+    h.update(&(node.children.len() as u64).to_le_bytes());
+    for c in &node.children {
+        if c.field_name.as_deref() == Some("pattern") {
+            feed_pattern(h, c);
+        } else {
+            feed_as_literal(h, c);
+        }
+    }
+}
+
+/// Pattern-aware emitter. Within a pattern, identifiers split into two
+/// roles: binders (introduce a new local) and constructors (variant or
+/// path references). The disambiguation rule mirrors Rust's: a `pattern`
+/// field forces binder; otherwise lowercase initial = binder, uppercase =
+/// constructor.
+fn feed_pattern(h: &mut Hasher, node: &SemanticNode) {
+    write_kind_and_field(h, node);
+    match node.kind.as_str() {
+        "identifier" => {
+            if is_binder_identifier(node) {
+                emit_binder_body(h);
+            } else {
+                emit_leaf_marker(h, node);
+                h.update(&[0u8; 8]);
+            }
+        }
+        "tuple_pattern" | "ref_pattern" | "reference_pattern" | "mut_pattern" | "slice_pattern" => {
+            h.update(&[TAG_NO_LEAF]);
+            h.update(&(node.children.len() as u64).to_le_bytes());
+            for c in &node.children {
+                feed_pattern(h, c);
+            }
+        }
+        "tuple_struct_pattern" => {
+            h.update(&[TAG_NO_LEAF]);
+            h.update(&(node.children.len() as u64).to_le_bytes());
+            for c in &node.children {
+                if c.field_name.as_deref() == Some("type") {
+                    feed_as_literal(h, c);
+                } else {
+                    feed_pattern(h, c);
+                }
+            }
+        }
+        "struct_pattern" => {
+            h.update(&[TAG_NO_LEAF]);
+            h.update(&(node.children.len() as u64).to_le_bytes());
+            for c in &node.children {
+                if c.field_name.as_deref() == Some("type") {
+                    feed_as_literal(h, c);
+                } else if c.kind == "field_pattern" {
+                    feed_field_pattern(h, c);
+                } else {
+                    feed_as_literal(h, c);
+                }
+            }
+        }
+        "captured_pattern" => {
+            h.update(&[TAG_NO_LEAF]);
+            h.update(&(node.children.len() as u64).to_le_bytes());
+            let mut named_binder = false;
+            for c in &node.children {
+                if !named_binder && c.kind == "identifier" {
+                    emit_binder_node(h, c);
+                    named_binder = true;
+                } else {
+                    feed_pattern(h, c);
+                }
+            }
+        }
+        _ => feed_as_literal(h, node),
+    }
+}
+
+fn feed_field_pattern(h: &mut Hasher, fp: &SemanticNode) {
+    write_kind_and_field(h, fp);
+    let has_pattern = fp
+        .children
+        .iter()
+        .any(|c| c.field_name.as_deref() == Some("pattern"));
+    h.update(&[TAG_NO_LEAF]);
+    h.update(&(fp.children.len() as u64).to_le_bytes());
+    for c in &fp.children {
+        if has_pattern {
+            if c.field_name.as_deref() == Some("pattern") {
+                feed_pattern(h, c);
+            } else {
+                feed_as_literal(h, c);
+            }
+        } else if matches!(
+            c.kind.as_str(),
+            "identifier" | "shorthand_field_identifier" | "field_identifier"
+        ) {
+            emit_binder_node(h, c);
+        } else {
+            feed_as_literal(h, c);
+        }
+    }
+}
+
+fn feed_as_literal(h: &mut Hasher, node: &SemanticNode) {
+    write_kind_and_field(h, node);
+    emit_leaf_marker(h, node);
+    h.update(&(node.children.len() as u64).to_le_bytes());
+    for c in &node.children {
+        feed_as_literal(h, c);
+    }
+}
+
+fn collect_callable_binders(params: &SemanticNode, out: &mut Vec<String>) {
+    for c in &params.children {
+        match c.kind.as_str() {
+            "parameter" => {
+                for cc in &c.children {
+                    if cc.field_name.as_deref() == Some("pattern") {
+                        collect_pattern_binders(cc, out);
+                    }
+                }
+            }
+            _ => collect_pattern_binders(c, out),
+        }
+    }
+}
+
+fn collect_pattern_binders(p: &SemanticNode, out: &mut Vec<String>) {
+    match p.kind.as_str() {
+        "identifier" => {
+            if is_binder_identifier(p) {
+                push_identifier_name(p, out);
+            }
+        }
+        "tuple_pattern" | "ref_pattern" | "reference_pattern" | "mut_pattern" | "slice_pattern" => {
+            for c in &p.children {
+                collect_pattern_binders(c, out);
+            }
+        }
+        "tuple_struct_pattern" => {
+            for c in &p.children {
+                if c.field_name.as_deref() != Some("type") {
+                    collect_pattern_binders(c, out);
+                }
+            }
+        }
+        "struct_pattern" => {
+            for c in &p.children {
+                if c.kind == "field_pattern" {
+                    collect_field_pattern_binders(c, out);
+                }
+            }
+        }
+        "captured_pattern" => {
+            let mut named_binder = false;
+            for c in &p.children {
+                if !named_binder && c.kind == "identifier" {
+                    push_identifier_name(c, out);
+                    named_binder = true;
+                } else {
+                    collect_pattern_binders(c, out);
+                }
+            }
+        }
+        _ => {}
+    }
+}
+
+fn collect_field_pattern_binders(fp: &SemanticNode, out: &mut Vec<String>) {
+    let has_pattern = fp
+        .children
+        .iter()
+        .any(|c| c.field_name.as_deref() == Some("pattern"));
+    if has_pattern {
+        for c in &fp.children {
+            if c.field_name.as_deref() == Some("pattern") {
+                collect_pattern_binders(c, out);
+            }
+        }
+    } else {
+        for c in &fp.children {
+            if matches!(
+                c.kind.as_str(),
+                "identifier" | "shorthand_field_identifier" | "field_identifier"
+            ) {
+                push_identifier_name(c, out);
+            }
+        }
+    }
+}
+
+fn push_identifier_name(node: &SemanticNode, out: &mut Vec<String>) {
+    if let Some(t) = &node.leaf_text {
+        if let Ok(s) = std::str::from_utf8(t) {
+            out.push(s.to_string());
+        }
+    }
+}
+
+/// Determina si un `identifier` en posición de patrón se interpreta como
+/// binder. Reglas:
+/// - Si tiene `field_name == "pattern"` (parámetros, lets), siempre es binder.
+/// - Si su nombre comienza con minúscula, es binder.
+/// - Si comienza con `_` seguido de letra/dígito, es binder (convención
+///   Rust para "intencionalmente sin usar").
+/// - Resto: constructor / variante / constante (literal).
+fn is_binder_identifier(node: &SemanticNode) -> bool {
+    if node.field_name.as_deref() == Some("pattern") {
+        return true;
+    }
+    let Some(t) = &node.leaf_text else { return false };
+    let Ok(s) = std::str::from_utf8(t) else { return false };
+    is_binder_name(s)
+}
+
+fn is_binder_name(s: &str) -> bool {
+    let mut chars = s.chars();
+    match chars.next() {
+        Some('_') => chars
+            .next()
+            .map_or(false, |c| c.is_lowercase() || c.is_ascii_digit() || c == '_'),
+        Some(c) => c.is_lowercase(),
+        None => false,
+    }
+}
+
+fn write_kind_and_field(h: &mut Hasher, node: &SemanticNode) {
+    write_str(h, &node.kind);
+    match &node.field_name {
+        Some(f) => {
+            h.update(&[1]);
+            write_str(h, f);
+        }
+        None => {
+            h.update(&[0]);
+        }
+    }
+}
+
+fn write_str(h: &mut Hasher, s: &str) {
+    h.update(&(s.len() as u64).to_le_bytes());
+    h.update(s.as_bytes());
+}
@@ -0,0 +1,52 @@
+use tree_sitter::Node;
+
+/// Nodo de AST normalizado: descarta posiciones, whitespace y trivia
+/// (comentarios marcados como `extra` en la gramática). Dos fragmentos de
+/// código semánticamente equivalentes producen árboles idénticos.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct SemanticNode {
+    pub kind: String,
+    pub field_name: Option<String>,
+    pub leaf_text: Option<Vec<u8>>,
+    pub children: Vec<SemanticNode>,
+}
+
+impl SemanticNode {
+    pub fn from_tree_sitter(node: Node<'_>, source: &[u8]) -> Self {
+        Self::build(node, source, None)
+    }
+
+    fn build(node: Node<'_>, source: &[u8], field_name: Option<String>) -> Self {
+        let kind = node.kind().to_string();
+        let mut children = Vec::new();
+
+        // Incluimos todos los hijos no-`extra`: nombrados (rules de la
+        // gramática) y anónimos (tokens literales como operadores y
+        // separadores). Lo único que descartamos son `extras` —
+        // comentarios y whitespace en gramáticas tree-sitter — que es
+        // exactamente la invariancia que queremos: dos formas con el
+        // mismo contenido y estructura producen el mismo árbol.
+        let mut cursor = node.walk();
+        if cursor.goto_first_child() {
+            loop {
+                let child = cursor.node();
+                if !child.is_extra() {
+                    let field = cursor.field_name().map(|s| s.to_string());
+                    children.push(Self::build(child, source, field));
+                }
+                if !cursor.goto_next_sibling() {
+                    break;
+                }
+            }
+        }
+
+        let leaf_text = if children.is_empty() {
+            let range = node.byte_range();
+            Some(source[range].to_vec())
+        } else {
+            None
+        };
+
+        SemanticNode { kind, field_name, leaf_text, children }
+    }
+}
@@ -0,0 +1,127 @@
+//! Atestaciones firmadas: la sustancia material de la atribución
+//! irrefutable. Una `Attestation` es una firma criptográfica sobre un
+//! `ContentHash` que vincula a su autor (un `Did`) con un fragmento
+//! concreto de contenido del repositorio.
+//!
+//! Modelo: cada hash del MST puede tener cero o más atestaciones,
+//! provenientes de autores distintos. La existencia de una atestación
+//! válida prueba que el dueño de cierta clave privada **vio y firmó
+//! exactamente ese hash** — no puede negarlo después sin admitir que
+//! filtró su llave. Es el equivalente a un commit firmado en Git pero
+//! a granularidad arbitraria: una función, un módulo, o un estado del
+//! repositorio entero.
+//!
+//! `AttestationStore` solo acepta atestaciones criptográficamente
+//! válidas: el `add` rechaza cualquier intento de inyectar firmas
+//! falsificadas. Esto convierte al store en una fuente confiable de
+//! la pregunta "¿quién ha respaldado este contenido?".
+
+use crate::cas::ContentHash;
+use crate::identity::{Did, Keypair, Signature};
+use std::collections::HashMap;
+
+#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
+pub struct Attestation {
+    pub content: ContentHash,
+    pub author: Did,
+    pub signature: Signature,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum AttestationError {
+    InvalidSignature,
+}
+
+impl std::fmt::Display for AttestationError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::InvalidSignature => write!(f, "firma de la atestación no verifica"),
+        }
+    }
+}
+
+impl std::error::Error for AttestationError {}
+
+impl Attestation {
+    /// Crea una atestación firmando el `ContentHash` con la `Keypair`
+    /// del autor. El `Did` queda registrado a partir de la `Keypair`
+    /// — no se acepta un `Did` arbitrario, lo que descarta de raíz
+    /// las atestaciones donde alguien dice ser otro.
+    pub fn create(keypair: &Keypair, content: ContentHash) -> Self {
+        Self {
+            content,
+            author: keypair.did(),
+            signature: keypair.sign(&content.0),
+        }
+    }
+
+    /// Verifica que `signature` es una firma válida sobre `content`
+    /// hecha con la llave privada del `author`. Cualquier modificación
+    /// de cualquiera de los tres campos invalida la atestación.
+    pub fn verify(&self) -> bool {
+        self.author.verify(&self.content.0, &self.signature)
+    }
+}
+
+/// Registro de atestaciones por `ContentHash`.
+///
+/// Idempotente por `(author, content)`: insertar dos veces la misma
+/// atestación no la duplica. Pero un mismo `ContentHash` puede tener
+/// atestaciones de **autores distintos** — es la base de los "filtros
+/// de convergencia" del spec, donde el peso de un cambio se mide por
+/// cuántas identidades reputadas lo respaldan.
+#[derive(Debug, Default, Clone)]
+pub struct AttestationStore {
+    by_content: HashMap<ContentHash, Vec<Attestation>>,
+}
+
+impl AttestationStore {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Inserta una atestación. Devuelve `Err(InvalidSignature)` si la
+    /// firma no verifica — el store NUNCA almacena firmas rotas, así
+    /// que cualquier consulta posterior puede confiar en lo que lee.
+    pub fn add(&mut self, att: Attestation) -> Result<(), AttestationError> {
+        if !att.verify() {
+            return Err(AttestationError::InvalidSignature);
+        }
+        let entry = self.by_content.entry(att.content).or_default();
+        if !entry.iter().any(|a| a.author == att.author) {
+            entry.push(att);
+        }
+        Ok(())
+    }
+
+    pub fn get(&self, content: &ContentHash) -> &[Attestation] {
+        self.by_content
+            .get(content)
+            .map(Vec::as_slice)
+            .unwrap_or(&[])
+    }
+
+    /// Conjunto de DIDs que han atestado este contenido. Cada autor
+    /// aparece como máximo una vez (deduplicación por `add`).
+    pub fn authors_of(&self, content: &ContentHash) -> Vec<Did> {
+        self.by_content
+            .get(content)
+            .map(|v| v.iter().map(|a| a.author).collect())
+            .unwrap_or_default()
+    }
+
+    pub fn len(&self) -> usize {
+        self.by_content.values().map(Vec::len).sum()
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.by_content.values().all(Vec::is_empty)
+    }
+
+    /// Itera todas las atestaciones del store (orden no especificado).
+    /// Usado por el protocolo de sync para enumerar lo que tenemos y
+    /// empujarlo al peer.
+    pub fn all(&self) -> impl Iterator<Item = &Attestation> + '_ {
+        self.by_content.values().flat_map(|v| v.iter())
+    }
+}
@@ -0,0 +1,95 @@
+use crate::ast::SemanticNode;
+use blake3::Hasher;
+
+/// Hash de 32 bytes que identifica unívocamente un `SemanticNode` por su
+/// estructura lógica. Dos nodos con misma estructura → mismo hash, sin
+/// importar formato, comentarios o posición en el archivo fuente.
+#[derive(
+    Debug,
+    Clone,
+    Copy,
+    PartialEq,
+    Eq,
+    Hash,
+    PartialOrd,
+    Ord,
+    serde::Serialize,
+    serde::Deserialize,
+)]
+#[serde(transparent)]
+pub struct ContentHash(pub [u8; 32]);
+
+impl ContentHash {
+    pub fn as_bytes(&self) -> &[u8; 32] {
+        &self.0
+    }
+}
+
+impl std::fmt::Display for ContentHash {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        for b in &self.0 {
+            write!(f, "{:02x}", b)?;
+        }
+        Ok(())
+    }
+}
+
+/// Hash Merkle de un `SemanticNode`. El hash es función pura de
+/// `(kind, field_name, leaf_text, &[child_hash])`. Esquema estricto:
+/// los hijos contribuyen como hash, no como bytestream completo. Eso
+/// permite verificar un nodo recibido por la red **sin tener** sus
+/// hijos: basta con tener los hashes de los hijos (que vienen en el
+/// `StoredNode.children`) y reproducir esta función.
+pub fn hash_node(node: &SemanticNode) -> ContentHash {
+    let child_hashes: Vec<ContentHash> = node.children.iter().map(hash_node).collect();
+    hash_components(
+        &node.kind,
+        node.field_name.as_deref(),
+        node.leaf_text.as_deref(),
+        &child_hashes,
+    )
+}
+
+/// Primitiva canónica del hash estructural. Es la única definición
+/// authoritativa: cualquier otra función que produzca un hash de
+/// contenido debe expresarse encima de ésta. Garantiza que
+/// `hash_node(&semantic)` y `hash_stored(&stored)` coincidan bit a bit
+/// para representaciones equivalentes del mismo árbol.
+pub fn hash_components(
+    kind: &str,
+    field_name: Option<&str>,
+    leaf_text: Option<&[u8]>,
+    child_hashes: &[ContentHash],
+) -> ContentHash {
+    let mut h = Hasher::new();
+    write_str(&mut h, kind);
+    match field_name {
+        Some(f) => {
+            h.update(&[1]);
+            write_str(&mut h, f);
+        }
+        None => {
+            h.update(&[0]);
+        }
+    }
+    match leaf_text {
+        Some(t) => {
+            h.update(&[1]);
+            h.update(&(t.len() as u64).to_le_bytes());
+            h.update(t);
+        }
+        None => {
+            h.update(&[0]);
+        }
+    }
+    h.update(&(child_hashes.len() as u64).to_le_bytes());
+    for ch in child_hashes {
+        h.update(&ch.0);
+    }
+    ContentHash(*h.finalize().as_bytes())
+}
+
+fn write_str(h: &mut Hasher, s: &str) {
+    h.update(&(s.len() as u64).to_le_bytes());
+    h.update(s.as_bytes());
+}
@@ -0,0 +1,223 @@
+//! Identidad self-sovereign basada en Ed25519.
+//!
+//! Cada peer (y cada autor humano o agente IA) se identifica por un
+//! `Did` — el bytestring de su clave pública Ed25519. La clave privada
+//! vive en su `Keypair` y nunca sale del nodo. Firmar un mensaje con la
+//! `Keypair` produce una `Signature` que cualquiera con el `Did` puede
+//! verificar — la atribución es irrefutable bajo el modelo
+//! criptográfico estándar (asumiendo que la clave privada no fugó).
+//!
+//! El esquema es deliberadamente minimalista: no hay rotación de
+//! claves, ni revocación, ni metadatos en el DID. Esas capas (DID
+//! Documents, métodos `did:web`/`did:ion`, claves de firma versus de
+//! cifrado, etc.) se construyen encima cuando la complejidad del
+//! producto lo justifique. Por ahora, el `Did` ES la clave pública.
+
+use aes_gcm::{aead::Aead, Aes256Gcm, KeyInit, Nonce};
+use argon2::Argon2;
+use ed25519_dalek::{
+    Signature as Ed25519Sig, Signer, SigningKey, Verifier, VerifyingKey, SECRET_KEY_LENGTH,
+    SIGNATURE_LENGTH,
+};
+use rand::rngs::OsRng;
+use rand::RngCore;
+
+/// Cabecera del formato de keypair cifrado en disco.
+const KEYPAIR_MAGIC: &[u8; 8] = b"MINGAKEY";
+const KEYPAIR_VERSION: u8 = 1;
+const ARGON2_SALT_LEN: usize = 16;
+const AES_NONCE_LEN: usize = 12;
+const KEYPAIR_HEADER_LEN: usize = 8 + 1 + ARGON2_SALT_LEN + AES_NONCE_LEN;
+
+#[derive(Debug, thiserror::Error)]
+pub enum KeypairCryptoError {
+    #[error("formato inválido: faltan magic / versión / longitud")]
+    InvalidFormat,
+
+    #[error("passphrase incorrecta o cifrado manipulado")]
+    DecryptFailed,
+
+    #[error("argon2: {0}")]
+    Argon2(String),
+}
+
+/// Decentralized Identifier: 32 bytes de la clave pública Ed25519.
+#[derive(
+    Debug,
+    Clone,
+    Copy,
+    PartialEq,
+    Eq,
+    Hash,
+    PartialOrd,
+    Ord,
+    serde::Serialize,
+    serde::Deserialize,
+)]
+#[serde(transparent)]
+pub struct Did(pub [u8; SECRET_KEY_LENGTH]);
+
+impl Did {
+    pub fn as_bytes(&self) -> &[u8; SECRET_KEY_LENGTH] {
+        &self.0
+    }
+
+    /// Verifica que `sig` sea una firma válida sobre `msg` producida
+    /// con la llave privada correspondiente a este DID. Devuelve
+    /// `false` ante cualquier irregularidad: bytes de DID que no son
+    /// un punto válido en la curva, firma malformada, mensaje que no
+    /// coincide.
+    pub fn verify(&self, msg: &[u8], sig: &Signature) -> bool {
+        let Ok(vk) = VerifyingKey::from_bytes(&self.0) else {
+            return false;
+        };
+        let ed_sig = Ed25519Sig::from_bytes(&sig.0);
+        vk.verify(msg, &ed_sig).is_ok()
+    }
+}
+
+impl std::fmt::Display for Did {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "did:key:")?;
+        for b in &self.0 {
+            write!(f, "{:02x}", b)?;
+        }
+        Ok(())
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
+#[serde(transparent)]
+pub struct Signature(
+    #[serde(with = "serde_big_array::BigArray")] pub [u8; SIGNATURE_LENGTH],
+);
+
+impl Signature {
+    pub fn as_bytes(&self) -> &[u8; SIGNATURE_LENGTH] {
+        &self.0
+    }
+}
+
+/// Llave criptográfica completa: priva (para firmar) + pública (para
+/// que otros verifiquen). Por convención llamamos `Did` al lado público
+/// expuesto al mundo, pero el `Keypair` mantiene ambos lados juntos.
+#[derive(Clone)]
+pub struct Keypair {
+    signing: SigningKey,
+}
+
+impl Keypair {
+    /// Genera un nuevo `Keypair` usando aleatoriedad del sistema
+    /// operativo (`/dev/urandom` en Unix, `BCryptGenRandom` en
+    /// Windows). Para producción.
+    pub fn generate() -> Self {
+        let mut seed = [0u8; SECRET_KEY_LENGTH];
+        OsRng.fill_bytes(&mut seed);
+        Self::from_seed(&seed)
+    }
+
+    /// Reconstruye un `Keypair` desde una semilla de 32 bytes. Misma
+    /// semilla → mismo `Keypair` (mismo `Did`, mismas firmas). Útil
+    /// para tests reproducibles y para escenarios donde la semilla
+    /// proviene de otra fuente determinista (HKDF, BIP39, etc.).
+    pub fn from_seed(seed: &[u8; SECRET_KEY_LENGTH]) -> Self {
+        Self {
+            signing: SigningKey::from_bytes(seed),
+        }
+    }
+
+    pub fn did(&self) -> Did {
+        Did(self.signing.verifying_key().to_bytes())
+    }
+
+    pub fn sign(&self, msg: &[u8]) -> Signature {
+        Signature(self.signing.sign(msg).to_bytes())
+    }
+
+    /// Cifra la parte privada del keypair con una passphrase humana.
+    /// Esquema:
+    ///
+    /// 1. Genera un salt aleatorio de 16 bytes y un nonce de 12 bytes.
+    /// 2. Deriva una clave AES-256 desde la passphrase vía Argon2id
+    ///    (parámetros por defecto OWASP).
+    /// 3. Cifra los 32 bytes de la clave secreta con AES-256-GCM
+    ///    (autenticado: integrity built-in).
+    /// 4. Compone el blob:
+    ///    `MAGIC(8) || VERSION(1) || SALT(16) || NONCE(12) || CIPHERTEXT+TAG(48)`.
+    ///
+    /// Total: 85 bytes. La passphrase nunca se almacena; quien no la
+    /// conozca no puede recuperar la identidad.
+    pub fn encrypt(&self, passphrase: &str) -> Result<Vec<u8>, KeypairCryptoError> {
+        let mut salt = [0u8; ARGON2_SALT_LEN];
+        let mut nonce_bytes = [0u8; AES_NONCE_LEN];
+        OsRng.fill_bytes(&mut salt);
+        OsRng.fill_bytes(&mut nonce_bytes);
+
+        let aes_key = derive_aes_key(passphrase, &salt)?;
+
+        let cipher = Aes256Gcm::new_from_slice(&aes_key)
+            .map_err(|_| KeypairCryptoError::DecryptFailed)?;
+        let nonce = Nonce::from_slice(&nonce_bytes);
+        let secret_bytes = self.signing.to_bytes();
+        let ciphertext = cipher
+            .encrypt(nonce, secret_bytes.as_ref())
+            .map_err(|_| KeypairCryptoError::DecryptFailed)?;
+
+        let mut out = Vec::with_capacity(KEYPAIR_HEADER_LEN + ciphertext.len());
+        out.extend_from_slice(KEYPAIR_MAGIC);
+        out.push(KEYPAIR_VERSION);
+        out.extend_from_slice(&salt);
+        out.extend_from_slice(&nonce_bytes);
+        out.extend_from_slice(&ciphertext);
+        Ok(out)
+    }
+
+    /// Descifra un keypair cifrado con `encrypt`. Falla con
+    /// `DecryptFailed` si la passphrase es incorrecta **o** si los
+    /// bytes han sido manipulados (AES-GCM detecta ambas vías).
+    pub fn decrypt(bytes: &[u8], passphrase: &str) -> Result<Self, KeypairCryptoError> {
+        if bytes.len() < KEYPAIR_HEADER_LEN {
+            return Err(KeypairCryptoError::InvalidFormat);
+        }
+        if &bytes[..8] != KEYPAIR_MAGIC {
+            return Err(KeypairCryptoError::InvalidFormat);
+        }
+        if bytes[8] != KEYPAIR_VERSION {
+            return Err(KeypairCryptoError::InvalidFormat);
+        }
+
+        let salt = &bytes[9..9 + ARGON2_SALT_LEN];
+        let nonce_bytes = &bytes[9 + ARGON2_SALT_LEN..KEYPAIR_HEADER_LEN];
+        let ciphertext = &bytes[KEYPAIR_HEADER_LEN..];
+
+        let aes_key = derive_aes_key(passphrase, salt)?;
+        let cipher = Aes256Gcm::new_from_slice(&aes_key)
+            .map_err(|_| KeypairCryptoError::DecryptFailed)?;
+        let nonce = Nonce::from_slice(nonce_bytes);
+        let plaintext = cipher
+            .decrypt(nonce, ciphertext)
+            .map_err(|_| KeypairCryptoError::DecryptFailed)?;
+
+        if plaintext.len() != SECRET_KEY_LENGTH {
+            return Err(KeypairCryptoError::InvalidFormat);
+        }
+        let mut seed = [0u8; SECRET_KEY_LENGTH];
+        seed.copy_from_slice(&plaintext);
+        Ok(Self::from_seed(&seed))
+    }
+}
+
+fn derive_aes_key(passphrase: &str, salt: &[u8]) -> Result<[u8; 32], KeypairCryptoError> {
+    let mut key = [0u8; 32];
+    Argon2::default()
+        .hash_password_into(passphrase.as_bytes(), salt, &mut key)
+        .map_err(|e| KeypairCryptoError::Argon2(e.to_string()))?;
+    Ok(key)
+}
+
+impl std::fmt::Debug for Keypair {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        // Nunca exponemos la parte privada en debug. Solo el DID.
+        write!(f, "Keypair {{ did: {} }}", self.did())
+    }
+}
@@ -0,0 +1,23 @@
+//! Núcleo puro de Minga: AST normalizado, direccionamiento por contenido
+//! semántico y Merkle Search Tree. Sin IO, sin red, sin filesystem.
+//!
+//! La separación es deliberada: este crate jamás importa libp2p, fuser ni
+//! ningún tipo asociado a un canal de IO. Si algo aquí necesita IO, el
+//! contrato se expone como trait y la implementación vive en otro crate.
+
+pub mod alpha;
+pub mod ast;
+pub mod attestation;
+pub mod cas;
+pub mod identity;
+pub mod mst;
+pub mod parse;
+pub mod store;
+
+pub use alpha::hash_node_alpha;
+pub use ast::SemanticNode;
+pub use attestation::{Attestation, AttestationError, AttestationStore};
+pub use cas::{hash_components, hash_node, ContentHash};
+pub use identity::{Did, Keypair, KeypairCryptoError, Signature};
+pub use mst::{empty_subtree_hash, Mst, MstDiff, NodeProbe};
+pub use store::{hash_stored, MemStore, NodeStore, StoredNode};
@@ -0,0 +1,457 @@
+//! Merkle Search Tree (MST).
+//!
+//! Estructura B-árbol probabilística sobre hashes, en la que el "nivel" de
+//! cada clave se deriva determinísticamente de su propio hash (cantidad de
+//! nibbles cero al inicio). Eso da dos propiedades clave:
+//!
+//! * **Independencia del orden de inserción.** El conjunto `{a, b, c}`
+//!   siempre produce el mismo árbol y el mismo `root_hash`, sin importar
+//!   en qué orden se insertaron las claves.
+//! * **Comparación logarítmica.** Dos repositorios pueden saber si tienen
+//!   el mismo conjunto de hashes con un único byte (`root_hash`); y, si
+//!   difieren, descender solo por las ramas con hashes distintos.
+//!
+//! Esta implementación es completa para insert/contains/iter y produce un
+//! `root_hash` Merkle correcto. La operación de `diff` mínima (delta de
+//! sincronización P2P) se construirá encima cuando exista `minga-p2p`.
+
+use crate::cas::ContentHash;
+use blake3::Hasher;
+use std::collections::HashMap;
+use std::sync::OnceLock;
+
+/// Resumen estructural de un nodo interno del MST: nivel al que viven
+/// sus claves, las claves a ese nivel, y el hash de cada uno de sus
+/// hijos (subárboles). Esto es lo que un peer transmite cuando otro le
+/// pregunta por la forma de un subárbol durante una sincronización
+/// recursiva: bandwidth proporcional a la divergencia, no al tamaño.
+#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
+pub struct NodeProbe {
+    pub level: u32,
+    pub keys: Vec<ContentHash>,
+    pub child_hashes: Vec<ContentHash>,
+}
+
+/// Hash canónico del subárbol vacío (el "neutro" del MST). Cualquier
+/// peer puede computarlo localmente sin tocar la red, lo que permite
+/// reconocer ramas vacías en el otro lado sin pedir un probe.
+pub fn empty_subtree_hash() -> ContentHash {
+    static H: OnceLock<ContentHash> = OnceLock::new();
+    *H.get_or_init(|| {
+        let mut h = Hasher::new();
+        h.update(b"E");
+        ContentHash(*h.finalize().as_bytes())
+    })
+}
+
+#[derive(Debug, Default, Clone, PartialEq, Eq)]
+pub struct Mst {
+    root: Subtree,
+}
+
+#[derive(Debug, Default, Clone, PartialEq, Eq)]
+enum Subtree {
+    #[default]
+    Empty,
+    Node(Box<NodeData>),
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+struct NodeData {
+    level: u32,
+    keys: Vec<ContentHash>,
+    children: Vec<Subtree>,
+}
+
+/// Nivel determinístico de un hash: número de nibbles (4 bits) cero al
+/// inicio. Distribución geométrica con base 16, lo que da árbol balanceado
+/// en expectativa con profundidad logarítmica.
+fn level_of(h: &ContentHash) -> u32 {
+    let mut count = 0u32;
+    for &b in &h.0 {
+        if b == 0 {
+            count += 2;
+        } else if b < 0x10 {
+            count += 1;
+            break;
+        } else {
+            break;
+        }
+    }
+    count
+}
+
+impl Mst {
+    pub fn new() -> Self {
+        Self { root: Subtree::Empty }
+    }
+
+    /// Inserta `h`. Devuelve `true` si era una clave nueva.
+    pub fn insert(&mut self, h: ContentHash) -> bool {
+        let l = level_of(&h);
+        let root = std::mem::take(&mut self.root);
+        let (new_root, inserted) = insert_in(root, h, l);
+        self.root = new_root;
+        inserted
+    }
+
+    pub fn contains(&self, h: &ContentHash) -> bool {
+        contains_in(&self.root, h)
+    }
+
+    pub fn len(&self) -> usize {
+        len_of(&self.root)
+    }
+
+    pub fn is_empty(&self) -> bool {
+        matches!(self.root, Subtree::Empty)
+    }
+
+    /// Recorrido in-order: claves emitidas en orden ascendente por hash.
+    pub fn iter(&self) -> Iter<'_> {
+        let mut it = Iter { stack: Vec::new() };
+        it.descend_left(&self.root);
+        it
+    }
+
+    /// Hash Merkle del árbol completo. Dos MSTs con el mismo conjunto de
+    /// claves tienen el mismo `root_hash`, sin importar orden de inserción.
+    pub fn root_hash(&self) -> ContentHash {
+        subtree_hash(&self.root)
+    }
+
+    /// Construye un índice `subtree_hash -> NodeProbe` cubriendo cada
+    /// nodo interno del árbol. Sirve a un peer como tabla de respuestas
+    /// instantáneas a `ProbeReq`s del otro lado: dado un hash que el
+    /// peer recibió de nosotros (en un Hello o un ProbeRes previo),
+    /// podemos reconstituir su `NodeProbe` en `O(1)`.
+    pub fn build_probe_index(&self) -> HashMap<ContentHash, NodeProbe> {
+        let mut idx = HashMap::new();
+        index_subtree(&self.root, &mut idx);
+        idx
+    }
+
+    /// Diferencia simétrica entre `self` y `other`. Devuelve las claves
+    /// que están en `self` pero no en `other`, y viceversa.
+    ///
+    /// Aprovecha la estructura Merkle: cualquier subárbol cuya raíz
+    /// hashee igual entre ambos lados se descarta sin descender. Cuando
+    /// dos nodos comparten nivel y separadores, recurrimos en paralelo
+    /// sobre sus hijos — cada par idéntico se poda por hash. Cuando la
+    /// estructura diverge (niveles distintos o separadores distintos en
+    /// el mismo nivel), enumeramos las claves de ambos y hacemos merge
+    /// ordenado.
+    ///
+    /// El resultado siempre viene ordenado por hash ascendente, lo que
+    /// permite a un peer P2P hacer streaming de los bloques que faltan
+    /// en orden estable y deduplicar mientras los recibe.
+    pub fn diff(&self, other: &Mst) -> MstDiff {
+        let mut d = MstDiff::default();
+        diff_subtrees(&self.root, &other.root, &mut d.only_in_self, &mut d.only_in_other);
+        d
+    }
+}
+
+/// Resultado de comparar dos MSTs. `is_empty()` ⇔ ambos representan el
+/// mismo conjunto.
+#[derive(Debug, Default, Clone, PartialEq, Eq)]
+pub struct MstDiff {
+    pub only_in_self: Vec<ContentHash>,
+    pub only_in_other: Vec<ContentHash>,
+}
+
+impl MstDiff {
+    pub fn is_empty(&self) -> bool {
+        self.only_in_self.is_empty() && self.only_in_other.is_empty()
+    }
+
+    pub fn total(&self) -> usize {
+        self.only_in_self.len() + self.only_in_other.len()
+    }
+}
+
+fn contains_in(t: &Subtree, h: &ContentHash) -> bool {
+    match t {
+        Subtree::Empty => false,
+        Subtree::Node(n) => match n.keys.binary_search(h) {
+            Ok(_) => true,
+            Err(i) => contains_in(&n.children[i], h),
+        },
+    }
+}
+
+fn len_of(t: &Subtree) -> usize {
+    match t {
+        Subtree::Empty => 0,
+        Subtree::Node(n) => n.keys.len() + n.children.iter().map(len_of).sum::<usize>(),
+    }
+}
+
+fn subtree_hash(t: &Subtree) -> ContentHash {
+    let mut h = Hasher::new();
+    match t {
+        Subtree::Empty => {
+            h.update(b"E");
+        }
+        Subtree::Node(n) => {
+            h.update(b"N");
+            h.update(&n.level.to_le_bytes());
+            h.update(&(n.keys.len() as u64).to_le_bytes());
+            for k in &n.keys {
+                h.update(&k.0);
+            }
+            for c in &n.children {
+                h.update(&subtree_hash(c).0);
+            }
+        }
+    }
+    ContentHash(*h.finalize().as_bytes())
+}
+
+/// Inserta `h` (de nivel `l`) en el subárbol `t`. Devuelve el nuevo
+/// subárbol y si fue una inserción real (no duplicado).
+fn insert_in(t: Subtree, h: ContentHash, l: u32) -> (Subtree, bool) {
+    match t {
+        Subtree::Empty => {
+            let node = NodeData {
+                level: l,
+                keys: vec![h],
+                children: vec![Subtree::Empty, Subtree::Empty],
+            };
+            (Subtree::Node(Box::new(node)), true)
+        }
+        Subtree::Node(boxed) => {
+            let n = *boxed;
+            if l > n.level {
+                // Nueva clave de nivel mayor: parte el árbol actual y la
+                // promueve a nueva raíz.
+                let (left, right) = split_at(Subtree::Node(Box::new(n)), &h);
+                let new_root = NodeData {
+                    level: l,
+                    keys: vec![h],
+                    children: vec![left, right],
+                };
+                (Subtree::Node(Box::new(new_root)), true)
+            } else if l == n.level {
+                match n.keys.binary_search(&h) {
+                    Ok(_) => (Subtree::Node(Box::new(n)), false),
+                    Err(i) => {
+                        let NodeData { level, mut keys, mut children } = n;
+                        let middle = std::mem::replace(&mut children[i], Subtree::Empty);
+                        let (left, right) = split_at(middle, &h);
+                        keys.insert(i, h);
+                        children[i] = left;
+                        children.insert(i + 1, right);
+                        (
+                            Subtree::Node(Box::new(NodeData { level, keys, children })),
+                            true,
+                        )
+                    }
+                }
+            } else {
+                // l < n.level: la clave nueva pertenece a un subárbol bajo
+                // el separador correspondiente.
+                let i = match n.keys.binary_search(&h) {
+                    Ok(_) => unreachable!(
+                        "colisión: clave de nivel inferior coincide con separador de nivel superior"
+                    ),
+                    Err(i) => i,
+                };
+                let NodeData { level, keys, mut children } = n;
+                let child = std::mem::replace(&mut children[i], Subtree::Empty);
+                let (new_child, inserted) = insert_in(child, h, l);
+                children[i] = new_child;
+                (
+                    Subtree::Node(Box::new(NodeData { level, keys, children })),
+                    inserted,
+                )
+            }
+        }
+    }
+}
+
+/// Parte `t` en (claves < pivot, claves > pivot). Pre-condición: el nivel
+/// de cada subárbol involucrado es estrictamente menor que el del pivot
+/// (que vive arriba). El pivot mismo no aparece en el resultado.
+fn split_at(t: Subtree, pivot: &ContentHash) -> (Subtree, Subtree) {
+    match t {
+        Subtree::Empty => (Subtree::Empty, Subtree::Empty),
+        Subtree::Node(boxed) => {
+            let n = *boxed;
+            let i = match n.keys.binary_search(pivot) {
+                Ok(_) => unreachable!("pivot coincide con clave de nivel inferior"),
+                Err(i) => i,
+            };
+            let NodeData { level, keys, children } = n;
+
+            let mut left_keys = keys.clone();
+            left_keys.truncate(i);
+            let mut right_keys = keys;
+            right_keys.drain(..i);
+
+            let mut left_children: Vec<Subtree> = Vec::with_capacity(i + 1);
+            let mut right_children: Vec<Subtree> = Vec::with_capacity(level as usize + 1);
+
+            let mut iter = children.into_iter();
+            for _ in 0..i {
+                left_children.push(iter.next().expect("invariante: children > i"));
+            }
+            let middle = iter.next().expect("invariante: existe children[i]");
+            let (l_mid, r_mid) = split_at(middle, pivot);
+            left_children.push(l_mid);
+            right_children.push(r_mid);
+            for c in iter {
+                right_children.push(c);
+            }
+
+            let left = if left_keys.is_empty() {
+                left_children.pop().unwrap_or(Subtree::Empty)
+            } else {
+                Subtree::Node(Box::new(NodeData {
+                    level,
+                    keys: left_keys,
+                    children: left_children,
+                }))
+            };
+            let right = if right_keys.is_empty() {
+                right_children.pop().unwrap_or(Subtree::Empty)
+            } else {
+                Subtree::Node(Box::new(NodeData {
+                    level,
+                    keys: right_keys,
+                    children: right_children,
+                }))
+            };
+            (left, right)
+        }
+    }
+}
+
+fn index_subtree(t: &Subtree, idx: &mut HashMap<ContentHash, NodeProbe>) {
+    if let Subtree::Node(n) = t {
+        let child_hashes: Vec<ContentHash> = n.children.iter().map(subtree_hash).collect();
+        let probe = NodeProbe {
+            level: n.level,
+            keys: n.keys.clone(),
+            child_hashes,
+        };
+        idx.insert(subtree_hash(t), probe);
+        for c in &n.children {
+            index_subtree(c, idx);
+        }
+    }
+}
+
+fn diff_subtrees(
+    t1: &Subtree,
+    t2: &Subtree,
+    only_in_1: &mut Vec<ContentHash>,
+    only_in_2: &mut Vec<ContentHash>,
+) {
+    // Short-circuit por hash Merkle: si los dos subárboles colapsan al
+    // mismo hash de 32 bytes, representan el mismo conjunto. Una sola
+    // comparación poda toda la rama. Aplicado recursivamente, en árboles
+    // mayormente iguales el coste es proporcional a la divergencia, no al
+    // tamaño total.
+    if subtree_hash(t1) == subtree_hash(t2) {
+        return;
+    }
+    match (t1, t2) {
+        (Subtree::Empty, _) => collect_all(t2, only_in_2),
+        (_, Subtree::Empty) => collect_all(t1, only_in_1),
+        (Subtree::Node(n1), Subtree::Node(n2)) => {
+            if n1.level == n2.level && n1.keys == n2.keys {
+                // Mismo nivel y mismos separadores: los hijos se alinean
+                // posicionalmente. Recurrimos en paralelo — cada par
+                // idéntico se podará en su llamada por el hash de Merkle.
+                for (c1, c2) in n1.children.iter().zip(n2.children.iter()) {
+                    diff_subtrees(c1, c2, only_in_1, only_in_2);
+                }
+            } else {
+                // Estructura divergente. Enumeramos ambos lados ordenados
+                // y hacemos merge. Correcto pero sin más poda Merkle: una
+                // futura iteración con `split_at` por cada separador del
+                // nivel mayor recuperaría la poda en el caso desalineado.
+                let mut k1 = Vec::with_capacity(len_of(t1));
+                let mut k2 = Vec::with_capacity(len_of(t2));
+                collect_all(t1, &mut k1);
+                collect_all(t2, &mut k2);
+                merge_diff_sorted(&k1, &k2, only_in_1, only_in_2);
+            }
+        }
+    }
+}
+
+fn collect_all(t: &Subtree, out: &mut Vec<ContentHash>) {
+    if let Subtree::Node(n) = t {
+        for i in 0..n.keys.len() {
+            collect_all(&n.children[i], out);
+            out.push(n.keys[i]);
+        }
+        collect_all(&n.children[n.keys.len()], out);
+    }
+}
+
+fn merge_diff_sorted(
+    a: &[ContentHash],
+    b: &[ContentHash],
+    only_a: &mut Vec<ContentHash>,
+    only_b: &mut Vec<ContentHash>,
+) {
+    let mut i = 0;
+    let mut j = 0;
+    while i < a.len() && j < b.len() {
+        match a[i].cmp(&b[j]) {
+            std::cmp::Ordering::Less => {
+                only_a.push(a[i]);
+                i += 1;
+            }
+            std::cmp::Ordering::Greater => {
+                only_b.push(b[j]);
+                j += 1;
+            }
+            std::cmp::Ordering::Equal => {
+                i += 1;
+                j += 1;
+            }
+        }
+    }
+    only_a.extend_from_slice(&a[i..]);
+    only_b.extend_from_slice(&b[j..]);
+}
+
+pub struct Iter<'a> {
+    /// Cada frame es (nodo, próximo índice de clave a emitir). Cuando se
+    /// pushea un frame, ya descendimos por su hijo izquierdo (children[0]).
+    stack: Vec<(&'a NodeData, usize)>,
+}
+
+impl<'a> Iter<'a> {
+    fn descend_left(&mut self, t: &'a Subtree) {
+        let mut cur = t;
+        while let Subtree::Node(n) = cur {
+            self.stack.push((n.as_ref(), 0));
+            cur = &n.children[0];
+        }
+    }
+}
+
+impl<'a> Iterator for Iter<'a> {
+    type Item = &'a ContentHash;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        loop {
+            let (node, ki) = {
+                let top = self.stack.last()?;
+                (top.0, top.1)
+            };
+            if ki < node.keys.len() {
+                self.stack.last_mut().unwrap().1 = ki + 1;
+                self.descend_left(&node.children[ki + 1]);
+                return Some(&node.keys[ki]);
+            } else {
+                self.stack.pop();
+            }
+        }
+    }
+}
@@ -0,0 +1,25 @@
+//! Adaptadores de parsing por dialecto. Hoy: Rust vía tree-sitter-rust.
+//!
+//! `parse::rust` produce un `SemanticNode` normalizado a partir de una
+//! cadena de código fuente. El error es opaco a propósito: el caller no
+//! necesita distinguir "gramática inválida" de "fallo del parser".
+
+use crate::ast::SemanticNode;
+use thiserror::Error;
+use tree_sitter::{Language, Parser};
+
+#[derive(Debug, Error)]
+pub enum ParseError {
+    #[error("tree-sitter no pudo configurar el lenguaje")]
+    Language,
+    #[error("tree-sitter no produjo árbol para la entrada")]
+    NoTree,
+}
+
+pub fn rust(source: &str) -> Result<SemanticNode, ParseError> {
+    let lang: Language = tree_sitter_rust::LANGUAGE.into();
+    let mut parser = Parser::new();
+    parser.set_language(&lang).map_err(|_| ParseError::Language)?;
+    let tree = parser.parse(source, None).ok_or(ParseError::NoTree)?;
+    Ok(SemanticNode::from_tree_sitter(tree.root_node(), source.as_bytes()))
+}
@@ -0,0 +1,144 @@
+//! Almacén de nodos direccionados por contenido.
+//!
+//! Cada `SemanticNode` se descompone en `StoredNode`s donde los hijos son
+//! referencias por hash, no estructuras inline. Así dos subárboles con la
+//! misma estructura se almacenan una sola vez, sin importar en cuántos
+//! lugares aparezcan en el repositorio. Esa es la diferencia entre "Git
+//! semántico" y "diff de líneas".
+//!
+//! `NodeStore` es el contrato; `MemStore` es la implementación de
+//! referencia, en memoria, agnóstica de IO. Un futuro `SledStore` o
+//! `RocksStore` vivirá en otro crate y se enchufará vía este trait sin
+//! tocar el resto del núcleo.
+
+use crate::ast::SemanticNode;
+use crate::cas::{self, ContentHash};
+use std::collections::HashMap;
+
+/// Forma "stored": idéntica a `SemanticNode` excepto que los hijos son
+/// hashes en vez de estructuras anidadas. Es el formato canónico en
+/// reposo y el que permite la deduplicación.
+#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
+pub struct StoredNode {
+    pub kind: String,
+    pub field_name: Option<String>,
+    pub leaf_text: Option<Vec<u8>>,
+    pub children: Vec<ContentHash>,
+}
+
+/// Hash de un `StoredNode`, idéntico al `hash_node` del `SemanticNode`
+/// equivalente. Permite a un protocolo de wire verificar que el nodo
+/// que le entregaron tiene efectivamente el hash que se le anunció,
+/// sin necesidad de reconstruir descendientes.
+pub fn hash_stored(stored: &StoredNode) -> ContentHash {
+    cas::hash_components(
+        &stored.kind,
+        stored.field_name.as_deref(),
+        stored.leaf_text.as_deref(),
+        &stored.children,
+    )
+}
+
+pub trait NodeStore {
+    /// Inserta un árbol completo. Recursivamente desempaqueta los hijos
+    /// y devuelve el hash de la raíz. Idempotente: insertar el mismo
+    /// árbol dos veces no aumenta el tamaño.
+    fn put(&mut self, node: &SemanticNode) -> ContentHash;
+
+    /// Inserta un nodo ya troceado por su hash. No recurre en hijos: el
+    /// llamador es responsable de garantizar que estarán presentes (lo
+    /// hace típicamente un protocolo de sync que va recibiendo nodos en
+    /// orden y solicita los faltantes a medida que descubre referencias).
+    fn put_chunked(&mut self, hash: ContentHash, stored: StoredNode);
+
+    fn get(&self, h: &ContentHash) -> Option<&StoredNode>;
+
+    fn contains(&self, h: &ContentHash) -> bool {
+        self.get(h).is_some()
+    }
+
+    /// Reconstruye el `SemanticNode` original a partir de su hash,
+    /// resolviendo recursivamente los hijos. `None` si algún hash no se
+    /// encuentra (almacén incompleto, inconsistente).
+    fn reconstruct(&self, h: &ContentHash) -> Option<SemanticNode>;
+
+    /// Itera todas las parejas `(hash, stored_node)` del store. Sin
+    /// orden garantizado. Usado para mergear stores tras una sesión
+    /// de sync (un peer recibe los nodos del otro en su sesión, y
+    /// luego los volcamos al store compartido).
+    fn iter(&self) -> Box<dyn Iterator<Item = (&ContentHash, &StoredNode)> + '_>;
+
+    fn len(&self) -> usize;
+
+    fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+}
+
+#[derive(Debug, Default, Clone)]
+pub struct MemStore {
+    map: HashMap<ContentHash, StoredNode>,
+}
+
+impl MemStore {
+    pub fn new() -> Self {
+        Self::default()
+    }
+}
+
+impl NodeStore for MemStore {
+    fn put(&mut self, node: &SemanticNode) -> ContentHash {
+        // Recorrido bottom-up: primero los hijos (devuelven su hash),
+        // luego compongo el hash del padre desde sus child_hashes
+        // mediante la primitiva canónica de cas. Cada subárbol se
+        // hashea exactamente una vez — sin recomputar `hash_node` sobre
+        // el árbol entero del padre.
+        let mut child_hashes = Vec::with_capacity(node.children.len());
+        for c in &node.children {
+            child_hashes.push(self.put(c));
+        }
+        let h = cas::hash_components(
+            &node.kind,
+            node.field_name.as_deref(),
+            node.leaf_text.as_deref(),
+            &child_hashes,
+        );
+        self.map.entry(h).or_insert_with(|| StoredNode {
+            kind: node.kind.clone(),
+            field_name: node.field_name.clone(),
+            leaf_text: node.leaf_text.clone(),
+            children: child_hashes,
+        });
+        h
+    }
+
+    fn put_chunked(&mut self, hash: ContentHash, stored: StoredNode) {
+        self.map.entry(hash).or_insert(stored);
+    }
+
+    fn get(&self, h: &ContentHash) -> Option<&StoredNode> {
+        self.map.get(h)
+    }
+
+    fn iter(&self) -> Box<dyn Iterator<Item = (&ContentHash, &StoredNode)> + '_> {
+        Box::new(self.map.iter())
+    }
+
+    fn reconstruct(&self, h: &ContentHash) -> Option<SemanticNode> {
+        let s = self.map.get(h)?;
+        let mut children = Vec::with_capacity(s.children.len());
+        for ch in &s.children {
+            children.push(self.reconstruct(ch)?);
+        }
+        Some(SemanticNode {
+            kind: s.kind.clone(),
+            field_name: s.field_name.clone(),
+            leaf_text: s.leaf_text.clone(),
+            children,
+        })
+    }
+
+    fn len(&self) -> usize {
+        self.map.len()
+    }
+}