chore: absorbe nakui (ERP matemático) en modules/nakui

- crates/modules/nakui/core/: el crate nakui-core (4 bins, tests).
  Deps directas (serde, rhai, surrealdb, petgraph, sha2, uuid, tokio,
  thiserror v1) — no convertidas a workspace = true en esta pasada.
- crates/modules/nakui/modules/{inventory,sales,treasury}/: datos
  declarativos del dominio (nsmc.json, schema.k, morphisms/) que el
  crate consume — no son crates.

cargo check -p nakui-core: 0 errores.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Sergio
2026-05-08 05:49:58 +00:00
parent 53dbdf0f1d
commit 4d50bfc587
49 changed files with 11953 additions and 40 deletions
+224
View File
@@ -0,0 +1,224 @@
use nakui_core::event_log::{
EventLog, ExecuteError, execute_and_log, replay, seed_and_log, verify_log,
};
use nakui_core::executor::Executor;
use nakui_core::store::{MemoryStore, Store};
use serde_json::json;
use uuid::Uuid;
fn main() {
let module_dir =
std::env::var("NAKUI_MODULE").unwrap_or_else(|_| "modules/treasury".into());
let exec = Executor::load_module(&module_dir).expect("load module");
let log_path = std::env::temp_dir().join(format!("nakui_demo_{}.jsonl", Uuid::new_v4()));
let mut log = EventLog::open(&log_path).expect("open log");
let mut store = MemoryStore::new();
let caja_a = Uuid::new_v4();
let caja_b = Uuid::new_v4();
let caja_c = Uuid::new_v4();
seed_and_log(
&exec,
&mut store,
&mut log,
"Caja",
caja_a,
json!({
"id": caja_a.to_string(),
"name": "Caja Principal",
"saldo": 200_000_i64,
"currency": "USD",
}),
)
.expect("seed A");
seed_and_log(
&exec,
&mut store,
&mut log,
"Caja",
caja_b,
json!({
"id": caja_b.to_string(),
"name": "Caja Chica",
"saldo": 50_000_i64,
"currency": "USD",
}),
)
.expect("seed B");
seed_and_log(
&exec,
&mut store,
&mut log,
"Caja",
caja_c,
json!({
"id": caja_c.to_string(),
"name": "Caja EUR",
"saldo": 30_000_i64,
"currency": "EUR",
}),
)
.expect("seed C");
section("== seed ==");
print_caja(&store, "A", caja_a);
print_caja(&store, "B", caja_b);
print_caja(&store, "C", caja_c);
section("== A: deposit 50_000 USD ==");
run_and_report(
&exec,
&mut store,
&mut log,
"register_cash_move",
&[("caja", caja_a)],
json!({
"monto": 50_000_i64,
"tipo": "in",
"timestamp": "2026-05-04T12:00:00Z",
"memo": "deposito A",
"movimiento_id": Uuid::new_v4().to_string(),
}),
);
print_caja(&store, "A", caja_a);
section("== transfer A -> B 100_000 USD ==");
run_and_report(
&exec,
&mut store,
&mut log,
"transfer_between_cajas",
&[("source", caja_a), ("dest", caja_b)],
json!({
"monto": 100_000_i64,
"timestamp": "2026-05-04T12:30:00Z",
"memo": "transferencia operativa",
"transfer_id": Uuid::new_v4().to_string(),
}),
);
print_caja(&store, "A", caja_a);
print_caja(&store, "B", caja_b);
section("== transfer A -> B 999_999_999 USD (reject: post-check on source) ==");
run_and_report(
&exec,
&mut store,
&mut log,
"transfer_between_cajas",
&[("source", caja_a), ("dest", caja_b)],
json!({
"monto": 999_999_999_i64,
"timestamp": "2026-05-04T13:00:00Z",
"memo": "overdraw",
"transfer_id": Uuid::new_v4().to_string(),
}),
);
section("== transfer A(USD) -> C(EUR) (reject: rhai throws) ==");
run_and_report(
&exec,
&mut store,
&mut log,
"transfer_between_cajas",
&[("source", caja_a), ("dest", caja_c)],
json!({
"monto": 10_000_i64,
"timestamp": "2026-05-04T14:00:00Z",
"memo": "USD -> EUR",
"transfer_id": Uuid::new_v4().to_string(),
}),
);
section("== self-transfer A -> A (reject: DuplicateInputId) ==");
run_and_report(
&exec,
&mut store,
&mut log,
"transfer_between_cajas",
&[("source", caja_a), ("dest", caja_a)],
json!({
"monto": 1_000_i64,
"timestamp": "2026-05-04T15:00:00Z",
"memo": "self",
"transfer_id": Uuid::new_v4().to_string(),
}),
);
section("== final live state ==");
print_caja(&store, "A", caja_a);
print_caja(&store, "B", caja_b);
print_caja(&store, "C", caja_c);
let entries = log.entries().expect("read log");
section(&format!(
"== log: {} entries at {} ==",
entries.len(),
log.path().display()
));
for e in &entries {
match e {
nakui_core::event_log::LogEntry::Seed {
seq, entity, id, ..
} => println!(" #{:02} seed {} {}", seq, entity, id),
nakui_core::event_log::LogEntry::Morphism {
seq,
morphism,
ops,
..
} => println!(" #{:02} morph {} ({} ops)", seq, morphism, ops.len()),
}
}
section("== replay verification (state) ==");
let replayed = replay(&log).expect("replay");
if store == replayed {
println!(" ok: replayed store byte-equal to live store");
} else {
println!(" MISMATCH: replay diverges from live");
}
section("== determinism verification (ops) ==");
match verify_log(&log, &exec) {
Ok(()) => println!(
" ok: every logged morphism reproduced its ops on re-execution"
),
Err(e) => println!(" nondeterminism detected: {}", e),
}
if std::env::var_os("NAKUI_DEMO_KEEP").is_none() {
let _ = std::fs::remove_file(&log_path);
} else {
println!("\n(NAKUI_DEMO_KEEP set — keeping log at {})", log_path.display());
}
}
fn run_and_report(
exec: &Executor,
store: &mut MemoryStore,
log: &mut EventLog,
morphism: &str,
inputs: &[(&str, Uuid)],
params: serde_json::Value,
) {
match execute_and_log(exec, store, log, morphism, inputs, params) {
Ok(ops) => println!(" ok ({} ops, logged at #{})", ops.len(), log.next_seq() - 1),
Err(ExecuteError::PreLog(e)) => println!(" rejected: {}", e),
Err(ExecuteError::LogAppend(e)) => println!(" LOG APPEND FAILED: {}", e),
Err(ExecuteError::PostLogStore(e)) => println!(
" POST-LOG STORE FAILED (log is canonical, store stale): {}",
e
),
}
}
fn print_caja(store: &MemoryStore, label: &str, id: Uuid) {
let v = store.load("Caja", id).expect("caja exists");
let saldo = v.get("saldo").and_then(|v| v.as_i64()).unwrap_or(0);
let currency = v.get("currency").and_then(|v| v.as_str()).unwrap_or("?");
println!(" {} {}: saldo={} {}", label, id, saldo, currency);
}
fn section(title: &str) {
println!("\n{}", title);
}
@@ -0,0 +1,170 @@
use nakui_core::event_log::{
EventLog, ExecuteError, execute_and_log, replay, seed_and_log, verify_log,
};
use nakui_core::executor::Executor;
use nakui_core::store::{MemoryStore, Store};
use serde_json::json;
use uuid::Uuid;
fn main() {
let module_dir = std::env::var("NAKUI_MODULE")
.unwrap_or_else(|_| "modules/inventory".into());
let exec = Executor::load_module(&module_dir).expect("load module");
let log_path =
std::env::temp_dir().join(format!("nakui_inv_{}.jsonl", Uuid::new_v4()));
let mut log = EventLog::open(&log_path).expect("open log");
let mut store = MemoryStore::new();
// Two stocks of SKU "kg-cafe-honduras-2026" at warehouses A and B,
// plus a third stock of SKU "lt-aceite-girasol" at warehouse C.
let stock_a = Uuid::new_v4();
let stock_b = Uuid::new_v4();
let stock_c = Uuid::new_v4();
seed_and_log(
&exec,
&mut store, &mut log, "Stock", stock_a,
json!({
"id": stock_a.to_string(),
"sku_id": "kg-cafe-honduras-2026",
"ubicacion": "almacen-norte",
"cantidad": 500_i64,
}),
).expect("seed A");
seed_and_log(
&exec,
&mut store, &mut log, "Stock", stock_b,
json!({
"id": stock_b.to_string(),
"sku_id": "kg-cafe-honduras-2026",
"ubicacion": "almacen-sur",
"cantidad": 100_i64,
}),
).expect("seed B");
seed_and_log(
&exec,
&mut store, &mut log, "Stock", stock_c,
json!({
"id": stock_c.to_string(),
"sku_id": "lt-aceite-girasol",
"ubicacion": "almacen-sur",
"cantidad": 200_i64,
}),
).expect("seed C");
section("== seed ==");
print_stock(&store, "A (cafe norte)", stock_a);
print_stock(&store, "B (cafe sur)", stock_b);
print_stock(&store, "C (aceite sur)", stock_c);
section("== recibir 250 kg cafe en A ==");
run_and_report(&exec, &mut store, &mut log, "recibir_stock",
&[("stock", stock_a)],
json!({
"cantidad": 250_i64,
"timestamp": "2026-05-04T08:00:00Z",
"movimiento_id": Uuid::new_v4().to_string(),
}),
);
print_stock(&store, "A", stock_a);
section("== transferir 200 kg cafe A -> B (conserva por sku_id) ==");
run_and_report(&exec, &mut store, &mut log, "transferir_stock",
&[("source", stock_a), ("dest", stock_b)],
json!({
"cantidad": 200_i64,
"timestamp": "2026-05-04T09:00:00Z",
"transfer_id": Uuid::new_v4().to_string(),
}),
);
print_stock(&store, "A", stock_a);
print_stock(&store, "B", stock_b);
section("== transferir 999_999 kg cafe A -> B (reject: stock <= 0) ==");
run_and_report(&exec, &mut store, &mut log, "transferir_stock",
&[("source", stock_a), ("dest", stock_b)],
json!({
"cantidad": 999_999_i64,
"timestamp": "2026-05-04T10:00:00Z",
"transfer_id": Uuid::new_v4().to_string(),
}),
);
section("== transferir 50 cafe(A) -> aceite(C) (reject: rhai SKU mismatch) ==");
run_and_report(&exec, &mut store, &mut log, "transferir_stock",
&[("source", stock_a), ("dest", stock_c)],
json!({
"cantidad": 50_i64,
"timestamp": "2026-05-04T11:00:00Z",
"transfer_id": Uuid::new_v4().to_string(),
}),
);
section("== final live state ==");
print_stock(&store, "A", stock_a);
print_stock(&store, "B", stock_b);
print_stock(&store, "C", stock_c);
let entries = log.entries().expect("read log");
section(&format!(
"== log: {} entries at {} ==",
entries.len(),
log.path().display()
));
for e in &entries {
match e {
nakui_core::event_log::LogEntry::Seed { seq, entity, id, .. } =>
println!(" #{:02} seed {} {}", seq, entity, id),
nakui_core::event_log::LogEntry::Morphism { seq, morphism, ops, .. } =>
println!(" #{:02} morph {} ({} ops)", seq, morphism, ops.len()),
}
}
section("== replay verification (state) ==");
let replayed = replay(&log).expect("replay");
if store == replayed {
println!(" ok: replayed store byte-equal to live store");
} else {
println!(" MISMATCH");
}
section("== determinism verification (ops) ==");
match verify_log(&log, &exec) {
Ok(()) => println!(
" ok: every logged morphism reproduced its ops on re-execution"
),
Err(e) => println!(" nondeterminism detected: {}", e),
}
let _ = std::fs::remove_file(&log_path);
}
fn run_and_report(
exec: &Executor,
store: &mut MemoryStore,
log: &mut EventLog,
morphism: &str,
inputs: &[(&str, Uuid)],
params: serde_json::Value,
) {
match execute_and_log(exec, store, log, morphism, inputs, params) {
Ok(ops) => println!(" ok ({} ops, logged at #{})", ops.len(), log.next_seq() - 1),
Err(ExecuteError::PreLog(e)) => println!(" rejected: {}", e),
Err(ExecuteError::LogAppend(e)) => println!(" LOG APPEND FAILED: {}", e),
Err(ExecuteError::PostLogStore(e)) => println!(
" POST-LOG STORE FAILED (log canonical, store stale): {}", e
),
}
}
fn print_stock(store: &MemoryStore, label: &str, id: Uuid) {
let v = store.load("Stock", id).expect("stock exists");
let cantidad = v.get("cantidad").and_then(|v| v.as_i64()).unwrap_or(0);
let sku = v.get("sku_id").and_then(|v| v.as_str()).unwrap_or("?");
let loc = v.get("ubicacion").and_then(|v| v.as_str()).unwrap_or("?");
println!(" {}: cantidad={} sku={} ubic={}", label, cantidad, sku, loc);
}
fn section(title: &str) {
println!("\n{}", title);
}
+455
View File
@@ -0,0 +1,455 @@
//! `nakui` — operator CLI for inspecting, replaying, and verifying an
//! event log produced by the kernel. The three subcommands map to the
//! three things you need when something goes sideways in production:
//!
//! - `inspect` — what's in the log? (audit trail)
//! - `replay` — what state does the log produce? (recovery dry-run)
//! - `verify-log` — does every morphism still reproduce its ops?
//! (determinism contract — the regression alarm)
//!
//! Exit codes: 0 on success, 1 on operational error, 2 on bad arguments.
use std::collections::BTreeMap;
use std::path::PathBuf;
use std::process::ExitCode;
use nakui_core::drift::{DriftDiff, check_against_socket};
use nakui_core::event_log::{
EventLog, LogEntry, Snapshot, replay_with_snapshot_into, verify_log,
};
use nakui_core::executor::Executor;
use nakui_core::run::run_server;
use nakui_core::store::MemoryStore;
fn main() -> ExitCode {
let args: Vec<String> = std::env::args().collect();
let prog = args.first().cloned().unwrap_or_else(|| "nakui".into());
let sub = match args.get(1).map(String::as_str) {
Some(s) => s,
None => {
print_usage(&prog);
return ExitCode::from(2);
}
};
let rest = &args[2..];
let result = match sub {
"inspect" => cmd_inspect(rest),
"replay" => cmd_replay(rest),
"verify-log" => cmd_verify_log(rest),
"run" => cmd_run(rest),
"drift" => cmd_drift(rest),
"snapshot" => cmd_snapshot(rest),
"compact" => cmd_compact(rest),
"-h" | "--help" | "help" => {
print_usage(&prog);
return ExitCode::SUCCESS;
}
other => {
eprintln!("nakui: unknown subcommand `{}`", other);
print_usage(&prog);
return ExitCode::from(2);
}
};
match result {
Ok(()) => ExitCode::SUCCESS,
Err(CliError::BadArgs(msg)) => {
eprintln!("nakui: {}", msg);
print_usage(&prog);
ExitCode::from(2)
}
Err(CliError::Op(msg)) => {
eprintln!("nakui: {}", msg);
ExitCode::from(1)
}
// Drift uses its own exit code so callers can distinguish "the
// tool failed" (1) from "the tool worked and detected drift" (3).
Err(CliError::DriftDetected) => ExitCode::from(3),
}
}
enum CliError {
BadArgs(String),
Op(String),
DriftDetected,
}
fn print_usage(prog: &str) {
eprintln!(
"usage:
{p} inspect --log <path>
{p} replay --log <path> [--snapshot <path>]
{p} verify-log --log <path> --module <dir>
{p} run --log <path> --module <dir> --socket <path>
[--snapshot <path>] [--store-path <dir>]
{p} drift --log <path> --against <socket>
{p} snapshot --log <path> --module <dir> --out <path>
{p} compact --log <path> --snapshot <path>
--store-path activates persistent SurrealStore (kv-surrealkv);
requires the binary to be built with `--features persistent`.",
p = prog
);
}
/// Minimal flag parser: `--name value` pairs, no `=` form, no clustering.
/// Returns a map of name -> value. Unknown flags are an error so typos
/// surface immediately instead of silently being ignored.
fn parse_flags(args: &[String], allowed: &[&str]) -> Result<BTreeMap<String, String>, CliError> {
let mut out = BTreeMap::new();
let mut i = 0;
while i < args.len() {
let flag = &args[i];
if !flag.starts_with("--") {
return Err(CliError::BadArgs(format!(
"expected --flag, got `{}`",
flag
)));
}
let name = &flag[2..];
if !allowed.contains(&name) {
return Err(CliError::BadArgs(format!("unknown flag `--{}`", name)));
}
let val = args.get(i + 1).ok_or_else(|| {
CliError::BadArgs(format!("flag `--{}` requires a value", name))
})?;
out.insert(name.to_string(), val.clone());
i += 2;
}
Ok(out)
}
fn require<'a>(
flags: &'a BTreeMap<String, String>,
name: &str,
) -> Result<&'a String, CliError> {
flags
.get(name)
.ok_or_else(|| CliError::BadArgs(format!("missing required flag `--{}`", name)))
}
fn cmd_inspect(args: &[String]) -> Result<(), CliError> {
let flags = parse_flags(args, &["log"])?;
let log_path = PathBuf::from(require(&flags, "log")?);
let log = EventLog::open(&log_path).map_err(|e| CliError::Op(format!("open log: {}", e)))?;
let entries = log
.entries()
.map_err(|e| CliError::Op(format!("read log: {}", e)))?;
println!("log: {}", log.path().display());
println!("entries: {}", entries.len());
if entries.is_empty() {
return Ok(());
}
println!("seq range: {}..={}", entries[0].seq(), entries.last().unwrap().seq());
println!();
for e in &entries {
match e {
LogEntry::Seed {
seq, entity, id, ..
} => println!(" #{:04} seed {} {}", seq, entity, id),
LogEntry::Morphism {
seq,
morphism,
ops,
inputs,
..
} => {
let inputs_s = inputs
.iter()
.map(|(k, v)| format!("{}={}", k, v))
.collect::<Vec<_>>()
.join(", ");
println!(
" #{:04} morph {} ({} ops) [{}]",
seq,
morphism,
ops.len(),
inputs_s
);
}
}
}
Ok(())
}
fn cmd_replay(args: &[String]) -> Result<(), CliError> {
let flags = parse_flags(args, &["log", "snapshot"])?;
let log_path = PathBuf::from(require(&flags, "log")?);
let log = EventLog::open(&log_path).map_err(|e| CliError::Op(format!("open log: {}", e)))?;
let snapshot = if let Some(p) = flags.get("snapshot") {
let path = PathBuf::from(p);
Snapshot::load(&path)
.map_err(|e| CliError::Op(format!("load snapshot: {}", e)))?
.ok_or_else(|| CliError::Op(format!("snapshot not found: {}", path.display())))?
.into()
} else {
None::<Snapshot>
};
let mut store = MemoryStore::new();
replay_with_snapshot_into(&log, snapshot.as_ref(), &mut store)
.map_err(|e| CliError::Op(format!("replay: {}", e)))?;
let entries = log
.entries()
.map_err(|e| CliError::Op(format!("read log: {}", e)))?;
let last_seq = entries.last().map(|e| e.seq().to_string()).unwrap_or_else(|| "<empty>".into());
println!("replayed log: {}", log.path().display());
if let Some(snap) = &snapshot {
println!("snapshot: seq {} (covers seq <= {})", snap.seq, snap.seq);
}
println!("last seq: {}", last_seq);
println!("entities:");
let mut by_entity: Vec<(&String, usize)> = store
.records()
.iter()
.map(|(k, v)| (k, v.len()))
.collect();
by_entity.sort_by(|a, b| a.0.cmp(b.0));
if by_entity.is_empty() {
println!(" (none)");
} else {
for (entity, count) in by_entity {
println!(" {:<20} {}", entity, count);
}
}
Ok(())
}
fn cmd_drift(args: &[String]) -> Result<(), CliError> {
let flags = parse_flags(args, &["log", "against"])?;
let log_path = PathBuf::from(require(&flags, "log")?);
let socket_path = PathBuf::from(require(&flags, "against")?);
let report = check_against_socket(&log_path, &socket_path)
.map_err(|e| CliError::Op(format!("drift check: {}", e)))?;
let log_hex = hex_encode(&report.log_hash);
let server_hex = hex_encode(&report.server_hash);
if report.in_sync() {
println!(
"ok: in sync (hash {}, {} records)",
short_hash(&log_hex),
report.log_records
);
return Ok(());
}
println!("DRIFT detected");
println!(
" log replay: hash {} ({} records)",
log_hex, report.log_records
);
println!(
" server state: hash {} ({} records)",
server_hex, report.server_records
);
println!();
println!("diffs:");
for d in &report.diffs {
match d {
DriftDiff::OnlyOnServer { entity, id, .. } => {
println!(" + {} {} (only on server)", entity, id);
}
DriftDiff::OnlyInLog { entity, id, .. } => {
println!(" - {} {} (only in log replay)", entity, id);
}
DriftDiff::Tampered {
entity,
id,
log_value,
server_value,
} => {
println!(
" ~ {} {} (tampered)\n log: {}\n server: {}",
entity, id, log_value, server_value
);
}
}
}
Err(CliError::DriftDetected)
}
fn hex_encode(bytes: &[u8]) -> String {
const HEX: &[u8; 16] = b"0123456789abcdef";
let mut out = String::with_capacity(bytes.len() * 2);
for &b in bytes {
out.push(HEX[(b >> 4) as usize] as char);
out.push(HEX[(b & 0x0f) as usize] as char);
}
out
}
fn short_hash(hex: &str) -> String {
if hex.len() <= 12 {
hex.to_string()
} else {
format!("{}{}", &hex[..6], &hex[hex.len() - 4..])
}
}
fn cmd_run(args: &[String]) -> Result<(), CliError> {
let flags = parse_flags(args, &["log", "module", "socket", "snapshot", "store-path"])?;
let log_path = PathBuf::from(require(&flags, "log")?);
let module_dir = PathBuf::from(require(&flags, "module")?);
let socket_path = PathBuf::from(require(&flags, "socket")?);
let snapshot_path = flags.get("snapshot").map(PathBuf::from);
let store_path = flags.get("store-path").map(PathBuf::from);
eprintln!(
"nakui run: module={} log={} socket={} snapshot={} store={}",
module_dir.display(),
log_path.display(),
socket_path.display(),
snapshot_path
.as_ref()
.map(|p| p.display().to_string())
.unwrap_or_else(|| "<none>".into()),
store_path
.as_ref()
.map(|p| p.display().to_string())
.unwrap_or_else(|| "<memory>".into()),
);
let executor = Executor::load_module(&module_dir)
.map_err(|e| CliError::Op(format!("load module {}: {}", module_dir.display(), e)))?;
let log = EventLog::open(&log_path).map_err(|e| CliError::Op(format!("open log: {}", e)))?;
let snapshot = match &snapshot_path {
Some(p) => Some(
Snapshot::load(p)
.map_err(|e| CliError::Op(format!("load snapshot: {}", e)))?
.ok_or_else(|| {
CliError::Op(format!("snapshot file does not exist: {}", p.display()))
})?,
),
None => None,
};
if let Some(p) = store_path {
run_persistent(executor, log, snapshot, &socket_path, &p)
} else {
let store = MemoryStore::new();
run_server(executor, log, store, snapshot, &socket_path)
.map_err(|e| CliError::Op(format!("run: {}", e)))
}
}
#[cfg(feature = "persistent")]
fn run_persistent(
executor: Executor,
log: EventLog,
snapshot: Option<Snapshot>,
socket_path: &std::path::Path,
store_path: &std::path::Path,
) -> Result<(), CliError> {
use nakui_core::surreal_store::SurrealStore;
let store = SurrealStore::new_persistent(store_path).map_err(|e| {
CliError::Op(format!(
"open persistent store at {}: {}",
store_path.display(),
e
))
})?;
run_server(executor, log, store, snapshot, socket_path)
.map_err(|e| CliError::Op(format!("run: {}", e)))
}
#[cfg(not(feature = "persistent"))]
fn run_persistent(
_executor: Executor,
_log: EventLog,
_snapshot: Option<Snapshot>,
_socket_path: &std::path::Path,
_store_path: &std::path::Path,
) -> Result<(), CliError> {
Err(CliError::Op(
"--store-path requires building with `--features persistent`".into(),
))
}
fn cmd_snapshot(args: &[String]) -> Result<(), CliError> {
let flags = parse_flags(args, &["log", "module", "out"])?;
let log_path = PathBuf::from(require(&flags, "log")?);
let module_dir = PathBuf::from(require(&flags, "module")?);
let out_path = PathBuf::from(require(&flags, "out")?);
let exec = Executor::load_module(&module_dir)
.map_err(|e| CliError::Op(format!("load module {}: {}", module_dir.display(), e)))?;
let log = EventLog::open(&log_path).map_err(|e| CliError::Op(format!("open log: {}", e)))?;
let mut store = MemoryStore::new();
replay_with_snapshot_into(&log, None, &mut store)
.map_err(|e| CliError::Op(format!("replay: {}", e)))?;
let last_seq = log
.entries()
.map_err(|e| CliError::Op(format!("read log: {}", e)))?
.last()
.map(|e| e.seq())
.ok_or_else(|| CliError::Op("log is empty; nothing to snapshot".into()))?;
let snap = Snapshot::capture(&store, last_seq, &exec);
snap.write(&out_path)
.map_err(|e| CliError::Op(format!("write snapshot: {}", e)))?;
let entity_count: usize = store.records().values().map(|m| m.len()).sum();
println!(
"snapshot written to {} (seq {}, {} records, schema {})",
out_path.display(),
last_seq,
entity_count,
short_hash(&hex_encode(&exec.module_schema_hash())),
);
Ok(())
}
fn cmd_compact(args: &[String]) -> Result<(), CliError> {
let flags = parse_flags(args, &["log", "snapshot"])?;
let log_path = PathBuf::from(require(&flags, "log")?);
let snap_path = PathBuf::from(require(&flags, "snapshot")?);
let snap = Snapshot::load(&snap_path)
.map_err(|e| CliError::Op(format!("load snapshot: {}", e)))?
.ok_or_else(|| CliError::Op(format!("snapshot not found: {}", snap_path.display())))?;
let mut log = EventLog::open(&log_path).map_err(|e| CliError::Op(format!("open log: {}", e)))?;
let before = log
.entries()
.map(|es| es.len())
.map_err(|e| CliError::Op(format!("read log: {}", e)))?;
log.compact_through(snap.seq)
.map_err(|e| CliError::Op(format!("compact: {}", e)))?;
let after = log
.entries()
.map(|es| es.len())
.map_err(|e| CliError::Op(format!("read log: {}", e)))?;
println!(
"compacted {} through seq {} ({}{} entries; {} dropped)",
log_path.display(),
snap.seq,
before,
after,
before.saturating_sub(after),
);
Ok(())
}
fn cmd_verify_log(args: &[String]) -> Result<(), CliError> {
let flags = parse_flags(args, &["log", "module"])?;
let log_path = PathBuf::from(require(&flags, "log")?);
let module_dir = PathBuf::from(require(&flags, "module")?);
let exec = Executor::load_module(&module_dir)
.map_err(|e| CliError::Op(format!("load module {}: {}", module_dir.display(), e)))?;
let log = EventLog::open(&log_path).map_err(|e| CliError::Op(format!("open log: {}", e)))?;
match verify_log(&log, &exec) {
Ok(()) => {
let n = log
.entries()
.map(|es| es.len())
.map_err(|e| CliError::Op(format!("read log: {}", e)))?;
println!("ok: {} entries; every morphism reproduced its ops", n);
Ok(())
}
Err(e) => Err(CliError::Op(format!("verify failed: {}", e))),
}
}
@@ -0,0 +1,176 @@
//! Cross-module demo: a `vender` morphism that touches a Stock entity
//! (defined in inventory's schema) and a Caja entity (defined in
//! treasury's schema). The sales module's `nsmc.json` lists three schema
//! files; the executor concatenates them at load time so KCL validates
//! against all three.
use nakui_core::event_log::{
EventLog, ExecuteError, execute_and_log, replay, seed_and_log, verify_log,
};
use nakui_core::executor::Executor;
use nakui_core::store::{MemoryStore, Store};
use serde_json::json;
use uuid::Uuid;
fn main() {
let module_dir = std::env::var("NAKUI_MODULE")
.unwrap_or_else(|_| "modules/sales".into());
let exec = Executor::load_module(&module_dir).expect("load module");
let log_path =
std::env::temp_dir().join(format!("nakui_sales_{}.jsonl", Uuid::new_v4()));
let mut log = EventLog::open(&log_path).expect("open log");
let mut store = MemoryStore::new();
let stock_id = Uuid::new_v4();
let caja_id = Uuid::new_v4();
seed_and_log(
&exec,
&mut store, &mut log, "Stock", stock_id,
json!({
"id": stock_id.to_string(),
"sku_id": "kg-cafe-honduras-2026",
"ubicacion": "almacen-norte",
"cantidad": 500_i64,
}),
).expect("seed stock");
seed_and_log(
&exec,
&mut store, &mut log, "Caja", caja_id,
json!({
"id": caja_id.to_string(),
"name": "Caja Principal",
"saldo": 1_000_000_i64, // $10_000.00 in cents
"currency": "USD",
}),
).expect("seed caja");
section("== seed ==");
print_stock(&store, "stock", stock_id);
print_caja(&store, "caja", caja_id);
// 1. Sell 100 kg cafe at $50.00 / kg = $5000.00 total.
section("== vender 100 kg @ $50.00 c/u ==");
run_and_report(&exec, &mut store, &mut log, "vender",
&[("stock", stock_id), ("caja", caja_id)],
json!({
"cantidad": 100_i64,
"precio_unitario": 5_000_i64, // $50.00 in cents
"timestamp": "2026-05-04T10:00:00Z",
"venta_id": Uuid::new_v4().to_string(),
}),
);
print_stock(&store, "stock", stock_id);
print_caja(&store, "caja", caja_id);
// 2. Try selling more than available stock — should fail Stock post-check.
section("== vender 9999 kg (reject: stock <= 0) ==");
run_and_report(&exec, &mut store, &mut log, "vender",
&[("stock", stock_id), ("caja", caja_id)],
json!({
"cantidad": 9999_i64,
"precio_unitario": 1_000_i64,
"timestamp": "2026-05-04T11:00:00Z",
"venta_id": Uuid::new_v4().to_string(),
}),
);
// 3. Negative price — caught by Rhai.
section("== vender con precio negativo (reject: rhai throw) ==");
run_and_report(&exec, &mut store, &mut log, "vender",
&[("stock", stock_id), ("caja", caja_id)],
json!({
"cantidad": 10_i64,
"precio_unitario": -100_i64,
"timestamp": "2026-05-04T11:30:00Z",
"venta_id": Uuid::new_v4().to_string(),
}),
);
// 4. Another good sale.
section("== vender 50 kg @ $60.00 c/u ==");
run_and_report(&exec, &mut store, &mut log, "vender",
&[("stock", stock_id), ("caja", caja_id)],
json!({
"cantidad": 50_i64,
"precio_unitario": 6_000_i64,
"timestamp": "2026-05-04T12:00:00Z",
"venta_id": Uuid::new_v4().to_string(),
}),
);
print_stock(&store, "stock", stock_id);
print_caja(&store, "caja", caja_id);
section("== final live state ==");
print_stock(&store, "stock", stock_id);
print_caja(&store, "caja", caja_id);
let entries = log.entries().expect("read log");
section(&format!(
"== log: {} entries at {} ==",
entries.len(),
log.path().display()
));
for e in &entries {
match e {
nakui_core::event_log::LogEntry::Seed { seq, entity, id, .. } =>
println!(" #{:02} seed {} {}", seq, entity, id),
nakui_core::event_log::LogEntry::Morphism { seq, morphism, ops, .. } =>
println!(" #{:02} morph {} ({} ops)", seq, morphism, ops.len()),
}
}
section("== replay verification (state) ==");
let replayed = replay(&log).expect("replay");
if store == replayed {
println!(" ok: replayed store byte-equal to live store");
} else {
println!(" MISMATCH");
}
section("== determinism verification (ops) ==");
match verify_log(&log, &exec) {
Ok(()) => println!(
" ok: every logged morphism reproduced its ops on re-execution"
),
Err(e) => println!(" nondeterminism detected: {}", e),
}
let _ = std::fs::remove_file(&log_path);
}
fn run_and_report(
exec: &Executor,
store: &mut MemoryStore,
log: &mut EventLog,
morphism: &str,
inputs: &[(&str, Uuid)],
params: serde_json::Value,
) {
match execute_and_log(exec, store, log, morphism, inputs, params) {
Ok(ops) => println!(" ok ({} ops, logged at #{})", ops.len(), log.next_seq() - 1),
Err(ExecuteError::PreLog(e)) => println!(" rejected: {}", e),
Err(ExecuteError::LogAppend(e)) => println!(" LOG APPEND FAILED: {}", e),
Err(ExecuteError::PostLogStore(e)) => println!(
" POST-LOG STORE FAILED (log canonical, store stale): {}", e
),
}
}
fn print_stock(store: &MemoryStore, label: &str, id: Uuid) {
let v = store.load("Stock", id).expect("stock exists");
let cantidad = v.get("cantidad").and_then(|v| v.as_i64()).unwrap_or(0);
let sku = v.get("sku_id").and_then(|v| v.as_str()).unwrap_or("?");
println!(" {} cantidad={} sku={}", label, cantidad, sku);
}
fn print_caja(store: &MemoryStore, label: &str, id: Uuid) {
let v = store.load("Caja", id).expect("caja exists");
let saldo = v.get("saldo").and_then(|v| v.as_i64()).unwrap_or(0);
let cur = v.get("currency").and_then(|v| v.as_str()).unwrap_or("?");
println!(" {} saldo={} {} (en centavos)", label, saldo, cur);
}
fn section(title: &str) {
println!("\n{}", title);
}
+72
View File
@@ -0,0 +1,72 @@
use serde::{Deserialize, Serialize};
use serde_json::Value;
use uuid::Uuid;
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct FieldPath {
pub entity: String,
pub id: Uuid,
pub field: String,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(tag = "op", rename_all = "snake_case")]
pub enum FieldOp {
Set {
path: FieldPath,
value: Value,
},
Create {
entity: String,
id: Uuid,
data: Value,
},
Delete {
entity: String,
id: Uuid,
},
}
impl FieldOp {
/// Token a manifest's `writes` list matches against.
/// "Caja.saldo" for field updates, "Movimiento" for whole-record ops.
pub fn capability_token(&self) -> String {
match self {
FieldOp::Set { path, .. } => format!("{}.{}", path.entity, path.field),
FieldOp::Create { entity, .. } => entity.clone(),
FieldOp::Delete { entity, .. } => entity.clone(),
}
}
}
/// Apply only the ops that target `(entity, id)` to `state` and return the
/// new value. Returns `None` if a Delete op removes the target — callers
/// should skip post-checks against a deleted entity rather than running
/// them against the stale prior state.
pub fn simulate_on(state: &Value, entity: &str, id: Uuid, ops: &[FieldOp]) -> Option<Value> {
let mut s: Option<Value> = Some(state.clone());
for op in ops {
match op {
FieldOp::Set { path, value } if path.entity == entity && path.id == id => {
if let Some(Value::Object(map)) = s.as_mut() {
map.insert(path.field.clone(), value.clone());
}
}
FieldOp::Create {
entity: e,
id: i,
data,
} if e == entity && *i == id => {
s = Some(data.clone());
}
FieldOp::Delete {
entity: e,
id: i,
} if e == entity && *i == id => {
s = None;
}
_ => {}
}
}
s
}
+496
View File
@@ -0,0 +1,496 @@
//! Drift detection: compare two snapshots of store state and surface
//! the records that differ.
//!
//! "Drift" here means the live store has departed from what the log can
//! reproduce. The `Store::hash_state` contract makes the binary check
//! cheap (32 bytes); when those disagree, `compare_states` walks both
//! enumerations and produces a diff list the operator can act on.
//!
//! No IO in this module. The wire bits (asking a `nakui run` server for
//! its hash and records) live in the CLI; this is the pure comparison
//! used by both the CLI and any future automated drift-watcher.
use serde::Serialize;
use serde_json::Value;
use std::collections::HashMap;
use std::io::{BufRead, BufReader, Write};
use std::os::unix::net::UnixStream;
use std::path::Path;
use thiserror::Error;
use uuid::Uuid;
use crate::event_log::{EventLog, replay};
use crate::store::Store;
/// A single record-level difference between two snapshots. Variants are
/// labeled from the perspective of the operator running the check: the
/// "log" side is the canonical state (what the log replays to), the
/// "server" side is the live state being audited.
#[derive(Debug, Clone, PartialEq, Serialize)]
#[serde(tag = "kind", rename_all = "snake_case")]
pub enum DriftDiff {
/// Server has a record the log doesn't know about. Phantom data —
/// either an out-of-band write, or a successful op that never
/// reached the WAL (which would itself be a kernel bug).
OnlyOnServer {
entity: String,
id: Uuid,
value: Value,
},
/// Log expects a record the server lost. Either the server's apply
/// rolled back without a reconcile, or someone deleted a record
/// out-of-band.
OnlyInLog {
entity: String,
id: Uuid,
value: Value,
},
/// Same (entity, id) on both sides but the values differ — the most
/// dangerous case, because it means a logged event was overwritten
/// or a field was tampered with.
Tampered {
entity: String,
id: Uuid,
log_value: Value,
server_value: Value,
},
}
#[derive(Debug, Clone, Serialize)]
pub struct DriftReport {
pub log_hash: [u8; 32],
pub server_hash: [u8; 32],
pub log_records: usize,
pub server_records: usize,
/// Empty iff the two snapshots are byte-identical. Sorted by
/// (entity, id_bytes) so two runs against the same drift produce
/// the same report.
pub diffs: Vec<DriftDiff>,
}
impl DriftReport {
pub fn in_sync(&self) -> bool {
self.log_hash == self.server_hash && self.diffs.is_empty()
}
}
/// Pure comparison: take two canonical-order enumerations (as returned
/// by `Store::iter`) plus their hashes, and return the diff list.
///
/// Inputs need not be pre-sorted — we re-key by (entity, id) and walk
/// the union — but if the iterators were produced via `Store::iter`,
/// they're already in canonical order and the report's `diffs` will be
/// emitted in that same order.
pub fn compare_states(
log_records: Vec<(String, Uuid, Value)>,
log_hash: [u8; 32],
server_records: Vec<(String, Uuid, Value)>,
server_hash: [u8; 32],
) -> DriftReport {
let log_count = log_records.len();
let server_count = server_records.len();
let mut log_map: HashMap<(String, Uuid), Value> = log_records
.into_iter()
.map(|(e, id, v)| ((e, id), v))
.collect();
let server_map: HashMap<(String, Uuid), Value> = server_records
.into_iter()
.map(|(e, id, v)| ((e, id), v))
.collect();
let mut diffs: Vec<DriftDiff> = Vec::new();
for ((entity, id), server_value) in &server_map {
match log_map.remove(&(entity.clone(), *id)) {
None => diffs.push(DriftDiff::OnlyOnServer {
entity: entity.clone(),
id: *id,
value: server_value.clone(),
}),
Some(log_value) => {
if log_value != *server_value {
diffs.push(DriftDiff::Tampered {
entity: entity.clone(),
id: *id,
log_value,
server_value: server_value.clone(),
});
}
}
}
}
// Whatever is left in log_map is missing on the server.
for ((entity, id), value) in log_map {
diffs.push(DriftDiff::OnlyInLog { entity, id, value });
}
// Canonical sort: (entity, id_bytes), then by variant kind so
// diff ordering is fully deterministic even when the same key
// appears (which it can't here, but defensively).
diffs.sort_by(|a, b| {
let (ea, ia) = key(a);
let (eb, ib) = key(b);
ea.cmp(eb)
.then_with(|| ia.as_bytes().cmp(ib.as_bytes()))
.then_with(|| variant_order(a).cmp(&variant_order(b)))
});
DriftReport {
log_hash,
server_hash,
log_records: log_count,
server_records: server_count,
diffs,
}
}
fn key(d: &DriftDiff) -> (&str, &Uuid) {
match d {
DriftDiff::OnlyOnServer { entity, id, .. }
| DriftDiff::OnlyInLog { entity, id, .. }
| DriftDiff::Tampered { entity, id, .. } => (entity.as_str(), id),
}
}
fn variant_order(d: &DriftDiff) -> u8 {
match d {
DriftDiff::OnlyInLog { .. } => 0,
DriftDiff::Tampered { .. } => 1,
DriftDiff::OnlyOnServer { .. } => 2,
}
}
#[derive(Debug, Error)]
pub enum DriftError {
#[error("open log: {0}")]
Log(#[from] crate::event_log::LogError),
#[error("replay log: {0}")]
Replay(#[from] crate::event_log::ReplayError),
#[error("store: {0}")]
Store(#[from] crate::store::StoreError),
#[error("connect to server socket: {0}")]
Connect(#[source] std::io::Error),
#[error("server io: {0}")]
Io(#[from] std::io::Error),
#[error("server response not json: {0}")]
Parse(#[from] serde_json::Error),
#[error("server returned error for `{op}`: {msg}")]
Server { op: String, msg: String },
#[error("server response missing field `{field}` for op `{op}`")]
MissingField { op: String, field: String },
#[error("server hash `{0}` is not 32 hex bytes")]
BadHash(String),
}
/// Audit a live `nakui run` server against the canonical state derived
/// from a log file.
///
/// Cheap path: ask the server for `hash_state`, replay the log locally,
/// hash that. If the hashes match, we return immediately with an empty
/// diff list — no large `dump_records` round-trip.
///
/// Expensive path: hashes differ. Pull the full record dump from the
/// server, run `compare_states`, return the structured report.
pub fn check_against_socket(
log_path: &Path,
socket_path: &Path,
) -> Result<DriftReport, DriftError> {
// Local: replay log → MemoryStore, snapshot.
let log = EventLog::open(log_path)?;
let local_store = replay(&log)?;
let local_records: Vec<(String, Uuid, Value)> = local_store.iter()?.collect();
let local_hash = local_store.hash_state()?;
// Wire: open the connection once and reuse it for both requests.
let stream = UnixStream::connect(socket_path).map_err(DriftError::Connect)?;
let mut conn = SocketClient::new(stream)?;
// Cheap path.
let hash_resp = conn.exchange(serde_json::json!({"op": "hash_state"}))?;
require_ok(&hash_resp, "hash_state")?;
let server_hash = parse_hash(&hash_resp, "hash_state")?;
let server_count = hash_resp
.get("records")
.and_then(Value::as_u64)
.ok_or_else(|| DriftError::MissingField {
op: "hash_state".into(),
field: "records".into(),
})? as usize;
if server_hash == local_hash {
return Ok(DriftReport {
log_hash: local_hash,
server_hash,
log_records: local_records.len(),
server_records: server_count,
diffs: Vec::new(),
});
}
// Expensive path: pull the full server snapshot.
let dump_resp = conn.exchange(serde_json::json!({"op": "dump_records"}))?;
require_ok(&dump_resp, "dump_records")?;
let server_records = parse_records(&dump_resp)?;
Ok(compare_states(
local_records,
local_hash,
server_records,
server_hash,
))
}
struct SocketClient {
writer: UnixStream,
reader: BufReader<UnixStream>,
}
impl SocketClient {
fn new(stream: UnixStream) -> Result<Self, DriftError> {
let reader_stream = stream.try_clone()?;
Ok(Self {
writer: stream,
reader: BufReader::new(reader_stream),
})
}
fn exchange(&mut self, req: Value) -> Result<Value, DriftError> {
let mut bytes = serde_json::to_vec(&req).expect("request serializes");
bytes.push(b'\n');
self.writer.write_all(&bytes)?;
let mut line = String::new();
let n = self.reader.read_line(&mut line)?;
if n == 0 {
return Err(DriftError::Io(std::io::Error::new(
std::io::ErrorKind::UnexpectedEof,
"server closed connection without responding",
)));
}
Ok(serde_json::from_str(line.trim())?)
}
}
fn require_ok(resp: &Value, op: &str) -> Result<(), DriftError> {
if resp.get("ok").and_then(Value::as_bool) == Some(true) {
Ok(())
} else {
Err(DriftError::Server {
op: op.into(),
msg: resp
.get("error")
.and_then(Value::as_str)
.unwrap_or("(no error message)")
.to_string(),
})
}
}
fn parse_hash(resp: &Value, op: &str) -> Result<[u8; 32], DriftError> {
let s = resp
.get("hash")
.and_then(Value::as_str)
.ok_or_else(|| DriftError::MissingField {
op: op.into(),
field: "hash".into(),
})?;
if s.len() != 64 {
return Err(DriftError::BadHash(s.into()));
}
let mut out = [0u8; 32];
for (i, byte) in out.iter_mut().enumerate() {
let hi = hex_nibble(s.as_bytes()[i * 2]).ok_or_else(|| DriftError::BadHash(s.into()))?;
let lo =
hex_nibble(s.as_bytes()[i * 2 + 1]).ok_or_else(|| DriftError::BadHash(s.into()))?;
*byte = (hi << 4) | lo;
}
Ok(out)
}
fn hex_nibble(c: u8) -> Option<u8> {
match c {
b'0'..=b'9' => Some(c - b'0'),
b'a'..=b'f' => Some(c - b'a' + 10),
b'A'..=b'F' => Some(c - b'A' + 10),
_ => None,
}
}
fn parse_records(resp: &Value) -> Result<Vec<(String, Uuid, Value)>, DriftError> {
let arr = resp
.get("records")
.and_then(Value::as_array)
.ok_or_else(|| DriftError::MissingField {
op: "dump_records".into(),
field: "records".into(),
})?;
let mut out: Vec<(String, Uuid, Value)> = Vec::with_capacity(arr.len());
for item in arr {
let entity = item
.get("entity")
.and_then(Value::as_str)
.ok_or_else(|| DriftError::MissingField {
op: "dump_records".into(),
field: "records[].entity".into(),
})?
.to_string();
let id_str = item
.get("id")
.and_then(Value::as_str)
.ok_or_else(|| DriftError::MissingField {
op: "dump_records".into(),
field: "records[].id".into(),
})?;
let id = Uuid::parse_str(id_str).map_err(|_| DriftError::MissingField {
op: "dump_records".into(),
field: format!("records[].id (not uuid: {})", id_str),
})?;
let value = item
.get("value")
.cloned()
.ok_or_else(|| DriftError::MissingField {
op: "dump_records".into(),
field: "records[].value".into(),
})?;
out.push((entity, id, value));
}
Ok(out)
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
fn h(byte: u8) -> [u8; 32] {
[byte; 32]
}
#[test]
fn empty_inputs_yield_no_diffs() {
let report = compare_states(Vec::new(), h(0), Vec::new(), h(0));
assert!(report.in_sync());
assert!(report.diffs.is_empty());
}
#[test]
fn equal_records_yield_no_diffs_even_if_hashes_were_lied_to() {
// The function compares records, not hashes — hash equality is
// the operator's fast-path, but the report's truth is the diffs.
let a = Uuid::new_v4();
let log = vec![(
"Caja".to_string(),
a,
json!({"saldo": 100}),
)];
let server = vec![(
"Caja".to_string(),
a,
json!({"saldo": 100}),
)];
let report = compare_states(log, h(1), server, h(2));
assert!(report.diffs.is_empty(), "records equal → no diffs");
}
#[test]
fn detects_only_on_server() {
let a = Uuid::new_v4();
let b = Uuid::new_v4();
let log = vec![(
"Caja".to_string(),
a,
json!({"saldo": 100}),
)];
let server = vec![
("Caja".to_string(), a, json!({"saldo": 100})),
("Caja".to_string(), b, json!({"saldo": 999})),
];
let report = compare_states(log, h(0), server, h(1));
assert_eq!(report.diffs.len(), 1);
match &report.diffs[0] {
DriftDiff::OnlyOnServer { entity, id, .. } => {
assert_eq!(entity, "Caja");
assert_eq!(*id, b);
}
other => panic!("expected OnlyOnServer, got {:?}", other),
}
}
#[test]
fn detects_only_in_log() {
let a = Uuid::new_v4();
let log = vec![("Caja".to_string(), a, json!({"saldo": 100}))];
let server = vec![];
let report = compare_states(log, h(0), server, h(1));
assert_eq!(report.diffs.len(), 1);
match &report.diffs[0] {
DriftDiff::OnlyInLog { id, .. } => assert_eq!(*id, a),
other => panic!("expected OnlyInLog, got {:?}", other),
}
}
#[test]
fn detects_tampered() {
let a = Uuid::new_v4();
let log = vec![("Caja".to_string(), a, json!({"saldo": 100}))];
let server = vec![("Caja".to_string(), a, json!({"saldo": 999}))];
let report = compare_states(log, h(0), server, h(1));
assert_eq!(report.diffs.len(), 1);
match &report.diffs[0] {
DriftDiff::Tampered {
id,
log_value,
server_value,
..
} => {
assert_eq!(*id, a);
assert_eq!(log_value["saldo"], json!(100));
assert_eq!(server_value["saldo"], json!(999));
}
other => panic!("expected Tampered, got {:?}", other),
}
}
#[test]
fn diffs_emerge_in_canonical_order() {
// Two entities, mixed drift kinds. Result must be sorted by
// (entity, id_bytes) so two runs produce the same report.
let id_caja = Uuid::nil(); // sorts first byte-wise
let id_mov = Uuid::from_u128(u128::MAX);
let log = vec![
("Movimiento".to_string(), id_mov, json!({"x": 1})),
];
let server = vec![
("Caja".to_string(), id_caja, json!({"saldo": 0})),
];
let report = compare_states(log, h(0), server, h(1));
assert_eq!(report.diffs.len(), 2);
// Caja sorts before Movimiento.
match (&report.diffs[0], &report.diffs[1]) {
(DriftDiff::OnlyOnServer { entity: e1, .. }, DriftDiff::OnlyInLog { entity: e2, .. }) => {
assert_eq!(e1, "Caja");
assert_eq!(e2, "Movimiento");
}
other => panic!("unexpected order: {:?}", other),
}
}
#[test]
fn in_sync_requires_both_hashes_and_no_diffs() {
// Defensive: if hashes match but somehow diffs is non-empty
// (caller mismatch), in_sync says no.
let report = DriftReport {
log_hash: h(0),
server_hash: h(0),
log_records: 1,
server_records: 1,
diffs: vec![DriftDiff::Tampered {
entity: "x".into(),
id: Uuid::nil(),
log_value: json!(1),
server_value: json!(2),
}],
};
assert!(!report.in_sync());
}
}
+687
View File
@@ -0,0 +1,687 @@
//! Append-only event log for deterministic replay.
//!
//! Two entry kinds:
//! - `Seed`: an externally-provided initial record (the system boundary).
//! - `Morphism`: a successful kernel-validated morphism call, with the
//! produced ops attached.
//!
//! `replay()` reconstructs a store by reading the log and applying ops
//! directly — fast, no script execution. `verify_log()` re-runs every
//! morphism through the kernel and asserts the recomputed ops match the
//! logged ones, which is the operational definition of determinism.
//!
//! Failures are never logged: a rejected morphism produces no event.
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::collections::{BTreeMap, HashMap};
use std::fs::OpenOptions;
use std::io::{BufRead, BufReader, Write};
use std::path::{Path, PathBuf};
use thiserror::Error;
use uuid::Uuid;
use crate::delta::FieldOp;
use crate::executor::{ExecError, Executor};
use crate::store::{MemoryStore, Store, StoreError};
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(tag = "kind", rename_all = "snake_case")]
pub enum LogEntry {
Seed {
seq: u64,
entity: String,
id: Uuid,
data: Value,
/// Bundle hash (just the KCL schemas) at the moment this seed
/// was logged. `None` for pre-versioning entries — `verify_log`
/// skips the schema check on those. New writes always populate
/// it via `seed_and_log`.
#[serde(default, skip_serializing_if = "Option::is_none")]
schema_hash: Option<[u8; 32]>,
},
Morphism {
seq: u64,
morphism: String,
inputs: BTreeMap<String, Uuid>,
params: Value,
ops: Vec<FieldOp>,
/// Hash of (kcl bundle | manifest spec | rhai script bytes) at
/// the moment this event was logged. `None` for pre-versioning
/// entries — `verify_log` skips the schema check on those (they
/// predate the contract). New writes always populate it.
#[serde(default, skip_serializing_if = "Option::is_none")]
schema_hash: Option<[u8; 32]>,
},
}
impl LogEntry {
pub fn seq(&self) -> u64 {
match self {
LogEntry::Seed { seq, .. } => *seq,
LogEntry::Morphism { seq, .. } => *seq,
}
}
}
#[derive(Debug, Error)]
pub enum LogError {
#[error("io: {0}")]
Io(#[from] std::io::Error),
#[error("parse at line {line}: {source}")]
Parse {
line: usize,
#[source]
source: serde_json::Error,
},
#[error("non-monotonic seq: got {got}, expected {expected}")]
NonMonotonic { got: u64, expected: u64 },
}
/// Errors from `execute_and_log`. The variants distinguish *when in the
/// pipeline* the failure occurred — which determines whether the log was
/// updated and whether the live store is still consistent.
#[derive(Debug, Error)]
pub enum ExecuteError {
/// Failure before the log was written. Store untouched, log untouched.
/// Safe to retry with the same inputs.
#[error("pre-log validation failed: {0}")]
PreLog(#[from] ExecError),
/// Log append failed (typically IO). Store untouched, log untouched.
/// Safe to retry once the log backend recovers.
#[error("log append failed: {0}")]
LogAppend(#[from] LogError),
/// Apply to the store failed AFTER the event was logged. The log is
/// canonical; the live store is now stale and should be rebuilt by
/// replaying the log. Retrying the same morphism is incorrect — the
/// event is already on disk.
#[error("store apply failed after log was committed (log is canonical, store stale): {0}")]
PostLogStore(crate::store::StoreError),
}
#[derive(Debug, Error)]
pub enum ReplayError {
#[error("log: {0}")]
Log(#[from] LogError),
#[error("store: {0}")]
Store(#[from] StoreError),
}
/// A reconcile rebuilds a stale store from the log. Either the wipe step
/// or the replay step can fail.
#[derive(Debug, Error)]
pub enum ReconcileError {
#[error("clearing store before replay failed: {0}")]
Clear(#[source] StoreError),
#[error("replay into cleared store failed: {0}")]
Replay(#[from] ReplayError),
}
/// Outcome of `execute_and_log_with_recovery`. PreLog/LogAppend mirror the
/// pre-WAL-fence variants of `ExecuteError` — the store is untouched and
/// the caller can retry. `Unrecoverable` means the WAL fence was crossed
/// (event is canonical on disk) but reconcile *also* failed: the operator
/// must intervene before any further writes.
#[derive(Debug, Error)]
pub enum RecoverableExecuteError {
#[error("pre-log validation failed: {0}")]
PreLog(#[from] ExecError),
#[error("log append failed: {0}")]
LogAppend(#[from] LogError),
#[error(
"store apply failed AND reconcile failed — log is canonical, store is in an unknown state. apply: {post_log}; reconcile: {reconcile}"
)]
Unrecoverable {
#[source]
post_log: StoreError,
reconcile: ReconcileError,
},
}
#[derive(Debug, Error)]
pub enum VerifyError {
#[error("log: {0}")]
Log(#[from] LogError),
#[error("morphism replay failed at seq {seq}: {source}")]
Exec {
seq: u64,
#[source]
source: ExecError,
},
#[error(
"non-determinism at seq {seq} morphism `{morphism}`: recomputed ops differ from logged ops"
)]
OpsMismatch {
seq: u64,
morphism: String,
expected: Vec<FieldOp>,
actual: Vec<FieldOp>,
},
/// The morphism was logged under a different schema/script bundle
/// than the one currently loaded. Re-executing it would (likely)
/// produce different ops, but the more specific signal is "the
/// rules changed since this was logged" — actionable: migrate the
/// log, or pin the executor to a compatible version.
#[error(
"schema mismatch at seq {seq} morphism `{morphism}`: logged schema_hash differs from current executor"
)]
SchemaMismatch {
seq: u64,
morphism: String,
logged: [u8; 32],
current: [u8; 32],
},
/// A `Seed` entry was logged under a different KCL bundle than the
/// one currently loaded. The seed's data may no longer fit the
/// entity definition. Coarser than `SchemaMismatch` (any change
/// to any schema file flips it, even one that doesn't affect the
/// seeded entity) but the operator still wants to know.
#[error(
"seed schema mismatch at seq {seq} entity `{entity}` id {id}: logged bundle hash differs from current executor"
)]
SeedSchemaMismatch {
seq: u64,
entity: String,
id: Uuid,
logged: [u8; 32],
current: [u8; 32],
},
}
pub struct EventLog {
path: PathBuf,
next_seq: u64,
}
impl EventLog {
/// Open or create a log at `path`. Reads existing entries to compute
/// `next_seq` and validate monotonicity. The first entry can start at
/// any seq (compacted logs are rooted at seq > 0); subsequent entries
/// must be strictly contiguous.
pub fn open(path: impl Into<PathBuf>) -> Result<Self, LogError> {
let path = path.into();
let mut next_seq: u64 = 0;
if path.exists() {
let entries = read_entries(&path)?;
let mut iter = entries.iter();
if let Some(first) = iter.next() {
next_seq = first.seq() + 1;
for e in iter {
if e.seq() != next_seq {
return Err(LogError::NonMonotonic {
got: e.seq(),
expected: next_seq,
});
}
next_seq = e.seq() + 1;
}
}
}
Ok(Self { path, next_seq })
}
pub fn next_seq(&self) -> u64 {
self.next_seq
}
pub fn path(&self) -> &Path {
&self.path
}
/// Append an entry. Calls `sync_all()` so the entry is durable on disk
/// before returning Ok — this is the WAL fence: by the time the caller
/// proceeds to mutate the store, the event is recoverable from a power
/// loss.
pub fn append(&mut self, entry: LogEntry) -> Result<(), LogError> {
if entry.seq() != self.next_seq {
return Err(LogError::NonMonotonic {
got: entry.seq(),
expected: self.next_seq,
});
}
let mut f = OpenOptions::new()
.create(true)
.append(true)
.open(&self.path)?;
let s = serde_json::to_string(&entry).expect("LogEntry serializes");
f.write_all(s.as_bytes())?;
f.write_all(b"\n")?;
f.sync_all()?;
self.next_seq += 1;
Ok(())
}
pub fn entries(&self) -> Result<Vec<LogEntry>, LogError> {
if !self.path.exists() {
return Ok(Vec::new());
}
read_entries(&self.path)
}
/// Truncate the log to drop entries with `seq <= through_seq`.
/// IRREVERSIBLE: caller must verify a Snapshot covering `through_seq`
/// exists on durable storage before calling this — once the entries
/// are gone, replay can only start from the snapshot.
///
/// Atomic at the filesystem level: writes survivors to a sibling
/// tempfile then renames over the original.
pub fn compact_through(&mut self, through_seq: u64) -> Result<(), LogError> {
let survivors: Vec<LogEntry> = self
.entries()?
.into_iter()
.filter(|e| e.seq() > through_seq)
.collect();
let tmp = self.path.with_extension("compacting");
{
let mut f = std::fs::File::create(&tmp)?;
for e in &survivors {
let s = serde_json::to_string(e).expect("LogEntry serializes");
f.write_all(s.as_bytes())?;
f.write_all(b"\n")?;
}
f.sync_all()?;
}
std::fs::rename(&tmp, &self.path)?;
sync_parent_dir(&self.path)?;
Ok(())
}
}
/// Open and fsync the parent directory of `target`. After an atomic
/// rename, the directory entry change isn't durable until the directory
/// itself is fsynced — without this, a kernel/power crash between the
/// rename and the next disk flush could leave the directory in a state
/// where the rename never happened (depending on filesystem journal
/// mode). With it, the rename survives.
///
/// Best-effort on platforms where opening a directory for sync isn't
/// permitted: the syscalls are POSIX-portable across Linux, macOS, and
/// the BSDs (the OSes Nakui targets), so this generally succeeds. A
/// failure here is propagated as an IO error so the caller can choose
/// to surface it; we prefer "loud" over "silent" for durability code.
fn sync_parent_dir(target: &Path) -> std::io::Result<()> {
let parent = target.parent().unwrap_or_else(|| Path::new("."));
let dir = std::fs::File::open(parent)?;
dir.sync_all()
}
/// A snapshot of a `Store`'s state at a particular log seq. Lets us short-
/// circuit replay: load the snapshot, then apply only the events with
/// `seq > snapshot.seq`. MemoryStore-specific for V1 — backends that
/// already persist (SurrealStore + RocksDB) don't need this layer.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Snapshot {
/// The last log seq this snapshot subsumes. `replay` resumes at seq+1.
pub seq: u64,
/// Full state at that seq, in MemoryStore's native shape.
pub records: HashMap<String, HashMap<Uuid, Value>>,
/// Module schema hash at capture time. `Some` for snapshots taken
/// via `capture(_, _, executor)`; `None` for those taken via the
/// hash-unaware `from_memory_store`. Loaders use this to refuse a
/// snapshot produced under a different bundle.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub schema_hash: Option<[u8; 32]>,
}
#[derive(Debug, Error)]
pub enum SnapshotMismatchError {
#[error(
"snapshot schema_hash differs from current executor; refusing to load (snapshot was taken under a different module bundle)"
)]
SchemaMismatch {
snapshot: [u8; 32],
current: [u8; 32],
},
}
impl Snapshot {
/// Capture the in-memory store's current state without binding to a
/// schema bundle. Test fixtures and ad-hoc tooling call this; the
/// production path uses `capture` so the snapshot can be validated
/// against the executor on load.
pub fn from_memory_store(store: &MemoryStore, seq: u64) -> Self {
Self {
seq,
records: store.records().clone(),
schema_hash: None,
}
}
/// Production capture: stamp the snapshot with the executor's
/// `module_schema_hash` so future loads can refuse a mismatch.
pub fn capture(store: &MemoryStore, seq: u64, executor: &Executor) -> Self {
Self {
seq,
records: store.records().clone(),
schema_hash: Some(executor.module_schema_hash()),
}
}
/// Verify the snapshot was produced under a bundle compatible with
/// `executor`. Snapshots without a hash (legacy / `from_memory_store`)
/// pass — the operator opted out of this check at capture time.
pub fn ensure_compatible_with(
&self,
executor: &Executor,
) -> Result<(), SnapshotMismatchError> {
let Some(snap_hash) = self.schema_hash else {
return Ok(());
};
let current = executor.module_schema_hash();
if snap_hash != current {
return Err(SnapshotMismatchError::SchemaMismatch {
snapshot: snap_hash,
current,
});
}
Ok(())
}
/// Atomically write the snapshot to `path`. Writes the bytes to a
/// sibling tempfile (`<path>.writing`), fsyncs, renames over the
/// target, then fsyncs the parent directory so the rename survives
/// a crash. A crash mid-write leaves either the previous snapshot
/// at `path` (rename never happened) or the new one (rename
/// completed and was durable) — never a truncated file. A stale
/// tempfile from a prior crash gets overwritten by `File::create`
/// on the next attempt, so writes are also self-healing.
pub fn write(&self, path: &Path) -> Result<(), LogError> {
let data = serde_json::to_vec_pretty(self).expect("snapshot serializes");
let tmp = path.with_extension("writing");
{
let mut f = std::fs::File::create(&tmp)?;
f.write_all(&data)?;
f.sync_all()?;
}
std::fs::rename(&tmp, path)?;
sync_parent_dir(path).map_err(LogError::Io)
}
pub fn load(path: &Path) -> Result<Option<Self>, LogError> {
if !path.exists() {
return Ok(None);
}
let text = std::fs::read_to_string(path).map_err(LogError::Io)?;
let snap: Snapshot = serde_json::from_str(&text).map_err(|e| LogError::Parse {
line: 0,
source: e,
})?;
Ok(Some(snap))
}
}
fn read_entries(path: &Path) -> Result<Vec<LogEntry>, LogError> {
let f = std::fs::File::open(path)?;
let r = BufReader::new(f);
let mut out = Vec::new();
for (i, line) in r.lines().enumerate() {
let line = line?;
if line.trim().is_empty() {
continue;
}
let entry: LogEntry = serde_json::from_str(&line).map_err(|e| LogError::Parse {
line: i + 1,
source: e,
})?;
out.push(entry);
}
Ok(out)
}
/// Seed an entity into the store and persist the event.
///
/// WAL order: append to log *first*, then mutate the store. If the log
/// append fails, the store is untouched and the caller can safely retry.
/// `Store::seed` is infallible by trait contract — once the log entry is
/// durable the store update is guaranteed to land for in-memory backends.
/// For backends with fallible writes (network/disk), failures surface as
/// a panic during `seed()`; callers that need a fallible seed path should
/// wrap their own retry/reconcile loop.
pub fn seed_and_log<S: Store>(
executor: &Executor,
store: &mut S,
log: &mut EventLog,
entity: &str,
id: Uuid,
data: Value,
) -> Result<(), LogError> {
let seq = log.next_seq();
log.append(LogEntry::Seed {
seq,
entity: entity.to_string(),
id,
data: data.clone(),
schema_hash: Some(executor.schema_bundle_hash),
})?;
store.seed(entity, id, data);
// Best-effort: a failure here means next startup does an extra full
// replay, never a correctness issue.
let _ = store.set_last_applied_seq(seq);
Ok(())
}
/// Run a morphism and persist the event in WAL order:
/// 1. compute() — pure, no mutation; full kernel validation incl. dry-run.
/// 2. log.append() — event hits disk *before* the store changes.
/// 3. store.apply() — materialize the change. By WAL semantics the log
/// is now the source of truth: if (3) fails, the stale store can be
/// rebuilt by replaying the log.
///
/// The error variants tell the caller exactly which stage failed so they
/// know whether to retry, recover, or rebuild.
pub fn execute_and_log<S: Store>(
executor: &Executor,
store: &mut S,
log: &mut EventLog,
morphism: &str,
inputs: &[(&str, Uuid)],
params: Value,
) -> Result<Vec<FieldOp>, ExecuteError> {
let ops = executor.compute(store, morphism, inputs, params.clone())?;
let seq = log.next_seq();
let entry = LogEntry::Morphism {
seq,
morphism: morphism.to_string(),
inputs: inputs
.iter()
.map(|(r, id)| (r.to_string(), *id))
.collect(),
params,
ops: ops.clone(),
schema_hash: executor.schema_hash(morphism),
};
log.append(entry)?;
store.apply(&ops).map_err(ExecuteError::PostLogStore)?;
let _ = store.set_last_applied_seq(seq);
Ok(ops)
}
/// Rebuild a (possibly stale) store from the log. Wipes the store, then
/// replays every event. Use this after a `PostLogStore` failure: the WAL
/// fence guarantees the log is the source of truth, so a clean replay
/// brings the store back into agreement with it.
///
/// `execute_and_log_with_recovery` automates this for the common case;
/// reach for `reconcile` directly when an operator/CLI is doing the
/// recovery, or when a backend reports drift detected out-of-band.
pub fn reconcile<S: Store>(store: &mut S, log: &EventLog) -> Result<(), ReconcileError> {
store.clear().map_err(ReconcileError::Clear)?;
replay_into(log, store)?;
Ok(())
}
/// Like `execute_and_log`, but on `PostLogStore` automatically rebuilds
/// the store from the log and returns the ops as if the apply had
/// succeeded. The caller sees a consistent post-state — either the event
/// landed cleanly, or it landed via reconcile, or `Unrecoverable` (which
/// means even the rebuild failed and the store must not be trusted).
///
/// PreLog and LogAppend are forwarded verbatim: the WAL fence wasn't
/// crossed, so there's nothing to reconcile.
pub fn execute_and_log_with_recovery<S: Store>(
executor: &Executor,
store: &mut S,
log: &mut EventLog,
morphism: &str,
inputs: &[(&str, Uuid)],
params: Value,
) -> Result<Vec<FieldOp>, RecoverableExecuteError> {
let ops = executor.compute(store, morphism, inputs, params.clone())?;
let seq = log.next_seq();
let entry = LogEntry::Morphism {
seq,
morphism: morphism.to_string(),
inputs: inputs
.iter()
.map(|(r, id)| (r.to_string(), *id))
.collect(),
params,
ops: ops.clone(),
schema_hash: executor.schema_hash(morphism),
};
log.append(entry)?;
if let Err(post_log) = store.apply(&ops) {
if let Err(reconcile) = reconcile(store, log) {
return Err(RecoverableExecuteError::Unrecoverable {
post_log,
reconcile,
});
}
// After reconcile the store reflects log state up to log.next_seq()-1
// (which equals our seq). The reconcile path itself updated the
// marker; nothing more to do here.
} else {
let _ = store.set_last_applied_seq(seq);
}
Ok(ops)
}
/// Replay the log into a caller-provided `Store`. The store should be
/// empty on entry; existing records are not erased. Use this with any
/// `Store` impl (MemoryStore, SurrealStore, future backends).
pub fn replay_into<S: Store>(log: &EventLog, store: &mut S) -> Result<(), ReplayError> {
replay_with_snapshot_into(log, None, store)
}
/// Replay starting from a snapshot. If `snapshot` is `Some`, every record
/// in it is seeded into `store` first, then events with `seq > snapshot.seq`
/// are applied. The point: replay cost shrinks from O(events) to
/// O(events_after_snapshot), useful when the log grows large.
pub fn replay_with_snapshot_into<S: Store>(
log: &EventLog,
snapshot: Option<&Snapshot>,
store: &mut S,
) -> Result<(), ReplayError> {
let start_seq = if let Some(snap) = snapshot {
for (entity, recs) in &snap.records {
for (id, data) in recs {
store.seed(entity, *id, data.clone());
}
}
snap.seq + 1
} else {
0
};
let mut last_applied: Option<u64> = snapshot.map(|s| s.seq);
for entry in log.entries()? {
if entry.seq() < start_seq {
continue;
}
let seq = entry.seq();
match entry {
LogEntry::Seed {
entity, id, data, ..
} => store.seed(&entity, id, data),
LogEntry::Morphism { ops, .. } => store.apply(&ops)?,
}
last_applied = Some(seq);
}
if let Some(seq) = last_applied {
let _ = store.set_last_applied_seq(seq);
}
Ok(())
}
/// Convenience: replay into a fresh `MemoryStore`. The fast path: O(events)
/// with no Rhai execution.
pub fn replay(log: &EventLog) -> Result<MemoryStore, ReplayError> {
let mut store = MemoryStore::new();
replay_into(log, &mut store)?;
Ok(store)
}
/// Re-execute every logged morphism through the kernel and assert the
/// recomputed ops match the logged ops byte-for-byte. This is the
/// determinism contract: if it ever fails, a morphism became impure.
pub fn verify_log(log: &EventLog, executor: &Executor) -> Result<(), VerifyError> {
let mut store = MemoryStore::new();
for entry in log.entries()? {
match entry {
LogEntry::Seed {
seq,
entity,
id,
data,
schema_hash,
} => {
if let Some(logged_hash) = schema_hash {
let current_hash = executor.schema_bundle_hash;
if logged_hash != current_hash {
return Err(VerifyError::SeedSchemaMismatch {
seq,
entity,
id,
logged: logged_hash,
current: current_hash,
});
}
}
store.seed(&entity, id, data);
}
LogEntry::Morphism {
seq,
morphism,
inputs,
params,
ops: logged,
schema_hash,
} => {
// Schema check first: if the rules changed, re-execution
// is meaningless — it'd just surface as OpsMismatch with
// a less actionable message. Legacy entries with no
// hash predate the contract; we let those through.
if let Some(logged_hash) = schema_hash {
if let Some(current_hash) = executor.schema_hash(&morphism) {
if logged_hash != current_hash {
return Err(VerifyError::SchemaMismatch {
seq,
morphism,
logged: logged_hash,
current: current_hash,
});
}
}
}
let owned: Vec<(String, Uuid)> = inputs.into_iter().collect();
let refs: Vec<(&str, Uuid)> =
owned.iter().map(|(r, id)| (r.as_str(), *id)).collect();
let recomputed = executor
.run(&mut store, &morphism, &refs, params)
.map_err(|e| VerifyError::Exec { seq, source: e })?;
if recomputed != logged {
return Err(VerifyError::OpsMismatch {
seq,
morphism,
expected: logged,
actual: recomputed,
});
}
}
}
}
Ok(())
}
+667
View File
@@ -0,0 +1,667 @@
use serde_json::{Value, json};
use sha2::{Digest, Sha256};
use std::collections::{BTreeMap, HashMap, HashSet};
use std::path::{Path, PathBuf};
use thiserror::Error;
use uuid::Uuid;
use crate::delta::{FieldOp, simulate_on};
use crate::graph::{GraphError, ManifestGraph};
use crate::kcl_wrapper::{self, KclError};
use crate::manifest::{ConserveRule, Manifest, ManifestError, MorphismSpec, ValidationError};
use crate::rhai_executor::{RhaiError, RhaiExecutor};
use crate::store::{Store, StoreError};
#[derive(Debug, Error)]
pub enum ExecError {
#[error("morphism `{0}` not in manifest")]
UnknownMorphism(String),
#[error("missing input role `{role}` for morphism `{morphism}`")]
MissingInput { morphism: String, role: String },
#[error("duplicate input id {id} bound to roles `{role_a}` and `{role_b}`")]
DuplicateInputId {
id: Uuid,
role_a: String,
role_b: String,
},
#[error("entity `{0}` id `{1}` not found in store")]
EntityMissing(String, Uuid),
#[error(
"capability violation: morphism `{morphism}` produced op on `{token}` not in writes={declared:?}"
)]
CapabilityViolation {
morphism: String,
token: String,
declared: Vec<String>,
},
#[error(
"conservation violated: Σ Δ {entity}.{field} where {group_by} = {group:?} = {total} (expected 0)"
)]
ConservationViolation {
entity: String,
field: String,
group_by: String,
group: String,
total: i128,
},
#[error("conservation rule {entity}.{field}: {message}")]
ConservationMalformed {
entity: String,
field: String,
message: String,
},
#[error("kcl pre-check failed on `{role}` ({entity}): {source}")]
KclPre {
role: String,
entity: String,
#[source]
source: KclError,
},
#[error("kcl post-check failed on `{role}` ({entity}): {source}")]
KclPost {
role: String,
entity: String,
#[source]
source: KclError,
},
#[error("kcl post-check failed on created {entity} {id}: {source}")]
KclPostCreate {
entity: String,
id: Uuid,
#[source]
source: KclError,
},
#[error("rhai: {0}")]
Rhai(#[from] RhaiError),
#[error("store: {0}")]
Store(#[from] StoreError),
#[error("manifest: {0}")]
Manifest(#[from] ManifestError),
#[error("manifest validation: {0}")]
ManifestValidation(#[from] ValidationError),
#[error("manifest graph: {0}")]
Graph(#[from] GraphError),
#[error("io: {0}")]
Io(#[from] std::io::Error),
}
pub struct Executor {
pub manifest: Manifest,
pub graph: ManifestGraph,
pub module_dir: PathBuf,
pub schema_path: PathBuf,
pub rhai: RhaiExecutor,
/// `true` when `schema_path` is a tempfile bundle created by
/// `load_module`; Drop removes it. `false` for inline-built executors
/// that point at a real schema file owned by the caller (tests).
pub owned_bundle: bool,
/// Per-morphism `schema_hash`: SHA-256 of (kcl bundle + manifest spec
/// + rhai script bytes), computed once at load. The hash is the
/// determinism contract for KCL evolution — `verify_log` uses it to
/// reject logs whose entries were produced under different rules.
pub schema_hashes: HashMap<String, [u8; 32]>,
/// Module-wide bundle hash: SHA-256 of just the KCL bundle bytes.
/// Stamped onto every `LogEntry::Seed` via `seed_and_log` so
/// `verify_log` can flag seeds whose entity schemas have evolved
/// since they were logged. Coarser than `schema_hashes` (any
/// schema.k edit moves it, even one that doesn't affect the seeded
/// entity) but cheap and conservative — false positives over false
/// negatives, like the morphism hash.
pub schema_bundle_hash: [u8; 32],
}
impl Drop for Executor {
fn drop(&mut self) {
if self.owned_bundle {
let _ = std::fs::remove_file(&self.schema_path);
}
}
}
/// One row of the bound-inputs map. Holds both `role` and `entity` so the
/// capability check can verify a Set's `path.entity` matches the role's
/// declared entity (catches uuid-collision and lazy scripts).
#[derive(Debug, Clone)]
struct InputBinding {
role: String,
entity: String,
}
impl Executor {
pub fn load_module(module_dir: impl Into<PathBuf>) -> Result<Self, ExecError> {
let module_dir = module_dir.into();
let manifest = Manifest::load(&module_dir.join("nsmc.json"))?;
manifest.validate(&module_dir)?;
let graph = ManifestGraph::build(&manifest)?;
let schema_path = build_schema_bundle(&module_dir, &manifest.effective_schemas())?;
let schema_bundle_bytes = std::fs::read(&schema_path)?;
let schema_bundle_hash = compute_schema_bundle_hash(&schema_bundle_bytes);
let mut schema_hashes = HashMap::with_capacity(manifest.morphisms.len());
for spec in &manifest.morphisms {
let script_path = module_dir.join(&spec.script);
let hash = compute_morphism_schema_hash(&schema_bundle_bytes, spec, &script_path)?;
schema_hashes.insert(spec.name.clone(), hash);
}
Ok(Self {
manifest,
graph,
module_dir,
schema_path,
rhai: RhaiExecutor::new_sandboxed(),
owned_bundle: true,
schema_hashes,
schema_bundle_hash,
})
}
/// Hash for the named morphism in the currently loaded module. `None`
/// if no such morphism is declared. Used by `verify_log` to enforce
/// the schema-version contract.
pub fn schema_hash(&self, morphism: &str) -> Option<[u8; 32]> {
self.schema_hashes.get(morphism).copied()
}
/// Single 32-byte hash representing the entire module's schema:
/// every morphism's hash, in canonical name order, framed and
/// chained. Snapshots pin this so a snapshot taken under bundle A
/// can be detected when later loaded against bundle B.
pub fn module_schema_hash(&self) -> [u8; 32] {
let mut entries: Vec<(&String, &[u8; 32])> = self.schema_hashes.iter().collect();
entries.sort_by_key(|(name, _)| name.as_str().to_owned());
let mut hasher = Sha256::new();
hasher.update(b"nakui-module-v1\0");
for (name, hash) in entries {
hasher.update((name.len() as u64).to_le_bytes());
hasher.update(name.as_bytes());
hasher.update(hash);
}
hasher.finalize().into()
}
/// Compute the ops for a morphism without mutating the store.
///
/// Pipeline:
/// 1. Resolve manifest spec; bind caller's role->id to spec inputs.
/// 2. Reject duplicate ids across roles.
/// 3. Load every input entity; KCL pre-check each.
/// 4. Run the Rhai script with `{ states, ids, params }`.
/// 5. Capability check: every Set targets a tracked id whose entity
/// matches the role's declared entity, and produces a `<role>.<field>`
/// token in `writes`; Create/Delete produce `<entity>` tokens.
/// 6. Delta-level invariants (conservation rules).
/// 7. Per-input KCL post-check (skipped for inputs that the ops Delete).
/// 8. KCL-validate every Created record against its entity schema.
/// 9. Pre-apply check: store.apply_dry_run guarantees apply will land.
///
/// On `Ok`, the returned ops are *contractually applicable* — caller can
/// log first and then apply, knowing apply will succeed barring transient
/// backend faults.
pub fn compute<S: Store>(
&self,
store: &S,
morphism_name: &str,
inputs: &[(&str, Uuid)],
params: Value,
) -> Result<Vec<FieldOp>, ExecError> {
let spec: &MorphismSpec = self
.manifest
.morphism(morphism_name)
.ok_or_else(|| ExecError::UnknownMorphism(morphism_name.to_string()))?;
// 1. Bind inputs.
let inputs_map: BTreeMap<String, Uuid> = inputs
.iter()
.map(|(role, id)| (role.to_string(), *id))
.collect();
for spec_in in &spec.inputs {
if !inputs_map.contains_key(&spec_in.role) {
return Err(ExecError::MissingInput {
morphism: morphism_name.to_string(),
role: spec_in.role.clone(),
});
}
}
// 2. Build id -> binding (role + entity), rejecting duplicates.
let mut id_to_input: HashMap<Uuid, InputBinding> = HashMap::new();
for spec_in in &spec.inputs {
let id = inputs_map[&spec_in.role];
if let Some(other) = id_to_input.get(&id) {
return Err(ExecError::DuplicateInputId {
id,
role_a: other.role.clone(),
role_b: spec_in.role.clone(),
});
}
id_to_input.insert(
id,
InputBinding {
role: spec_in.role.clone(),
entity: spec_in.entity.clone(),
},
);
}
// 3. Load + pre-check every input.
let mut loaded: BTreeMap<String, Value> = BTreeMap::new();
let mut id_strings: BTreeMap<String, String> = BTreeMap::new();
for spec_in in &spec.inputs {
let id = inputs_map[&spec_in.role];
let state = store
.load(&spec_in.entity, id)
.ok_or_else(|| ExecError::EntityMissing(spec_in.entity.clone(), id))?;
self.kcl_check(&spec_in.entity, &state)
.map_err(|e| ExecError::KclPre {
role: spec_in.role.clone(),
entity: spec_in.entity.clone(),
source: e,
})?;
loaded.insert(spec_in.role.clone(), state);
id_strings.insert(spec_in.role.clone(), id.to_string());
}
// 4. Rhai.
let script_path = self.module_dir.join(&spec.script);
let input = json!({
"states": loaded,
"ids": id_strings,
"params": params,
});
let ops = self.rhai.run(&script_path, input)?;
// 5. Capability check.
let declared: HashSet<&str> = spec.writes.iter().map(String::as_str).collect();
for op in &ops {
let token = match op {
FieldOp::Set { path, .. } => match id_to_input.get(&path.id) {
Some(binding) if binding.entity == path.entity => {
format!("{}.{}", binding.role, path.field)
}
Some(_) => {
return Err(ExecError::CapabilityViolation {
morphism: morphism_name.to_string(),
token: format!(
"<entity-mismatch>.{}.{}",
path.entity, path.field
),
declared: spec.writes.clone(),
});
}
None => {
return Err(ExecError::CapabilityViolation {
morphism: morphism_name.to_string(),
token: format!("<untracked id>.{}.{}", path.entity, path.field),
declared: spec.writes.clone(),
});
}
},
FieldOp::Create { entity, .. } => entity.clone(),
FieldOp::Delete { entity, .. } => entity.clone(),
};
if !declared.contains(token.as_str()) {
return Err(ExecError::CapabilityViolation {
morphism: morphism_name.to_string(),
token,
declared: spec.writes.clone(),
});
}
}
// 6. Conservation invariants.
for rule in &spec.invariants.conserve {
check_conservation(rule, &loaded, &id_to_input, &ops)?;
}
// 7. Per-input KCL post-check; skip Deleted inputs.
for spec_in in &spec.inputs {
let id = inputs_map[&spec_in.role];
if let Some(new_state) =
simulate_on(&loaded[&spec_in.role], &spec_in.entity, id, &ops)
{
self.kcl_check(&spec_in.entity, &new_state)
.map_err(|e| ExecError::KclPost {
role: spec_in.role.clone(),
entity: spec_in.entity.clone(),
source: e,
})?;
}
}
// 8. Validate every Created record against its entity schema.
for op in &ops {
if let FieldOp::Create { entity, id, data } = op {
self.kcl_check(entity, data)
.map_err(|e| ExecError::KclPostCreate {
entity: entity.clone(),
id: *id,
source: e,
})?;
}
}
// 9. Pre-apply check: structural compatibility with current store state.
store.apply_dry_run(&ops)?;
Ok(ops)
}
/// compute + apply, for callers that don't need event logging.
pub fn run<S: Store>(
&self,
store: &mut S,
morphism_name: &str,
inputs: &[(&str, Uuid)],
params: Value,
) -> Result<Vec<FieldOp>, ExecError> {
let ops = self.compute(store, morphism_name, inputs, params)?;
store.apply(&ops)?;
Ok(ops)
}
fn kcl_check(&self, entity: &str, state: &Value) -> Result<(), KclError> {
let tmp = std::env::temp_dir().join(format!("nakui_{}_{}.json", entity, Uuid::new_v4()));
std::fs::write(&tmp, serde_json::to_vec(state).expect("state serializes"))
.map_err(KclError::Io)?;
let result = kcl_wrapper::vet(&self.schema_path, &tmp, entity);
let _ = std::fs::remove_file(&tmp);
result
}
}
/// Concatenate every declared `.k` file into a single bundle on disk.
/// `kcl vet` only takes one schema arg, so cross-module modules (e.g. sales
/// referencing both treasury and inventory entities) bundle their imports
/// at load time. The bundle lives in `temp_dir` for the lifetime of the
/// executor; one file per Executor instance.
/// Module-wide hash of just the KCL bundle bytes. Stamped on
/// `LogEntry::Seed` entries (which don't run through any morphism, so
/// `compute_morphism_schema_hash` doesn't apply). Bumped by any byte
/// change in any schema file the manifest exposes — coarser than a
/// per-entity hash would be, but doesn't require KCL parsing.
fn compute_schema_bundle_hash(schema_bundle_bytes: &[u8]) -> [u8; 32] {
let mut hasher = Sha256::new();
hasher.update(b"nakui-bundle-v1\0");
hasher.update((schema_bundle_bytes.len() as u64).to_le_bytes());
hasher.update(schema_bundle_bytes);
hasher.finalize().into()
}
/// Per-morphism schema hash. SHA-256 with length-prefixed framing over
/// three inputs that together determine the morphism's deterministic
/// behaviour: the KCL schema bundle (entity shapes + invariants), the
/// manifest spec (writes, conserve, depends_on, etc.), and a
/// **normalized** form of the Rhai script — comments stripped and
/// whitespace runs collapsed, with string literals preserved exactly.
///
/// The normalization makes the hash invariant to cosmetic edits (a
/// developer adding a `// TODO` doesn't invalidate the log) without
/// missing real behavioural changes. The framing tag is bumped to
/// `nakui-schema-v2` so logs hashed under v1 (raw bytes) cleanly fail
/// SchemaMismatch on upgrade rather than silently divergence.
fn compute_morphism_schema_hash(
schema_bundle_bytes: &[u8],
spec: &MorphismSpec,
script_path: &Path,
) -> std::io::Result<[u8; 32]> {
let script_bytes = std::fs::read(script_path)?;
let script_source = std::str::from_utf8(&script_bytes).map_err(|e| {
std::io::Error::new(
std::io::ErrorKind::InvalidData,
format!("script {} is not valid UTF-8: {}", script_path.display(), e),
)
})?;
let normalized_script = normalize_rhai_source(script_source);
let spec_json = serde_json::to_vec(spec).expect("MorphismSpec serializes");
let mut hasher = Sha256::new();
hasher.update(b"nakui-schema-v2\0");
hasher.update(b"schema:");
hasher.update((schema_bundle_bytes.len() as u64).to_le_bytes());
hasher.update(schema_bundle_bytes);
hasher.update(b"spec:");
hasher.update((spec_json.len() as u64).to_le_bytes());
hasher.update(&spec_json);
hasher.update(b"script:");
hasher.update((normalized_script.len() as u64).to_le_bytes());
hasher.update(normalized_script.as_bytes());
Ok(hasher.finalize().into())
}
/// Strip line/block comments and collapse whitespace runs in a Rhai
/// source string. Preserves string literals exactly. Used to make the
/// schema hash invariant to cosmetic edits.
///
/// Limitations:
/// - Doesn't handle backtick template literals (Rhai 1.x interp
/// strings). If the modules ever start using them, the normalizer
/// must be extended; until then it's not a concern for the
/// production scripts in `modules/`.
/// - Doesn't handle nested block comments — Rhai itself doesn't
/// either.
pub fn normalize_rhai_source(src: &str) -> String {
let mut out = String::with_capacity(src.len());
let mut chars = src.chars().peekable();
let mut prev_was_space = true; // strip leading whitespace
while let Some(c) = chars.next() {
// Line comment: //...\n
if c == '/' && chars.peek() == Some(&'/') {
chars.next();
while let Some(&n) = chars.peek() {
if n == '\n' {
break;
}
chars.next();
}
continue;
}
// Block comment: /* ... */
if c == '/' && chars.peek() == Some(&'*') {
chars.next();
let mut prev = '\0';
while let Some(n) = chars.next() {
if prev == '*' && n == '/' {
break;
}
prev = n;
}
continue;
}
// String literal: copy verbatim including escape sequences.
if c == '"' {
out.push('"');
while let Some(n) = chars.next() {
if n == '\\' {
out.push('\\');
if let Some(esc) = chars.next() {
out.push(esc);
}
} else if n == '"' {
out.push('"');
break;
} else {
out.push(n);
}
}
prev_was_space = false;
continue;
}
// Whitespace run → single space (or nothing if at edge).
if c.is_whitespace() {
if !prev_was_space {
out.push(' ');
prev_was_space = true;
}
continue;
}
// Regular character.
out.push(c);
prev_was_space = false;
}
if out.ends_with(' ') {
out.pop();
}
out
}
fn build_schema_bundle(module_dir: &std::path::Path, schemas: &[String]) -> std::io::Result<PathBuf> {
let mut combined = String::new();
for s in schemas {
let p = module_dir.join(s);
let content = std::fs::read_to_string(&p)?;
combined.push_str("# --- ");
combined.push_str(p.to_string_lossy().as_ref());
combined.push_str(" ---\n");
combined.push_str(&content);
combined.push_str("\n\n");
}
let bundle = std::env::temp_dir().join(format!("nakui_schema_{}.k", Uuid::new_v4()));
std::fs::write(&bundle, combined)?;
Ok(bundle)
}
fn check_conservation(
rule: &ConserveRule,
loaded: &BTreeMap<String, Value>,
id_to_input: &HashMap<Uuid, InputBinding>,
ops: &[FieldOp],
) -> Result<(), ExecError> {
let mut delta_by_group: HashMap<String, i128> = HashMap::new();
for op in ops {
if let FieldOp::Set { path, value } = op {
if path.entity != rule.entity || path.field != rule.field {
continue;
}
let binding = id_to_input
.get(&path.id)
.filter(|b| b.entity == path.entity)
.ok_or_else(|| ExecError::ConservationMalformed {
entity: rule.entity.clone(),
field: rule.field.clone(),
message: format!(
"Set on id {} with entity {} cannot be reconciled to a tracked input",
path.id, path.entity
),
})?;
let old_state = &loaded[&binding.role];
let old_val =
old_state
.get(&rule.field)
.and_then(Value::as_i64)
.ok_or_else(|| ExecError::ConservationMalformed {
entity: rule.entity.clone(),
field: rule.field.clone(),
message: format!("old value at role `{}` is not i64", binding.role),
})?;
let new_val =
value
.as_i64()
.ok_or_else(|| ExecError::ConservationMalformed {
entity: rule.entity.clone(),
field: rule.field.clone(),
message: format!("Set value at role `{}` is not i64", binding.role),
})?;
let group_key = match &rule.group_by {
Some(g) => old_state
.get(g)
.and_then(Value::as_str)
.unwrap_or("")
.to_string(),
None => String::new(),
};
*delta_by_group.entry(group_key).or_insert(0) +=
(new_val as i128) - (old_val as i128);
}
}
for (group, total) in &delta_by_group {
if *total != 0 {
return Err(ExecError::ConservationViolation {
entity: rule.entity.clone(),
field: rule.field.clone(),
group_by: rule.group_by.clone().unwrap_or_else(|| "(global)".into()),
group: group.clone(),
total: *total,
});
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn normalize_strips_line_and_block_comments() {
let src = r#"
// header comment
let x = 1; // trailing
/* block
spans lines */
let y = 2;
"#;
let normalized = normalize_rhai_source(src);
assert_eq!(normalized, "let x = 1; let y = 2;");
}
#[test]
fn normalize_collapses_whitespace_runs() {
let src = "let a =\t\t1;\n\n\n\nlet b = 2;";
let normalized = normalize_rhai_source(src);
assert_eq!(normalized, "let a = 1; let b = 2;");
}
#[test]
fn normalize_preserves_strings_verbatim_including_double_spaces() {
// The double space, the // inside, and the escape are preserved
// exactly because they're inside a string literal — semantic
// content, not cosmetic.
let src = r#"let s = "hello // not a comment \"world\"";"#;
let normalized = normalize_rhai_source(src);
assert_eq!(normalized, r#"let s = "hello // not a comment \"world\"";"#);
}
#[test]
fn normalize_is_idempotent() {
let src = "// a\nlet x = 1;\n";
let n1 = normalize_rhai_source(src);
let n2 = normalize_rhai_source(&n1);
assert_eq!(n1, n2);
}
#[test]
fn normalize_distinguishes_real_changes() {
// Adding a new statement is a non-cosmetic change — the
// normalized output must reflect it.
let a = "let x = 1;";
let b = "let x = 1; let y = 2;";
assert_ne!(normalize_rhai_source(a), normalize_rhai_source(b));
// Same for changing a literal value.
let c = "let x = 1;";
let d = "let x = 2;";
assert_ne!(normalize_rhai_source(c), normalize_rhai_source(d));
}
#[test]
fn normalize_handles_comment_at_end_without_newline() {
let src = "let x = 1; // no trailing newline";
let normalized = normalize_rhai_source(src);
assert_eq!(normalized, "let x = 1;");
}
#[test]
fn normalize_handles_unterminated_block_comment() {
// Defensive: if someone writes `/* ...` and forgets to close,
// we don't infinite-loop or panic. The trailing content is
// discarded, which is fine — Rhai won't parse this either.
let src = "let x = 1; /* never ends";
let normalized = normalize_rhai_source(src);
assert_eq!(normalized, "let x = 1;");
}
}
+277
View File
@@ -0,0 +1,277 @@
//! Static dependency graph derived from a `Manifest`.
//!
//! Two graphs in one structure:
//! - **Explicit graph** (`depends_on`): morphism-to-morphism edges declared
//! by the manifest author. Cycles here are an error — the graph is built
//! with cycle detection.
//! - **Data-flow indexes** (`reads`/`writes`): inverted indexes from
//! canonical entity tokens (`"Caja.saldo"` or `"Movimiento"`) to the
//! morphisms that read or write them. Self-loops in data flow are
//! legal (a morphism that reads a field and updates it is normal).
//!
//! Tokens are normalized at build time: a manifest's role-prefixed tokens
//! (`"caja.saldo"`) become entity-prefixed (`"Caja.saldo"`) so cross-module
//! queries work uniformly.
use petgraph::algo::tarjan_scc;
use petgraph::graph::{DiGraph, NodeIndex};
use petgraph::visit::Topo;
use std::collections::{HashMap, HashSet};
use thiserror::Error;
use crate::manifest::Manifest;
#[derive(Debug, Error)]
pub enum GraphError {
#[error("dependency cycle in `depends_on` involving morphisms {0:?}")]
Cycle(Vec<String>),
#[error("morphism `{0}` referenced in depends_on but not declared in this manifest")]
UnknownMorphism(String),
}
#[derive(Debug)]
pub struct ManifestGraph {
/// Explicit `depends_on` graph. Edge `a -> b` means: morphism `b`
/// depends on `a`, so `a` must be available before `b`.
explicit: DiGraph<String, ()>,
/// Data-flow indexes. Token form: "Entity.field" or "Entity".
readers_of_token: HashMap<String, Vec<String>>,
writers_of_token: HashMap<String, Vec<String>>,
/// Per-morphism canonicalized token sets.
morphism_reads: HashMap<String, Vec<String>>,
morphism_writes: HashMap<String, Vec<String>>,
}
impl ManifestGraph {
pub fn build(manifest: &Manifest) -> Result<Self, GraphError> {
let explicit = build_explicit(manifest)?;
if let Some(cycle) = find_cycle(&explicit) {
return Err(GraphError::Cycle(cycle));
}
let (readers_of_token, writers_of_token, morphism_reads, morphism_writes) =
build_data_flow(manifest);
Ok(Self {
explicit,
readers_of_token,
writers_of_token,
morphism_reads,
morphism_writes,
})
}
/// Morphisms that read `token`. Token form: "Entity.field" or "Entity".
pub fn readers_of(&self, token: &str) -> &[String] {
self.readers_of_token
.get(token)
.map(|v| v.as_slice())
.unwrap_or(&[])
}
/// Morphisms that write `token`.
pub fn writers_of(&self, token: &str) -> &[String] {
self.writers_of_token
.get(token)
.map(|v| v.as_slice())
.unwrap_or(&[])
}
pub fn morphism_reads(&self, name: &str) -> &[String] {
self.morphism_reads
.get(name)
.map(|v| v.as_slice())
.unwrap_or(&[])
}
pub fn morphism_writes(&self, name: &str) -> &[String] {
self.morphism_writes
.get(name)
.map(|v| v.as_slice())
.unwrap_or(&[])
}
/// Morphisms whose `reads` overlap any of `name`'s `writes`. The
/// dirty-marking primitive: after `name` runs successfully, these are
/// the candidates whose derived state would be invalidated. The result
/// excludes `name` itself even if it reads what it writes.
pub fn affected_by(&self, name: &str) -> Vec<String> {
let writes = match self.morphism_writes.get(name) {
Some(w) => w,
None => return Vec::new(),
};
let mut affected: HashSet<String> = HashSet::new();
for token in writes {
if let Some(readers) = self.readers_of_token.get(token) {
for r in readers {
if r != name {
affected.insert(r.clone());
}
}
}
}
let mut out: Vec<_> = affected.into_iter().collect();
out.sort();
out
}
/// Topological order of the explicit dependency graph. If `a` is in
/// `b.depends_on`, `a` precedes `b` in the result.
pub fn topological_order(&self) -> Vec<String> {
let mut topo = Topo::new(&self.explicit);
let mut out = Vec::new();
while let Some(idx) = topo.next(&self.explicit) {
out.push(self.explicit[idx].clone());
}
out
}
}
fn build_explicit(manifest: &Manifest) -> Result<DiGraph<String, ()>, GraphError> {
let mut graph = DiGraph::new();
let mut nodes: HashMap<String, NodeIndex> = HashMap::new();
for m in &manifest.morphisms {
let idx = graph.add_node(m.name.clone());
nodes.insert(m.name.clone(), idx);
}
for m in &manifest.morphisms {
let to = nodes[&m.name];
for dep in &m.depends_on {
let from = *nodes
.get(dep)
.ok_or_else(|| GraphError::UnknownMorphism(dep.clone()))?;
graph.add_edge(from, to, ());
}
}
Ok(graph)
}
/// Returns one cycle's nodes (sorted) if the graph has any. Self-loops
/// are returned as `[name]`; multi-node SCCs as the SCC's nodes.
fn find_cycle(graph: &DiGraph<String, ()>) -> Option<Vec<String>> {
for scc in tarjan_scc(graph) {
if scc.len() > 1 {
let mut names: Vec<String> = scc.iter().map(|i| graph[*i].clone()).collect();
names.sort();
return Some(names);
}
if scc.len() == 1 && graph.find_edge(scc[0], scc[0]).is_some() {
return Some(vec![graph[scc[0]].clone()]);
}
}
None
}
fn build_data_flow(
manifest: &Manifest,
) -> (
HashMap<String, Vec<String>>,
HashMap<String, Vec<String>>,
HashMap<String, Vec<String>>,
HashMap<String, Vec<String>>,
) {
let mut readers: HashMap<String, Vec<String>> = HashMap::new();
let mut writers: HashMap<String, Vec<String>> = HashMap::new();
let mut m_reads: HashMap<String, Vec<String>> = HashMap::new();
let mut m_writes: HashMap<String, Vec<String>> = HashMap::new();
for m in &manifest.morphisms {
let role_to_entity: HashMap<&str, &str> = m
.inputs
.iter()
.map(|i| (i.role.as_str(), i.entity.as_str()))
.collect();
// Dedupe per-morphism: `source.saldo` and `dest.saldo` both
// canonicalize to `Caja.saldo` — the morphism is one writer, not
// two.
let mut seen_reads: HashSet<String> = HashSet::new();
for r in &m.reads {
if let Some(token) = canonicalize_token(r, &role_to_entity) {
if seen_reads.insert(token.clone()) {
readers.entry(token.clone()).or_default().push(m.name.clone());
m_reads.entry(m.name.clone()).or_default().push(token);
}
}
}
let mut seen_writes: HashSet<String> = HashSet::new();
for w in &m.writes {
if let Some(token) = canonicalize_token(w, &role_to_entity) {
if seen_writes.insert(token.clone()) {
writers.entry(token.clone()).or_default().push(m.name.clone());
m_writes.entry(m.name.clone()).or_default().push(token);
}
}
}
}
(readers, writers, m_reads, m_writes)
}
/// "role.field" -> "Entity.field" via the inputs map; "Entity" -> "Entity".
fn canonicalize_token(t: &str, roles: &HashMap<&str, &str>) -> Option<String> {
if let Some((role, field)) = t.split_once('.') {
roles
.get(role)
.map(|entity| format!("{}.{}", entity, field))
} else {
Some(t.to_string())
}
}
/// Tracks which morphisms have stale derived state because some morphism
/// they read from was applied. Wire it next to your `execute_and_log`
/// loop: after a successful run, call `mark_dirty_after(morphism, &graph)`;
/// then any consumer (cached view, derived report, downstream pipeline)
/// queries `is_dirty(name)` before using its cached output.
///
/// The tracker holds names only — it doesn't know what "recompute" means
/// for any particular morphism. That's deliberate: the kernel exposes the
/// invalidation primitive; what to do with the dirty set is the caller's.
#[derive(Debug, Default, Clone)]
pub struct DirtyTracker {
dirty: HashSet<String>,
}
impl DirtyTracker {
pub fn new() -> Self {
Self::default()
}
/// After `morphism_name` runs successfully, mark every morphism in
/// `graph.affected_by(morphism_name)` as dirty.
pub fn mark_dirty_after(&mut self, morphism_name: &str, graph: &ManifestGraph) {
for affected in graph.affected_by(morphism_name) {
self.dirty.insert(affected);
}
}
pub fn is_dirty(&self, morphism: &str) -> bool {
self.dirty.contains(morphism)
}
/// Sorted list of dirty morphisms. Stable order for UI/telemetry.
pub fn dirty(&self) -> Vec<String> {
let mut out: Vec<String> = self.dirty.iter().cloned().collect();
out.sort();
out
}
pub fn len(&self) -> usize {
self.dirty.len()
}
pub fn is_empty(&self) -> bool {
self.dirty.is_empty()
}
/// Clear the dirty flag for a specific morphism (call after the
/// caller has recomputed it).
pub fn clear(&mut self, morphism: &str) {
self.dirty.remove(morphism);
}
pub fn clear_all(&mut self) {
self.dirty.clear();
}
}
@@ -0,0 +1,43 @@
use std::path::Path;
use std::process::Command;
use thiserror::Error;
#[derive(Debug, Error)]
pub enum KclError {
#[error("kcl binary not found on PATH (install: https://kcl-lang.io)")]
BinaryMissing,
#[error("kcl validation failed:\n{0}")]
ValidationFailed(String),
#[error("io invoking kcl: {0}")]
Io(#[from] std::io::Error),
}
/// Validate `state_path` (json) against a schema defined in `schema_path` (.k),
/// targeting the named schema.
pub fn vet(schema_path: &Path, state_path: &Path, schema_name: &str) -> Result<(), KclError> {
let out = match Command::new("kcl")
.arg("vet")
.arg(state_path)
.arg(schema_path)
.arg("-s")
.arg(schema_name)
.output()
{
Ok(o) => o,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Err(KclError::BinaryMissing),
Err(e) => return Err(e.into()),
};
if out.status.success() {
Ok(())
} else {
let stderr = String::from_utf8_lossy(&out.stderr).into_owned();
let stdout = String::from_utf8_lossy(&out.stdout).into_owned();
let msg = if stderr.trim().is_empty() {
stdout
} else {
stderr
};
Err(KclError::ValidationFailed(msg))
}
}
+11
View File
@@ -0,0 +1,11 @@
pub mod delta;
pub mod drift;
pub mod event_log;
pub mod executor;
pub mod graph;
pub mod kcl_wrapper;
pub mod manifest;
pub mod rhai_executor;
pub mod run;
pub mod store;
pub mod surreal_store;
+306
View File
@@ -0,0 +1,306 @@
use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
use std::path::Path;
use thiserror::Error;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Manifest {
pub module: String,
/// Schema files that compose this module's KCL surface. Paths are
/// resolved relative to the module directory; cross-module references
/// use `"../other_module/schema.k"`. Defaults to `["schema.k"]` when
/// the field is absent — the single-file case.
#[serde(default)]
pub schemas: Vec<String>,
pub morphisms: Vec<MorphismSpec>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MorphismSpec {
pub name: String,
pub inputs: Vec<MorphismInput>,
pub reads: Vec<String>,
pub writes: Vec<String>,
#[serde(default)]
pub invariants: Invariants,
#[serde(default)]
pub depends_on: Vec<String>,
pub script: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MorphismInput {
pub role: String,
pub entity: String,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Invariants {
/// Sum-conservation rules. The total Δ of (entity, field) across the ops
/// produced by the morphism must be zero — optionally bucketed by another
/// field on the entity (e.g. group_by="currency" so USD and EUR are
/// independent ledgers).
#[serde(default)]
pub conserve: Vec<ConserveRule>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ConserveRule {
pub entity: String,
pub field: String,
#[serde(default)]
pub group_by: Option<String>,
}
#[derive(Debug, Error)]
pub enum ManifestError {
#[error("io reading manifest: {0}")]
Io(#[from] std::io::Error),
#[error("parsing manifest json: {0}")]
Parse(#[from] serde_json::Error),
}
/// Errors raised by `Manifest::validate`. Each variant flags a specific
/// semantic issue caught before the kernel ever runs the module — these
/// are the contract between manifest authors (humans or AI) and Nakui.
#[derive(Debug, Error)]
pub enum ValidationError {
#[error("morphism name `{0}` declared more than once")]
DuplicateMorphism(String),
#[error("morphism `{morphism}`: input role `{role}` declared more than once")]
DuplicateRole { morphism: String, role: String },
#[error(
"morphism `{morphism}`: input entity `{entity}` is not declared in any schema file (known: {known:?})"
)]
InputUnknownEntity {
morphism: String,
entity: String,
known: Vec<String>,
},
#[error(
"morphism `{morphism}`: writes token `{token}` references unknown role `{role}` (declared roles: {roles:?})"
)]
WritesUnknownRole {
morphism: String,
token: String,
role: String,
roles: Vec<String>,
},
#[error(
"morphism `{morphism}`: writes token `{token}` is not a declared role.field nor a known entity name"
)]
WritesUnknownEntity { morphism: String, token: String },
#[error("morphism `{morphism}`: conserve rule references unknown entity `{entity}`")]
ConserveUnknownEntity { morphism: String, entity: String },
#[error("morphism `{morphism}`: depends_on `{dep}` does not name a morphism in this manifest")]
DependsOnUnknown { morphism: String, dep: String },
#[error("morphism `{morphism}`: script file `{script}` not found at {resolved}")]
ScriptMissing {
morphism: String,
script: String,
resolved: String,
},
#[error("schema file `{path}` declared in manifest does not exist at {resolved}")]
SchemaFileMissing { path: String, resolved: String },
#[error("schema name `{name}` is declared in multiple files: {files:?}")]
DuplicateSchema { name: String, files: Vec<String> },
#[error("io reading schema `{path}`: {source}")]
Io {
path: String,
#[source]
source: std::io::Error,
},
}
impl Manifest {
pub fn load(path: &Path) -> Result<Self, ManifestError> {
let text = std::fs::read_to_string(path)?;
let m: Self = serde_json::from_str(&text)?;
Ok(m)
}
pub fn morphism(&self, name: &str) -> Option<&MorphismSpec> {
self.morphisms.iter().find(|m| m.name == name)
}
/// Schema files this module exposes. Defaults to `["schema.k"]` when
/// the manifest doesn't declare any explicitly.
pub fn effective_schemas(&self) -> Vec<String> {
if self.schemas.is_empty() {
vec!["schema.k".to_string()]
} else {
self.schemas.clone()
}
}
/// Run all semantic checks. Catches author errors that would otherwise
/// surface as opaque runtime failures — misspelled entity names that
/// silently make conservation a no-op, role typos in writes that allow
/// any op through, unresolvable script paths, etc.
pub fn validate(&self, module_dir: &Path) -> Result<(), ValidationError> {
// 1. Resolve schemas: read each file, parse schema names, detect
// cross-file duplicates. Build the set of known entity names.
let mut entity_to_files: HashMap<String, Vec<String>> = HashMap::new();
for s in self.effective_schemas() {
let resolved = module_dir.join(&s);
if !resolved.exists() {
return Err(ValidationError::SchemaFileMissing {
path: s.clone(),
resolved: resolved.display().to_string(),
});
}
let content = std::fs::read_to_string(&resolved).map_err(|e| {
ValidationError::Io {
path: s.clone(),
source: e,
}
})?;
for name in extract_schema_names(&content) {
entity_to_files.entry(name).or_default().push(s.clone());
}
}
for (name, files) in &entity_to_files {
if files.len() > 1 {
return Err(ValidationError::DuplicateSchema {
name: name.clone(),
files: files.clone(),
});
}
}
let known_entities: HashSet<&str> =
entity_to_files.keys().map(String::as_str).collect();
// 2. Manifest-level: morphism names must be unique.
let mut seen: HashSet<&str> = HashSet::new();
for m in &self.morphisms {
if !seen.insert(m.name.as_str()) {
return Err(ValidationError::DuplicateMorphism(m.name.clone()));
}
}
let known_morphisms: HashSet<&str> =
self.morphisms.iter().map(|m| m.name.as_str()).collect();
// 3. Per-morphism checks.
for m in &self.morphisms {
let mut roles: HashSet<&str> = HashSet::new();
for inp in &m.inputs {
if !roles.insert(inp.role.as_str()) {
return Err(ValidationError::DuplicateRole {
morphism: m.name.clone(),
role: inp.role.clone(),
});
}
if !known_entities.contains(inp.entity.as_str()) {
return Err(ValidationError::InputUnknownEntity {
morphism: m.name.clone(),
entity: inp.entity.clone(),
known: sorted(&known_entities),
});
}
}
for token in &m.writes {
if let Some((role, _field)) = token.split_once('.') {
if !roles.contains(role) {
return Err(ValidationError::WritesUnknownRole {
morphism: m.name.clone(),
token: token.clone(),
role: role.to_string(),
roles: m.inputs.iter().map(|i| i.role.clone()).collect(),
});
}
} else if !known_entities.contains(token.as_str()) {
return Err(ValidationError::WritesUnknownEntity {
morphism: m.name.clone(),
token: token.clone(),
});
}
}
for rule in &m.invariants.conserve {
if !known_entities.contains(rule.entity.as_str()) {
return Err(ValidationError::ConserveUnknownEntity {
morphism: m.name.clone(),
entity: rule.entity.clone(),
});
}
}
for dep in &m.depends_on {
if !known_morphisms.contains(dep.as_str()) {
return Err(ValidationError::DependsOnUnknown {
morphism: m.name.clone(),
dep: dep.clone(),
});
}
}
let script_resolved = module_dir.join(&m.script);
if !script_resolved.exists() {
return Err(ValidationError::ScriptMissing {
morphism: m.name.clone(),
script: m.script.clone(),
resolved: script_resolved.display().to_string(),
});
}
}
Ok(())
}
}
/// Cheap line-scan over a `.k` file to extract every `schema NAME` declared
/// at column 0 (top-level). Tolerates inheritance (`schema X(Y):`) and
/// generic params (`schema X[T]:`); ignores comments and string literals
/// because top-level KCL syntax doesn't admit them ambiguously.
fn extract_schema_names(content: &str) -> Vec<String> {
let mut out = Vec::new();
for line in content.lines() {
// Top-level declarations are not indented in idiomatic KCL.
if line.starts_with("schema ") {
let after = &line["schema ".len()..];
let name: String = after
.chars()
.take_while(|c| c.is_alphanumeric() || *c == '_')
.collect();
if !name.is_empty() {
out.push(name);
}
}
}
out
}
fn sorted(set: &HashSet<&str>) -> Vec<String> {
let mut v: Vec<String> = set.iter().map(|s| s.to_string()).collect();
v.sort();
v
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn extract_schema_names_handles_basic_forms() {
let content = r#"
schema Caja:
saldo: int
schema Movimiento(Base):
monto: int
# schema Comentario:
schema Generic[T]:
inner: T
schema _Underscore:
x: int
"#;
let names = extract_schema_names(content);
assert_eq!(
names,
vec!["Caja", "Movimiento", "Generic", "_Underscore"]
);
}
}
@@ -0,0 +1,103 @@
use rhai::packages::{
ArithmeticPackage, BasicArrayPackage, BasicIteratorPackage, BasicMapPackage,
BasicStringPackage, CorePackage, LogicPackage, Package,
};
use rhai::{AST, Dynamic, Engine, Scope};
use serde_json::Value;
use std::cell::RefCell;
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use thiserror::Error;
use crate::delta::FieldOp;
#[derive(Debug, Error)]
pub enum RhaiError {
#[error("compile error: {0}")]
Compile(String),
#[error("runtime error: {0}")]
Runtime(String),
#[error("morphism returned non-array")]
BadDelta,
#[error("delta op malformed: {0}")]
BadOp(String),
#[error("io reading script: {0}")]
Io(#[from] std::io::Error),
}
pub struct RhaiExecutor {
engine: Engine,
/// Compiled-AST cache keyed by absolute script path. Avoids reading +
/// reparsing on every call (verify_log re-runs every morphism in the
/// log; without the cache that becomes an O(events × parse) blowup).
asts: RefCell<HashMap<PathBuf, Arc<AST>>>,
}
impl RhaiExecutor {
/// Build a deterministic engine. Time, random, IO, debug/print are all
/// excluded by construction (we register packages by name, not the
/// StandardPackage bundle which would pull in BasicTimePackage).
pub fn new_sandboxed() -> Self {
let mut engine = Engine::new_raw();
// Deliberately omitted: BasicTimePackage, EvalPackage, DebugPackage.
CorePackage::new().register_into_engine(&mut engine);
LogicPackage::new().register_into_engine(&mut engine);
ArithmeticPackage::new().register_into_engine(&mut engine);
BasicArrayPackage::new().register_into_engine(&mut engine);
BasicMapPackage::new().register_into_engine(&mut engine);
BasicStringPackage::new().register_into_engine(&mut engine);
BasicIteratorPackage::new().register_into_engine(&mut engine);
engine.set_max_call_levels(64);
engine.set_max_expr_depths(64, 32);
Self {
engine,
asts: RefCell::new(HashMap::new()),
}
}
pub fn run(&self, script_path: &Path, input: Value) -> Result<Vec<FieldOp>, RhaiError> {
let ast = self.ast_for(script_path)?;
let dyn_input: Dynamic = rhai::serde::to_dynamic(input)
.map_err(|e| RhaiError::Runtime(format!("input -> dynamic: {}", e)))?;
let mut scope = Scope::new();
scope.push_dynamic("input", dyn_input);
let result: Dynamic = self
.engine
.eval_ast_with_scope(&mut scope, &ast)
.map_err(|e| RhaiError::Runtime(e.to_string()))?;
let arr = result.into_array().map_err(|_| RhaiError::BadDelta)?;
let mut ops = Vec::with_capacity(arr.len());
for item in arr {
let json: Value = rhai::serde::from_dynamic(&item)
.map_err(|e| RhaiError::BadOp(format!("dynamic -> json: {}", e)))?;
let op: FieldOp = serde_json::from_value(json)
.map_err(|e| RhaiError::BadOp(e.to_string()))?;
ops.push(op);
}
Ok(ops)
}
/// Returns a cached compiled AST for `script_path`, compiling it on the
/// first call. Cache hits avoid filesystem IO and parse cost entirely.
fn ast_for(&self, script_path: &Path) -> Result<Arc<AST>, RhaiError> {
if let Some(ast) = self.asts.borrow().get(script_path) {
return Ok(Arc::clone(ast));
}
let source = std::fs::read_to_string(script_path)?;
let compiled = self
.engine
.compile(&source)
.map_err(|e| RhaiError::Compile(e.to_string()))?;
let arc = Arc::new(compiled);
self.asts
.borrow_mut()
.insert(script_path.to_path_buf(), Arc::clone(&arc));
Ok(arc)
}
}
+352
View File
@@ -0,0 +1,352 @@
//! `nakui run` server: a long-lived process that holds an in-memory store
//! reconstructed from the log, exposes a Unix Domain Socket, and serves
//! line-delimited JSON requests to drive the kernel.
//!
//! Why UDS + line-JSON for V1:
//! - Multi-client without committing to a transport (HTTP/NATS later).
//! - Filesystem permissions gate access; no port exposure.
//! - Self-describing: `describe` returns the manifest's morphism specs
//! so an agent (human or LLM) can drive the server without external
//! docs.
//!
//! Concurrency: one connection at a time. Backed by `&mut Store`, the
//! kernel is single-writer by design. Multiple clients queue in
//! `accept()`. If/when we want concurrency, the right unit to parallelize
//! is reads, not writes — that's a future refactor with locks at the
//! right granularity.
//!
//! Recovery: every `execute` goes through `execute_and_log_with_recovery`
//! so a transient apply failure auto-rebuilds the in-memory store from
//! the log without taking the server down.
use std::io::{BufRead, BufReader, Write};
use std::os::unix::net::{UnixListener, UnixStream};
use std::path::Path;
use serde::Deserialize;
use serde_json::{Value, json};
use thiserror::Error;
use uuid::Uuid;
use crate::event_log::{
EventLog, RecoverableExecuteError, ReplayError, Snapshot, SnapshotMismatchError,
execute_and_log_with_recovery, replay_with_snapshot_into, verify_log,
};
use crate::executor::Executor;
use crate::store::Store;
#[derive(Debug, Error)]
pub enum RunError {
#[error("io: {0}")]
Io(#[from] std::io::Error),
#[error("clear store on startup: {0}")]
Clear(#[source] crate::store::StoreError),
#[error("replay on startup: {0}")]
Replay(#[from] ReplayError),
#[error("log: {0}")]
Log(#[from] crate::event_log::LogError),
#[error("snapshot incompatible: {0}")]
SnapshotMismatch(#[from] SnapshotMismatchError),
#[error(
"snapshot/log gap: snapshot covers up to seq {snap_seq}, log's first remaining entry is seq {log_first_seq} (expected ≤ {expected})"
)]
SnapshotGap {
snap_seq: u64,
log_first_seq: u64,
expected: u64,
},
}
/// Run the server until a `shutdown` request is received or `accept`
/// returns an unrecoverable error. On exit, removes the socket file.
///
/// Startup reconstruction:
/// - With `Some(snapshot)`: validate its `schema_hash` against the
/// executor, seed the store from the snapshot, replay only the log
/// tail (entries with `seq > snapshot.seq`).
/// - With `None`: full replay from seq 0. Slower for long logs.
///
/// In both cases the store is wiped first, so the server never serves
/// requests against a state the log can't reproduce. This is true for
/// `MemoryStore` and for persistent backends like `SurrealStore` —
/// persistence is a durability property of the runtime cache, not a
/// way to skip replay. (A future "skip replay if last_applied_seq
/// matches" optimization would change that.)
pub fn run_server<S: Store>(
executor: Executor,
mut log: EventLog,
mut store: S,
snapshot: Option<Snapshot>,
socket_path: &Path,
) -> Result<(), RunError> {
startup_replay(&executor, &log, &mut store, snapshot.as_ref())?;
// Best-effort cleanup of stale sockets from a prior crashed run.
// Bind itself will fail if a live process is already listening.
let _ = std::fs::remove_file(socket_path);
let listener = UnixListener::bind(socket_path)?;
let result = accept_loop(&listener, &executor, &mut store, &mut log);
let _ = std::fs::remove_file(socket_path);
result
}
fn startup_replay<S: Store>(
executor: &Executor,
log: &EventLog,
store: &mut S,
snapshot: Option<&Snapshot>,
) -> Result<(), RunError> {
// Snapshot validation runs first (cheap) so a bad snapshot is caught
// even when we'd otherwise take the skip-replay fast path.
if let Some(snap) = snapshot {
snap.ensure_compatible_with(executor)?;
let entries = log.entries()?;
if let Some(first) = entries.first() {
let expected = snap.seq.saturating_add(1);
if first.seq() > expected {
return Err(RunError::SnapshotGap {
snap_seq: snap.seq,
log_first_seq: first.seq(),
expected,
});
}
}
}
// Fast path: persistent stores carry a `last_applied_seq` marker;
// when it matches the log's last seq, the store is verifiably in
// sync and we can skip the clear+replay entirely. Failures here
// (e.g. backend can't read meta) just fall through to full replay
// — never a correctness issue.
let log_last_seq = log.entries()?.last().map(|e| e.seq());
if let Ok(applied) = store.last_applied_seq() {
if applied == log_last_seq && applied.is_some() {
return Ok(());
}
}
store.clear().map_err(RunError::Clear)?;
replay_with_snapshot_into(log, snapshot, store)?;
Ok(())
}
fn accept_loop<S: Store>(
listener: &UnixListener,
executor: &Executor,
store: &mut S,
log: &mut EventLog,
) -> Result<(), RunError> {
loop {
let (stream, _addr) = listener.accept()?;
let shutdown = handle_connection(stream, executor, store, log);
if shutdown {
return Ok(());
}
}
}
#[derive(Debug, Deserialize)]
#[serde(tag = "op", rename_all = "snake_case")]
enum Request {
Execute {
morphism: String,
#[serde(default)]
inputs: std::collections::BTreeMap<String, Uuid>,
#[serde(default)]
params: Value,
},
Load {
entity: String,
id: Uuid,
},
Describe,
Verify,
/// Return the SHA-256 of the live store's full state plus a record
/// count. Used by the drift detector as the cheap fast-path check
/// before asking for the full record dump.
HashState,
/// Return every record on the server in canonical order. Used after
/// a hash mismatch to compute the per-record diff. Response can be
/// large — the operator opts into it.
DumpRecords,
Shutdown,
}
/// Process one connection. Returns `true` if the client requested
/// shutdown — the caller should stop the accept loop after the response
/// has been flushed.
///
/// IO errors on a single connection don't kill the server: we log to
/// stderr and move on. Only a request-level shutdown ends the loop.
fn handle_connection<S: Store>(
stream: UnixStream,
executor: &Executor,
store: &mut S,
log: &mut EventLog,
) -> bool {
let mut writer = match stream.try_clone() {
Ok(s) => s,
Err(e) => {
eprintln!("nakui run: clone stream: {}", e);
return false;
}
};
let reader = BufReader::new(stream);
for line in reader.lines() {
let line = match line {
Ok(l) => l,
Err(e) => {
eprintln!("nakui run: read: {}", e);
return false;
}
};
if line.trim().is_empty() {
continue;
}
let (response, shutdown) = dispatch(&line, executor, store, log);
let bytes = serde_json::to_vec(&response).expect("response serializes");
if let Err(e) = writer.write_all(&bytes).and_then(|_| writer.write_all(b"\n")) {
eprintln!("nakui run: write: {}", e);
return false;
}
if shutdown {
let _ = writer.flush();
return true;
}
}
false
}
fn dispatch<S: Store>(
line: &str,
executor: &Executor,
store: &mut S,
log: &mut EventLog,
) -> (Value, bool) {
let req: Request = match serde_json::from_str(line) {
Ok(r) => r,
Err(e) => return (error_response(&format!("bad request: {}", e)), false),
};
match req {
Request::Execute {
morphism,
inputs,
params,
} => {
let inputs_vec: Vec<(&str, Uuid)> =
inputs.iter().map(|(k, v)| (k.as_str(), *v)).collect();
match execute_and_log_with_recovery(
executor,
store,
log,
&morphism,
&inputs_vec,
params,
) {
Ok(ops) => (
json!({
"ok": true,
"seq": log.next_seq().saturating_sub(1),
"ops": ops,
"schema_hash": executor.schema_hash(&morphism).map(|h| hex_encode(&h)),
}),
false,
),
Err(RecoverableExecuteError::PreLog(e)) => (
json!({"ok": false, "stage": "pre_log", "error": e.to_string()}),
false,
),
Err(RecoverableExecuteError::LogAppend(e)) => (
json!({"ok": false, "stage": "log_append", "error": e.to_string()}),
false,
),
Err(e @ RecoverableExecuteError::Unrecoverable { .. }) => (
json!({"ok": false, "stage": "unrecoverable", "error": e.to_string()}),
false,
),
}
}
Request::Load { entity, id } => {
let value = store.load(&entity, id);
(json!({"ok": true, "value": value}), false)
}
Request::Describe => {
let hashes: std::collections::BTreeMap<String, String> = executor
.schema_hashes
.iter()
.map(|(k, v)| (k.clone(), hex_encode(v)))
.collect();
(
json!({
"ok": true,
"protocol": 1,
"module": executor.manifest.module,
"schemas": executor.manifest.effective_schemas(),
"morphisms": executor.manifest.morphisms,
"schema_hashes": hashes,
}),
false,
)
}
Request::Verify => match verify_log(log, executor) {
Ok(()) => {
let entries = log
.entries()
.map(|es| es.len())
.unwrap_or(0);
(json!({"ok": true, "entries": entries}), false)
}
Err(e) => (
json!({"ok": false, "error": e.to_string()}),
false,
),
},
Request::HashState => {
let records: Vec<_> = match store.iter() {
Ok(it) => it.collect(),
Err(e) => return (json!({"ok": false, "error": e.to_string()}), false),
};
let count = records.len();
let hash = match store.hash_state() {
Ok(h) => h,
Err(e) => return (json!({"ok": false, "error": e.to_string()}), false),
};
(
json!({
"ok": true,
"hash": hex_encode(&hash),
"records": count,
}),
false,
)
}
Request::DumpRecords => match store.iter() {
Ok(it) => {
let records: Vec<Value> = it
.map(|(entity, id, value)| {
json!({"entity": entity, "id": id, "value": value})
})
.collect();
(json!({"ok": true, "records": records}), false)
}
Err(e) => (json!({"ok": false, "error": e.to_string()}), false),
},
Request::Shutdown => (json!({"ok": true, "shutdown": true}), true),
}
}
fn error_response(msg: &str) -> Value {
json!({"ok": false, "error": msg})
}
fn hex_encode(bytes: &[u8]) -> String {
const HEX: &[u8; 16] = b"0123456789abcdef";
let mut out = String::with_capacity(bytes.len() * 2);
for &b in bytes {
out.push(HEX[(b >> 4) as usize] as char);
out.push(HEX[(b & 0x0f) as usize] as char);
}
out
}
+593
View File
@@ -0,0 +1,593 @@
use serde_json::Value;
use sha2::{Digest, Sha256};
use std::collections::HashMap;
use thiserror::Error;
use uuid::Uuid;
use crate::delta::FieldOp;
#[derive(Debug, Clone, Error)]
pub enum StoreError {
#[error("entity {0} id {1} not found")]
NotFound(String, Uuid),
#[error("entity {0} id {1} already exists")]
Conflict(String, Uuid),
#[error("set on non-object record at {0} {1}")]
NotAnObject(String, Uuid),
/// Backend-specific transient or systemic failure (network, disk,
/// driver). Distinct from the data-shape errors above.
#[error("backend error: {0}")]
Backend(String),
}
pub trait Store {
fn load(&self, entity: &str, id: Uuid) -> Option<Value>;
/// Insert or replace a record without going through the morphism
/// pipeline. Represents external/boundary input — the source of
/// records that didn't originate from a kernel-validated event.
fn seed(&mut self, entity: &str, id: Uuid, data: Value);
/// Read-only check: would `apply(ops)` succeed against current state?
/// Does NOT mutate. The kernel runs this as the last step of `compute`
/// so that, by the time we log an event, the apply is contractually
/// guaranteed to land.
fn apply_dry_run(&self, ops: &[FieldOp]) -> Result<(), StoreError>;
fn apply(&mut self, ops: &[FieldOp]) -> Result<(), StoreError>;
/// Drop every record. Used by `reconcile` to wipe a stale store before
/// replaying the log. Must leave the store in the same state it would
/// be in immediately after construction. Implementors that override
/// `last_applied_seq` must reset that marker here too — a cleared
/// store has applied nothing.
fn clear(&mut self) -> Result<(), StoreError>;
/// The last log seq whose effects are reflected in this store, if
/// the store can persist that fact. Default `Ok(None)` covers
/// transient backends. The startup path uses this to skip the full
/// replay when the store is verifiably already in sync with the log.
fn last_applied_seq(&self) -> Result<Option<u64>, StoreError> {
Ok(None)
}
/// Persist the marker after a successful apply / seed / replay.
/// Best-effort: callers ignore failures here because a stale marker
/// only costs an extra full replay on next startup, never
/// correctness — full replay starts with `clear()`, so it tolerates
/// any prior state. Default impl is a no-op for transient backends.
fn set_last_applied_seq(&mut self, _seq: u64) -> Result<(), StoreError> {
Ok(())
}
/// Enumerate every record in canonical order: sorted first by entity
/// name, then by id bytes. The canonical order is what makes
/// `hash_state` reproducible — without it two stores with the same
/// records would hash differently depending on insertion order.
///
/// Returns owned `Value`s. For an in-memory backend this clones; for
/// a remote backend it materializes a snapshot. V1 chooses simplicity
/// over streaming — the hash and drift-comparison use cases need to
/// see all records anyway, and an iterator over a Vec keeps the
/// trait method object-safe and free of async lifetime concerns.
fn iter(&self) -> Result<Box<dyn Iterator<Item = (String, Uuid, Value)> + '_>, StoreError>;
/// Deterministic SHA-256 of the store's full state. Two stores with
/// the same records (regardless of how they got there or which
/// backend they live in) produce the same hash; any drift produces
/// a different one. The default impl is the contract — backends
/// should only override it for backend-native acceleration (e.g.
/// server-side table digests), and an override must produce the
/// same bytes as the default.
///
/// Framing per record:
/// entity_bytes | 0x00 | id_bytes | 0x00 | canonical_value_hash
/// The length prefix on entity/id prevents (entity="ab", id="c")
/// from colliding with (entity="a", id="bc"). The value bytes are
/// produced by `hash_value`, which walks the JSON tree with
/// type-tagged framing — that decouples the hash from
/// `serde_json::to_vec`'s representation choices (especially
/// integer-valued floats vs ints) so cross-backend comparison
/// works.
fn hash_state(&self) -> Result<[u8; 32], StoreError> {
let mut hasher = Sha256::new();
for (entity, id, value) in self.iter()? {
hasher.update(entity.as_bytes());
hasher.update([0u8]);
hasher.update(id.as_bytes());
hasher.update([0u8]);
hash_value(&mut hasher, &value);
}
Ok(hasher.finalize().into())
}
}
/// Canonical hash of a `serde_json::Value`. Type-tagged so a string
/// "true" can't collide with the boolean `true`; length-prefixed so
/// concatenation can't shift bytes between fields. Numbers normalize:
/// any integer-valued number (i64, u64, or a finite f64 with no
/// fractional part) is hashed as an i128 — that's what makes
/// cross-backend equality work, since SurrealDB may round-trip
/// what the caller wrote as `100_i64` back as the same numeric value
/// without us needing to commit to a wire-format-specific
/// representation.
pub fn hash_value(hasher: &mut Sha256, v: &Value) {
match v {
Value::Null => hasher.update([TAG_NULL]),
Value::Bool(b) => {
hasher.update([TAG_BOOL]);
hasher.update([*b as u8]);
}
Value::Number(n) => {
if let Some(i) = n.as_i64() {
hash_int(hasher, i as i128);
} else if let Some(u) = n.as_u64() {
hash_int(hasher, u as i128);
} else if let Some(f) = n.as_f64() {
// Integer-valued floats canonicalize to int. Anything
// else (fractions, NaN, infinities) hashes as the raw
// f64 bit pattern — that's still deterministic, just
// not normalized.
if f.is_finite()
&& f.fract() == 0.0
&& f >= I128_MIN_AS_F64
&& f <= I128_MAX_AS_F64
{
hash_int(hasher, f as i128);
} else {
hasher.update([TAG_FLOAT]);
hasher.update(f.to_bits().to_le_bytes());
}
} else {
// serde_json::Number guarantees one of the above; this
// branch only fires if a future variant appears.
hasher.update([TAG_FLOAT]);
hasher.update(f64::NAN.to_bits().to_le_bytes());
}
}
Value::String(s) => {
hasher.update([TAG_STRING]);
hasher.update((s.len() as u64).to_le_bytes());
hasher.update(s.as_bytes());
}
Value::Array(arr) => {
hasher.update([TAG_ARRAY]);
hasher.update((arr.len() as u64).to_le_bytes());
for item in arr {
hash_value(hasher, item);
}
}
Value::Object(map) => {
hasher.update([TAG_OBJECT]);
hasher.update((map.len() as u64).to_le_bytes());
// serde_json::Map without `preserve_order` is BTreeMap
// (alphabetical). We sort defensively in case the build
// pulls in `preserve_order` transitively from a future dep.
let mut keys: Vec<&String> = map.keys().collect();
keys.sort();
for k in keys {
hasher.update((k.len() as u64).to_le_bytes());
hasher.update(k.as_bytes());
hash_value(hasher, &map[k]);
}
}
}
}
fn hash_int(hasher: &mut Sha256, n: i128) {
hasher.update([TAG_INT]);
hasher.update(n.to_le_bytes());
}
const TAG_NULL: u8 = 0;
const TAG_BOOL: u8 = 1;
const TAG_INT: u8 = 2;
const TAG_FLOAT: u8 = 3;
const TAG_STRING: u8 = 4;
const TAG_ARRAY: u8 = 5;
const TAG_OBJECT: u8 = 6;
// f64 can't represent i128::MAX exactly; the cast truncates upward to
// the next representable f64. Use those as the comparison bounds so
// `f as i128` stays well-defined.
const I128_MIN_AS_F64: f64 = -1.7014118346046923e38;
const I128_MAX_AS_F64: f64 = 1.7014118346046923e38;
#[derive(Debug, Default, Clone, PartialEq)]
pub struct MemoryStore {
records: HashMap<String, HashMap<Uuid, Value>>,
/// Last log seq whose effects are reflected here. In-process only —
/// resets to `None` on construction or `clear`. The skip-replay
/// optimization in `nakui run` benefits the persistent backends;
/// for `MemoryStore` it's harmless bookkeeping (process restart =
/// new store = `None`, which forces full replay).
last_applied: Option<u64>,
}
impl MemoryStore {
pub fn new() -> Self {
Self::default()
}
/// Borrow the internal records map. Used by `Snapshot::from_memory_store`
/// to capture state for snapshot persistence.
pub fn records(&self) -> &HashMap<String, HashMap<Uuid, Value>> {
&self.records
}
}
impl Store for MemoryStore {
fn load(&self, entity: &str, id: Uuid) -> Option<Value> {
self.records.get(entity)?.get(&id).cloned()
}
fn seed(&mut self, entity: &str, id: Uuid, data: Value) {
self.records
.entry(entity.to_string())
.or_default()
.insert(id, data);
}
fn apply_dry_run(&self, ops: &[FieldOp]) -> Result<(), StoreError> {
for op in ops {
match op {
FieldOp::Set { path, .. } => {
match self.records.get(&path.entity).and_then(|m| m.get(&path.id)) {
None => {
return Err(StoreError::NotFound(path.entity.clone(), path.id));
}
Some(Value::Object(_)) => {}
Some(_) => {
return Err(StoreError::NotAnObject(path.entity.clone(), path.id));
}
}
}
FieldOp::Create { entity, id, .. } => {
if self
.records
.get(entity)
.and_then(|m| m.get(id))
.is_some()
{
return Err(StoreError::Conflict(entity.clone(), *id));
}
}
FieldOp::Delete { entity, id } => {
if self
.records
.get(entity)
.and_then(|m| m.get(id))
.is_none()
{
return Err(StoreError::NotFound(entity.clone(), *id));
}
}
}
}
Ok(())
}
fn apply(&mut self, ops: &[FieldOp]) -> Result<(), StoreError> {
self.apply_dry_run(ops)?;
for op in ops {
match op {
FieldOp::Set { path, value } => {
let rec = self
.records
.get_mut(&path.entity)
.and_then(|m| m.get_mut(&path.id))
.expect("validated by dry_run");
let map = match rec {
Value::Object(m) => m,
_ => unreachable!("dry_run guards against non-object"),
};
map.insert(path.field.clone(), value.clone());
}
FieldOp::Create { entity, id, data } => {
self.records
.entry(entity.clone())
.or_default()
.insert(*id, data.clone());
}
FieldOp::Delete { entity, id } => {
self.records
.get_mut(entity)
.expect("validated by dry_run")
.remove(id);
}
}
}
Ok(())
}
fn clear(&mut self) -> Result<(), StoreError> {
self.records.clear();
self.last_applied = None;
Ok(())
}
fn last_applied_seq(&self) -> Result<Option<u64>, StoreError> {
Ok(self.last_applied)
}
fn set_last_applied_seq(&mut self, seq: u64) -> Result<(), StoreError> {
self.last_applied = Some(seq);
Ok(())
}
fn iter(&self) -> Result<Box<dyn Iterator<Item = (String, Uuid, Value)> + '_>, StoreError> {
let mut out: Vec<(String, Uuid, Value)> = self
.records
.iter()
.flat_map(|(entity, m)| {
m.iter()
.map(move |(id, v)| (entity.clone(), *id, v.clone()))
})
.collect();
out.sort_by(|a, b| a.0.cmp(&b.0).then_with(|| a.1.as_bytes().cmp(b.1.as_bytes())));
Ok(Box::new(out.into_iter()))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::delta::{FieldOp, FieldPath};
use serde_json::json;
#[test]
fn dry_run_rejects_set_on_non_object() {
let mut store = MemoryStore::new();
let id = Uuid::new_v4();
store.seed("Caja", id, json!(42)); // not an object
let op = FieldOp::Set {
path: FieldPath {
entity: "Caja".into(),
id,
field: "saldo".into(),
},
value: json!(100),
};
match store.apply_dry_run(&[op.clone()]) {
Err(StoreError::NotAnObject(e, i)) => {
assert_eq!(e, "Caja");
assert_eq!(i, id);
}
other => panic!("expected NotAnObject, got {:?}", other),
}
// apply must reject too without panicking.
assert!(matches!(
store.apply(&[op]),
Err(StoreError::NotAnObject(_, _))
));
}
#[test]
fn dry_run_rejects_create_conflict() {
let mut store = MemoryStore::new();
let id = Uuid::new_v4();
store.seed("Caja", id, json!({"id": id.to_string()}));
let op = FieldOp::Create {
entity: "Caja".into(),
id,
data: json!({"id": id.to_string()}),
};
assert!(matches!(
store.apply_dry_run(&[op]),
Err(StoreError::Conflict(_, _))
));
}
#[test]
fn dry_run_passes_for_valid_set() {
let mut store = MemoryStore::new();
let id = Uuid::new_v4();
store.seed("Caja", id, json!({"saldo": 100, "currency": "USD"}));
let op = FieldOp::Set {
path: FieldPath {
entity: "Caja".into(),
id,
field: "saldo".into(),
},
value: json!(150),
};
assert!(store.apply_dry_run(&[op]).is_ok());
}
#[test]
fn iter_returns_canonical_order_regardless_of_insertion() {
let a = Uuid::new_v4();
let b = Uuid::new_v4();
let c = Uuid::new_v4();
let mut s1 = MemoryStore::new();
s1.seed("Caja", a, json!({"id": a.to_string(), "x": 1}));
s1.seed("Movimiento", c, json!({"id": c.to_string(), "y": 3}));
s1.seed("Caja", b, json!({"id": b.to_string(), "x": 2}));
let mut s2 = MemoryStore::new();
s2.seed("Movimiento", c, json!({"id": c.to_string(), "y": 3}));
s2.seed("Caja", b, json!({"id": b.to_string(), "x": 2}));
s2.seed("Caja", a, json!({"id": a.to_string(), "x": 1}));
let r1: Vec<_> = s1.iter().unwrap().collect();
let r2: Vec<_> = s2.iter().unwrap().collect();
assert_eq!(r1, r2, "iter order must be insertion-independent");
// Entities lexicographically sorted (Caja before Movimiento).
let entities: Vec<&str> = r1.iter().map(|(e, _, _)| e.as_str()).collect();
assert_eq!(entities, vec!["Caja", "Caja", "Movimiento"]);
// Within Caja, ids in byte order.
let caja_ids: Vec<Uuid> = r1
.iter()
.filter(|(e, _, _)| e == "Caja")
.map(|(_, id, _)| *id)
.collect();
let mut expected = vec![a, b];
expected.sort_by(|x, y| x.as_bytes().cmp(y.as_bytes()));
assert_eq!(caja_ids, expected);
}
#[test]
fn hash_state_is_deterministic_and_independent_of_insertion_order() {
let a = Uuid::new_v4();
let b = Uuid::new_v4();
let mut s1 = MemoryStore::new();
s1.seed("Caja", a, json!({"id": a.to_string(), "saldo": 100}));
s1.seed("Caja", b, json!({"id": b.to_string(), "saldo": 200}));
let mut s2 = MemoryStore::new();
s2.seed("Caja", b, json!({"id": b.to_string(), "saldo": 200}));
s2.seed("Caja", a, json!({"id": a.to_string(), "saldo": 100}));
assert_eq!(
s1.hash_state().unwrap(),
s2.hash_state().unwrap(),
"equal state must hash identically regardless of how it was built"
);
}
#[test]
fn hash_state_changes_when_state_changes() {
let a = Uuid::new_v4();
let mut s1 = MemoryStore::new();
s1.seed("Caja", a, json!({"id": a.to_string(), "saldo": 100}));
let mut s2 = MemoryStore::new();
s2.seed("Caja", a, json!({"id": a.to_string(), "saldo": 101}));
assert_ne!(
s1.hash_state().unwrap(),
s2.hash_state().unwrap(),
"off-by-one in a single field must produce a different hash"
);
// Adding a record changes the hash too.
let mut s3 = MemoryStore::new();
s3.seed("Caja", a, json!({"id": a.to_string(), "saldo": 100}));
s3.seed("Caja", Uuid::new_v4(), json!({"id": "extra", "saldo": 0}));
assert_ne!(s1.hash_state().unwrap(), s3.hash_state().unwrap());
}
#[test]
fn last_applied_seq_round_trips_and_resets_on_clear() {
let mut store = MemoryStore::new();
assert_eq!(
store.last_applied_seq().unwrap(),
None,
"fresh MemoryStore has no marker"
);
store.set_last_applied_seq(5).unwrap();
assert_eq!(store.last_applied_seq().unwrap(), Some(5));
store.set_last_applied_seq(12).unwrap();
assert_eq!(store.last_applied_seq().unwrap(), Some(12));
store.clear().unwrap();
assert_eq!(
store.last_applied_seq().unwrap(),
None,
"clear() resets the marker — a cleared store has applied nothing"
);
}
#[test]
fn integer_and_integer_valued_float_hash_identically() {
// The cross-backend property: the same numeric value, written
// by a backend as i64 vs read back as integer-valued f64,
// must hash the same.
let int_value = json!({"saldo": 100_i64});
let float_value = json!({"saldo": 100.0_f64});
let mut h_int = sha2::Sha256::new();
super::hash_value(&mut h_int, &int_value);
let mut h_float = sha2::Sha256::new();
super::hash_value(&mut h_float, &float_value);
assert_eq!(
h_int.finalize(),
h_float.finalize(),
"integer-valued numbers must canonicalize regardless of source representation"
);
}
#[test]
fn fractional_floats_do_not_canonicalize_to_int() {
// Floats with fractional parts must remain floats — collapsing
// 100.5 into 100 would hide real differences.
let int_value = json!({"x": 100_i64});
let frac_value = json!({"x": 100.5_f64});
let mut h_int = sha2::Sha256::new();
super::hash_value(&mut h_int, &int_value);
let mut h_frac = sha2::Sha256::new();
super::hash_value(&mut h_frac, &frac_value);
assert_ne!(
h_int.finalize(),
h_frac.finalize(),
"100 and 100.5 must hash differently"
);
}
#[test]
fn same_object_with_different_insertion_order_hashes_same() {
// serde_json::Map is BTreeMap by default but we sort defensively
// in case `preserve_order` is enabled by some transitive dep.
let mut m1 = serde_json::Map::new();
m1.insert("a".into(), json!(1));
m1.insert("b".into(), json!(2));
m1.insert("c".into(), json!(3));
let mut m2 = serde_json::Map::new();
m2.insert("c".into(), json!(3));
m2.insert("a".into(), json!(1));
m2.insert("b".into(), json!(2));
let mut h1 = sha2::Sha256::new();
super::hash_value(&mut h1, &Value::Object(m1));
let mut h2 = sha2::Sha256::new();
super::hash_value(&mut h2, &Value::Object(m2));
assert_eq!(h1.finalize(), h2.finalize());
}
#[test]
fn type_tagged_framing_distinguishes_string_from_number() {
// The string "42" must not collide with the number 42.
let str_v = json!("42");
let num_v = json!(42);
let mut h_str = sha2::Sha256::new();
super::hash_value(&mut h_str, &str_v);
let mut h_num = sha2::Sha256::new();
super::hash_value(&mut h_num, &num_v);
assert_ne!(h_str.finalize(), h_num.finalize());
// Bool true must not collide with the number 1.
let bool_v = json!(true);
let one_v = json!(1);
let mut h_bool = sha2::Sha256::new();
super::hash_value(&mut h_bool, &bool_v);
let mut h_one = sha2::Sha256::new();
super::hash_value(&mut h_one, &one_v);
assert_ne!(h_bool.finalize(), h_one.finalize());
}
#[test]
fn empty_store_has_a_well_defined_hash() {
let s1 = MemoryStore::new();
let s2 = MemoryStore::new();
assert_eq!(s1.hash_state().unwrap(), s2.hash_state().unwrap());
// The empty hash is the SHA-256 of an empty input — fix the
// expected bytes so an accidental framing change in `hash_state`
// can't silently sail through.
let expected = hex_decode(
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
);
assert_eq!(s1.hash_state().unwrap().to_vec(), expected);
}
fn hex_decode(s: &str) -> Vec<u8> {
(0..s.len())
.step_by(2)
.map(|i| u8::from_str_radix(&s[i..i + 2], 16).expect("hex"))
.collect()
}
}
@@ -0,0 +1,403 @@
//! SurrealDB-backed `Store` implementation.
//!
//! Wraps an embedded `kv-mem` SurrealDB instance behind the same sync
//! `Store` trait the kernel uses. Each instance owns a private `tokio`
//! current-thread runtime and `block_on`s every async call.
//!
//! Why everything goes through `db.query()`:
//! SurrealDB 2.x's typed-response API (`db.upsert(thing).content(data)`)
//! deserializes responses through a serializer that is hostile to
//! `serde_json::Value` and to dynamic record shapes. Using raw SurrealQL
//! with parameter binding sidesteps that — `Response::check()` validates
//! success without forcing us to materialize the response into a typed
//! shape.
//!
//! Identity handling: SurrealDB owns record identity via a `RecordId`
//! (table:id). We strip the application-level `id` field before sending
//! and restore it on read so KCL schemas (which require `id: str`) see
//! a stable shape.
use serde_json::Value;
use surrealdb::Surreal;
use surrealdb::engine::local::{Db, Mem};
#[cfg(feature = "persistent")]
use surrealdb::engine::local::SurrealKv;
use thiserror::Error;
use tokio::runtime::Runtime;
use uuid::Uuid;
use crate::delta::FieldOp;
use crate::store::{Store, StoreError};
/// Reserved table prefix for runtime metadata that lives alongside user
/// records. Anything starting with this prefix is hidden from `iter`
/// (and therefore from `hash_state`, `dump_records`, drift detection)
/// so user-facing views never see internal bookkeeping.
const META_TABLE_PREFIX: &str = "nakui_";
/// Single-record table where `last_applied_seq` lives. Singleton id =
/// `singleton`. Wiped by `clear()` because the table prefix is part of
/// the enumeration there — a cleared store has applied nothing.
const META_TABLE: &str = "nakui_runtime_meta";
const META_SINGLETON_ID: &str = "singleton";
/// Field alias used by `iter` to surface the application-level record
/// id alongside the rest of the row, in a single per-table query. The
/// alias is stripped before the row is handed back to the caller, so
/// it never shows up in user views. Reserved — a user record with a
/// field of this name would collide and `iter` would error on UUID
/// parse failure.
const ITER_ID_ALIAS: &str = "__nakui_app_id";
#[derive(Debug, Error)]
pub enum SurrealStoreError {
#[error("io creating tokio runtime: {0}")]
Runtime(#[from] std::io::Error),
#[error("surrealdb: {0}")]
Backend(#[from] surrealdb::Error),
}
pub struct SurrealStore {
runtime: Runtime,
db: Surreal<Db>,
}
impl SurrealStore {
/// Build an in-memory SurrealDB instance (`kv-mem`). Volatile —
/// nothing persists when the process exits.
pub fn new_in_memory() -> Result<Self, SurrealStoreError> {
let runtime = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()?;
let db = runtime.block_on(async {
let db = Surreal::new::<Mem>(()).await?;
db.use_ns("nakui").use_db("default").await?;
Ok::<_, surrealdb::Error>(db)
})?;
Ok(Self { runtime, db })
}
/// Build a SurrealKV-backed SurrealDB instance at `path`. Records
/// survive process restarts. Requires the `persistent` Cargo feature.
///
/// Reopening an existing path resumes from the persisted state — the
/// canonical use is `let store = SurrealStore::new_persistent(path)?`
/// at process startup, with the path stable across runs.
#[cfg(feature = "persistent")]
pub fn new_persistent(
path: impl AsRef<std::path::Path>,
) -> Result<Self, SurrealStoreError> {
let runtime = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()?;
let path = path.as_ref().to_path_buf();
let db = runtime.block_on(async {
let db = Surreal::new::<SurrealKv>(path).await?;
db.use_ns("nakui").use_db("default").await?;
Ok::<_, surrealdb::Error>(db)
})?;
Ok(Self { runtime, db })
}
}
fn strip_app_id(mut data: Value) -> Value {
if let Value::Object(map) = &mut data {
map.remove("id");
}
data
}
fn restore_app_id(mut data: Value, id: Uuid) -> Value {
if let Value::Object(map) = &mut data {
map.insert("id".into(), Value::String(id.to_string()));
}
data
}
fn json_to_map(v: Value) -> Result<serde_json::Map<String, Value>, StoreError> {
match v {
Value::Object(map) => Ok(map),
_ => Err(StoreError::Backend(
"SurrealStore expects object-shaped records".into(),
)),
}
}
fn map_err(e: surrealdb::Error) -> StoreError {
StoreError::Backend(e.to_string())
}
impl Store for SurrealStore {
fn load(&self, entity: &str, id: Uuid) -> Option<Value> {
let entity = entity.to_string();
let id_str = id.to_string();
self.runtime.block_on(async {
// `OMIT id` skips SurrealDB's Thing-typed id which serde_json::Value
// can't represent; we restore the application id ourselves.
let mut response = self
.db
.query("SELECT * OMIT id FROM type::thing($table, $id)")
.bind(("table", entity))
.bind(("id", id_str))
.await
.ok()?;
let rows: Vec<Value> = response.take(0).ok()?;
let row = rows.into_iter().next()?;
Some(restore_app_id(row, id))
})
}
fn seed(&mut self, entity: &str, id: Uuid, data: Value) {
let stripped = strip_app_id(data);
let map = json_to_map(stripped).expect("seed data is object-shaped");
let entity = entity.to_string();
let id_str = id.to_string();
self.runtime.block_on(async {
self.db
.query("UPSERT type::thing($table, $id) CONTENT $data")
.bind(("table", entity))
.bind(("id", id_str))
.bind(("data", map))
.await
.and_then(|r| r.check())
.expect("seed upsert");
});
}
fn apply_dry_run(&self, ops: &[FieldOp]) -> Result<(), StoreError> {
self.runtime.block_on(async {
for op in ops {
match op {
FieldOp::Set { path, .. } => {
let exists = self.exists(&path.entity, path.id).await?;
if !exists {
return Err(StoreError::NotFound(
path.entity.clone(),
path.id,
));
}
// We don't model NotAnObject for SurrealStore: every
// record stored via this trait is map-shaped by
// construction (json_to_map enforces it on write).
}
FieldOp::Create { entity, id, .. } => {
if self.exists(entity, *id).await? {
return Err(StoreError::Conflict(entity.clone(), *id));
}
}
FieldOp::Delete { entity, id } => {
if !self.exists(entity, *id).await? {
return Err(StoreError::NotFound(entity.clone(), *id));
}
}
}
}
Ok(())
})
}
fn iter(&self) -> Result<Box<dyn Iterator<Item = (String, Uuid, Value)> + '_>, StoreError> {
// One query per table: pull the application id alongside every
// other field via an alias, strip the SurrealDB-typed `id` via
// OMIT, then restore the application `id` field in code so the
// output is byte-identical to what `load()` produces (cross-
// backend hash equality and the `iter ↔ load` parity contract
// both depend on this).
//
// Filters runtime metadata tables (META_TABLE_PREFIX) so client
// views never leak internal bookkeeping.
self.runtime.block_on(async {
let mut info = self
.db
.query("INFO FOR DB")
.await
.and_then(|r| r.check())
.map_err(map_err)?;
let row: Option<Value> = info.take(0).map_err(map_err)?;
let tables: Vec<String> = row
.as_ref()
.and_then(|v| v.get("tables"))
.and_then(|v| v.as_object())
.map(|m| {
m.keys()
.filter(|k| !k.starts_with(META_TABLE_PREFIX))
.cloned()
.collect()
})
.unwrap_or_default();
let mut out: Vec<(String, Uuid, Value)> = Vec::new();
for table in &tables {
// The alias is parameterised in the SELECT clause so the
// SurrealQL parser sees a literal field name; we can't
// bind it as a parameter (only values bind, not
// identifiers), but it's a compile-time constant so
// there's no injection surface.
let select = format!(
"SELECT meta::id(id) AS {alias}, * OMIT id FROM type::table($t)",
alias = ITER_ID_ALIAS,
);
let mut resp = self
.db
.query(&select)
.bind(("t", table.clone()))
.await
.and_then(|r| r.check())
.map_err(map_err)?;
let rows: Vec<Value> = resp.take(0).map_err(map_err)?;
for row in rows {
let Value::Object(mut map) = row else {
return Err(StoreError::Backend(format!(
"row in table {} is not an object",
table
)));
};
let app_id_str = match map.remove(ITER_ID_ALIAS) {
Some(Value::String(s)) => s,
_ => {
return Err(StoreError::Backend(format!(
"row in table {} missing alias `{}`",
table, ITER_ID_ALIAS
)));
}
};
let id = Uuid::parse_str(&app_id_str).map_err(|e| {
StoreError::Backend(format!(
"non-uuid id in table {}: {} ({})",
table, app_id_str, e
))
})?;
// Match `restore_app_id`: insert the application id
// back as a regular `id: <uuid_str>` field. Callers
// reading the row see exactly what `load()` returns.
map.insert("id".into(), Value::String(app_id_str));
out.push((table.clone(), id, Value::Object(map)));
}
}
out.sort_by(|a, b| a.0.cmp(&b.0).then_with(|| a.1.as_bytes().cmp(b.1.as_bytes())));
Ok(Box::new(out.into_iter())
as Box<dyn Iterator<Item = (String, Uuid, Value)>>)
})
}
fn clear(&mut self) -> Result<(), StoreError> {
// Wipes EVERY table including the runtime meta table — a
// cleared store must report `last_applied_seq() == None`.
self.runtime.block_on(async {
let mut info = self
.db
.query("INFO FOR DB")
.await
.and_then(|r| r.check())
.map_err(map_err)?;
let row: Option<Value> = info.take(0).map_err(map_err)?;
let tables = row
.as_ref()
.and_then(|v| v.get("tables"))
.and_then(|v| v.as_object());
let names: Vec<String> = match tables {
Some(map) => map.keys().cloned().collect(),
None => Vec::new(),
};
for name in names {
self.db
.query("DELETE FROM type::table($t)")
.bind(("t", name))
.await
.and_then(|r| r.check())
.map_err(map_err)?;
}
Ok(())
})
}
fn last_applied_seq(&self) -> Result<Option<u64>, StoreError> {
self.runtime.block_on(async {
let mut resp = self
.db
.query("SELECT VALUE last_applied_seq FROM type::thing($t, $id)")
.bind(("t", META_TABLE))
.bind(("id", META_SINGLETON_ID))
.await
.and_then(|r| r.check())
.map_err(map_err)?;
// The query yields either zero rows (no meta record yet) or
// one row containing the i64 value.
let rows: Vec<i64> = resp.take(0).map_err(map_err)?;
Ok(rows.into_iter().next().map(|v| v as u64))
})
}
fn set_last_applied_seq(&mut self, seq: u64) -> Result<(), StoreError> {
let seq_signed = seq as i64;
self.runtime.block_on(async {
self.db
.query("UPSERT type::thing($t, $id) CONTENT { last_applied_seq: $seq }")
.bind(("t", META_TABLE))
.bind(("id", META_SINGLETON_ID))
.bind(("seq", seq_signed))
.await
.and_then(|r| r.check())
.map_err(map_err)?;
Ok(())
})
}
fn apply(&mut self, ops: &[FieldOp]) -> Result<(), StoreError> {
self.apply_dry_run(ops)?;
self.runtime.block_on(async {
for op in ops {
match op {
FieldOp::Set { path, value } => {
let mut patch = serde_json::Map::new();
patch.insert(path.field.clone(), value.clone());
self.db
.query("UPDATE type::thing($table, $id) MERGE $patch")
.bind(("table", path.entity.clone()))
.bind(("id", path.id.to_string()))
.bind(("patch", patch))
.await
.and_then(|r| r.check())
.map_err(map_err)?;
}
FieldOp::Create { entity, id, data } => {
let stripped = strip_app_id(data.clone());
let map = json_to_map(stripped)?;
self.db
.query("CREATE type::thing($table, $id) CONTENT $data")
.bind(("table", entity.clone()))
.bind(("id", id.to_string()))
.bind(("data", map))
.await
.and_then(|r| r.check())
.map_err(map_err)?;
}
FieldOp::Delete { entity, id } => {
self.db
.query("DELETE type::thing($table, $id)")
.bind(("table", entity.clone()))
.bind(("id", id.to_string()))
.await
.and_then(|r| r.check())
.map_err(map_err)?;
}
}
}
Ok(())
})
}
}
impl SurrealStore {
async fn exists(&self, entity: &str, id: Uuid) -> Result<bool, StoreError> {
let mut response = self
.db
.query("SELECT * OMIT id FROM type::thing($table, $id)")
.bind(("table", entity.to_string()))
.bind(("id", id.to_string()))
.await
.map_err(map_err)?;
let rows: Vec<Value> = response.take(0).map_err(map_err)?;
Ok(!rows.is_empty())
}
}