feat: llimphi standalone — framework UI soberano extraído del monorepo

Motor gráfico Llimphi como workspace independiente: bucle Elm
(input→update→view→layout→raster→present) sobre wgpu+vello+taffy+parley.
Núcleo (hal/raster/layout/text/ui/theme/surface/motion/icons) + ~40 widgets
+ módulos, sin dependencias al resto del monorepo. cargo check --workspace
pasa (64 crates). Puerta de entrada: cargo run -p llimphi-ui --example counter.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-04 04:23:42 +00:00
commit e65e9cc623
286 changed files with 46136 additions and 0 deletions
+15
View File
@@ -0,0 +1,15 @@
[package]
name = "llimphi-gpu-bench"
version.workspace = true
edition.workspace = true
license.workspace = true
authors.workspace = true
publish.workspace = true
description = "Binario standalone que valida el SDD §'GPU directo wgpu' en una máquina con GPU real: imprime info del adapter, corre vello vs GPU directo a varios N, evalúa el criterio (≥5× a 500K, ≥60 fps @ 1M) y exporta PNGs de verificación."
[dependencies]
llimphi-hal = { path = "../llimphi-hal" }
llimphi-raster = { path = "../llimphi-raster" }
vello = { workspace = true }
pollster = { workspace = true }
png = { workspace = true }
+941
View File
@@ -0,0 +1,941 @@
//! `llimphi-gpu-bench` — binario standalone para validar el SDD
//! `02_ruway/llimphi/SDD.md` §"GPU directo wgpu" en una máquina con GPU
//! real.
//!
//! Hace cuatro cosas en orden y lo imprime todo a stdout en formato
//! markdown / tabla copy-paste friendly:
//!
//! 1. **Header del sistema** — versión, hora, OS, GPU detectado.
//! 2. **Info del adapter wgpu** — backend (Vulkan/Metal/DX12/GL),
//! device name, vendor, limits relevantes.
//! 3. **Spike vello vs GPU directo** — para N ∈ {25K, 50K, 100K, 200K,
//! 500K, 1M}. Mide ms/frame de cada uno y el factor. Evalúa el
//! criterio del SDD: ≥5× a 500K → PASA; < → ABORTAR.
//! 4. **Escalado GPU directo solo** — para N ∈ {100K, 500K, 1M, 2M,
//! 5M, 10M}. Mide ms/frame, fps equivalente, Mprim/s. Evalúa el
//! objetivo de 60 fps @ 1M.
//! 5. **PNGs de verificación visual** — exporta 2 archivos al cwd:
//! `bench_vello_100k.png` y `bench_directo_100k.png`. La forma del
//! cielo de puntos debe coincidir entre los dos (LCG determinista).
//!
//! Pegar el output completo en chat para la verificación.
//!
//! Corre con: `cargo run -p llimphi-gpu-bench --release`.
use std::fs::File;
use std::io::{BufWriter, Write};
use std::time::Instant;
use llimphi_hal::{wgpu, Hal};
use llimphi_raster::kurbo::{Affine, Rect};
use llimphi_raster::peniko::{color::palette, Color, Fill};
use llimphi_raster::{vello, GpuBatch, GpuPipelines};
const W: u32 = 1024;
const H: u32 = 1024;
const FMT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba8Unorm;
const WARMUP: usize = 5;
const MEASURED: usize = 15;
const SPIKE_SIZES: &[u32] = &[25_000, 50_000, 100_000, 200_000, 500_000, 1_000_000];
const SCALE_SIZES: &[u32] = &[100_000, 500_000, 1_000_000, 2_000_000, 5_000_000, 10_000_000];
/// Overrides via env vars (para correr en hosts limitados sin tumbar el
/// binario). En GPU real ignorarlos y dejar los defaults.
///
/// - `LLIMPHI_BENCH_SPIKE_MAX=N` — recorta SPIKE_SIZES a los ≤ N.
/// - `LLIMPHI_BENCH_SCALE_MAX=N` — idem SCALE_SIZES.
/// - `LLIMPHI_BENCH_SKIP_VELLO=1` — saltea totalmente la columna vello
/// (útil si vello revienta con SIGSEGV en este host).
fn spike_sizes() -> Vec<u32> {
let max = std::env::var("LLIMPHI_BENCH_SPIKE_MAX")
.ok()
.and_then(|v| v.parse::<u32>().ok())
.unwrap_or(u32::MAX);
SPIKE_SIZES.iter().copied().filter(|&n| n <= max).collect()
}
fn scale_sizes() -> Vec<u32> {
let max = std::env::var("LLIMPHI_BENCH_SCALE_MAX")
.ok()
.and_then(|v| v.parse::<u32>().ok())
.unwrap_or(u32::MAX);
SCALE_SIZES.iter().copied().filter(|&n| n <= max).collect()
}
fn skip_vello() -> bool {
std::env::var("LLIMPHI_BENCH_SKIP_VELLO").ok().as_deref() == Some("1")
}
fn main() {
print_header();
let hal = pollster::block_on(Hal::new(None)).expect("hal");
print_adapter(&hal);
let (target, view) = make_target(&hal.device);
let pipelines = GpuPipelines::new(&hal.device, FMT);
let mut vello_renderer = vello::Renderer::new(
&hal.device,
vello::RendererOptions {
use_cpu: false,
antialiasing_support: vello::AaSupport {
area: true,
msaa8: false,
msaa16: false,
},
num_init_threads: None,
pipeline_cache: None,
},
)
.expect("vello renderer");
println!("## Spike vello vs GPU directo");
println!();
println!("Target: {W}×{H} Rgba8Unorm, headless. Cada N corre {WARMUP} warmup + {MEASURED} medidos, reporta mediana.");
println!();
println!("| N | vello ms | directo ms | factor | nota |");
println!("|---:|---:|---:|---:|---|");
let mut spike_rows: Vec<SpikeRow> = Vec::new();
let skip_v = skip_vello();
for n in spike_sizes() {
let row = bench_spike(&hal, &mut vello_renderer, &pipelines, &view, n, skip_v);
let note = if row.vello_crashed {
"vello SIGSEGV/error"
} else if let Some(f) = row.factor {
if f >= 5.0 { "≥5×" } else { "<5×" }
} else {
"-"
};
let vello_str = if row.vello_crashed {
"".to_string()
} else {
format!("{:.2}", row.vello_ms.unwrap_or(0.0))
};
let factor_str = match row.factor {
Some(f) => format!("{:.2}×", f),
None => "".to_string(),
};
println!(
"| {} | {} | {:.2} | {} | {} |",
fmt_int(n),
vello_str,
row.directo_ms,
factor_str,
note
);
let _ = std::io::stdout().flush();
spike_rows.push(row);
}
println!();
print_spike_verdict(&spike_rows);
println!("## Escalado GPU directo");
println!();
println!("API real (`GpuPipelines` + `GpuBatch::add_rect`). Sólo se mide el lado GPU directo — vello no llega acá.");
println!();
println!("| N | ms / frame | fps (1000/ms) | Mprim/s |");
println!("|---:|---:|---:|---:|");
let mut scale_rows: Vec<ScaleRow> = Vec::new();
for n in scale_sizes() {
let ms = bench_directo(&hal, &pipelines, &view, n);
let fps = 1000.0 / ms;
let mps = (n as f64 / 1_000_000.0) / (ms / 1000.0);
println!(
"| {} | {:.2} | {:.1} | {:.2} |",
fmt_int(n),
ms,
fps,
mps
);
let _ = std::io::stdout().flush();
scale_rows.push(ScaleRow { n, ms, fps, mps });
}
println!();
print_scale_verdict(&scale_rows);
// ----------------------------------------------------------------
// Variantes persistentes: el rebuild del batch/scene por frame es
// el peor caso. En apps reales (cosmos starfield Gaia, tinkuy
// particles iniciales, nakui viewport estático) los datos no
// cambian por frame — se uploadean UNA vez y el bucle solo redraw.
// Estos benches lo miden.
// ----------------------------------------------------------------
println!("## Persistente — datos fijos, sólo redraw por frame");
println!();
println!("Setup (LCG + write_buffer / Scene fill) fuera de la medición; el bucle medido sólo emite render_pass + draw + submit + wait.");
println!();
println!("### vello (Scene reutilizada sin reset)");
println!();
println!("| N | ms / frame | fps (1000/ms) |");
println!("|---:|---:|---:|");
let mut vello_persist_rows: Vec<(u32, f64)> = Vec::new();
let skip_v = skip_vello();
for n in scale_sizes() {
if skip_v {
println!("| {} | skipped | — |", fmt_int(n));
continue;
}
let attempt = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
bench_vello_persistent(&hal, &mut vello_renderer, &view, n)
}));
match attempt {
Ok(ms) => {
let fps = 1000.0 / ms;
println!("| {} | {:.2} | {:.1} |", fmt_int(n), ms, fps);
let _ = std::io::stdout().flush();
vello_persist_rows.push((n, ms));
}
Err(_) => {
println!("| {} | crash | — |", fmt_int(n));
}
}
}
println!();
println!("### GPU directo (buffer + bind group persistentes)");
println!();
println!("| N | ms / frame | fps (1000/ms) | Mprim/s |");
println!("|---:|---:|---:|---:|");
let mut directo_persist_rows: Vec<ScaleRow> = Vec::new();
for n in scale_sizes() {
let ms = bench_directo_persistent(&hal, &pipelines, &view, n);
let fps = 1000.0 / ms;
let mps = (n as f64 / 1_000_000.0) / (ms / 1000.0);
println!("| {} | {:.2} | {:.1} | {:.2} |", fmt_int(n), ms, fps, mps);
let _ = std::io::stdout().flush();
directo_persist_rows.push(ScaleRow { n, ms, fps, mps });
}
println!();
print_persistent_verdict(&directo_persist_rows, &vello_persist_rows);
println!("## Validación visual");
println!();
let png_vello = "bench_vello_100k.png";
let png_directo = "bench_directo_100k.png";
if let Err(e) = export_vello_png(&hal, &mut vello_renderer, &target, &view, 100_000, png_vello)
{
println!("vello PNG fallo: {e}");
} else {
println!("- vello 100K → `{}` ({W}×{H})", png_vello);
}
if let Err(e) =
export_directo_png(&hal, &pipelines, &target, &view, 100_000, png_directo)
{
println!("directo PNG fallo: {e}");
} else {
println!("- directo 100K → `{}` ({W}×{H})", png_directo);
}
println!();
println!("Las dos imágenes deben mostrar la misma constelación de puntos (LCG determinista).");
println!("Mirar en visor: si vello tiene halo AA suave y directo tiene pixeles hard-edged, todo bien.");
println!();
println!("## Resumen");
println!();
print_summary(
&spike_rows,
&scale_rows,
&directo_persist_rows,
&vello_persist_rows,
);
}
// ============================================================
// IO / header
// ============================================================
fn print_header() {
println!("# llimphi-gpu-bench");
println!();
println!("Validación de Fase 0 del SDD `02_ruway/llimphi/SDD.md` §\"GPU directo wgpu\".");
println!("Criterio: factor ≥ 5× a 500K Y ≥ 60 fps @ 1M en GPU mid (Radeon 5500M, Iris Xe).");
println!();
println!("- crate version: {}", env!("CARGO_PKG_VERSION"));
println!("- host OS: {}", std::env::consts::OS);
println!("- host arch: {}", std::env::consts::ARCH);
println!();
}
fn print_adapter(hal: &Hal) {
let info = hal.adapter.get_info();
let limits = hal.adapter.limits();
println!("## Adapter wgpu");
println!();
println!("- backend: `{:?}`", info.backend);
println!("- device name: `{}`", info.name);
println!("- vendor: `0x{:04x}`", info.vendor);
println!("- device id: `0x{:04x}`", info.device);
println!("- device type: `{:?}`", info.device_type);
println!("- driver: `{}`", info.driver);
println!("- driver info: `{}`", info.driver_info);
println!();
println!("Limits relevantes:");
println!();
println!("- max texture 2D: {}", limits.max_texture_dimension_2d);
println!("- max buffer size: {} MB", limits.max_buffer_size / (1024 * 1024));
println!("- max storage buffer binding: {} MB", limits.max_storage_buffer_binding_size / (1024 * 1024));
println!();
let is_software = matches!(
info.device_type,
wgpu::DeviceType::Cpu
) || info.driver.to_lowercase().contains("llvmpipe")
|| info.driver.to_lowercase().contains("software")
|| info.name.to_lowercase().contains("llvmpipe")
|| info.name.to_lowercase().contains("swiftshader");
if is_software {
println!("⚠️ Adapter parece software (`{}`). Los números no reflejan GPU real.", info.name);
println!();
}
}
fn fmt_int(n: u32) -> String {
let s = n.to_string();
let mut out = String::new();
for (i, c) in s.chars().rev().enumerate() {
if i > 0 && i % 3 == 0 {
out.push('_');
}
out.push(c);
}
out.chars().rev().collect()
}
// ============================================================
// Benches
// ============================================================
struct SpikeRow {
n: u32,
vello_ms: Option<f64>,
vello_crashed: bool,
directo_ms: f64,
factor: Option<f64>,
}
struct ScaleRow {
n: u32,
ms: f64,
fps: f64,
mps: f64,
}
fn bench_spike(
hal: &Hal,
vello_renderer: &mut vello::Renderer,
pipelines: &GpuPipelines,
view: &wgpu::TextureView,
n: u32,
skip_vello: bool,
) -> SpikeRow {
let directo_ms = bench_directo(hal, pipelines, view, n);
if skip_vello {
return SpikeRow {
n,
vello_ms: None,
vello_crashed: true, // tratamos "skipped" como "no llegó"
directo_ms,
factor: None,
};
}
// catch_unwind sólo atrapa panics, no SIGSEGV. En vello pre-200K
// este path debería ser suficiente; si el binario muere igual,
// re-correr con `LLIMPHI_BENCH_SKIP_VELLO=1`.
let vello_attempt = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
bench_vello(hal, vello_renderer, view, n)
}));
match vello_attempt {
Ok(ms) => {
let factor = ms / directo_ms;
SpikeRow {
n,
vello_ms: Some(ms),
vello_crashed: false,
directo_ms,
factor: Some(factor),
}
}
Err(_) => SpikeRow {
n,
vello_ms: None,
vello_crashed: true,
directo_ms,
factor: None,
},
}
}
fn bench_vello(
hal: &Hal,
renderer: &mut vello::Renderer,
view: &wgpu::TextureView,
n: u32,
) -> f64 {
let mut scene = vello::Scene::new();
let mut samples: Vec<f64> = Vec::with_capacity(MEASURED);
for frame in 0..(WARMUP + MEASURED) {
let t0 = Instant::now();
scene.reset();
let mut state: u32 = 0x1234_5678;
for _ in 0..n {
let (x, y, rgba) = lcg_point(&mut state);
let r = (rgba & 0xFF) as u8;
let g = ((rgba >> 8) & 0xFF) as u8;
let b = ((rgba >> 16) & 0xFF) as u8;
let a = ((rgba >> 24) & 0xFF) as u8;
let xf = x as f64;
let yf = y as f64;
scene.fill(
Fill::NonZero,
Affine::IDENTITY,
Color::from_rgba8(r, g, b, a),
None,
&Rect::new(xf, yf, xf + POINT_PX as f64, yf + POINT_PX as f64),
);
}
renderer
.render_to_texture(
&hal.device,
&hal.queue,
&scene,
view,
&vello::RenderParams {
base_color: palette::css::BLACK,
width: W,
height: H,
antialiasing_method: vello::AaConfig::Area,
},
)
.expect("vello render");
hal.device.poll(wgpu::Maintain::Wait);
let dt = t0.elapsed().as_secs_f64() * 1000.0;
if frame >= WARMUP {
samples.push(dt);
}
}
median(&mut samples)
}
fn bench_directo(
hal: &Hal,
pipelines: &GpuPipelines,
view: &wgpu::TextureView,
n: u32,
) -> f64 {
let mut samples: Vec<f64> = Vec::with_capacity(MEASURED);
for frame in 0..(WARMUP + MEASURED) {
let t0 = Instant::now();
let mut batch = GpuBatch::new(pipelines);
let mut state: u32 = 0x1234_5678;
for _ in 0..n {
let (x, y, rgba) = lcg_point(&mut state);
let r = (rgba & 0xFF) as u8;
let g = ((rgba >> 8) & 0xFF) as u8;
let b = ((rgba >> 16) & 0xFF) as u8;
let a = ((rgba >> 24) & 0xFF) as u8;
batch.add_rect(x, y, POINT_PX, POINT_PX, Color::from_rgba8(r, g, b, a));
}
let mut encoder = hal.device.create_command_encoder(
&wgpu::CommandEncoderDescriptor {
label: Some("bench-directo-enc"),
},
);
batch.flush(
&hal.device,
&hal.queue,
&mut encoder,
view,
(W as f32, H as f32),
wgpu::LoadOp::Clear(wgpu::Color::BLACK),
);
hal.queue.submit(std::iter::once(encoder.finish()));
hal.device.poll(wgpu::Maintain::Wait);
let dt = t0.elapsed().as_secs_f64() * 1000.0;
if frame >= WARMUP {
samples.push(dt);
}
}
median(&mut samples)
}
/// Vello persistente: la Scene se construye UNA vez (fill N rects) y
/// el bucle medido sólo invoca `render_to_texture`. Sin `scene.reset()`.
fn bench_vello_persistent(
hal: &Hal,
renderer: &mut vello::Renderer,
view: &wgpu::TextureView,
n: u32,
) -> f64 {
let mut scene = vello::Scene::new();
scene.reset();
let mut state: u32 = 0x1234_5678;
for _ in 0..n {
let (x, y, rgba) = lcg_point(&mut state);
let r = (rgba & 0xFF) as u8;
let g = ((rgba >> 8) & 0xFF) as u8;
let b = ((rgba >> 16) & 0xFF) as u8;
let a = ((rgba >> 24) & 0xFF) as u8;
let xf = x as f64;
let yf = y as f64;
scene.fill(
Fill::NonZero,
Affine::IDENTITY,
Color::from_rgba8(r, g, b, a),
None,
&Rect::new(xf, yf, xf + POINT_PX as f64, yf + POINT_PX as f64),
);
}
let mut samples: Vec<f64> = Vec::with_capacity(MEASURED);
for frame in 0..(WARMUP + MEASURED) {
let t0 = Instant::now();
renderer
.render_to_texture(
&hal.device,
&hal.queue,
&scene,
view,
&vello::RenderParams {
base_color: palette::css::BLACK,
width: W,
height: H,
antialiasing_method: vello::AaConfig::Area,
},
)
.expect("vello render");
hal.device.poll(wgpu::Maintain::Wait);
let dt = t0.elapsed().as_secs_f64() * 1000.0;
if frame >= WARMUP {
samples.push(dt);
}
}
median(&mut samples)
}
/// GPU directo persistente: instance buffer + uniform buffer + bind
/// group se construyen UNA vez. Bucle medido sólo abre render_pass,
/// hace `draw(0..6, 0..n)` y submit.
///
/// Replica el layout que pinta `GpuBatch::add_rect` por debajo
/// (instance stride 20 B = [x:f32, y:f32, w:f32, h:f32, rgba:u32]),
/// usando el `rects` pipeline + `bind_layout` expuestos por
/// `GpuPipelines`.
fn bench_directo_persistent(
hal: &Hal,
pipelines: &GpuPipelines,
view: &wgpu::TextureView,
n: u32,
) -> f64 {
// Empaquetar instancias UNA vez.
let mut bytes = Vec::with_capacity(n as usize * 20);
let mut state: u32 = 0x1234_5678;
for _ in 0..n {
let (x, y, rgba) = lcg_point(&mut state);
bytes.extend_from_slice(&x.to_ne_bytes());
bytes.extend_from_slice(&y.to_ne_bytes());
bytes.extend_from_slice(&POINT_PX.to_ne_bytes());
bytes.extend_from_slice(&POINT_PX.to_ne_bytes());
bytes.extend_from_slice(&rgba.to_ne_bytes());
}
let inst_buf = hal.device.create_buffer(&wgpu::BufferDescriptor {
label: Some("persist-rects"),
size: bytes.len() as u64,
usage: wgpu::BufferUsages::VERTEX | wgpu::BufferUsages::COPY_DST,
mapped_at_creation: false,
});
hal.queue.write_buffer(&inst_buf, 0, &bytes);
// Uniforms (viewport + line_width).
let u_data: [f32; 4] = [W as f32, H as f32, 1.0, 0.0];
let mut u_bytes = Vec::with_capacity(16);
for v in u_data {
u_bytes.extend_from_slice(&v.to_ne_bytes());
}
let uniforms = hal.device.create_buffer(&wgpu::BufferDescriptor {
label: Some("persist-uniforms"),
size: 16,
usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
mapped_at_creation: false,
});
hal.queue.write_buffer(&uniforms, 0, &u_bytes);
let bind_group = hal.device.create_bind_group(&wgpu::BindGroupDescriptor {
label: Some("persist-bg"),
layout: &pipelines.bind_layout,
entries: &[wgpu::BindGroupEntry {
binding: 0,
resource: uniforms.as_entire_binding(),
}],
});
// Asegurar que toda la escritura previa esté en la GPU antes de
// empezar a medir frames — si no, el primer frame paga el upload.
hal.queue.submit(std::iter::empty::<wgpu::CommandBuffer>());
hal.device.poll(wgpu::Maintain::Wait);
let mut samples: Vec<f64> = Vec::with_capacity(MEASURED);
for frame in 0..(WARMUP + MEASURED) {
let t0 = Instant::now();
let mut encoder = hal.device.create_command_encoder(
&wgpu::CommandEncoderDescriptor {
label: Some("persist-enc"),
},
);
{
let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
label: Some("persist-pass"),
color_attachments: &[Some(wgpu::RenderPassColorAttachment {
view,
resolve_target: None,
ops: wgpu::Operations {
load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
store: wgpu::StoreOp::Store,
},
})],
depth_stencil_attachment: None,
timestamp_writes: None,
occlusion_query_set: None,
});
pass.set_pipeline(&pipelines.rects);
pass.set_bind_group(0, &bind_group, &[]);
pass.set_vertex_buffer(0, inst_buf.slice(..));
pass.draw(0..6, 0..n);
}
hal.queue.submit(std::iter::once(encoder.finish()));
hal.device.poll(wgpu::Maintain::Wait);
let dt = t0.elapsed().as_secs_f64() * 1000.0;
if frame >= WARMUP {
samples.push(dt);
}
}
median(&mut samples)
}
fn lcg_point(state: &mut u32) -> (f32, f32, u32) {
*state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223);
let x = (*state % W) as f32;
*state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223);
let y = (*state % H) as f32;
*state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223);
// Colores: piso 128 por canal para que las PNGs de verificación
// se vean (sin esto el LCG produce muchos negros casi puros, y
// los puntos quedan invisibles en pantalla aunque estén pintados).
let r = 128 | ((*state >> 0) & 0x7F) as u8;
let g = 128 | ((*state >> 8) & 0x7F) as u8;
let b = 128 | ((*state >> 16) & 0x7F) as u8;
let rgba = (r as u32) | ((g as u32) << 8) | ((b as u32) << 16) | 0xFF00_0000;
(x, y, rgba)
}
const POINT_PX: f32 = 2.5;
fn median(samples: &mut [f64]) -> f64 {
samples.sort_by(|a, b| a.partial_cmp(b).unwrap());
samples[samples.len() / 2]
}
// ============================================================
// Veredictos
// ============================================================
fn print_spike_verdict(rows: &[SpikeRow]) {
let at_500k = rows.iter().find(|r| r.n == 500_000);
match at_500k {
Some(r) if r.vello_crashed => {
println!("**Veredicto Fase 0:** Vello revienta antes de 500K → directo es el único path posible en ese régimen. PASA cualitativo.");
}
Some(r) => match r.factor {
Some(f) if f >= 5.0 => {
println!("**Veredicto Fase 0:** factor a 500K = {:.2}× ≥ 5 → **PASA** (criterio SDD cumplido).", f);
}
Some(f) => {
println!("**Veredicto Fase 0:** factor a 500K = {:.2}× < 5 → **ABORTAR** según criterio literal del SDD.", f);
println!("Pero ver si vello revienta a tamaños mayores — eso cambia el veredicto cualitativamente.");
}
None => {
println!("**Veredicto Fase 0:** sin datos para 500K (vello crashed o N no medido). Revisar tabla arriba.");
}
},
None => {
println!("**Veredicto Fase 0:** no se midió 500K en este run. Revisar tabla arriba.");
}
}
println!();
}
fn print_persistent_verdict(
directo: &[ScaleRow],
vello: &[(u32, f64)],
) {
let d_1m = directo.iter().find(|r| r.n == 1_000_000);
let v_1m = vello.iter().find(|(n, _)| *n == 1_000_000);
match d_1m {
Some(r) if r.fps >= 60.0 => {
println!(
"**Veredicto persistente @ 1M:** directo {:.1} fps ≥ 60 → **PASA**.",
r.fps
);
}
Some(r) => {
println!(
"**Veredicto persistente @ 1M:** directo {:.1} fps < 60 → falla incluso sin rebuild.",
r.fps
);
}
None => println!("**Veredicto:** sin datos a 1M."),
}
if let (Some(d), Some((_, v_ms))) = (d_1m, v_1m) {
let factor = v_ms / d.ms;
println!(
"**Factor persistente @ 1M:** vello {:.1} ms / directo {:.1} ms = {:.2}× ({})",
v_ms,
d.ms,
factor,
if factor >= 5.0 { "≥5×" } else { "<5×" }
);
}
println!();
}
fn print_scale_verdict(rows: &[ScaleRow]) {
let at_1m = rows.iter().find(|r| r.n == 1_000_000);
match at_1m {
Some(r) if r.fps >= 60.0 => {
println!("**Veredicto Fase 0 (objetivo 60 fps @ 1M):** {:.1} fps ≥ 60 → **PASA**.", r.fps);
}
Some(r) => {
println!("**Veredicto Fase 0 (objetivo 60 fps @ 1M):** {:.1} fps < 60 → marginal. ¿Es CPU-bound el bench (write_buffer de 12-20 MB por frame)? Probar también con `mapped_at_creation` para sacar el camino más rápido.", r.fps);
}
None => {
println!("**Veredicto:** sin datos para 1M.");
}
}
println!();
}
fn print_summary(
spike: &[SpikeRow],
scale: &[ScaleRow],
persist_directo: &[ScaleRow],
persist_vello: &[(u32, f64)],
) {
println!("Copiar lo que sigue al chat:");
println!();
println!("```");
println!("rebuild por frame — vello vs directo:");
for r in spike {
let v = match (r.vello_crashed, r.vello_ms) {
(true, _) => "crash".to_string(),
(_, Some(ms)) => format!("{:.1}ms", ms),
_ => "-".to_string(),
};
let f = r
.factor
.map(|x| format!("{:.2}x", x))
.unwrap_or_else(|| "-".to_string());
println!(" {:>10} vello={:>10} directo={:>7.1}ms factor={}", fmt_int(r.n), v, r.directo_ms, f);
}
println!();
println!("rebuild por frame — escalado directo:");
for r in scale {
println!(" {:>10} {:>7.1}ms {:>5.1}fps {:>5.2}Mprim/s", fmt_int(r.n), r.ms, r.fps, r.mps);
}
println!();
println!("persistente (datos fijos, sólo redraw):");
for r in persist_directo {
let v_ms = persist_vello
.iter()
.find(|(n, _)| *n == r.n)
.map(|(_, ms)| format!("{:>7.1}ms", ms))
.unwrap_or_else(|| "".to_string());
let factor = persist_vello
.iter()
.find(|(n, _)| *n == r.n)
.map(|(_, vms)| format!("factor={:.2}x", vms / r.ms))
.unwrap_or_else(|| "factor= — ".to_string());
println!(
" {:>10} vello={} directo={:>7.1}ms {} {:>5.1}fps {:>5.2}Mprim/s",
fmt_int(r.n),
v_ms,
r.ms,
factor,
r.fps,
r.mps,
);
}
println!("```");
}
// ============================================================
// Textura destino + PNG export
// ============================================================
fn make_target(device: &wgpu::Device) -> (wgpu::Texture, wgpu::TextureView) {
let tex = device.create_texture(&wgpu::TextureDescriptor {
label: Some("bench-target"),
size: wgpu::Extent3d {
width: W,
height: H,
depth_or_array_layers: 1,
},
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
format: FMT,
// RENDER_ATTACHMENT para el directo, STORAGE_BINDING para vello,
// TEXTURE_BINDING + COPY_SRC para poder leer (PNG export).
usage: wgpu::TextureUsages::RENDER_ATTACHMENT
| wgpu::TextureUsages::STORAGE_BINDING
| wgpu::TextureUsages::TEXTURE_BINDING
| wgpu::TextureUsages::COPY_SRC,
view_formats: &[],
});
let view = tex.create_view(&wgpu::TextureViewDescriptor::default());
(tex, view)
}
fn export_vello_png(
hal: &Hal,
renderer: &mut vello::Renderer,
target: &wgpu::Texture,
view: &wgpu::TextureView,
n: u32,
path: &str,
) -> Result<(), String> {
let mut scene = vello::Scene::new();
let mut state: u32 = 0x1234_5678;
for _ in 0..n {
let (x, y, rgba) = lcg_point(&mut state);
let r = (rgba & 0xFF) as u8;
let g = ((rgba >> 8) & 0xFF) as u8;
let b = ((rgba >> 16) & 0xFF) as u8;
let a = ((rgba >> 24) & 0xFF) as u8;
scene.fill(
Fill::NonZero,
Affine::IDENTITY,
Color::from_rgba8(r, g, b, a),
None,
&Rect::new(x as f64, y as f64, x as f64 + POINT_PX as f64, y as f64 + POINT_PX as f64),
);
}
renderer
.render_to_texture(
&hal.device,
&hal.queue,
&scene,
view,
&vello::RenderParams {
base_color: palette::css::BLACK,
width: W,
height: H,
antialiasing_method: vello::AaConfig::Area,
},
)
.map_err(|e| format!("{e:?}"))?;
write_texture_png(hal, target, path)
}
fn export_directo_png(
hal: &Hal,
pipelines: &GpuPipelines,
target: &wgpu::Texture,
view: &wgpu::TextureView,
n: u32,
path: &str,
) -> Result<(), String> {
let mut batch = GpuBatch::new(pipelines);
let mut state: u32 = 0x1234_5678;
for _ in 0..n {
let (x, y, rgba) = lcg_point(&mut state);
let r = (rgba & 0xFF) as u8;
let g = ((rgba >> 8) & 0xFF) as u8;
let b = ((rgba >> 16) & 0xFF) as u8;
let a = ((rgba >> 24) & 0xFF) as u8;
batch.add_rect(x, y, POINT_PX, POINT_PX, Color::from_rgba8(r, g, b, a));
}
let mut encoder = hal.device.create_command_encoder(
&wgpu::CommandEncoderDescriptor {
label: Some("png-directo-enc"),
},
);
batch.flush(
&hal.device,
&hal.queue,
&mut encoder,
view,
(W as f32, H as f32),
wgpu::LoadOp::Clear(wgpu::Color::BLACK),
);
hal.queue.submit(std::iter::once(encoder.finish()));
hal.device.poll(wgpu::Maintain::Wait);
write_texture_png(hal, target, path)
}
/// Copia la textura a un buffer mapeable + lee + escribe PNG.
fn write_texture_png(hal: &Hal, target: &wgpu::Texture, path: &str) -> Result<(), String> {
// wgpu pide stride alineado a 256 B en COPY_TEXTURE_TO_BUFFER.
let unpadded = (W * 4) as usize;
let align = wgpu::COPY_BYTES_PER_ROW_ALIGNMENT as usize;
let padded = ((unpadded + align - 1) / align) * align;
let buf_size = (padded * H as usize) as u64;
let buf = hal.device.create_buffer(&wgpu::BufferDescriptor {
label: Some("png-readback"),
size: buf_size,
usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
mapped_at_creation: false,
});
let mut encoder = hal.device.create_command_encoder(
&wgpu::CommandEncoderDescriptor {
label: Some("png-copy-enc"),
},
);
encoder.copy_texture_to_buffer(
wgpu::TexelCopyTextureInfo {
texture: target,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::All,
},
wgpu::TexelCopyBufferInfo {
buffer: &buf,
layout: wgpu::TexelCopyBufferLayout {
offset: 0,
bytes_per_row: Some(padded as u32),
rows_per_image: Some(H),
},
},
wgpu::Extent3d {
width: W,
height: H,
depth_or_array_layers: 1,
},
);
hal.queue.submit(std::iter::once(encoder.finish()));
let slice = buf.slice(..);
let (tx, rx) = std::sync::mpsc::channel();
slice.map_async(wgpu::MapMode::Read, move |r| {
let _ = tx.send(r);
});
hal.device.poll(wgpu::Maintain::Wait);
rx.recv().map_err(|e| e.to_string())?.map_err(|e| e.to_string())?;
let data = slice.get_mapped_range();
// Desempaquetar las filas (skip padding) y escribir PNG.
let mut pixels = Vec::with_capacity((W * H * 4) as usize);
for row in 0..H {
let start = row as usize * padded;
let end = start + unpadded;
pixels.extend_from_slice(&data[start..end]);
}
drop(data);
buf.unmap();
let file = File::create(path).map_err(|e| e.to_string())?;
let writer = BufWriter::new(file);
let mut encoder = png::Encoder::new(writer, W, H);
encoder.set_color(png::ColorType::Rgba);
encoder.set_depth(png::BitDepth::Eight);
let mut w = encoder.write_header().map_err(|e| e.to_string())?;
w.write_image_data(&pixels).map_err(|e| e.to_string())?;
Ok(())
}