feat: llimphi standalone — framework UI soberano extraído del monorepo
Motor gráfico Llimphi como workspace independiente: bucle Elm (input→update→view→layout→raster→present) sobre wgpu+vello+taffy+parley. Núcleo (hal/raster/layout/text/ui/theme/surface/motion/icons) + ~40 widgets + módulos, sin dependencias al resto del monorepo. cargo check --workspace pasa (64 crates). Puerta de entrada: cargo run -p llimphi-ui --example counter. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,24 @@
|
||||
[package]
|
||||
name = "llimphi-raster"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
authors.workspace = true
|
||||
publish.workspace = true
|
||||
|
||||
[dependencies]
|
||||
llimphi-hal = { path = "../llimphi-hal" }
|
||||
vello = { workspace = true }
|
||||
pollster = { workspace = true }
|
||||
|
||||
[[example]]
|
||||
name = "render_node"
|
||||
path = "examples/render_node.rs"
|
||||
|
||||
[[example]]
|
||||
name = "spike_gpu_directo"
|
||||
path = "examples/spike_gpu_directo.rs"
|
||||
|
||||
[[example]]
|
||||
name = "gpu_million_points"
|
||||
path = "examples/gpu_million_points.rs"
|
||||
@@ -0,0 +1,10 @@
|
||||
# llimphi-raster
|
||||
|
||||
> Rasterizer vello + cache de scenes de [llimphi](../README.md).
|
||||
|
||||
Wrapper sobre `vello`/`wgpu` con cache LRU de `Scene`s pre-renderizadas (para layouts estáticos que no cambian frame a frame). Manejo de antialiasing, clipping, blend modes. Trabaja contra `Surface` del HAL.
|
||||
|
||||
## Deps
|
||||
|
||||
- [`llimphi-hal`](../llimphi-hal/README.md)
|
||||
- `vello`, `wgpu`, `peniko`, `kurbo`
|
||||
@@ -0,0 +1,10 @@
|
||||
# llimphi-raster
|
||||
|
||||
> Vello rasterizer + scene cache of [llimphi](../README.md).
|
||||
|
||||
Wrapper over `vello`/`wgpu` with LRU cache of pre-rendered `Scene`s (for static layouts that don't change frame to frame). Antialiasing, clipping, blend modes. Works against the HAL's `Surface`.
|
||||
|
||||
## Deps
|
||||
|
||||
- [`llimphi-hal`](../llimphi-hal/README.md)
|
||||
- `vello`, `wgpu`, `peniko`, `kurbo`
|
||||
@@ -0,0 +1,111 @@
|
||||
//! Demo headless del HAL GPU directo — Fase 6 del SDD
|
||||
//! `02_ruway/llimphi/SDD.md` §"GPU directo wgpu".
|
||||
//!
|
||||
//! A diferencia de `spike_gpu_directo` (que compara vello vs un pipeline
|
||||
//! mock para tomar la decisión arquitectónica), este ejemplo usa
|
||||
//! directamente la API pública `GpuPipelines` + `GpuBatch` sobre N
|
||||
//! puntos (rects 1.2×1.2 px) sintéticos. Su rol es:
|
||||
//!
|
||||
//! - Documentar el uso mínimo: 8 líneas de código + uso de Color.
|
||||
//! - Ejercitar el HAL sin ninguna app (sin winit, sin runtime Elm).
|
||||
//! - Servir de benchmark de referencia post-implementación: tiempo
|
||||
//! total CPU+GPU para 100K / 500K / 1M / 5M rects.
|
||||
//!
|
||||
//! Corre con: `cargo run -p llimphi-raster --example gpu_million_points --release`.
|
||||
|
||||
use std::io::Write;
|
||||
use std::time::Instant;
|
||||
|
||||
use llimphi_hal::{wgpu, Hal};
|
||||
use llimphi_raster::peniko::Color;
|
||||
use llimphi_raster::{GpuBatch, GpuPipelines};
|
||||
|
||||
const W: u32 = 1024;
|
||||
const H: u32 = 1024;
|
||||
const FMT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba8Unorm;
|
||||
const WARMUP: usize = 5;
|
||||
const MEASURED: usize = 15;
|
||||
const SIZES: &[u32] = &[100_000, 500_000, 1_000_000, 5_000_000];
|
||||
|
||||
fn main() {
|
||||
let hal = pollster::block_on(Hal::new(None)).expect("hal");
|
||||
let pipelines = GpuPipelines::new(&hal.device, FMT);
|
||||
|
||||
let (_tex, view) = make_target(&hal.device);
|
||||
|
||||
println!();
|
||||
println!("gpu_million_points — GpuBatch + 3 pipelines · target {W}×{H} Rgba8Unorm");
|
||||
println!("warmup {WARMUP}, measured {MEASURED}");
|
||||
println!(" {:>10} | {:>14} | {:>14}", "N", "ms / frame", "Mprim/s");
|
||||
println!(" {:->10} + {:->14} + {:->14}", "", "", "");
|
||||
|
||||
for &n in SIZES {
|
||||
let ms = bench(&hal, &pipelines, &view, n);
|
||||
let throughput = (n as f64 / 1_000_000.0) / (ms / 1000.0);
|
||||
println!(" {:>10} | {:>14.3} | {:>14.2}", n, ms, throughput);
|
||||
let _ = std::io::stdout().flush();
|
||||
}
|
||||
println!();
|
||||
println!("(en llvmpipe estos números son CPU-bound — ver Fase 0 del SDD)");
|
||||
println!();
|
||||
}
|
||||
|
||||
fn make_target(device: &wgpu::Device) -> (wgpu::Texture, wgpu::TextureView) {
|
||||
let tex = device.create_texture(&wgpu::TextureDescriptor {
|
||||
label: Some("gpu_million_points-target"),
|
||||
size: wgpu::Extent3d {
|
||||
width: W,
|
||||
height: H,
|
||||
depth_or_array_layers: 1,
|
||||
},
|
||||
mip_level_count: 1,
|
||||
sample_count: 1,
|
||||
dimension: wgpu::TextureDimension::D2,
|
||||
format: FMT,
|
||||
usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
|
||||
view_formats: &[],
|
||||
});
|
||||
let view = tex.create_view(&wgpu::TextureViewDescriptor::default());
|
||||
(tex, view)
|
||||
}
|
||||
|
||||
fn bench(hal: &Hal, pipelines: &GpuPipelines, view: &wgpu::TextureView, n: u32) -> f64 {
|
||||
let mut samples: Vec<f64> = Vec::with_capacity(MEASURED);
|
||||
for frame in 0..(WARMUP + MEASURED) {
|
||||
let t0 = Instant::now();
|
||||
let mut batch = GpuBatch::new(pipelines);
|
||||
let mut state: u32 = 0x1234_5678;
|
||||
for _ in 0..n {
|
||||
state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223);
|
||||
let x = (state % W) as f32;
|
||||
state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223);
|
||||
let y = (state % H) as f32;
|
||||
state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223);
|
||||
let r = ((state >> 0) & 0xFF) as f32 / 255.0;
|
||||
let g = ((state >> 8) & 0xFF) as f32 / 255.0;
|
||||
let b = ((state >> 16) & 0xFF) as f32 / 255.0;
|
||||
batch.add_rect(x, y, 1.2, 1.2, Color::new([r, g, b, 1.0]));
|
||||
}
|
||||
let mut encoder = hal.device.create_command_encoder(
|
||||
&wgpu::CommandEncoderDescriptor {
|
||||
label: Some("gpu_million_points-enc"),
|
||||
},
|
||||
);
|
||||
batch.flush(
|
||||
&hal.device,
|
||||
&hal.queue,
|
||||
&mut encoder,
|
||||
view,
|
||||
(W as f32, H as f32),
|
||||
wgpu::LoadOp::Clear(wgpu::Color::BLACK),
|
||||
);
|
||||
hal.queue.submit(std::iter::once(encoder.finish()));
|
||||
hal.device.poll(wgpu::Maintain::Wait);
|
||||
let dt = t0.elapsed().as_secs_f64() * 1000.0;
|
||||
if frame >= WARMUP {
|
||||
samples.push(dt);
|
||||
}
|
||||
}
|
||||
samples.sort_by(|a, b| a.partial_cmp(b).unwrap());
|
||||
samples[samples.len() / 2]
|
||||
}
|
||||
@@ -0,0 +1,143 @@
|
||||
//! Fase 2 de Llimphi: un nodo (círculo + halo) renderizado por vello con AA
|
||||
//! perfecto sobre el swapchain de llimphi-hal.
|
||||
//!
|
||||
//! Corre con: `cargo run -p llimphi-raster --example render_node --release`.
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
use llimphi_hal::winit::application::ApplicationHandler;
|
||||
use llimphi_hal::winit::dpi::LogicalSize;
|
||||
use llimphi_hal::winit::event::WindowEvent;
|
||||
use llimphi_hal::winit::event_loop::{ActiveEventLoop, ControlFlow, EventLoop};
|
||||
use llimphi_hal::winit::window::{Window, WindowAttributes, WindowId};
|
||||
use llimphi_hal::{Hal, Surface, WinitSurface};
|
||||
use llimphi_raster::kurbo::{Affine, Circle, Stroke};
|
||||
use llimphi_raster::peniko::{color::palette, Color, Fill};
|
||||
use llimphi_raster::{vello, Renderer};
|
||||
|
||||
struct State {
|
||||
window: Arc<Window>,
|
||||
hal: Hal,
|
||||
surface: WinitSurface,
|
||||
renderer: Renderer,
|
||||
scene: vello::Scene,
|
||||
}
|
||||
|
||||
struct App {
|
||||
state: Option<State>,
|
||||
started: Instant,
|
||||
}
|
||||
|
||||
impl ApplicationHandler for App {
|
||||
fn resumed(&mut self, event_loop: &ActiveEventLoop) {
|
||||
if self.state.is_some() {
|
||||
return;
|
||||
}
|
||||
let window = event_loop
|
||||
.create_window(
|
||||
WindowAttributes::default()
|
||||
.with_title("llimphi · render_node")
|
||||
.with_inner_size(LogicalSize::new(960u32, 540u32)),
|
||||
)
|
||||
.expect("create window");
|
||||
let window = Arc::new(window);
|
||||
let hal = pollster::block_on(Hal::new(None)).expect("hal");
|
||||
let surface = WinitSurface::new(&hal, window.clone()).expect("surface");
|
||||
let renderer = Renderer::new(&hal).expect("renderer");
|
||||
window.request_redraw();
|
||||
self.state = Some(State {
|
||||
window,
|
||||
hal,
|
||||
surface,
|
||||
renderer,
|
||||
scene: vello::Scene::new(),
|
||||
});
|
||||
}
|
||||
|
||||
fn window_event(
|
||||
&mut self,
|
||||
event_loop: &ActiveEventLoop,
|
||||
_id: WindowId,
|
||||
event: WindowEvent,
|
||||
) {
|
||||
let Some(state) = self.state.as_mut() else {
|
||||
return;
|
||||
};
|
||||
match event {
|
||||
WindowEvent::CloseRequested => event_loop.exit(),
|
||||
WindowEvent::Resized(size) => {
|
||||
state.surface.resize(size.width, size.height);
|
||||
state.window.request_redraw();
|
||||
}
|
||||
WindowEvent::RedrawRequested => {
|
||||
let frame = match state.surface.acquire() {
|
||||
Ok(f) => f,
|
||||
Err(_) => {
|
||||
let (w, h) = state.surface.size();
|
||||
state.surface.resize(w, h);
|
||||
state.window.request_redraw();
|
||||
return;
|
||||
}
|
||||
};
|
||||
let (w, h) = frame.size();
|
||||
state.scene.reset();
|
||||
build_node(&mut state.scene, w as f64, h as f64, self.started.elapsed().as_secs_f64());
|
||||
if let Err(e) = state.renderer.render(
|
||||
&state.hal,
|
||||
&state.scene,
|
||||
&frame,
|
||||
palette::css::BLACK,
|
||||
) {
|
||||
eprintln!("render error: {e}");
|
||||
}
|
||||
state.surface.present(frame, &state.hal);
|
||||
state.window.request_redraw();
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Pinta un nodo centrado (círculo lleno + halo) que respira con `t`.
|
||||
fn build_node(scene: &mut vello::Scene, w: f64, h: f64, t: f64) {
|
||||
let cx = w * 0.5;
|
||||
let cy = h * 0.5;
|
||||
let pulse = 1.0 + 0.06 * (t * 1.6).sin();
|
||||
let r = (h.min(w) * 0.18) * pulse;
|
||||
|
||||
// Halo
|
||||
scene.stroke(
|
||||
&Stroke::new(2.0),
|
||||
Affine::IDENTITY,
|
||||
Color::from_rgba8(60, 120, 200, 180),
|
||||
None,
|
||||
&Circle::new((cx, cy), r * 1.35),
|
||||
);
|
||||
// Cuerpo
|
||||
scene.fill(
|
||||
Fill::NonZero,
|
||||
Affine::IDENTITY,
|
||||
Color::from_rgba8(90, 160, 230, 255),
|
||||
None,
|
||||
&Circle::new((cx, cy), r),
|
||||
);
|
||||
// Borde
|
||||
scene.stroke(
|
||||
&Stroke::new(3.0),
|
||||
Affine::IDENTITY,
|
||||
Color::from_rgba8(20, 50, 100, 255),
|
||||
None,
|
||||
&Circle::new((cx, cy), r),
|
||||
);
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let event_loop = EventLoop::new().expect("event loop");
|
||||
event_loop.set_control_flow(ControlFlow::Poll);
|
||||
let mut app = App {
|
||||
state: None,
|
||||
started: Instant::now(),
|
||||
};
|
||||
event_loop.run_app(&mut app).expect("run app");
|
||||
}
|
||||
@@ -0,0 +1,390 @@
|
||||
//! Spike Fase 0 — GPU directo vs vello.
|
||||
//!
|
||||
//! Compara el tiempo total CPU+GPU por frame para pintar N puntos en una
|
||||
//! textura `Rgba8Unorm` 1024×1024 con dos estrategias:
|
||||
//!
|
||||
//! - **Vello**: una llamada `Scene::fill(Rect 1×1)` por punto, luego
|
||||
//! `vello::Renderer::render_to_texture`.
|
||||
//! - **GPU directo**: un pipeline `wgpu` con instanced quad. Cada punto es
|
||||
//! una instancia `[x:f32, y:f32, rgba:u32]`. Una sola draw call.
|
||||
//!
|
||||
//! Tamaños: 100K, 500K, 1M puntos. 10 frames de warmup + 20 medidos por
|
||||
//! tamaño. Reporta mediana y factor de aceleración.
|
||||
//!
|
||||
//! Criterio de aceptación del SDD (`llimphi/SDD.md` §"GPU directo wgpu"):
|
||||
//! factor ≥ 5× a 500K → seguir con Fase 1. Si no, abortar.
|
||||
//!
|
||||
//! Corre con: `cargo run -p llimphi-raster --example spike_gpu_directo --release`.
|
||||
|
||||
use std::io::Write;
|
||||
use std::time::Instant;
|
||||
|
||||
use llimphi_hal::{wgpu, Hal};
|
||||
use llimphi_raster::{
|
||||
kurbo::{Affine, Rect},
|
||||
peniko::{color::palette, Color, Fill},
|
||||
vello,
|
||||
};
|
||||
|
||||
const W: u32 = 1024;
|
||||
const H: u32 = 1024;
|
||||
const TARGET_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba8Unorm;
|
||||
const WARMUP_FRAMES: usize = 5;
|
||||
const MEASURED_FRAMES: usize = 15;
|
||||
// Vello revienta (SIGSEGV en `vello_encoding::path::flatten`) cuando la
|
||||
// escena pasa de ~200K paths con los `Limits::default()` que pide el HAL.
|
||||
// Es exactamente el techo del SDD §"GPU directo wgpu". Lo medimos hasta
|
||||
// donde vello aguanta; el lado directo se mide a sizes mucho mayores para
|
||||
// confirmar el régimen post-techo.
|
||||
const VELLO_SIZES: &[usize] = &[25_000, 50_000, 100_000, 200_000];
|
||||
const DIRECTO_SIZES: &[usize] = &[100_000, 500_000, 1_000_000, 5_000_000];
|
||||
|
||||
fn main() {
|
||||
let hal = pollster::block_on(Hal::new(None)).expect("hal");
|
||||
|
||||
// Textura destino compartida por ambos backends. STORAGE_BINDING para
|
||||
// vello (compute), RENDER_ATTACHMENT para el pipeline directo. Idéntica
|
||||
// al `intermediate` de `WinitSurface` (HAL real).
|
||||
let (target, target_view) = create_target(&hal.device);
|
||||
|
||||
let mut vello_renderer = vello::Renderer::new(
|
||||
&hal.device,
|
||||
vello::RendererOptions {
|
||||
use_cpu: false,
|
||||
antialiasing_support: vello::AaSupport {
|
||||
area: true,
|
||||
msaa8: false,
|
||||
msaa16: false,
|
||||
},
|
||||
num_init_threads: None,
|
||||
pipeline_cache: None,
|
||||
},
|
||||
)
|
||||
.expect("vello renderer");
|
||||
|
||||
let directo = DirectoPipeline::new(&hal.device);
|
||||
|
||||
println!();
|
||||
println!("spike GPU directo — target {W}×{H} Rgba8Unorm, headless");
|
||||
println!("warmup {WARMUP_FRAMES}, measured {MEASURED_FRAMES}");
|
||||
println!();
|
||||
println!("vello (scene.fill por punto):");
|
||||
println!(" {:>10} | {:>14}", "N", "ms / frame");
|
||||
println!(" {:->10} + {:->14}", "", "");
|
||||
let mut vello_100k_ms: Option<f64> = None;
|
||||
for &n in VELLO_SIZES {
|
||||
let points = gen_points(n);
|
||||
let ms = bench_vello(&hal, &mut vello_renderer, &target_view, &points);
|
||||
println!(" {:>10} | {:>14.3}", n, ms);
|
||||
let _ = std::io::stdout().flush();
|
||||
if n == 100_000 {
|
||||
vello_100k_ms = Some(ms);
|
||||
}
|
||||
}
|
||||
println!();
|
||||
println!("GPU directo (instanced quad, 1 draw call):");
|
||||
println!(" {:>10} | {:>14}", "N", "ms / frame");
|
||||
println!(" {:->10} + {:->14}", "", "");
|
||||
let mut directo_100k_ms: Option<f64> = None;
|
||||
for &n in DIRECTO_SIZES {
|
||||
let points = gen_points(n);
|
||||
let ms = bench_directo(&hal, &directo, &target_view, &points);
|
||||
println!(" {:>10} | {:>14.3}", n, ms);
|
||||
let _ = std::io::stdout().flush();
|
||||
if n == 100_000 {
|
||||
directo_100k_ms = Some(ms);
|
||||
}
|
||||
}
|
||||
println!();
|
||||
if let (Some(v), Some(d)) = (vello_100k_ms, directo_100k_ms) {
|
||||
let factor = v / d;
|
||||
let verdict = if factor >= 5.0 { "PASA" } else { "ABORTAR" };
|
||||
println!(
|
||||
"veredicto Fase 0 @ 100K: vello {:.2} ms / directo {:.2} ms = {:.2}× → {}",
|
||||
v, d, factor, verdict
|
||||
);
|
||||
println!("(SDD pide ≥5× a 500K, pero vello no llega a 500K — techo medido <300K)");
|
||||
}
|
||||
println!();
|
||||
// Mantener vivo el texture para evitar warnings.
|
||||
drop(target);
|
||||
}
|
||||
|
||||
fn create_target(device: &wgpu::Device) -> (wgpu::Texture, wgpu::TextureView) {
|
||||
let tex = device.create_texture(&wgpu::TextureDescriptor {
|
||||
label: Some("spike-target"),
|
||||
size: wgpu::Extent3d {
|
||||
width: W,
|
||||
height: H,
|
||||
depth_or_array_layers: 1,
|
||||
},
|
||||
mip_level_count: 1,
|
||||
sample_count: 1,
|
||||
dimension: wgpu::TextureDimension::D2,
|
||||
format: TARGET_FORMAT,
|
||||
usage: wgpu::TextureUsages::STORAGE_BINDING
|
||||
| wgpu::TextureUsages::RENDER_ATTACHMENT
|
||||
| wgpu::TextureUsages::TEXTURE_BINDING,
|
||||
view_formats: &[],
|
||||
});
|
||||
let view = tex.create_view(&wgpu::TextureViewDescriptor::default());
|
||||
(tex, view)
|
||||
}
|
||||
|
||||
/// LCG numerical recipes — determinista, sin dependencias.
|
||||
fn gen_points(n: usize) -> Vec<(f32, f32, u32)> {
|
||||
let mut state: u32 = 0x1234_5678;
|
||||
let mut out = Vec::with_capacity(n);
|
||||
for _ in 0..n {
|
||||
state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223);
|
||||
let x = (state % W) as f32;
|
||||
state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223);
|
||||
let y = (state % H) as f32;
|
||||
state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223);
|
||||
// RGBA packed little-endian: R en byte bajo (queda igual a como lo
|
||||
// lee el shader: `rgba & 0xFF` → R).
|
||||
let rgba = (state & 0x00FF_FFFF) | 0xFF00_0000;
|
||||
out.push((x, y, rgba));
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn bench_vello(
|
||||
hal: &Hal,
|
||||
renderer: &mut vello::Renderer,
|
||||
target: &wgpu::TextureView,
|
||||
points: &[(f32, f32, u32)],
|
||||
) -> f64 {
|
||||
let mut scene = vello::Scene::new();
|
||||
let mut samples: Vec<f64> = Vec::with_capacity(MEASURED_FRAMES);
|
||||
for frame in 0..(WARMUP_FRAMES + MEASURED_FRAMES) {
|
||||
let t0 = Instant::now();
|
||||
scene.reset();
|
||||
for &(x, y, rgba) in points {
|
||||
let r = (rgba & 0xFF) as u8;
|
||||
let g = ((rgba >> 8) & 0xFF) as u8;
|
||||
let b = ((rgba >> 16) & 0xFF) as u8;
|
||||
let a = ((rgba >> 24) & 0xFF) as u8;
|
||||
let xf = x as f64;
|
||||
let yf = y as f64;
|
||||
scene.fill(
|
||||
Fill::NonZero,
|
||||
Affine::IDENTITY,
|
||||
Color::from_rgba8(r, g, b, a),
|
||||
None,
|
||||
&Rect::new(xf, yf, xf + 1.0, yf + 1.0),
|
||||
);
|
||||
}
|
||||
renderer
|
||||
.render_to_texture(
|
||||
&hal.device,
|
||||
&hal.queue,
|
||||
&scene,
|
||||
target,
|
||||
&vello::RenderParams {
|
||||
base_color: palette::css::BLACK,
|
||||
width: W,
|
||||
height: H,
|
||||
antialiasing_method: vello::AaConfig::Area,
|
||||
},
|
||||
)
|
||||
.expect("vello render");
|
||||
// Bloquear hasta que la GPU termine este frame. Sin esto medimos
|
||||
// sólo el submit + queue building, no el trabajo real.
|
||||
hal.device.poll(wgpu::Maintain::Wait);
|
||||
let dt = t0.elapsed().as_secs_f64() * 1000.0;
|
||||
if frame >= WARMUP_FRAMES {
|
||||
samples.push(dt);
|
||||
}
|
||||
}
|
||||
median(&mut samples)
|
||||
}
|
||||
|
||||
fn bench_directo(
|
||||
hal: &Hal,
|
||||
pipe: &DirectoPipeline,
|
||||
target: &wgpu::TextureView,
|
||||
points: &[(f32, f32, u32)],
|
||||
) -> f64 {
|
||||
// Buffer de instancias dimensionado para el peor caso.
|
||||
let bytes_per_inst = std::mem::size_of::<[u32; 3]>(); // [x:f32, y:f32, rgba:u32] = 12B
|
||||
let inst_buf = hal.device.create_buffer(&wgpu::BufferDescriptor {
|
||||
label: Some("spike-directo-inst"),
|
||||
size: (points.len() * bytes_per_inst) as u64,
|
||||
usage: wgpu::BufferUsages::VERTEX | wgpu::BufferUsages::COPY_DST,
|
||||
mapped_at_creation: false,
|
||||
});
|
||||
|
||||
let mut samples: Vec<f64> = Vec::with_capacity(MEASURED_FRAMES);
|
||||
for frame in 0..(WARMUP_FRAMES + MEASURED_FRAMES) {
|
||||
let t0 = Instant::now();
|
||||
// Empaquetar instancias: igual a la "scene build" del lado vello,
|
||||
// para que la comparación sea fair (ambos parten de los mismos
|
||||
// puntos crudos).
|
||||
let bytes = pack_instances(points);
|
||||
hal.queue.write_buffer(&inst_buf, 0, &bytes);
|
||||
|
||||
let mut encoder = hal.device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
|
||||
label: Some("spike-directo-enc"),
|
||||
});
|
||||
{
|
||||
let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
|
||||
label: Some("spike-directo-pass"),
|
||||
color_attachments: &[Some(wgpu::RenderPassColorAttachment {
|
||||
view: target,
|
||||
resolve_target: None,
|
||||
ops: wgpu::Operations {
|
||||
load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
|
||||
store: wgpu::StoreOp::Store,
|
||||
},
|
||||
})],
|
||||
depth_stencil_attachment: None,
|
||||
timestamp_writes: None,
|
||||
occlusion_query_set: None,
|
||||
});
|
||||
pass.set_pipeline(&pipe.pipeline);
|
||||
pass.set_vertex_buffer(0, inst_buf.slice(..));
|
||||
// 6 vértices por instancia (2 tris = quad), N instancias.
|
||||
pass.draw(0..6, 0..points.len() as u32);
|
||||
}
|
||||
hal.queue.submit(std::iter::once(encoder.finish()));
|
||||
hal.device.poll(wgpu::Maintain::Wait);
|
||||
let dt = t0.elapsed().as_secs_f64() * 1000.0;
|
||||
if frame >= WARMUP_FRAMES {
|
||||
samples.push(dt);
|
||||
}
|
||||
}
|
||||
median(&mut samples)
|
||||
}
|
||||
|
||||
fn pack_instances(points: &[(f32, f32, u32)]) -> Vec<u8> {
|
||||
let mut v = Vec::with_capacity(points.len() * 12);
|
||||
for &(x, y, rgba) in points {
|
||||
v.extend_from_slice(&x.to_ne_bytes());
|
||||
v.extend_from_slice(&y.to_ne_bytes());
|
||||
v.extend_from_slice(&rgba.to_ne_bytes());
|
||||
}
|
||||
v
|
||||
}
|
||||
|
||||
fn median(samples: &mut [f64]) -> f64 {
|
||||
samples.sort_by(|a, b| a.partial_cmp(b).unwrap());
|
||||
samples[samples.len() / 2]
|
||||
}
|
||||
|
||||
/// Pipeline trivial para el bench: instanced quad sin texturas, color
|
||||
/// per-instance. No es código de producción — es el "mock GPU directo"
|
||||
/// que pide la Fase 0 del SDD para medir el techo alcanzable.
|
||||
struct DirectoPipeline {
|
||||
pipeline: wgpu::RenderPipeline,
|
||||
}
|
||||
|
||||
impl DirectoPipeline {
|
||||
fn new(device: &wgpu::Device) -> Self {
|
||||
let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
|
||||
label: Some("spike-directo-shader"),
|
||||
source: wgpu::ShaderSource::Wgsl(WGSL.into()),
|
||||
});
|
||||
let layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
|
||||
label: Some("spike-directo-layout"),
|
||||
bind_group_layouts: &[],
|
||||
push_constant_ranges: &[],
|
||||
});
|
||||
let pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
|
||||
label: Some("spike-directo-pipeline"),
|
||||
layout: Some(&layout),
|
||||
vertex: wgpu::VertexState {
|
||||
module: &shader,
|
||||
entry_point: Some("vs"),
|
||||
compilation_options: Default::default(),
|
||||
buffers: &[wgpu::VertexBufferLayout {
|
||||
array_stride: 12,
|
||||
step_mode: wgpu::VertexStepMode::Instance,
|
||||
attributes: &[
|
||||
wgpu::VertexAttribute {
|
||||
format: wgpu::VertexFormat::Float32x2,
|
||||
offset: 0,
|
||||
shader_location: 0,
|
||||
},
|
||||
wgpu::VertexAttribute {
|
||||
format: wgpu::VertexFormat::Uint32,
|
||||
offset: 8,
|
||||
shader_location: 1,
|
||||
},
|
||||
],
|
||||
}],
|
||||
},
|
||||
primitive: wgpu::PrimitiveState {
|
||||
topology: wgpu::PrimitiveTopology::TriangleList,
|
||||
strip_index_format: None,
|
||||
front_face: wgpu::FrontFace::Ccw,
|
||||
cull_mode: None,
|
||||
unclipped_depth: false,
|
||||
polygon_mode: wgpu::PolygonMode::Fill,
|
||||
conservative: false,
|
||||
},
|
||||
depth_stencil: None,
|
||||
multisample: wgpu::MultisampleState::default(),
|
||||
fragment: Some(wgpu::FragmentState {
|
||||
module: &shader,
|
||||
entry_point: Some("fs"),
|
||||
compilation_options: Default::default(),
|
||||
targets: &[Some(wgpu::ColorTargetState {
|
||||
format: TARGET_FORMAT,
|
||||
blend: None,
|
||||
write_mask: wgpu::ColorWrites::ALL,
|
||||
})],
|
||||
}),
|
||||
multiview: None,
|
||||
cache: None,
|
||||
});
|
||||
Self { pipeline }
|
||||
}
|
||||
}
|
||||
|
||||
const WGSL: &str = r#"
|
||||
struct Inst {
|
||||
@location(0) xy: vec2<f32>,
|
||||
@location(1) rgba: u32,
|
||||
};
|
||||
|
||||
struct V2F {
|
||||
@builtin(position) pos: vec4<f32>,
|
||||
@location(0) color: vec4<f32>,
|
||||
};
|
||||
|
||||
const W: f32 = 1024.0;
|
||||
const H: f32 = 1024.0;
|
||||
|
||||
@vertex
|
||||
fn vs(@builtin(vertex_index) vid: u32, inst: Inst) -> V2F {
|
||||
// Quad 1.5px alrededor de (inst.xy + 0.5). Pixel-centered.
|
||||
var corners = array<vec2<f32>, 6>(
|
||||
vec2<f32>(-0.75, -0.75),
|
||||
vec2<f32>( 0.75, -0.75),
|
||||
vec2<f32>( 0.75, 0.75),
|
||||
vec2<f32>(-0.75, -0.75),
|
||||
vec2<f32>( 0.75, 0.75),
|
||||
vec2<f32>(-0.75, 0.75),
|
||||
);
|
||||
let off = corners[vid];
|
||||
let px = inst.xy + vec2<f32>(0.5, 0.5) + off;
|
||||
// pixel → NDC, Y invertido (vello / textura framebuffer).
|
||||
let ndc = vec2<f32>(px.x / W * 2.0 - 1.0, 1.0 - px.y / H * 2.0);
|
||||
|
||||
let r = f32( inst.rgba & 0xFFu) / 255.0;
|
||||
let g = f32((inst.rgba >> 8u) & 0xFFu) / 255.0;
|
||||
let b = f32((inst.rgba >> 16u) & 0xFFu) / 255.0;
|
||||
let a = f32((inst.rgba >> 24u) & 0xFFu) / 255.0;
|
||||
|
||||
var out: V2F;
|
||||
out.pos = vec4<f32>(ndc, 0.0, 1.0);
|
||||
out.color = vec4<f32>(r, g, b, a);
|
||||
return out;
|
||||
}
|
||||
|
||||
@fragment
|
||||
fn fs(in: V2F) -> @location(0) vec4<f32> {
|
||||
return in.color;
|
||||
}
|
||||
"#;
|
||||
@@ -0,0 +1,553 @@
|
||||
//! Backend GPU directo (Fases 2 + 3 del SDD §"GPU directo wgpu").
|
||||
//!
|
||||
//! Tres pipelines `wgpu` cacheadas en [`GpuPipelines`] (lines / tris /
|
||||
//! rects) + un acumulador [`GpuBatch`] que las apps usan por frame para
|
||||
//! emitir centenares de miles a millones de primitivos en una draw call
|
||||
//! por tipo, sin pasar por vello.
|
||||
//!
|
||||
//! Diseño minimal Fase 2/3:
|
||||
//!
|
||||
//! - Vertex format triángulos: `[x: f32, y: f32, rgba: u32]` (12 B/vert).
|
||||
//! - Instance format líneas: `[x0, y0, x1, y1, rgba]` (20 B/seg).
|
||||
//! - Instance format rects: `[x, y, w, h, rgba]` (20 B/rect).
|
||||
//! - Sin texturas. Sin AA por shader — quien necesite AA fino sigue por
|
||||
//! vello. Para puntos densos el "popping" no se nota.
|
||||
//! - Blending alfa habilitado: el alpha del color es respetado.
|
||||
//! - El viewport `(width, height)` se pasa al flush y va en un uniform —
|
||||
//! los shaders convierten pixel → NDC ahí.
|
||||
//!
|
||||
//! Cache de pipelines: una sola instancia de `GpuPipelines` por
|
||||
//! `(device, color_format)`. Construirla compila los 3 pipelines en
|
||||
//! caliente (~ms en hardware moderno). Los callers la mantienen viva
|
||||
//! entre frames (en su Model o vía `OnceLock`).
|
||||
//!
|
||||
//! Grow strategy: `flush` crea un buffer por tipo no vacío en el
|
||||
//! mismo frame. Sin reuso entre frames — Fase 4 (`GpuSceneCanvas`)
|
||||
//! introducirá el `GpuBuffers` persistente que dobla capacidad si
|
||||
//! aparece la necesidad.
|
||||
|
||||
use llimphi_hal::wgpu;
|
||||
use vello::peniko::Color;
|
||||
|
||||
/// Pipelines cacheadas. Crear uno por proceso (o por surface format).
|
||||
///
|
||||
/// Para uso típico via [`GpuBatch`] los campos no se tocan directo. La
|
||||
/// API pública existe para callers avanzados que quieran montar su propio
|
||||
/// buffer persistente (datos que no cambian por frame: starfield Gaia,
|
||||
/// particles iniciales, viewport estático) y emitir draw calls
|
||||
/// manualmente reusando estas pipelines.
|
||||
///
|
||||
/// Layouts:
|
||||
/// - Vertex buffer triángulos: `[x: f32, y: f32, rgba: u32]` (12 B/vert).
|
||||
/// - Instance buffer rects: `[x, y, w, h, rgba]` (20 B/inst).
|
||||
/// - Instance buffer líneas: `[x0, y0, x1, y1, rgba]` (20 B/inst).
|
||||
/// - Bind group 0 binding 0: uniform `{viewport: vec2<f32>, line_width: f32, _pad: f32}` (16 B).
|
||||
pub struct GpuPipelines {
|
||||
pub lines: wgpu::RenderPipeline,
|
||||
pub tris: wgpu::RenderPipeline,
|
||||
pub rects: wgpu::RenderPipeline,
|
||||
pub bind_layout: wgpu::BindGroupLayout,
|
||||
}
|
||||
|
||||
impl GpuPipelines {
|
||||
/// Compila los 3 pipelines apuntando al `color_format` del target
|
||||
/// que recibirán en `flush` (el de la intermediate de `WinitSurface`,
|
||||
/// normalmente `Rgba8Unorm`).
|
||||
pub fn new(device: &wgpu::Device, color_format: wgpu::TextureFormat) -> Self {
|
||||
let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
|
||||
label: Some("llimphi-raster-gpu-shader"),
|
||||
source: wgpu::ShaderSource::Wgsl(WGSL.into()),
|
||||
});
|
||||
|
||||
let bind_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
|
||||
label: Some("llimphi-raster-gpu-bgl"),
|
||||
entries: &[wgpu::BindGroupLayoutEntry {
|
||||
binding: 0,
|
||||
visibility: wgpu::ShaderStages::VERTEX,
|
||||
ty: wgpu::BindingType::Buffer {
|
||||
ty: wgpu::BufferBindingType::Uniform,
|
||||
has_dynamic_offset: false,
|
||||
min_binding_size: None,
|
||||
},
|
||||
count: None,
|
||||
}],
|
||||
});
|
||||
|
||||
let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
|
||||
label: Some("llimphi-raster-gpu-pl"),
|
||||
bind_group_layouts: &[&bind_layout],
|
||||
push_constant_ranges: &[],
|
||||
});
|
||||
|
||||
let color_targets = [Some(wgpu::ColorTargetState {
|
||||
format: color_format,
|
||||
blend: Some(wgpu::BlendState::ALPHA_BLENDING),
|
||||
write_mask: wgpu::ColorWrites::ALL,
|
||||
})];
|
||||
|
||||
// Triángulos (vertex buffer plano, color per-vertex).
|
||||
let tris = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
|
||||
label: Some("llimphi-raster-gpu-tris"),
|
||||
layout: Some(&pipeline_layout),
|
||||
vertex: wgpu::VertexState {
|
||||
module: &shader,
|
||||
entry_point: Some("vs_tris"),
|
||||
compilation_options: Default::default(),
|
||||
buffers: &[wgpu::VertexBufferLayout {
|
||||
array_stride: 12,
|
||||
step_mode: wgpu::VertexStepMode::Vertex,
|
||||
attributes: &[
|
||||
wgpu::VertexAttribute {
|
||||
format: wgpu::VertexFormat::Float32x2,
|
||||
offset: 0,
|
||||
shader_location: 0,
|
||||
},
|
||||
wgpu::VertexAttribute {
|
||||
format: wgpu::VertexFormat::Uint32,
|
||||
offset: 8,
|
||||
shader_location: 1,
|
||||
},
|
||||
],
|
||||
}],
|
||||
},
|
||||
primitive: tri_primitive(),
|
||||
depth_stencil: None,
|
||||
multisample: wgpu::MultisampleState::default(),
|
||||
fragment: Some(wgpu::FragmentState {
|
||||
module: &shader,
|
||||
entry_point: Some("fs"),
|
||||
compilation_options: Default::default(),
|
||||
targets: &color_targets,
|
||||
}),
|
||||
multiview: None,
|
||||
cache: None,
|
||||
});
|
||||
|
||||
// Rects (instanced quad).
|
||||
let rects = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
|
||||
label: Some("llimphi-raster-gpu-rects"),
|
||||
layout: Some(&pipeline_layout),
|
||||
vertex: wgpu::VertexState {
|
||||
module: &shader,
|
||||
entry_point: Some("vs_rects"),
|
||||
compilation_options: Default::default(),
|
||||
buffers: &[wgpu::VertexBufferLayout {
|
||||
array_stride: 20,
|
||||
step_mode: wgpu::VertexStepMode::Instance,
|
||||
attributes: &[
|
||||
wgpu::VertexAttribute {
|
||||
format: wgpu::VertexFormat::Float32x2,
|
||||
offset: 0,
|
||||
shader_location: 0,
|
||||
},
|
||||
wgpu::VertexAttribute {
|
||||
format: wgpu::VertexFormat::Float32x2,
|
||||
offset: 8,
|
||||
shader_location: 1,
|
||||
},
|
||||
wgpu::VertexAttribute {
|
||||
format: wgpu::VertexFormat::Uint32,
|
||||
offset: 16,
|
||||
shader_location: 2,
|
||||
},
|
||||
],
|
||||
}],
|
||||
},
|
||||
primitive: tri_primitive(),
|
||||
depth_stencil: None,
|
||||
multisample: wgpu::MultisampleState::default(),
|
||||
fragment: Some(wgpu::FragmentState {
|
||||
module: &shader,
|
||||
entry_point: Some("fs"),
|
||||
compilation_options: Default::default(),
|
||||
targets: &color_targets,
|
||||
}),
|
||||
multiview: None,
|
||||
cache: None,
|
||||
});
|
||||
|
||||
// Líneas con grosor: cada segmento es una instancia de 20 B; el
|
||||
// VS expande a un quad de 6 vértices perpendicular al segmento
|
||||
// usando un grosor uniforme en píxeles (vienen del uniform).
|
||||
let lines = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
|
||||
label: Some("llimphi-raster-gpu-lines"),
|
||||
layout: Some(&pipeline_layout),
|
||||
vertex: wgpu::VertexState {
|
||||
module: &shader,
|
||||
entry_point: Some("vs_lines"),
|
||||
compilation_options: Default::default(),
|
||||
buffers: &[wgpu::VertexBufferLayout {
|
||||
array_stride: 20,
|
||||
step_mode: wgpu::VertexStepMode::Instance,
|
||||
attributes: &[
|
||||
wgpu::VertexAttribute {
|
||||
format: wgpu::VertexFormat::Float32x4,
|
||||
offset: 0,
|
||||
shader_location: 0,
|
||||
},
|
||||
wgpu::VertexAttribute {
|
||||
format: wgpu::VertexFormat::Uint32,
|
||||
offset: 16,
|
||||
shader_location: 1,
|
||||
},
|
||||
],
|
||||
}],
|
||||
},
|
||||
primitive: tri_primitive(),
|
||||
depth_stencil: None,
|
||||
multisample: wgpu::MultisampleState::default(),
|
||||
fragment: Some(wgpu::FragmentState {
|
||||
module: &shader,
|
||||
entry_point: Some("fs"),
|
||||
compilation_options: Default::default(),
|
||||
targets: &color_targets,
|
||||
}),
|
||||
multiview: None,
|
||||
cache: None,
|
||||
});
|
||||
|
||||
Self {
|
||||
lines,
|
||||
tris,
|
||||
rects,
|
||||
bind_layout,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn tri_primitive() -> wgpu::PrimitiveState {
|
||||
wgpu::PrimitiveState {
|
||||
topology: wgpu::PrimitiveTopology::TriangleList,
|
||||
strip_index_format: None,
|
||||
front_face: wgpu::FrontFace::Ccw,
|
||||
cull_mode: None,
|
||||
unclipped_depth: false,
|
||||
polygon_mode: wgpu::PolygonMode::Fill,
|
||||
conservative: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Acumulador de primitivas por frame. Construir → `add_*` → `flush`.
|
||||
pub struct GpuBatch<'a> {
|
||||
pipelines: &'a GpuPipelines,
|
||||
line_verts: Vec<u8>,
|
||||
tri_verts: Vec<u8>,
|
||||
rect_insts: Vec<u8>,
|
||||
line_width: f32,
|
||||
line_count: u32,
|
||||
tri_vert_count: u32,
|
||||
rect_count: u32,
|
||||
}
|
||||
|
||||
impl<'a> GpuBatch<'a> {
|
||||
pub fn new(pipelines: &'a GpuPipelines) -> Self {
|
||||
Self {
|
||||
pipelines,
|
||||
line_verts: Vec::new(),
|
||||
tri_verts: Vec::new(),
|
||||
rect_insts: Vec::new(),
|
||||
line_width: 1.0,
|
||||
line_count: 0,
|
||||
tri_vert_count: 0,
|
||||
rect_count: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Grosor de las próximas líneas (en pixels del frame, sin AA).
|
||||
/// Se aplica a todas las líneas del batch — el lado bueno de una
|
||||
/// sola draw call es que sólo hay un grosor "vivo" por flush.
|
||||
pub fn line_width(&mut self, w: f32) {
|
||||
self.line_width = w;
|
||||
}
|
||||
|
||||
/// Añade un segmento de línea como instancia.
|
||||
pub fn add_line(&mut self, p0: (f32, f32), p1: (f32, f32), color: Color) {
|
||||
let rgba = pack_rgba(color);
|
||||
self.line_verts.extend_from_slice(&p0.0.to_ne_bytes());
|
||||
self.line_verts.extend_from_slice(&p0.1.to_ne_bytes());
|
||||
self.line_verts.extend_from_slice(&p1.0.to_ne_bytes());
|
||||
self.line_verts.extend_from_slice(&p1.1.to_ne_bytes());
|
||||
self.line_verts.extend_from_slice(&rgba.to_ne_bytes());
|
||||
self.line_count += 1;
|
||||
}
|
||||
|
||||
/// Añade una polilínea como secuencia de segmentos individuales
|
||||
/// (line-list). Para N puntos emite N-1 instancias.
|
||||
pub fn add_polyline(&mut self, points: &[(f32, f32)], color: Color) {
|
||||
if points.len() < 2 {
|
||||
return;
|
||||
}
|
||||
for w in points.windows(2) {
|
||||
self.add_line(w[0], w[1], color);
|
||||
}
|
||||
}
|
||||
|
||||
/// Añade un triángulo con color por vértice.
|
||||
pub fn add_tri(
|
||||
&mut self,
|
||||
a: (f32, f32),
|
||||
b: (f32, f32),
|
||||
c: (f32, f32),
|
||||
ca: Color,
|
||||
cb: Color,
|
||||
cc: Color,
|
||||
) {
|
||||
self.push_tri_vert(a, ca);
|
||||
self.push_tri_vert(b, cb);
|
||||
self.push_tri_vert(c, cc);
|
||||
}
|
||||
|
||||
fn push_tri_vert(&mut self, p: (f32, f32), color: Color) {
|
||||
let rgba = pack_rgba(color);
|
||||
self.tri_verts.extend_from_slice(&p.0.to_ne_bytes());
|
||||
self.tri_verts.extend_from_slice(&p.1.to_ne_bytes());
|
||||
self.tri_verts.extend_from_slice(&rgba.to_ne_bytes());
|
||||
self.tri_vert_count += 1;
|
||||
}
|
||||
|
||||
/// Añade un triangle list crudo `[(x, y); 3*N]` con un mismo color
|
||||
/// uniforme por vértice. Útil para teselaciones precomputadas
|
||||
/// (contornos, polígonos rellenos).
|
||||
pub fn add_tri_list(&mut self, verts: &[(f32, f32)], color: Color) {
|
||||
for &p in verts {
|
||||
self.push_tri_vert(p, color);
|
||||
}
|
||||
}
|
||||
|
||||
/// Añade un rectángulo lleno como instancia (sin radio — para
|
||||
/// rounded rects sigue por vello).
|
||||
pub fn add_rect(&mut self, x: f32, y: f32, w: f32, h: f32, color: Color) {
|
||||
let rgba = pack_rgba(color);
|
||||
self.rect_insts.extend_from_slice(&x.to_ne_bytes());
|
||||
self.rect_insts.extend_from_slice(&y.to_ne_bytes());
|
||||
self.rect_insts.extend_from_slice(&w.to_ne_bytes());
|
||||
self.rect_insts.extend_from_slice(&h.to_ne_bytes());
|
||||
self.rect_insts.extend_from_slice(&rgba.to_ne_bytes());
|
||||
self.rect_count += 1;
|
||||
}
|
||||
|
||||
/// Cuenta total de primitivas pendientes (útil para benches).
|
||||
pub fn primitive_count(&self) -> u32 {
|
||||
self.line_count + self.rect_count + self.tri_vert_count / 3
|
||||
}
|
||||
|
||||
/// Despacha las primitivas acumuladas como 1 draw call por tipo
|
||||
/// no vacío contra `view`. `viewport` es el tamaño en pixels del
|
||||
/// target (lo usa el VS para mapear pixel → NDC).
|
||||
///
|
||||
/// `load_op` decide si la pasada conserva el contenido previo
|
||||
/// (`Load`, lo normal cuando vello ya pintó algo) o limpia
|
||||
/// (`Clear(color)`). Apps que llamen a `GpuBatch` desde
|
||||
/// `gpu_paint_with` quieren `Load`.
|
||||
pub fn flush(
|
||||
self,
|
||||
device: &wgpu::Device,
|
||||
queue: &wgpu::Queue,
|
||||
encoder: &mut wgpu::CommandEncoder,
|
||||
view: &wgpu::TextureView,
|
||||
viewport: (f32, f32),
|
||||
load_op: wgpu::LoadOp<wgpu::Color>,
|
||||
) {
|
||||
let total = self.line_count + self.tri_vert_count + self.rect_count;
|
||||
if total == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
// Uniforms: [viewport.w, viewport.h, line_width, _pad].
|
||||
let u_data = [viewport.0, viewport.1, self.line_width, 0.0];
|
||||
let mut u_bytes = Vec::with_capacity(16);
|
||||
for v in u_data {
|
||||
u_bytes.extend_from_slice(&v.to_ne_bytes());
|
||||
}
|
||||
let uniforms = device.create_buffer(&wgpu::BufferDescriptor {
|
||||
label: Some("llimphi-raster-gpu-u"),
|
||||
size: 16,
|
||||
usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
|
||||
mapped_at_creation: false,
|
||||
});
|
||||
queue.write_buffer(&uniforms, 0, &u_bytes);
|
||||
|
||||
let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
|
||||
label: Some("llimphi-raster-gpu-bg"),
|
||||
layout: &self.pipelines.bind_layout,
|
||||
entries: &[wgpu::BindGroupEntry {
|
||||
binding: 0,
|
||||
resource: uniforms.as_entire_binding(),
|
||||
}],
|
||||
});
|
||||
|
||||
// Buffers por tipo (sólo si hay datos).
|
||||
let lines_buf = (!self.line_verts.is_empty()).then(|| {
|
||||
let b = device.create_buffer(&wgpu::BufferDescriptor {
|
||||
label: Some("llimphi-raster-gpu-lines-buf"),
|
||||
size: self.line_verts.len() as u64,
|
||||
usage: wgpu::BufferUsages::VERTEX | wgpu::BufferUsages::COPY_DST,
|
||||
mapped_at_creation: false,
|
||||
});
|
||||
queue.write_buffer(&b, 0, &self.line_verts);
|
||||
b
|
||||
});
|
||||
let tris_buf = (!self.tri_verts.is_empty()).then(|| {
|
||||
let b = device.create_buffer(&wgpu::BufferDescriptor {
|
||||
label: Some("llimphi-raster-gpu-tris-buf"),
|
||||
size: self.tri_verts.len() as u64,
|
||||
usage: wgpu::BufferUsages::VERTEX | wgpu::BufferUsages::COPY_DST,
|
||||
mapped_at_creation: false,
|
||||
});
|
||||
queue.write_buffer(&b, 0, &self.tri_verts);
|
||||
b
|
||||
});
|
||||
let rects_buf = (!self.rect_insts.is_empty()).then(|| {
|
||||
let b = device.create_buffer(&wgpu::BufferDescriptor {
|
||||
label: Some("llimphi-raster-gpu-rects-buf"),
|
||||
size: self.rect_insts.len() as u64,
|
||||
usage: wgpu::BufferUsages::VERTEX | wgpu::BufferUsages::COPY_DST,
|
||||
mapped_at_creation: false,
|
||||
});
|
||||
queue.write_buffer(&b, 0, &self.rect_insts);
|
||||
b
|
||||
});
|
||||
|
||||
let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
|
||||
label: Some("llimphi-raster-gpu-pass"),
|
||||
color_attachments: &[Some(wgpu::RenderPassColorAttachment {
|
||||
view,
|
||||
resolve_target: None,
|
||||
ops: wgpu::Operations {
|
||||
load: load_op,
|
||||
store: wgpu::StoreOp::Store,
|
||||
},
|
||||
})],
|
||||
depth_stencil_attachment: None,
|
||||
timestamp_writes: None,
|
||||
occlusion_query_set: None,
|
||||
});
|
||||
pass.set_bind_group(0, &bind_group, &[]);
|
||||
|
||||
// Orden de draws: rects (fondo) → tris → lines (encima). Match
|
||||
// de la convención usual "fill abajo, stroke arriba".
|
||||
if let Some(buf) = rects_buf.as_ref() {
|
||||
pass.set_pipeline(&self.pipelines.rects);
|
||||
pass.set_vertex_buffer(0, buf.slice(..));
|
||||
pass.draw(0..6, 0..self.rect_count);
|
||||
}
|
||||
if let Some(buf) = tris_buf.as_ref() {
|
||||
pass.set_pipeline(&self.pipelines.tris);
|
||||
pass.set_vertex_buffer(0, buf.slice(..));
|
||||
pass.draw(0..self.tri_vert_count, 0..1);
|
||||
}
|
||||
if let Some(buf) = lines_buf.as_ref() {
|
||||
pass.set_pipeline(&self.pipelines.lines);
|
||||
pass.set_vertex_buffer(0, buf.slice(..));
|
||||
pass.draw(0..6, 0..self.line_count);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Empaqueta un `peniko::Color` a u32 little-endian RGBA8.
|
||||
/// El shader lo lee como `inst.rgba` y separa bytes — debe coincidir
|
||||
/// con la convención del WGSL (`r = rgba & 0xFF`, etc.).
|
||||
fn pack_rgba(c: Color) -> u32 {
|
||||
let [r, g, b, a] = c.to_rgba8().to_u8_array();
|
||||
(r as u32) | ((g as u32) << 8) | ((b as u32) << 16) | ((a as u32) << 24)
|
||||
}
|
||||
|
||||
const WGSL: &str = r#"
|
||||
struct Uniforms {
|
||||
viewport: vec2<f32>,
|
||||
line_width: f32,
|
||||
_pad: f32,
|
||||
};
|
||||
|
||||
@group(0) @binding(0) var<uniform> u: Uniforms;
|
||||
|
||||
struct V2F {
|
||||
@builtin(position) pos: vec4<f32>,
|
||||
@location(0) color: vec4<f32>,
|
||||
};
|
||||
|
||||
fn unpack_rgba(c: u32) -> vec4<f32> {
|
||||
let r = f32( c & 0xFFu) / 255.0;
|
||||
let g = f32((c >> 8u) & 0xFFu) / 255.0;
|
||||
let b = f32((c >> 16u) & 0xFFu) / 255.0;
|
||||
let a = f32((c >> 24u) & 0xFFu) / 255.0;
|
||||
return vec4<f32>(r, g, b, a);
|
||||
}
|
||||
|
||||
fn px_to_ndc(p: vec2<f32>) -> vec2<f32> {
|
||||
return vec2<f32>(p.x / u.viewport.x * 2.0 - 1.0, 1.0 - p.y / u.viewport.y * 2.0);
|
||||
}
|
||||
|
||||
// -------- triángulos: 1 vértice = (xy, rgba) --------
|
||||
|
||||
@vertex
|
||||
fn vs_tris(@location(0) xy: vec2<f32>, @location(1) rgba: u32) -> V2F {
|
||||
var out: V2F;
|
||||
out.pos = vec4<f32>(px_to_ndc(xy), 0.0, 1.0);
|
||||
out.color = unpack_rgba(rgba);
|
||||
return out;
|
||||
}
|
||||
|
||||
// -------- rects: 1 instancia = (xy, wh, rgba), 6 vértices/quad --------
|
||||
|
||||
@vertex
|
||||
fn vs_rects(
|
||||
@builtin(vertex_index) vid: u32,
|
||||
@location(0) inst_xy: vec2<f32>,
|
||||
@location(1) inst_wh: vec2<f32>,
|
||||
@location(2) inst_rgba: u32,
|
||||
) -> V2F {
|
||||
var corners = array<vec2<f32>, 6>(
|
||||
vec2<f32>(0.0, 0.0),
|
||||
vec2<f32>(1.0, 0.0),
|
||||
vec2<f32>(1.0, 1.0),
|
||||
vec2<f32>(0.0, 0.0),
|
||||
vec2<f32>(1.0, 1.0),
|
||||
vec2<f32>(0.0, 1.0),
|
||||
);
|
||||
let local = corners[vid];
|
||||
let px = inst_xy + local * inst_wh;
|
||||
var out: V2F;
|
||||
out.pos = vec4<f32>(px_to_ndc(px), 0.0, 1.0);
|
||||
out.color = unpack_rgba(inst_rgba);
|
||||
return out;
|
||||
}
|
||||
|
||||
// -------- líneas: 1 instancia = (p0xy, p1xy, rgba), expandida a quad ----
|
||||
|
||||
@vertex
|
||||
fn vs_lines(
|
||||
@builtin(vertex_index) vid: u32,
|
||||
@location(0) seg: vec4<f32>,
|
||||
@location(1) rgba: u32,
|
||||
) -> V2F {
|
||||
// Quad perpendicular al segmento, grosor uniforme `u.line_width` px.
|
||||
// vid 0..5 mapea a los 6 vértices del quad (2 tris).
|
||||
let p0 = seg.xy;
|
||||
let p1 = seg.zw;
|
||||
let dir = normalize(p1 - p0);
|
||||
let n = vec2<f32>(-dir.y, dir.x);
|
||||
let half_w = u.line_width * 0.5;
|
||||
let offsets = array<vec2<f32>, 6>(
|
||||
vec2<f32>(0.0, -half_w), // p0 -n
|
||||
vec2<f32>(0.0, half_w), // p0 +n
|
||||
vec2<f32>(1.0, half_w), // p1 +n
|
||||
vec2<f32>(0.0, -half_w), // p0 -n
|
||||
vec2<f32>(1.0, half_w), // p1 +n
|
||||
vec2<f32>(1.0, -half_w), // p1 -n
|
||||
);
|
||||
let o = offsets[vid];
|
||||
let along = mix(p0, p1, o.x);
|
||||
let across = n * o.y;
|
||||
let px = along + across;
|
||||
var out: V2F;
|
||||
out.pos = vec4<f32>(px_to_ndc(px), 0.0, 1.0);
|
||||
out.color = unpack_rgba(rgba);
|
||||
return out;
|
||||
}
|
||||
|
||||
@fragment
|
||||
fn fs(in: V2F) -> @location(0) vec4<f32> {
|
||||
return in.color;
|
||||
}
|
||||
"#;
|
||||
@@ -0,0 +1,120 @@
|
||||
//! llimphi-raster — Brocha Matemática.
|
||||
//!
|
||||
//! Traduce primitivas vectoriales (líneas, curvas de Bézier, texto) a
|
||||
//! píxeles via Compute Shaders. Backend: `vello`.
|
||||
//!
|
||||
//! Punto de entrada: [`Renderer`]. Recibe una [`vello::Scene`] y la pinta
|
||||
//! sobre un [`llimphi_hal::Frame`].
|
||||
|
||||
use llimphi_hal::{Frame, Hal};
|
||||
pub use vello;
|
||||
pub use vello::kurbo;
|
||||
pub use vello::peniko;
|
||||
|
||||
pub mod gpu;
|
||||
pub use gpu::{GpuBatch, GpuPipelines};
|
||||
|
||||
/// Errores del rasterizador.
|
||||
#[derive(Debug)]
|
||||
pub enum RasterError {
|
||||
Init(String),
|
||||
Render(String),
|
||||
}
|
||||
|
||||
impl std::fmt::Display for RasterError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Init(s) => write!(f, "vello init: {s}"),
|
||||
Self::Render(s) => write!(f, "vello render: {s}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for RasterError {}
|
||||
|
||||
/// Rasterizador vectorial. Una instancia por surface (porque vello cachea
|
||||
/// resources contra un `surface_format` específico).
|
||||
pub struct Renderer {
|
||||
inner: vello::Renderer,
|
||||
}
|
||||
|
||||
impl Renderer {
|
||||
/// Inicializa el rasterizador. Vello acepta cualquier textura compatible
|
||||
/// (Rgba8Unorm / Bgra8Unorm) en `render`, así que no se fija un formato
|
||||
/// en construcción.
|
||||
///
|
||||
/// **`antialiasing_support`**: pedimos `area` solamente, no `all()`.
|
||||
/// `area` es el único método que `render()` usa (`AaConfig::Area`
|
||||
/// fijo). Pedir `all()` haría a vello compilar también pipelines
|
||||
/// para `msaa8` y `msaa16` que nunca se invocan — en Mali-G57 eso
|
||||
/// triplica el cold-start (medido: 3.7s vs ~1.2s). Si alguna app
|
||||
/// futura necesita MSAA, agregamos un constructor explícito.
|
||||
///
|
||||
/// **`num_init_threads: None`**: vello paraleliza la compilación
|
||||
/// de shaders en `None` → todos los CPU cores. Mali-G57 viene en
|
||||
/// SoCs octa-core ARM; con 1 thread tardamos 2.0s, con 8 esperamos
|
||||
/// ~400-600ms. La compilación de shaders es 100% CPU (Rust →
|
||||
/// SPIR-V), el GPU no participa, así que multi-thread escala
|
||||
/// casi linealmente hasta saturar el queue del Naga compiler.
|
||||
pub fn new(hal: &Hal) -> Result<Self, RasterError> {
|
||||
let inner = vello::Renderer::new(
|
||||
&hal.device,
|
||||
vello::RendererOptions {
|
||||
use_cpu: false,
|
||||
antialiasing_support: vello::AaSupport {
|
||||
area: true,
|
||||
msaa8: false,
|
||||
msaa16: false,
|
||||
},
|
||||
num_init_threads: None,
|
||||
pipeline_cache: None,
|
||||
},
|
||||
)
|
||||
.map_err(|e| RasterError::Init(e.to_string()))?;
|
||||
Ok(Self { inner })
|
||||
}
|
||||
|
||||
/// Renderiza `scene` sobre `frame` limpiando con `base_color`. AA fija
|
||||
/// en area-sampling (precisión Δ < 10⁻⁹ rad del SDD).
|
||||
pub fn render(
|
||||
&mut self,
|
||||
hal: &Hal,
|
||||
scene: &vello::Scene,
|
||||
frame: &Frame,
|
||||
base_color: peniko::Color,
|
||||
) -> Result<(), RasterError> {
|
||||
let (width, height) = frame.size();
|
||||
self.render_to_view(hal, scene, frame.view(), width, height, base_color)
|
||||
}
|
||||
|
||||
/// Como [`render`](Self::render) pero contra una vista de textura
|
||||
/// explícita (mismo formato/tamaño que la intermedia). Lo usa el
|
||||
/// compositor de overlay de `llimphi-ui` para rasterizar la capa de
|
||||
/// overlay sobre fondo transparente en su propia textura. Ojo:
|
||||
/// `render_to_texture` **limpia** el target con `base_color` y escribe
|
||||
/// todos los píxeles — no compone sobre contenido previo.
|
||||
pub fn render_to_view(
|
||||
&mut self,
|
||||
hal: &Hal,
|
||||
scene: &vello::Scene,
|
||||
view: &llimphi_hal::wgpu::TextureView,
|
||||
width: u32,
|
||||
height: u32,
|
||||
base_color: peniko::Color,
|
||||
) -> Result<(), RasterError> {
|
||||
self.inner
|
||||
.render_to_texture(
|
||||
&hal.device,
|
||||
&hal.queue,
|
||||
scene,
|
||||
view,
|
||||
&vello::RenderParams {
|
||||
base_color,
|
||||
width,
|
||||
height,
|
||||
antialiasing_method: vello::AaConfig::Area,
|
||||
},
|
||||
)
|
||||
.map_err(|e| RasterError::Render(e.to_string()))
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,128 @@
|
||||
//! Smoke test del backend GPU directo (`llimphi_raster::gpu`).
|
||||
//!
|
||||
//! No verifica píxeles — eso requiere AA y un patrón conocido, y por
|
||||
//! ahora el módulo no garantiza pixel-exactness. Sí verifica que:
|
||||
//!
|
||||
//! - `GpuPipelines::new` compila los 3 shaders WGSL sin errores de naga.
|
||||
//! - `GpuBatch` acepta líneas, triángulos y rects mezclados sin pánico.
|
||||
//! - `flush` ejecuta sin errores wgpu y la `Maintain::Wait` retorna
|
||||
//! (= la GPU/llvmpipe terminó las pasadas).
|
||||
//!
|
||||
//! Corre en cualquier adapter wgpu disponible — en CI sin GPU usa
|
||||
//! llvmpipe, donde igual valida el ensamblado y la sintaxis WGSL.
|
||||
|
||||
use llimphi_hal::{wgpu, Hal};
|
||||
use llimphi_raster::gpu::{GpuBatch, GpuPipelines};
|
||||
use llimphi_raster::peniko::Color;
|
||||
|
||||
const W: u32 = 256;
|
||||
const H: u32 = 256;
|
||||
const FMT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba8Unorm;
|
||||
|
||||
fn make_target(device: &wgpu::Device) -> (wgpu::Texture, wgpu::TextureView) {
|
||||
let tex = device.create_texture(&wgpu::TextureDescriptor {
|
||||
label: Some("smoke-target"),
|
||||
size: wgpu::Extent3d {
|
||||
width: W,
|
||||
height: H,
|
||||
depth_or_array_layers: 1,
|
||||
},
|
||||
mip_level_count: 1,
|
||||
sample_count: 1,
|
||||
dimension: wgpu::TextureDimension::D2,
|
||||
format: FMT,
|
||||
usage: wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::COPY_SRC,
|
||||
view_formats: &[],
|
||||
});
|
||||
let view = tex.create_view(&wgpu::TextureViewDescriptor::default());
|
||||
(tex, view)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn batch_with_rects_lines_tris_does_not_panic() {
|
||||
let hal = pollster::block_on(Hal::new(None)).expect("hal");
|
||||
let pipelines = GpuPipelines::new(&hal.device, FMT);
|
||||
let (_tex, view) = make_target(&hal.device);
|
||||
|
||||
let mut batch = GpuBatch::new(&pipelines);
|
||||
batch.line_width(2.0);
|
||||
|
||||
// Cuadrícula 8×8 de rects con color que varía.
|
||||
for j in 0..8 {
|
||||
for i in 0..8 {
|
||||
let x = 8.0 + i as f32 * 30.0;
|
||||
let y = 8.0 + j as f32 * 30.0;
|
||||
let c = Color::from_rgba8(
|
||||
(i * 32) as u8,
|
||||
(j * 32) as u8,
|
||||
100,
|
||||
255,
|
||||
);
|
||||
batch.add_rect(x, y, 24.0, 24.0, c);
|
||||
}
|
||||
}
|
||||
|
||||
// Diagonal de líneas.
|
||||
for k in 0..16 {
|
||||
batch.add_line(
|
||||
(0.0, k as f32 * 16.0),
|
||||
(W as f32, (k + 1) as f32 * 16.0),
|
||||
Color::from_rgba8(220, 220, 250, 180),
|
||||
);
|
||||
}
|
||||
|
||||
// Triángulo grande con color por vértice.
|
||||
batch.add_tri(
|
||||
(128.0, 32.0),
|
||||
(64.0, 220.0),
|
||||
(220.0, 220.0),
|
||||
Color::from_rgba8(255, 80, 80, 200),
|
||||
Color::from_rgba8(80, 255, 80, 200),
|
||||
Color::from_rgba8(80, 80, 255, 200),
|
||||
);
|
||||
|
||||
assert!(batch.primitive_count() > 0, "batch debería tener primitivas");
|
||||
|
||||
let mut encoder = hal
|
||||
.device
|
||||
.create_command_encoder(&wgpu::CommandEncoderDescriptor {
|
||||
label: Some("smoke-enc"),
|
||||
});
|
||||
batch.flush(
|
||||
&hal.device,
|
||||
&hal.queue,
|
||||
&mut encoder,
|
||||
&view,
|
||||
(W as f32, H as f32),
|
||||
wgpu::LoadOp::Clear(wgpu::Color::BLACK),
|
||||
);
|
||||
hal.queue.submit(std::iter::once(encoder.finish()));
|
||||
hal.device.poll(wgpu::Maintain::Wait);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_batch_flush_is_no_op() {
|
||||
let hal = pollster::block_on(Hal::new(None)).expect("hal");
|
||||
let pipelines = GpuPipelines::new(&hal.device, FMT);
|
||||
let (_tex, view) = make_target(&hal.device);
|
||||
|
||||
let batch = GpuBatch::new(&pipelines);
|
||||
assert_eq!(batch.primitive_count(), 0);
|
||||
|
||||
let mut encoder = hal
|
||||
.device
|
||||
.create_command_encoder(&wgpu::CommandEncoderDescriptor {
|
||||
label: Some("smoke-empty-enc"),
|
||||
});
|
||||
// Con batch vacío, flush no debe crear render pass ni buffers.
|
||||
batch.flush(
|
||||
&hal.device,
|
||||
&hal.queue,
|
||||
&mut encoder,
|
||||
&view,
|
||||
(W as f32, H as f32),
|
||||
wgpu::LoadOp::Clear(wgpu::Color::TRANSPARENT),
|
||||
);
|
||||
hal.queue.submit(std::iter::once(encoder.finish()));
|
||||
hal.device.poll(wgpu::Maintain::Wait);
|
||||
}
|
||||
Reference in New Issue
Block a user