//! Demo headless del HAL GPU directo — Fase 6 del SDD //! `02_ruway/llimphi/SDD.md` §"GPU directo wgpu". //! //! A diferencia de `spike_gpu_directo` (que compara vello vs un pipeline //! mock para tomar la decisión arquitectónica), este ejemplo usa //! directamente la API pública `GpuPipelines` + `GpuBatch` sobre N //! puntos (rects 1.2×1.2 px) sintéticos. Su rol es: //! //! - Documentar el uso mínimo: 8 líneas de código + uso de Color. //! - Ejercitar el HAL sin ninguna app (sin winit, sin runtime Elm). //! - Servir de benchmark de referencia post-implementación: tiempo //! total CPU+GPU para 100K / 500K / 1M / 5M rects. //! //! Corre con: `cargo run -p llimphi-raster --example gpu_million_points --release`. use std::io::Write; use std::time::Instant; use llimphi_hal::{wgpu, Hal}; use llimphi_raster::peniko::Color; use llimphi_raster::{GpuBatch, GpuPipelines}; const W: u32 = 1024; const H: u32 = 1024; const FMT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba8Unorm; const WARMUP: usize = 5; const MEASURED: usize = 15; const SIZES: &[u32] = &[100_000, 500_000, 1_000_000, 5_000_000]; fn main() { let hal = pollster::block_on(Hal::new(None)).expect("hal"); let pipelines = GpuPipelines::new(&hal.device, FMT); let (_tex, view) = make_target(&hal.device); println!(); println!("gpu_million_points — GpuBatch + 3 pipelines · target {W}×{H} Rgba8Unorm"); println!("warmup {WARMUP}, measured {MEASURED}"); println!(" {:>10} | {:>14} | {:>14}", "N", "ms / frame", "Mprim/s"); println!(" {:->10} + {:->14} + {:->14}", "", "", ""); for &n in SIZES { let ms = bench(&hal, &pipelines, &view, n); let throughput = (n as f64 / 1_000_000.0) / (ms / 1000.0); println!(" {:>10} | {:>14.3} | {:>14.2}", n, ms, throughput); let _ = std::io::stdout().flush(); } println!(); println!("(en llvmpipe estos números son CPU-bound — ver Fase 0 del SDD)"); println!(); } fn make_target(device: &wgpu::Device) -> (wgpu::Texture, wgpu::TextureView) { let tex = device.create_texture(&wgpu::TextureDescriptor { label: Some("gpu_million_points-target"), size: wgpu::Extent3d { width: W, height: H, depth_or_array_layers: 1, }, mip_level_count: 1, sample_count: 1, dimension: wgpu::TextureDimension::D2, format: FMT, usage: wgpu::TextureUsages::RENDER_ATTACHMENT, view_formats: &[], }); let view = tex.create_view(&wgpu::TextureViewDescriptor::default()); (tex, view) } fn bench(hal: &Hal, pipelines: &GpuPipelines, view: &wgpu::TextureView, n: u32) -> f64 { let mut samples: Vec = Vec::with_capacity(MEASURED); for frame in 0..(WARMUP + MEASURED) { let t0 = Instant::now(); let mut batch = GpuBatch::new(pipelines); let mut state: u32 = 0x1234_5678; for _ in 0..n { state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223); let x = (state % W) as f32; state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223); let y = (state % H) as f32; state = state.wrapping_mul(1_664_525).wrapping_add(1_013_904_223); let r = ((state >> 0) & 0xFF) as f32 / 255.0; let g = ((state >> 8) & 0xFF) as f32 / 255.0; let b = ((state >> 16) & 0xFF) as f32 / 255.0; batch.add_rect(x, y, 1.2, 1.2, Color::new([r, g, b, 1.0])); } let mut encoder = hal.device.create_command_encoder( &wgpu::CommandEncoderDescriptor { label: Some("gpu_million_points-enc"), }, ); batch.flush( &hal.device, &hal.queue, &mut encoder, view, (W as f32, H as f32), wgpu::LoadOp::Clear(wgpu::Color::BLACK), ); hal.queue.submit(std::iter::once(encoder.finish())); hal.device.poll(wgpu::PollType::wait_indefinitely()); let dt = t0.elapsed().as_secs_f64() * 1000.0; if frame >= WARMUP { samples.push(dt); } } samples.sort_by(|a, b| a.partial_cmp(b).unwrap()); samples[samples.len() / 2] }