diff --git a/crates/apps/charka/src/main.rs b/crates/apps/charka/src/main.rs index 4108cdd..9d0d864 100644 --- a/crates/apps/charka/src/main.rs +++ b/crates/apps/charka/src/main.rs @@ -290,13 +290,13 @@ mod tests { let ir = ir_of( "PROCEDURE DIVISION.\n\ MAIN.\n\ - INSPECT WS-X TALLYING WS-N FOR ALL ' '.\n", + INITIALIZE WS-X.\n", ); let mut verbs = Vec::new(); for proc in &ir.procedures { collect_unknowns(&proc.body, &mut verbs); } - assert_eq!(verbs, vec!["INSPECT".to_string()]); + assert_eq!(verbs, vec!["INITIALIZE".to_string()]); } #[test] diff --git a/crates/modules/charka/SDD.md b/crates/modules/charka/SDD.md index e5b2455..77ace3d 100644 --- a/crates/modules/charka/SDD.md +++ b/crates/modules/charka/SDD.md @@ -85,8 +85,8 @@ Tercera etapa: `Program` → `Ir`. Aquí se parsea cada `Sentence` cruda reimplementar la clasificación. - `Procedure { name, body: Vec }`. `Stmt` cubre `Move`, `Display`, `Accept`, `Compute`, `Add`/`Subtract`/`Multiply`/`Divide`, - `If`, `Evaluate`, `StringConcat`, `Unstring`, `Perform`, `GoTo`, - `StopRun`, `Goback`, `Exit`, `Continue`. + `If`, `Evaluate`, `StringConcat`, `Unstring`, `Inspect`, `Perform`, + `GoTo`, `StopRun`, `Goback`, `Exit`, `Continue`. - `Expr` — expresiones aritméticas con precedencia y paréntesis (Pratt: `+ -` < `* /` < `**` der.). `Cond` — comparaciones (símbolo o forma palabra) unidas por `AND`/`OR`/`NOT`, más nombres de condición @@ -102,7 +102,8 @@ Tercera etapa: `Program` → `Ir`. Aquí se parsea cada `Sentence` cruda - `EVALUATE subject WHEN ... WHEN OTHER` — el `case` de COBOL, por igualdad de valor (no la forma `EVALUATE TRUE` con condiciones). - `STRING` (concatenación) y `UNSTRING` (partición por delimitador) — - el manejo de cadenas. + el manejo de cadenas. `INSPECT` — contar (`TALLYING FOR ALL`) y + reemplazar (`REPLACING ALL`). - Fuera de alcance v1: E/S de ficheros, CICS, SQL embebido. ## charka-runtime @@ -174,8 +175,8 @@ que corre el `Ir` directamente sobre `charka-runtime`, sin compilar. ## El corpus -`crates/modules/charka/corpus/` — 12 programas COBOL graduados -(`01-hola` … `12-cadenas`), cada uno con su `.expected`. Ejercita el +`crates/modules/charka/corpus/` — 13 programas COBOL graduados +(`01-hola` … `13-inspeccion`), cada uno con su `.expected`. Ejercita el pipeline completo de punta a punta. Ver su `README.md`. ## La CLI diff --git a/crates/modules/charka/charka-codegen/src/lib.rs b/crates/modules/charka/charka-codegen/src/lib.rs index c5f9e4a..82331b2 100644 --- a/crates/modules/charka/charka-codegen/src/lib.rs +++ b/crates/modules/charka/charka-codegen/src/lib.rs @@ -280,8 +280,8 @@ mod tests { fn unknown_verb_becomes_a_comment() { let out = gen("PROCEDURE DIVISION.\n\ MAIN.\n\ - INSPECT WS-X TALLYING WS-N FOR ALL ' '.\n"); - assert!(out.contains("// charka: verbo no transpilado — INSPECT")); + INITIALIZE WS-X.\n"); + assert!(out.contains("// charka: verbo no transpilado — INITIALIZE")); } #[test] @@ -384,6 +384,21 @@ mod tests { assert!(out.contains("__it.next().unwrap_or(\"\")")); } + #[test] + fn inspect_emits_tally_and_replace() { + let out = gen("DATA DIVISION.\n\ + WORKING-STORAGE SECTION.\n\ + 01 WS-T PIC X(10).\n\ + 01 WS-N PIC 9(3).\n\ + PROCEDURE DIVISION.\n\ + MAIN.\n\ + INSPECT WS-T TALLYING WS-N FOR ALL 'X'.\n\ + INSPECT WS-T REPLACING ALL 'X' BY 'Y'.\n"); + assert!(out.contains(".matches(")); + assert!(out.contains("Decimal::from_integer(__n)")); + assert!(out.contains(".replace(")); + } + #[test] fn empty_program_still_compiles_shape() { let out = gen(""); diff --git a/crates/modules/charka/charka-codegen/src/stmt.rs b/crates/modules/charka/charka-codegen/src/stmt.rs index 100761a..8e51c14 100644 --- a/crates/modules/charka/charka-codegen/src/stmt.rs +++ b/crates/modules/charka/charka-codegen/src/stmt.rs @@ -1,7 +1,9 @@ //! Emisión de los statements del PROCEDURE: cada [`Stmt`] se traduce a //! una o varias líneas de código Rust sobre `charka-runtime`. -use charka_ir::{CmpOp, Cond, Operand, Perform, PerformControl, PerformTarget, Stmt, WhenBranch}; +use charka_ir::{ + CmpOp, Cond, InspectOp, Operand, Perform, PerformControl, PerformTarget, Stmt, WhenBranch, +}; use crate::emit::Emitter; use crate::expr::{ @@ -82,6 +84,7 @@ pub(crate) fn emit_stmt(em: &mut Emitter, sym: &Symbols, stmt: &Stmt) { delimiter, into, } => emit_unstring(em, sym, source, delimiter, into), + Stmt::Inspect { target, op } => emit_inspect(em, sym, target, op), Stmt::Perform(p) => emit_perform(em, sym, p), Stmt::GoTo { target } => { em.line(&format!( @@ -423,6 +426,38 @@ fn emit_unstring( em.line("}"); } +/// `INSPECT` — cuenta (`TALLYING`) o reemplaza (`REPLACING`). +fn emit_inspect(em: &mut Emitter, sym: &Symbols, target: &Operand, op: &InspectOp) { + match op { + InspectOp::TallyingForAll { counter, search } => { + em.line("{"); + em.indent(); + em.line(&format!( + "let __n = ({}).matches({}).count() as i128;", + operand_display(sym, target), + operand_str(sym, search) + )); + match field_ref(sym, counter) { + Some((lref, FieldKind::Num { .. })) => em.line(&format!( + "{lref}.store({lref}.value().add(&Decimal::from_integer(__n)));" + )), + _ => em.line("// charka: contador INSPECT no resuelto"), + } + em.dedent(); + em.line("}"); + } + InspectOp::ReplacingAll { from, to } => { + let replaced = format!( + "({}).replace({}, {})", + operand_display(sym, target), + operand_str(sym, from), + operand_str(sym, to) + ); + emit_store_text(em, sym, target, &format!("{replaced}.as_str()")); + } + } +} + fn emit_perform(em: &mut Emitter, sym: &Symbols, p: &Perform) { // Emite el "cuerpo": la llamada al párrafo o el bloque en línea. let emit_body = |em: &mut Emitter, sym: &Symbols| match &p.target { diff --git a/crates/modules/charka/charka-ir/src/ast.rs b/crates/modules/charka/charka-ir/src/ast.rs index c3f50d9..e186521 100644 --- a/crates/modules/charka/charka-ir/src/ast.rs +++ b/crates/modules/charka/charka-ir/src/ast.rs @@ -183,6 +183,8 @@ pub enum Stmt { delimiter: Operand, into: Vec, }, + /// `INSPECT target ...` — cuenta o reemplaza caracteres. + Inspect { target: Operand, op: InspectOp }, /// `PERFORM ...` — ver [`Perform`]. Perform(Perform), /// `GO TO target` @@ -200,6 +202,16 @@ pub enum Stmt { Unknown { verb: String, tokens: Vec }, } +/// La operación de un `INSPECT`. +#[derive(Debug, Clone, PartialEq)] +pub enum InspectOp { + /// `TALLYING counter FOR ALL search` — suma a `counter` la cantidad + /// de apariciones de `search` en el destino. + TallyingForAll { counter: Operand, search: Operand }, + /// `REPLACING ALL from BY to` — reemplaza las apariciones de `from`. + ReplacingAll { from: Operand, to: Operand }, +} + /// Una rama `WHEN` de un `EVALUATE`: los valores que la disparan /// (varios `WHEN` apilados comparten cuerpo) y el cuerpo a ejecutar. #[derive(Debug, Clone, PartialEq)] diff --git a/crates/modules/charka/charka-ir/src/lib.rs b/crates/modules/charka/charka-ir/src/lib.rs index 2e1a445..a93e490 100644 --- a/crates/modules/charka/charka-ir/src/lib.rs +++ b/crates/modules/charka/charka-ir/src/lib.rs @@ -17,9 +17,9 @@ //! `ACCEPT`, `COMPUTE` (con expresiones con precedencia), `ADD`, //! `SUBTRACT`, `MULTIPLY`, `DIVIDE`, `IF`/`ELSE`/`END-IF` (con //! condiciones `AND`/`OR`/`NOT`), `EVALUATE`/`WHEN`, `STRING`, -//! `UNSTRING`, `PERFORM` (fuera de línea, en línea, `TIMES`, `UNTIL`, -//! `VARYING`), `GO TO`, `STOP RUN`, `GOBACK`, `EXIT`, `CONTINUE`. -//! Fuera de alcance: E/S de ficheros, CICS y SQL embebido. +//! `UNSTRING`, `INSPECT`, `PERFORM` (fuera de línea, en línea, +//! `TIMES`, `UNTIL`, `VARYING`), `GO TO`, `STOP RUN`, `GOBACK`, +//! `EXIT`, `CONTINUE`. Fuera de alcance: E/S de ficheros, CICS y SQL. #![forbid(unsafe_code)] @@ -383,6 +383,33 @@ mod tests { } } + #[test] + fn inspect_tallying_and_replacing_parse() { + let b = body("INSPECT WS-T TALLYING WS-N FOR ALL 'A'."); + match &b[0] { + Stmt::Inspect { + target, + op: InspectOp::TallyingForAll { counter, search }, + } => { + assert_eq!(target, &Operand::Data("WS-T".into())); + assert_eq!(counter, &Operand::Data("WS-N".into())); + assert_eq!(search, &Operand::Str("A".into())); + } + other => panic!("se esperaba INSPECT TALLYING, vino {other:?}"), + } + let b = body("INSPECT WS-T REPLACING ALL 'A' BY 'O'."); + match &b[0] { + Stmt::Inspect { + op: InspectOp::ReplacingAll { from, to }, + .. + } => { + assert_eq!(from, &Operand::Str("A".into())); + assert_eq!(to, &Operand::Str("O".into())); + } + other => panic!("se esperaba INSPECT REPLACING, vino {other:?}"), + } + } + #[test] fn several_statements_in_one_sentence() { let b = body("MOVE 1 TO X DISPLAY X STOP RUN."); @@ -394,10 +421,10 @@ mod tests { #[test] fn unrecognized_verb_becomes_unknown() { - let b = body("INSPECT WS-X TALLYING WS-N FOR ALL ' '."); + let b = body("INITIALIZE WS-X WS-Y."); match &b[0] { Stmt::Unknown { verb, tokens } => { - assert_eq!(verb, "INSPECT"); + assert_eq!(verb, "INITIALIZE"); assert!(!tokens.is_empty()); } other => panic!("se esperaba Unknown, vino {other:?}"), diff --git a/crates/modules/charka/charka-ir/src/stmt.rs b/crates/modules/charka/charka-ir/src/stmt.rs index 6ec13a9..73107be 100644 --- a/crates/modules/charka/charka-ir/src/stmt.rs +++ b/crates/modules/charka/charka-ir/src/stmt.rs @@ -5,7 +5,7 @@ use charka_parser::TokenKind; -use crate::ast::{Operand, Perform, PerformControl, PerformTarget, Stmt, WhenBranch}; +use crate::ast::{InspectOp, Operand, Perform, PerformControl, PerformTarget, Stmt, WhenBranch}; use crate::cursor::{parse_operand, Cursor}; use crate::expr::{parse_cond, parse_expr}; use crate::kw::{is_boundary, is_terminator, is_verb}; @@ -41,6 +41,7 @@ fn parse_one_stmt(c: &mut Cursor, stops: &[&str]) -> Stmt { "EVALUATE" => parse_evaluate(c), "STRING" => parse_string(c), "UNSTRING" => parse_unstring(c), + "INSPECT" => parse_inspect(c), "PERFORM" => parse_perform(c), "GO" => parse_goto(c), "STOP" => parse_stop(c), @@ -378,6 +379,39 @@ fn parse_unstring(c: &mut Cursor) -> Stmt { } } +fn parse_inspect(c: &mut Cursor) -> Stmt { + c.bump(); // INSPECT + let target = parse_operand(c); + if c.eat_word("TALLYING") { + let counter = parse_operand(c); + c.eat_word("FOR"); + c.eat_word("ALL"); + let search = parse_operand(c); + skip_to_stmt_boundary(c); + Stmt::Inspect { + target, + op: InspectOp::TallyingForAll { counter, search }, + } + } else if c.eat_word("REPLACING") { + c.eat_word("ALL"); + let from = parse_operand(c); + c.eat_word("BY"); + let to = parse_operand(c); + skip_to_stmt_boundary(c); + Stmt::Inspect { + target, + op: InspectOp::ReplacingAll { from, to }, + } + } else { + // Forma de INSPECT que la v1 no modela. + skip_to_stmt_boundary(c); + Stmt::Unknown { + verb: "INSPECT".to_string(), + tokens: Vec::new(), + } + } +} + fn parse_perform(c: &mut Cursor) -> Stmt { c.bump(); // PERFORM diff --git a/crates/modules/charka/charka-shadow/src/interp.rs b/crates/modules/charka/charka-shadow/src/interp.rs index ddde260..ccbd439 100644 --- a/crates/modules/charka/charka-shadow/src/interp.rs +++ b/crates/modules/charka/charka-shadow/src/interp.rs @@ -8,8 +8,8 @@ use std::collections::HashMap; use charka_ir::{ - BinOp, CmpOp, Cond, ConditionName, Expr, Figurative, Ir, Operand, Perform, PerformControl, - PerformTarget, Stmt, + BinOp, CmpOp, Cond, ConditionName, Expr, Figurative, InspectOp, Ir, Operand, Perform, + PerformControl, PerformTarget, Stmt, }; use charka_runtime::{cobol_text_cmp, Decimal, Rounding}; @@ -270,6 +270,33 @@ impl<'a> Machine<'a> { } Flow::Normal } + Stmt::Inspect { target, op } => { + match op { + InspectOp::TallyingForAll { counter, search } => { + let hay = self.eval_text(target); + let needle = self.eval_text(search); + let n = if needle.is_empty() { + 0 + } else { + hay.matches(needle.as_str()).count() + }; + let cur = self.eval_decimal(counter); + self.store(counter, cur.add(&Decimal::from_integer(n as i128)), false); + } + InspectOp::ReplacingAll { from, to } => { + let hay = self.eval_text(target); + let f = self.eval_text(from); + let t = self.eval_text(to); + let new = if f.is_empty() { + hay + } else { + hay.replace(f.as_str(), t.as_str()) + }; + self.store_text(target, &new); + } + } + Flow::Normal + } Stmt::Perform(p) => self.exec_perform(p), Stmt::GoTo { target } => { // Aproximación: ejecuta el destino y sale del párrafo. diff --git a/crates/modules/charka/charka-shadow/src/lib.rs b/crates/modules/charka/charka-shadow/src/lib.rs index 2d1b901..767d621 100644 --- a/crates/modules/charka/charka-shadow/src/lib.rs +++ b/crates/modules/charka/charka-shadow/src/lib.rs @@ -121,6 +121,7 @@ mod tests { corpus_test!(corpus_10_condicion, "10-condicion"); corpus_test!(corpus_11_tabla, "11-tabla"); corpus_test!(corpus_12_cadenas, "12-cadenas"); + corpus_test!(corpus_13_inspeccion, "13-inspeccion"); #[test] fn empty_source_runs_clean() { diff --git a/crates/modules/charka/corpus/13-inspeccion.cob b/crates/modules/charka/corpus/13-inspeccion.cob new file mode 100644 index 0000000..5693862 --- /dev/null +++ b/crates/modules/charka/corpus/13-inspeccion.cob @@ -0,0 +1,14 @@ +* corpus charka — nivel 6: INSPECT (contar y reemplazar) +IDENTIFICATION DIVISION. +PROGRAM-ID. INSPECCION. +DATA DIVISION. +WORKING-STORAGE SECTION. +01 WS-TEXTO PIC X(15) VALUE 'BANANA-MANZANA'. +01 WS-CUENTA PIC 9(3) VALUE 0. +PROCEDURE DIVISION. +MAIN. + INSPECT WS-TEXTO TALLYING WS-CUENTA FOR ALL 'A'. + DISPLAY 'LETRAS A = ' WS-CUENTA. + INSPECT WS-TEXTO REPLACING ALL 'A' BY 'O'. + DISPLAY 'TEXTO = ' WS-TEXTO. + STOP RUN. diff --git a/crates/modules/charka/corpus/13-inspeccion.expected b/crates/modules/charka/corpus/13-inspeccion.expected new file mode 100644 index 0000000..3dd5e53 --- /dev/null +++ b/crates/modules/charka/corpus/13-inspeccion.expected @@ -0,0 +1,2 @@ +LETRAS A = 006 +TEXTO = BONONO-MONZONO diff --git a/crates/modules/charka/corpus/README.md b/crates/modules/charka/corpus/README.md index cb7e1a8..69834d5 100644 --- a/crates/modules/charka/corpus/README.md +++ b/crates/modules/charka/corpus/README.md @@ -21,6 +21,7 @@ salida correcta, una línea por `DISPLAY`. | `10-condicion` | 5 | nombres de condición (nivel 88) en `IF` | | `11-tabla` | 6 | tablas (`OCCURS`) y referencias con subíndice | | `12-cadenas` | 6 | `STRING` (concatenar) y `UNSTRING` (partir) | +| `13-inspeccion` | 6 | `INSPECT` — contar (`TALLYING`) y reemplazar | ## Formato diff --git a/docs/changelog/charka.md b/docs/changelog/charka.md index 5f71d1d..546324b 100644 --- a/docs/changelog/charka.md +++ b/docs/changelog/charka.md @@ -3,6 +3,24 @@ Transpilador COBOL → Rust. El módulo más grande del ecosistema (Fase D del plan macro) — el parser COBOL completo es un esfuerzo multi-mes. +### feat(charka): INSPECT — contar y reemplazar caracteres + +El verbo de COBOL para analizar y limpiar campos de texto. + +- IR: `Stmt::Inspect { target, op }` con `InspectOp::TallyingForAll` + (cuenta apariciones y las suma a un contador) y + `InspectOp::ReplacingAll` (reemplaza apariciones). +- Parser: `INSPECT t TALLYING n FOR ALL lit` y + `INSPECT t REPLACING ALL a BY b`. Una forma no soportada cae a + `Stmt::Unknown`. +- Codegen: `TALLYING` → `str::matches(..).count()`; `REPLACING` → + `str::replace`. +- Shadow: el intérprete cuenta / reemplaza el texto. +- Corpus: programa nuevo `13-inspeccion`. Verificado: el intérprete + sombra y el crate compilado dan la misma salida. +- Alcance v1: `TALLYING FOR ALL` y `REPLACING ALL`; sin `LEADING`, + `FIRST`, `CHARACTERS`, `BEFORE`/`AFTER`. + ### feat(charka): STRING y UNSTRING — manejo de cadenas Dos verbos comunes de COBOL para construir y partir cadenas.