feat(charka): INSPECT — contar y reemplazar caracteres
El verbo de COBOL para analizar y limpiar campos de texto.
- IR: Stmt::Inspect { target, op } con InspectOp::TallyingForAll
(cuenta apariciones y las suma a un contador) y
InspectOp::ReplacingAll (reemplaza apariciones).
- Parser: INSPECT t TALLYING n FOR ALL lit y
INSPECT t REPLACING ALL a BY b. Una forma no soportada cae a
Stmt::Unknown.
- Codegen: TALLYING -> str::matches(..).count(); REPLACING ->
str::replace.
- Shadow: el intérprete cuenta / reemplaza el texto.
- Corpus: programa nuevo 13-inspeccion. Verificado: el intérprete
sombra y el crate compilado por scaffold dan la misma salida.
Alcance v1: TALLYING FOR ALL y REPLACING ALL; sin LEADING, FIRST,
CHARACTERS, BEFORE/AFTER.
Tests: charka-ir 26, charka-codegen 20, charka-shadow 18. fmt +
clippy limpios.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -85,8 +85,8 @@ Tercera etapa: `Program` → `Ir`. Aquí se parsea cada `Sentence` cruda
|
||||
reimplementar la clasificación.
|
||||
- `Procedure { name, body: Vec<Stmt> }`. `Stmt` cubre `Move`,
|
||||
`Display`, `Accept`, `Compute`, `Add`/`Subtract`/`Multiply`/`Divide`,
|
||||
`If`, `Evaluate`, `StringConcat`, `Unstring`, `Perform`, `GoTo`,
|
||||
`StopRun`, `Goback`, `Exit`, `Continue`.
|
||||
`If`, `Evaluate`, `StringConcat`, `Unstring`, `Inspect`, `Perform`,
|
||||
`GoTo`, `StopRun`, `Goback`, `Exit`, `Continue`.
|
||||
- `Expr` — expresiones aritméticas con precedencia y paréntesis (Pratt:
|
||||
`+ -` < `* /` < `**` der.). `Cond` — comparaciones (símbolo o forma
|
||||
palabra) unidas por `AND`/`OR`/`NOT`, más nombres de condición
|
||||
@@ -102,7 +102,8 @@ Tercera etapa: `Program` → `Ir`. Aquí se parsea cada `Sentence` cruda
|
||||
- `EVALUATE subject WHEN ... WHEN OTHER` — el `case` de COBOL, por
|
||||
igualdad de valor (no la forma `EVALUATE TRUE` con condiciones).
|
||||
- `STRING` (concatenación) y `UNSTRING` (partición por delimitador) —
|
||||
el manejo de cadenas.
|
||||
el manejo de cadenas. `INSPECT` — contar (`TALLYING FOR ALL`) y
|
||||
reemplazar (`REPLACING ALL`).
|
||||
- Fuera de alcance v1: E/S de ficheros, CICS, SQL embebido.
|
||||
|
||||
## charka-runtime
|
||||
@@ -174,8 +175,8 @@ que corre el `Ir` directamente sobre `charka-runtime`, sin compilar.
|
||||
|
||||
## El corpus
|
||||
|
||||
`crates/modules/charka/corpus/` — 12 programas COBOL graduados
|
||||
(`01-hola` … `12-cadenas`), cada uno con su `.expected`. Ejercita el
|
||||
`crates/modules/charka/corpus/` — 13 programas COBOL graduados
|
||||
(`01-hola` … `13-inspeccion`), cada uno con su `.expected`. Ejercita el
|
||||
pipeline completo de punta a punta. Ver su `README.md`.
|
||||
|
||||
## La CLI
|
||||
|
||||
@@ -280,8 +280,8 @@ mod tests {
|
||||
fn unknown_verb_becomes_a_comment() {
|
||||
let out = gen("PROCEDURE DIVISION.\n\
|
||||
MAIN.\n\
|
||||
INSPECT WS-X TALLYING WS-N FOR ALL ' '.\n");
|
||||
assert!(out.contains("// charka: verbo no transpilado — INSPECT"));
|
||||
INITIALIZE WS-X.\n");
|
||||
assert!(out.contains("// charka: verbo no transpilado — INITIALIZE"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -384,6 +384,21 @@ mod tests {
|
||||
assert!(out.contains("__it.next().unwrap_or(\"\")"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn inspect_emits_tally_and_replace() {
|
||||
let out = gen("DATA DIVISION.\n\
|
||||
WORKING-STORAGE SECTION.\n\
|
||||
01 WS-T PIC X(10).\n\
|
||||
01 WS-N PIC 9(3).\n\
|
||||
PROCEDURE DIVISION.\n\
|
||||
MAIN.\n\
|
||||
INSPECT WS-T TALLYING WS-N FOR ALL 'X'.\n\
|
||||
INSPECT WS-T REPLACING ALL 'X' BY 'Y'.\n");
|
||||
assert!(out.contains(".matches("));
|
||||
assert!(out.contains("Decimal::from_integer(__n)"));
|
||||
assert!(out.contains(".replace("));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_program_still_compiles_shape() {
|
||||
let out = gen("");
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
//! Emisión de los statements del PROCEDURE: cada [`Stmt`] se traduce a
|
||||
//! una o varias líneas de código Rust sobre `charka-runtime`.
|
||||
|
||||
use charka_ir::{CmpOp, Cond, Operand, Perform, PerformControl, PerformTarget, Stmt, WhenBranch};
|
||||
use charka_ir::{
|
||||
CmpOp, Cond, InspectOp, Operand, Perform, PerformControl, PerformTarget, Stmt, WhenBranch,
|
||||
};
|
||||
|
||||
use crate::emit::Emitter;
|
||||
use crate::expr::{
|
||||
@@ -82,6 +84,7 @@ pub(crate) fn emit_stmt(em: &mut Emitter, sym: &Symbols, stmt: &Stmt) {
|
||||
delimiter,
|
||||
into,
|
||||
} => emit_unstring(em, sym, source, delimiter, into),
|
||||
Stmt::Inspect { target, op } => emit_inspect(em, sym, target, op),
|
||||
Stmt::Perform(p) => emit_perform(em, sym, p),
|
||||
Stmt::GoTo { target } => {
|
||||
em.line(&format!(
|
||||
@@ -423,6 +426,38 @@ fn emit_unstring(
|
||||
em.line("}");
|
||||
}
|
||||
|
||||
/// `INSPECT` — cuenta (`TALLYING`) o reemplaza (`REPLACING`).
|
||||
fn emit_inspect(em: &mut Emitter, sym: &Symbols, target: &Operand, op: &InspectOp) {
|
||||
match op {
|
||||
InspectOp::TallyingForAll { counter, search } => {
|
||||
em.line("{");
|
||||
em.indent();
|
||||
em.line(&format!(
|
||||
"let __n = ({}).matches({}).count() as i128;",
|
||||
operand_display(sym, target),
|
||||
operand_str(sym, search)
|
||||
));
|
||||
match field_ref(sym, counter) {
|
||||
Some((lref, FieldKind::Num { .. })) => em.line(&format!(
|
||||
"{lref}.store({lref}.value().add(&Decimal::from_integer(__n)));"
|
||||
)),
|
||||
_ => em.line("// charka: contador INSPECT no resuelto"),
|
||||
}
|
||||
em.dedent();
|
||||
em.line("}");
|
||||
}
|
||||
InspectOp::ReplacingAll { from, to } => {
|
||||
let replaced = format!(
|
||||
"({}).replace({}, {})",
|
||||
operand_display(sym, target),
|
||||
operand_str(sym, from),
|
||||
operand_str(sym, to)
|
||||
);
|
||||
emit_store_text(em, sym, target, &format!("{replaced}.as_str()"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn emit_perform(em: &mut Emitter, sym: &Symbols, p: &Perform) {
|
||||
// Emite el "cuerpo": la llamada al párrafo o el bloque en línea.
|
||||
let emit_body = |em: &mut Emitter, sym: &Symbols| match &p.target {
|
||||
|
||||
@@ -183,6 +183,8 @@ pub enum Stmt {
|
||||
delimiter: Operand,
|
||||
into: Vec<Operand>,
|
||||
},
|
||||
/// `INSPECT target ...` — cuenta o reemplaza caracteres.
|
||||
Inspect { target: Operand, op: InspectOp },
|
||||
/// `PERFORM ...` — ver [`Perform`].
|
||||
Perform(Perform),
|
||||
/// `GO TO target`
|
||||
@@ -200,6 +202,16 @@ pub enum Stmt {
|
||||
Unknown { verb: String, tokens: Vec<Token> },
|
||||
}
|
||||
|
||||
/// La operación de un `INSPECT`.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum InspectOp {
|
||||
/// `TALLYING counter FOR ALL search` — suma a `counter` la cantidad
|
||||
/// de apariciones de `search` en el destino.
|
||||
TallyingForAll { counter: Operand, search: Operand },
|
||||
/// `REPLACING ALL from BY to` — reemplaza las apariciones de `from`.
|
||||
ReplacingAll { from: Operand, to: Operand },
|
||||
}
|
||||
|
||||
/// Una rama `WHEN` de un `EVALUATE`: los valores que la disparan
|
||||
/// (varios `WHEN` apilados comparten cuerpo) y el cuerpo a ejecutar.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
|
||||
@@ -17,9 +17,9 @@
|
||||
//! `ACCEPT`, `COMPUTE` (con expresiones con precedencia), `ADD`,
|
||||
//! `SUBTRACT`, `MULTIPLY`, `DIVIDE`, `IF`/`ELSE`/`END-IF` (con
|
||||
//! condiciones `AND`/`OR`/`NOT`), `EVALUATE`/`WHEN`, `STRING`,
|
||||
//! `UNSTRING`, `PERFORM` (fuera de línea, en línea, `TIMES`, `UNTIL`,
|
||||
//! `VARYING`), `GO TO`, `STOP RUN`, `GOBACK`, `EXIT`, `CONTINUE`.
|
||||
//! Fuera de alcance: E/S de ficheros, CICS y SQL embebido.
|
||||
//! `UNSTRING`, `INSPECT`, `PERFORM` (fuera de línea, en línea,
|
||||
//! `TIMES`, `UNTIL`, `VARYING`), `GO TO`, `STOP RUN`, `GOBACK`,
|
||||
//! `EXIT`, `CONTINUE`. Fuera de alcance: E/S de ficheros, CICS y SQL.
|
||||
|
||||
#![forbid(unsafe_code)]
|
||||
|
||||
@@ -383,6 +383,33 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn inspect_tallying_and_replacing_parse() {
|
||||
let b = body("INSPECT WS-T TALLYING WS-N FOR ALL 'A'.");
|
||||
match &b[0] {
|
||||
Stmt::Inspect {
|
||||
target,
|
||||
op: InspectOp::TallyingForAll { counter, search },
|
||||
} => {
|
||||
assert_eq!(target, &Operand::Data("WS-T".into()));
|
||||
assert_eq!(counter, &Operand::Data("WS-N".into()));
|
||||
assert_eq!(search, &Operand::Str("A".into()));
|
||||
}
|
||||
other => panic!("se esperaba INSPECT TALLYING, vino {other:?}"),
|
||||
}
|
||||
let b = body("INSPECT WS-T REPLACING ALL 'A' BY 'O'.");
|
||||
match &b[0] {
|
||||
Stmt::Inspect {
|
||||
op: InspectOp::ReplacingAll { from, to },
|
||||
..
|
||||
} => {
|
||||
assert_eq!(from, &Operand::Str("A".into()));
|
||||
assert_eq!(to, &Operand::Str("O".into()));
|
||||
}
|
||||
other => panic!("se esperaba INSPECT REPLACING, vino {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn several_statements_in_one_sentence() {
|
||||
let b = body("MOVE 1 TO X DISPLAY X STOP RUN.");
|
||||
@@ -394,10 +421,10 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn unrecognized_verb_becomes_unknown() {
|
||||
let b = body("INSPECT WS-X TALLYING WS-N FOR ALL ' '.");
|
||||
let b = body("INITIALIZE WS-X WS-Y.");
|
||||
match &b[0] {
|
||||
Stmt::Unknown { verb, tokens } => {
|
||||
assert_eq!(verb, "INSPECT");
|
||||
assert_eq!(verb, "INITIALIZE");
|
||||
assert!(!tokens.is_empty());
|
||||
}
|
||||
other => panic!("se esperaba Unknown, vino {other:?}"),
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
|
||||
use charka_parser::TokenKind;
|
||||
|
||||
use crate::ast::{Operand, Perform, PerformControl, PerformTarget, Stmt, WhenBranch};
|
||||
use crate::ast::{InspectOp, Operand, Perform, PerformControl, PerformTarget, Stmt, WhenBranch};
|
||||
use crate::cursor::{parse_operand, Cursor};
|
||||
use crate::expr::{parse_cond, parse_expr};
|
||||
use crate::kw::{is_boundary, is_terminator, is_verb};
|
||||
@@ -41,6 +41,7 @@ fn parse_one_stmt(c: &mut Cursor, stops: &[&str]) -> Stmt {
|
||||
"EVALUATE" => parse_evaluate(c),
|
||||
"STRING" => parse_string(c),
|
||||
"UNSTRING" => parse_unstring(c),
|
||||
"INSPECT" => parse_inspect(c),
|
||||
"PERFORM" => parse_perform(c),
|
||||
"GO" => parse_goto(c),
|
||||
"STOP" => parse_stop(c),
|
||||
@@ -378,6 +379,39 @@ fn parse_unstring(c: &mut Cursor) -> Stmt {
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_inspect(c: &mut Cursor) -> Stmt {
|
||||
c.bump(); // INSPECT
|
||||
let target = parse_operand(c);
|
||||
if c.eat_word("TALLYING") {
|
||||
let counter = parse_operand(c);
|
||||
c.eat_word("FOR");
|
||||
c.eat_word("ALL");
|
||||
let search = parse_operand(c);
|
||||
skip_to_stmt_boundary(c);
|
||||
Stmt::Inspect {
|
||||
target,
|
||||
op: InspectOp::TallyingForAll { counter, search },
|
||||
}
|
||||
} else if c.eat_word("REPLACING") {
|
||||
c.eat_word("ALL");
|
||||
let from = parse_operand(c);
|
||||
c.eat_word("BY");
|
||||
let to = parse_operand(c);
|
||||
skip_to_stmt_boundary(c);
|
||||
Stmt::Inspect {
|
||||
target,
|
||||
op: InspectOp::ReplacingAll { from, to },
|
||||
}
|
||||
} else {
|
||||
// Forma de INSPECT que la v1 no modela.
|
||||
skip_to_stmt_boundary(c);
|
||||
Stmt::Unknown {
|
||||
verb: "INSPECT".to_string(),
|
||||
tokens: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_perform(c: &mut Cursor) -> Stmt {
|
||||
c.bump(); // PERFORM
|
||||
|
||||
|
||||
@@ -8,8 +8,8 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use charka_ir::{
|
||||
BinOp, CmpOp, Cond, ConditionName, Expr, Figurative, Ir, Operand, Perform, PerformControl,
|
||||
PerformTarget, Stmt,
|
||||
BinOp, CmpOp, Cond, ConditionName, Expr, Figurative, InspectOp, Ir, Operand, Perform,
|
||||
PerformControl, PerformTarget, Stmt,
|
||||
};
|
||||
use charka_runtime::{cobol_text_cmp, Decimal, Rounding};
|
||||
|
||||
@@ -270,6 +270,33 @@ impl<'a> Machine<'a> {
|
||||
}
|
||||
Flow::Normal
|
||||
}
|
||||
Stmt::Inspect { target, op } => {
|
||||
match op {
|
||||
InspectOp::TallyingForAll { counter, search } => {
|
||||
let hay = self.eval_text(target);
|
||||
let needle = self.eval_text(search);
|
||||
let n = if needle.is_empty() {
|
||||
0
|
||||
} else {
|
||||
hay.matches(needle.as_str()).count()
|
||||
};
|
||||
let cur = self.eval_decimal(counter);
|
||||
self.store(counter, cur.add(&Decimal::from_integer(n as i128)), false);
|
||||
}
|
||||
InspectOp::ReplacingAll { from, to } => {
|
||||
let hay = self.eval_text(target);
|
||||
let f = self.eval_text(from);
|
||||
let t = self.eval_text(to);
|
||||
let new = if f.is_empty() {
|
||||
hay
|
||||
} else {
|
||||
hay.replace(f.as_str(), t.as_str())
|
||||
};
|
||||
self.store_text(target, &new);
|
||||
}
|
||||
}
|
||||
Flow::Normal
|
||||
}
|
||||
Stmt::Perform(p) => self.exec_perform(p),
|
||||
Stmt::GoTo { target } => {
|
||||
// Aproximación: ejecuta el destino y sale del párrafo.
|
||||
|
||||
@@ -121,6 +121,7 @@ mod tests {
|
||||
corpus_test!(corpus_10_condicion, "10-condicion");
|
||||
corpus_test!(corpus_11_tabla, "11-tabla");
|
||||
corpus_test!(corpus_12_cadenas, "12-cadenas");
|
||||
corpus_test!(corpus_13_inspeccion, "13-inspeccion");
|
||||
|
||||
#[test]
|
||||
fn empty_source_runs_clean() {
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
* corpus charka — nivel 6: INSPECT (contar y reemplazar)
|
||||
IDENTIFICATION DIVISION.
|
||||
PROGRAM-ID. INSPECCION.
|
||||
DATA DIVISION.
|
||||
WORKING-STORAGE SECTION.
|
||||
01 WS-TEXTO PIC X(15) VALUE 'BANANA-MANZANA'.
|
||||
01 WS-CUENTA PIC 9(3) VALUE 0.
|
||||
PROCEDURE DIVISION.
|
||||
MAIN.
|
||||
INSPECT WS-TEXTO TALLYING WS-CUENTA FOR ALL 'A'.
|
||||
DISPLAY 'LETRAS A = ' WS-CUENTA.
|
||||
INSPECT WS-TEXTO REPLACING ALL 'A' BY 'O'.
|
||||
DISPLAY 'TEXTO = ' WS-TEXTO.
|
||||
STOP RUN.
|
||||
@@ -0,0 +1,2 @@
|
||||
LETRAS A = 006
|
||||
TEXTO = BONONO-MONZONO
|
||||
@@ -21,6 +21,7 @@ salida correcta, una línea por `DISPLAY`.
|
||||
| `10-condicion` | 5 | nombres de condición (nivel 88) en `IF` |
|
||||
| `11-tabla` | 6 | tablas (`OCCURS`) y referencias con subíndice |
|
||||
| `12-cadenas` | 6 | `STRING` (concatenar) y `UNSTRING` (partir) |
|
||||
| `13-inspeccion` | 6 | `INSPECT` — contar (`TALLYING`) y reemplazar |
|
||||
|
||||
## Formato
|
||||
|
||||
|
||||
Reference in New Issue
Block a user