feat(charka): STRING y UNSTRING — manejo de cadenas

Dos verbos comunes de COBOL para construir y partir cadenas.

- IR: Stmt::StringConcat { sources, into } y
  Stmt::Unstring { source, delimiter, into }.
- Parser: STRING a b DELIMITED BY SIZE INTO t END-STRING y
  UNSTRING s DELIMITED BY d INTO a b c END-UNSTRING.
- Codegen: STRING -> format! concatenado; UNSTRING -> un bloque que
  parte con str::split y reparte los trozos a los destinos.
- Shadow: el intérprete concatena / parte el texto y lo reparte.
- Corpus: programa nuevo 12-cadenas. Verificado: el intérprete sombra
  y el crate compilado por scaffold dan la misma salida.

Alcance v1: STRING con DELIMITED BY SIZE (otros delimitadores se
ignoran); sin WITH POINTER ni ON OVERFLOW.

Tests: charka-ir 25, charka-codegen 19, charka-shadow 17. fmt +
clippy limpios.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
sergio
2026-05-21 22:09:10 +00:00
parent 3902763daa
commit 47c49acd47
12 changed files with 269 additions and 10 deletions
@@ -367,6 +367,23 @@ mod tests {
assert!(out.contains(".saturating_sub(1)]"));
}
#[test]
fn string_concatenates_and_unstring_splits() {
let out = gen("DATA DIVISION.\n\
WORKING-STORAGE SECTION.\n\
01 WS-A PIC X(4).\n\
01 WS-B PIC X(4).\n\
01 WS-OUT PIC X(10).\n\
01 WS-SRC PIC X(10).\n\
PROCEDURE DIVISION.\n\
MAIN.\n\
STRING WS-A WS-B DELIMITED BY SIZE INTO WS-OUT END-STRING.\n\
UNSTRING WS-SRC DELIMITED BY ',' INTO WS-A WS-B END-UNSTRING.\n");
assert!(out.contains("self.ws_out.store(&format!("));
assert!(out.contains("__src.split(__delim.as_str())"));
assert!(out.contains("__it.next().unwrap_or(\"\")"));
}
#[test]
fn empty_program_still_compiles_shape() {
let out = gen("");
@@ -76,6 +76,12 @@ pub(crate) fn emit_stmt(em: &mut Emitter, sym: &Symbols, stmt: &Stmt) {
whens,
other,
} => emit_evaluate(em, sym, subject, whens, other),
Stmt::StringConcat { sources, into } => emit_string(em, sym, sources, into),
Stmt::Unstring {
source,
delimiter,
into,
} => emit_unstring(em, sym, source, delimiter, into),
Stmt::Perform(p) => emit_perform(em, sym, p),
Stmt::GoTo { target } => {
em.line(&format!(
@@ -366,6 +372,57 @@ fn branch_condition(sym: &Symbols, subject: &Operand, branch: &WhenBranch) -> St
.join(" || ")
}
/// Almacena una expresión `&str` en un destino: directo si es de
/// texto, parseado a `Decimal` si es numérico.
fn emit_store_text(em: &mut Emitter, sym: &Symbols, target: &Operand, text: &str) {
match field_ref(sym, target) {
Some((lref, FieldKind::Text { .. })) => {
em.line(&format!("{lref}.store({text});"));
}
Some((lref, FieldKind::Num { .. })) => {
em.line(&format!(
"{lref}.store(Decimal::parse(({text}).trim())\
.unwrap_or_else(|_| Decimal::zero()));"
));
}
None => em.line("// charka: destino no resuelto"),
}
}
/// `STRING` — concatena el texto de las fuentes en el destino.
fn emit_string(em: &mut Emitter, sym: &Symbols, sources: &[Operand], into: &Operand) {
let fmt = "{}".repeat(sources.len());
let args: Vec<String> = sources.iter().map(|s| operand_display(sym, s)).collect();
let concat = format!("&format!(\"{fmt}\", {})", args.join(", "));
emit_store_text(em, sym, into, &concat);
}
/// `UNSTRING` — parte el texto de la fuente y reparte los trozos.
fn emit_unstring(
em: &mut Emitter,
sym: &Symbols,
source: &Operand,
delimiter: &Operand,
into: &[Operand],
) {
em.line("{");
em.indent();
em.line(&format!(
"let __src = ({}).to_string();",
operand_display(sym, source)
));
em.line(&format!(
"let __delim = ({}).to_string();",
operand_display(sym, delimiter)
));
em.line("let mut __it = __src.split(__delim.as_str());");
for t in into {
emit_store_text(em, sym, t, "__it.next().unwrap_or(\"\")");
}
em.dedent();
em.line("}");
}
fn emit_perform(em: &mut Emitter, sym: &Symbols, p: &Perform) {
// Emite el "cuerpo": la llamada al párrafo o el bloque en línea.
let emit_body = |em: &mut Emitter, sym: &Symbols| match &p.target {