diff --git a/interactive/examples/ddir_col.rs b/interactive/examples/ddir_col.rs index bf822a13f..d6d8d574f 100644 --- a/interactive/examples/ddir_col.rs +++ b/interactive/examples/ddir_col.rs @@ -61,14 +61,12 @@ mod types { } pub type Diff = i64; - pub type Id = usize; pub type Time = timely::order::Product>; } use types::*; use interactive::parse; use interactive::lower; -use interactive::ir::Program; use differential_dataflow::columnar as columnar_support; @@ -88,7 +86,6 @@ mod columnar { } mod render { - use std::collections::HashMap; use std::sync::Arc; use timely::order::Product; use timely::dataflow::Scope; @@ -99,7 +96,7 @@ mod render { use differential_dataflow::operators::arrange::{Arranged, TraceAgent}; use columnar::Columnar; use super::types::*; - use interactive::ir::{Node, LinearOp, Program, RowLike, eval_fields, eval_field_into, eval_condition}; + use interactive::ir::{LinearOp, RowLike, eval_fields, eval_field_into, eval_condition}; use super::columnar::{DdirUpdate, DdirRecordedUpdates}; use super::columnar::{ColValSpine, ColValBuilder}; @@ -353,67 +350,6 @@ mod render { s.exports.iter().map(|e| resolve(&items, &imports, &var_cols, &e.value).collection()).collect() } - pub fn render_program<'scope>(program: &Program, scope: Scope<'scope, ConcreteTime>, inputs: &[Col<'scope>]) -> HashMap> - { - let mut nodes: HashMap> = HashMap::new(); - let mut level: usize = 0; - let mut variables: HashMap, usize)> = HashMap::new(); - let mut var_levels: HashMap = HashMap::new(); - - for (&id, node) in program.nodes.iter() { - match node { - Node::Input(i) => { - nodes.insert(id, Rendered::Collection(inputs[*i].clone())); - }, - Node::Import { name } => panic!("ddir_col: Import {:?} not supported in this harness (no trace registry).", name), - Node::Linear { input, ops } => { - let c = nodes[input].collection(); - nodes.insert(id, Rendered::Collection(render_linear(c, ops.clone(), level))); - }, - Node::Concat(ids) => { - let mut r = nodes[&ids[0]].collection(); - for i in &ids[1..] { r = r.concat(nodes[i].collection()); } - nodes.insert(id, Rendered::Collection(r)); - }, - Node::Arrange(input) => { - nodes.insert(id, Rendered::Arrangement(nodes[input].arrange())); - }, - Node::Join { left, right, projection } => { - let Rendered::Arrangement(l) = &nodes[left] else { panic!("Join: left input must be an Arrangement") }; - let Rendered::Arrangement(r) = &nodes[right] else { panic!("Join: right input must be an Arrangement") }; - nodes.insert(id, Rendered::Collection(render_join(l.clone(), r.clone(), projection))); - }, - Node::Reduce { input, reducer } => { - let Rendered::Arrangement(a) = &nodes[input] else { panic!("Reduce: input must be an Arrangement") }; - nodes.insert(id, Rendered::Arrangement(render_reduce(a.clone(), reducer))); - }, - Node::Variable => { - let step: Product> = Product::new(0, feedback_summary::(level, 1)); - let (var, col) = Variable::new(scope, step); - nodes.insert(id, Rendered::Collection(col)); - variables.insert(id, (var, level)); - var_levels.insert(id, level); - }, - Node::Inspect { input, label } => { - let col = nodes[input].collection(); - nodes.insert(id, Rendered::Collection(render_inspect(col, label.clone()))); - }, - Node::Leave(inner_id, scope_level) => { - let c = nodes[inner_id].collection(); - nodes.insert(id, Rendered::Collection(super::columnar::leave_dynamic(c, *scope_level))); - }, - Node::Scope => { level += 1; }, - Node::EndScope => { level -= 1; }, - Node::Bind { variable, value } => { - let c = nodes[value].collection(); - let (var, _) = variables.remove(variable).expect("Bind: variable not found"); - var.set(c); - }, - } - } - - nodes.into_iter().filter_map(|(id, r)| match r { Rendered::Collection(c) => Some((id, c)), _ => None }).collect() - } } use differential_dataflow::dynamic::pointstamp::PointStamp; @@ -421,40 +357,12 @@ use differential_dataflow::dynamic::pointstamp::PointStamp; type DdirOuterUpdate = (Row, Row, u64, Diff); fn run(name: &str, stmts: Vec, n_inputs: usize, nodes: u64, edges: u64, arity: usize, batch: u64, rounds: Option) { - // The scope-tree IR (lower_tree + render_tree, one timely region per scope) - // is the default path; FLAT=1 forces the flat path for A/B comparison. - let tree_mode = std::env::var("FLAT").is_err(); - let (tree, compiled, result_id, tree_export_idx); - if tree_mode { - let mut t = lower::lower_tree(stmts); - let ops_before = t.op_count(); - t.optimize(); - tree_export_idx = t.root.exports.iter().position(|e| e.name == "result").unwrap_or(0); - println!("{}: tree mode; {} ops before optimize, {} after; driving export {:?}", - name, ops_before, t.op_count(), t.root.exports[tree_export_idx].name); - tree = Some(t); - compiled = Program { nodes: std::collections::BTreeMap::new(), export: vec![] }; - result_id = 0; - } else { - let mut c: Program = lower::lower(stmts); - println!("{}: {} IR nodes (before optimize)", name, c.nodes.len()); - c.optimize(); - println!("{}: {} IR nodes (after optimize), exports = {:?}", - name, c.nodes.len(), - c.export.iter().map(|(n, id)| (n.as_str(), *id)).collect::>()); - c.dump(); - let (driven_name, id) = { - let pick = c.export.iter().find(|(n, _)| n == "result") - .or_else(|| c.export.first()) - .expect("ddir_col: program declares no exports"); - (pick.0.clone(), pick.1) - }; - println!("{}: driving export {:?} (id {})", name, driven_name, id); - tree = None; - compiled = c; - result_id = id; - tree_export_idx = 0; - } + let mut tree = lower::lower_tree(stmts); + let ops_before = tree.op_count(); + tree.optimize(); + let tree_export_idx = tree.root.exports.iter().position(|e| e.name == "result").unwrap_or(0); + println!("{}: {} ops before optimize, {} after; driving export {:?}", + name, ops_before, tree.op_count(), tree.root.exports[tree_export_idx].name); let name = name.to_string(); timely::execute_from_args(std::env::args().skip(4), move |worker| { @@ -476,18 +384,13 @@ fn run(name: &str, stmts: Vec, n_inputs: usize, nodes: u64, edges: let mut probe = timely::dataflow::ProbeHandle::new(); let output = scope.iterative::, _, _>(|inner| { let entered: Vec<_> = collections.iter().map(|c| c.clone().enter(inner)).collect(); - if let Some(tree) = &tree { - let root_imports: Vec<_> = tree.root.imports.iter().map(|imp| match &imp.from { - interactive::scope_ir::Source::Input(n) => entered[*n].clone(), - interactive::scope_ir::Source::Trace(name) => panic!("ddir_col: Import {:?} not supported in this harness (no trace registry).", name), - interactive::scope_ir::Source::Parent(_) => unreachable!("root scope cannot import from a parent"), - }).collect(); - let exports = render::render_tree(&tree.root, inner, 0, root_imports); - exports[tree_export_idx].clone().leave(scope) - } else { - let rendered = render::render_program(&compiled, inner, &entered); - rendered[&result_id].clone().leave(scope) - } + let root_imports: Vec<_> = tree.root.imports.iter().map(|imp| match &imp.from { + interactive::scope_ir::Source::Input(n) => entered[*n].clone(), + interactive::scope_ir::Source::Trace(name) => panic!("ddir_col: Import {:?} not supported in this harness (no trace registry).", name), + interactive::scope_ir::Source::Parent(_) => unreachable!("root scope cannot import from a parent"), + }).collect(); + let exports = render::render_tree(&tree.root, inner, 0, root_imports); + exports[tree_export_idx].clone().leave(scope) }); output.probe_with(&mut probe); (handles, probe) diff --git a/interactive/examples/ddir_vec.rs b/interactive/examples/ddir_vec.rs index 535e36c9d..008aae128 100644 --- a/interactive/examples/ddir_vec.rs +++ b/interactive/examples/ddir_vec.rs @@ -1,6 +1,6 @@ //! DD IR vec-backed backend: parse, lower, render, execute. //! -//! With `--explain`, applies `interactive::explain::explain` after lowering +//! With `--explain`, applies the explanation rewrite (explain_tree) after lowering //! and treats the last input handle as the query input (seeded from the //! `QUERY` env var, format `"key_fields:val_fields"`). @@ -9,7 +9,6 @@ use mimalloc::MiMalloc; #[global_allocator] static GLOBAL: MiMalloc = MiMalloc; -use std::collections::HashMap; use std::sync::Arc; use timely::order::Product; use timely::dataflow::Scope; @@ -26,7 +25,7 @@ use smallvec::smallvec as svec; use interactive::parse; use interactive::lower; use interactive::scope_ir as st; -use interactive::ir::{Node, LinearOp, Program, Diff, Id, Time, eval_fields, eval_field_into, eval_condition}; +use interactive::ir::{LinearOp, Diff, Time, eval_fields, eval_field_into, eval_condition}; type Row = SmallVec<[i64; 2]>; type DdirTime = Product>; @@ -222,70 +221,6 @@ fn render_tree<'scope>( s.exports.iter().map(|e| resolve(&items, &imports, &var_cols, &e.value).collection()).collect() } -fn render_program<'scope>(program: &Program, scope: Scope<'scope, DdirTime>, inputs: &[Col<'scope, DdirTime>]) -> HashMap> -{ - let mut nodes: HashMap> = HashMap::new(); - let mut level: usize = 0; - let mut variables: HashMap, usize)> = HashMap::new(); - let mut var_levels: HashMap = HashMap::new(); - - for (&id, node) in program.nodes.iter() { - match node { - Node::Input(i) => { nodes.insert(id, Rendered::Collection(inputs[*i].clone())); }, - Node::Import { name } => panic!("ddir_vec: Import {:?} not supported in this harness (no trace registry).", name), - Node::Linear { input, ops } => { - let c = nodes[input].collection(); - let r = render_linear(c, ops.clone(), level); - nodes.insert(id, Rendered::Collection(r)); - }, - Node::Concat(ids) => { let mut r = nodes[&ids[0]].collection(); for i in &ids[1..] { r = r.concat(nodes[i].collection()); } nodes.insert(id, Rendered::Collection(r)); }, - Node::Arrange(input) => { nodes.insert(id, Rendered::Arrangement(nodes[input].arrange())); }, - Node::Join { left, right, projection } => { - let Rendered::Arrangement(l) = &nodes[left] else { panic!("Join: left input must be an Arrangement") }; - let Rendered::Arrangement(r) = &nodes[right] else { panic!("Join: right input must be an Arrangement") }; - let l = l.clone(); - let r = r.clone(); - let proj = projection.clone(); - let f: Arc smallvec::SmallVec<[(Row, Row); 2]> + Send + Sync> = - Arc::new(move |key, left, right| { let i = [key.as_slice(), left.as_slice(), right.as_slice()]; svec![(eval_fields(&proj.key, &i), eval_fields(&proj.val, &i))] }); - let result = l.join_core(r, move |k, v1, v2| f(k, v1, v2)); - nodes.insert(id, Rendered::Collection(result)); - }, - Node::Reduce { input, reducer } => { - let Rendered::Arrangement(a) = &nodes[input] else { panic!("Reduce: input must be an Arrangement") }; - let a = a.clone(); - let f: Arc) + Send + Sync> = match reducer { - parse::Reducer::Min => Arc::new(|_key, vals, output| { if let Some(min) = vals.iter().map(|(v, _)| *v).min() { output.push((min.clone(), 1)); } }), - parse::Reducer::Distinct => Arc::new(|_key, _vals, output| { output.push((Row::new(), 1)); }), - parse::Reducer::Count => Arc::new(|_key, vals, output| { let count: Diff = vals.iter().map(|(_, d)| *d).sum(); if count > 0 { let mut r = Row::new(); r.push(count); output.push((r, 1)); } }), - }; - let reduced = a.reduce_abelian::<_, differential_dataflow::trace::implementations::ValBuilder<_,_,_,_>, ValSpine<_,_,_,_>, _>( - "Reduce", - move |k, v, o| f(k, v, o), - |vec, key, upds| { vec.clear(); vec.extend(upds.drain(..).map(|(v,t,r)| ((key.clone(), v),t,r))); }, - ); - nodes.insert(id, Rendered::Arrangement(reduced)); - }, - Node::Variable => { - let step: Product> = Product::new(0, feedback_summary::(level, 1)); - let (var, col) = VecVariable::new(scope, step); - nodes.insert(id, Rendered::Collection(col)); variables.insert(id, (var, level)); var_levels.insert(id, level); - }, - Node::Inspect { input, label } => { - let col = nodes[input].collection(); - let label = label.clone(); - nodes.insert(id, Rendered::Collection(col.inspect(move |x| eprintln!(" [{}] {:?}", label, x.clone())))); - }, - Node::Leave(inner_id, scope_level) => { nodes.insert(id, Rendered::Collection(nodes[inner_id].collection().leave_dynamic(*scope_level))); }, - Node::Scope => { level += 1; }, - Node::EndScope => { level -= 1; }, - Node::Bind { variable, value } => { let c = nodes[value].collection(); let (var, _) = variables.remove(variable).expect("Bind: variable not found"); var.set(c); }, - } - } - - nodes.into_iter().filter_map(|(id, r)| match r { Rendered::Collection(c) => Some((id, c)), _ => None }).collect() -} - fn run( name: &str, stmts: Vec, @@ -298,51 +233,30 @@ fn run( explain: bool, ) { // The scope-tree IR (lower_tree + render_tree, one timely region per scope) - // is the default path. `--explain` still goes through the flat IR (the - // explanation rewrite operates on it), and FLAT=1 forces the flat path - // for A/B comparison; outputs must match either way. - let tree_mode = !explain && std::env::var("FLAT").is_err(); - let (tree, compiled, result_id, tree_export_idx); - if tree_mode { - let mut t = lower::lower_tree(stmts); - let ops_before = t.op_count(); - t.optimize(); - tree_export_idx = t.root.exports.iter().position(|e| e.name == "result").unwrap_or(0); - println!("{}: tree mode; {} ops before optimize, {} after; driving export {:?}", - name, ops_before, t.op_count(), t.root.exports[tree_export_idx].name); - tree = Some(t); - compiled = Program { nodes: std::collections::BTreeMap::new(), export: vec![] }; - result_id = 0; - } else { - let mut c: Program = lower::lower(stmts); - // When --explain is set, rewrite the program for self-explanation - // before optimization. The transformed program declares one extra - // input (the query); the last handle below is reserved for it and - // seeded from `QUERY=`. - if explain { - let input_arities = vec![(arity, 0usize); n_inputs]; - let import_arities = std::collections::BTreeMap::new(); - c = interactive::explain::explain(&c, &input_arities, &import_arities); - } - println!("{}: {} IR nodes (before optimize)", name, c.nodes.len()); - c.optimize(); - println!("{}: {} IR nodes (after optimize), exports = {:?}", - name, c.nodes.len(), - c.export.iter().map(|(n, id)| (n.as_str(), *id)).collect::>()); - c.dump(); - // Drive one export: prefer `$result`, else the first declared. - let (driven_name, id) = { - let pick = c.export.iter().find(|(n, _)| n == "result") - .or_else(|| c.export.first()) - .expect("ddir_vec: program declares no exports"); - (pick.0.clone(), pick.1) - }; - println!("{}: driving export {:?} (id {})", name, driven_name, id); - tree = None; - compiled = c; - result_id = id; - tree_export_idx = 0; + // is the default path, including for `--explain` (explain_tree). FLAT=1 + // forces the flat path for A/B comparison; outputs must match either way. + let mut query_shape: Option<(usize, usize)> = None; + let mut tree = lower::lower_tree(stmts); + // CLONE_RT=1 routes the program through the explain rewrite's + // clone-with-lifts as an identity check: outputs must be unchanged. + if std::env::var("CLONE_RT").is_ok() { + tree = interactive::explain_tree::clone_identity(&tree); + } + // --explain: rewrite for self-explanation before optimization (the + // rules assume single-op Linears). Sources are the root's imports. + if explain { + let source_shapes: Vec<(usize, usize)> = tree.root.imports.iter().map(|imp| match &imp.from { + interactive::scope_ir::Source::Input(_) => (arity, 0usize), + other => panic!("ddir_vec --explain: unsupported source {:?}", other), + }).collect(); + query_shape = Some(interactive::explain_tree::export_shape(&tree, &source_shapes)); + tree = interactive::explain_tree::explain_tree(&tree, &source_shapes); } + let ops_before = tree.op_count(); + tree.optimize(); + let tree_export_idx = tree.root.exports.iter().position(|e| e.name == "result").unwrap_or(0); + println!("{}: {} ops before optimize, {} after; driving export {:?}", + name, ops_before, tree.op_count(), tree.root.exports[tree_export_idx].name); let name = name.to_string(); let total_inputs = if explain { n_inputs + 1 } else { n_inputs }; let query_input_idx = if explain { Some(n_inputs) } else { None }; @@ -371,18 +285,13 @@ fn run( let mut probe = timely::dataflow::ProbeHandle::new(); let output = scope.iterative::, _, _>(|inner| { let entered: Vec<_> = collections.iter().map(|c| c.clone().enter(inner)).collect(); - if let Some(tree) = &tree { - let root_imports: Vec<_> = tree.root.imports.iter().map(|imp| match &imp.from { - st::Source::Input(n) => entered[*n].clone(), - st::Source::Trace(name) => panic!("ddir_vec: Import {:?} not supported in this harness (no trace registry).", name), - st::Source::Parent(_) => unreachable!("root scope cannot import from a parent"), - }).collect(); - let exports = render_tree(&tree.root, inner, 0, root_imports); - exports[tree_export_idx].clone().leave(scope) - } else { - let rendered = render_program(&compiled, inner, &entered); - rendered[&result_id].clone().leave(scope) - } + let root_imports: Vec<_> = tree.root.imports.iter().map(|imp| match &imp.from { + st::Source::Input(n) => entered[*n].clone(), + st::Source::Trace(name) => panic!("ddir_vec: Import {:?} not supported in this harness (no trace registry).", name), + st::Source::Parent(_) => unreachable!("root scope cannot import from a parent"), + }).collect(); + let exports = render_tree(&tree.root, inner, 0, root_imports); + exports[tree_export_idx].clone().leave(scope) }); output.probe_with(&mut probe); (handles, probe) @@ -416,6 +325,11 @@ fn run( let q_key: Row = parse_row(k_str); let mut q_val: Row = parse_row(vq_str); if q_val.is_empty() { q_val.push(0); } + if let Some((qk, qv)) = query_shape { + assert!(q_key.len() == qk && q_val.len() == qv + 1, + "QUERY shape mismatch: export is (k={}, v={}), so QUERY needs {} key field(s) and {} val field(s) + q; got key={:?} val_with_q={:?}", + qk, qv, qk, qv, q_key, q_val); + } eprintln!("seeding query: key={:?} val_with_q={:?}", q_key, q_val); inputs[q_idx].update((q_key, q_val), 1); } diff --git a/interactive/examples/dump_explain.rs b/interactive/examples/dump_explain.rs index 800e27f92..867e7f654 100644 --- a/interactive/examples/dump_explain.rs +++ b/interactive/examples/dump_explain.rs @@ -1,17 +1,12 @@ -//! Print the original and explain-rewritten programs as `.ddp`-ish source. +//! Print a program's scope-tree IR, before and after the explanation rewrite. //! //! Usage: `dump_explain `. //! -//! Goal is readability, not strict parseability — the IR loses original -//! names so we invent `nID` names. Scope/EndScope become `scope_N: { ... }` -//! blocks; Variable + Bind become `var nID = body;` at the Bind point. +//! Output is the structural tree dump (readability, not parseability): +//! per scope, imports and vars first, items in order with `Sub`s nested, +//! then binds and exports. -use std::collections::BTreeMap; -use std::collections::BTreeSet; - -use interactive::ir::{Id, LinearOp, Node, Program}; -use interactive::parse::{Condition, FieldExpr, Projection, Reducer}; -use interactive::{explain, lower, parse}; +use interactive::{lower, parse, scope_ir}; fn main() { let path = std::env::args().nth(1).expect("usage: dump_explain "); @@ -23,180 +18,22 @@ fn main() { } else { parse::applicative::parse(&source) }; - let (n_inputs, imports) = interactive::survey_sources(&stmts); - let original = lower::lower(stmts); + let original = lower::lower_tree(stmts); println!("-- ===================================================="); - println!("-- ORIGINAL ({} nodes)", original.nodes.len()); + println!("-- ORIGINAL ({} ops)", original.op_count()); println!("-- ===================================================="); - print_ddp(&original); + original.dump(); - let input_arities = vec![(arity, 0usize); n_inputs]; - let import_arities: BTreeMap = imports - .iter() - .map(|n| (n.clone(), (arity, 0usize))) - .collect(); - let rewritten = explain::explain(&original, &input_arities, &import_arities); + let source_shapes: Vec<(usize, usize)> = original.root.imports.iter().map(|imp| match &imp.from { + scope_ir::Source::Input(_) | scope_ir::Source::Trace(_) => (arity, 0usize), + scope_ir::Source::Parent(_) => unreachable!("root scope cannot import from a parent"), + }).collect(); + let rewritten = interactive::explain_tree::explain_tree(&original, &source_shapes); println!(); println!("-- ===================================================="); - println!("-- AFTER explain rewrite ({} nodes)", rewritten.nodes.len()); + println!("-- EXPLAIN ({} ops)", rewritten.op_count()); println!("-- ===================================================="); - print_ddp(&rewritten); -} - -/// Walk a Program's nodes in id order, emitting `.ddp`-ish let / var / -/// scope statements. Generated names are `nID`. -fn print_ddp(p: &Program) { - // Pre-scan: which Variables are bound to which values? - let var_body: BTreeMap = p.nodes.iter().filter_map(|(_, n)| { - if let Node::Bind { variable, value } = n { - Some((*variable, *value)) - } else { - None - } - }).collect(); - // Mark which nodes appear inside a Reduce's Arrange-then-Reduce pair, - // so we can fold them into a single `| distinct` / `| min` / `| count`. - let mut reduce_arrange_inputs: BTreeSet = BTreeSet::new(); - for (_, node) in &p.nodes { - if let Node::Reduce { input, .. } = node { - reduce_arrange_inputs.insert(*input); - } - } - // Same for Join's Arrange-Arrange pair. - let mut join_arrange_inputs: BTreeSet = BTreeSet::new(); - for (_, node) in &p.nodes { - if let Node::Join { left, right, .. } = node { - join_arrange_inputs.insert(*left); - join_arrange_inputs.insert(*right); - } - } - - let mut indent: usize = 0; - for (&id, node) in &p.nodes { - let pad = " ".repeat(indent); - match node { - Node::Scope => { - println!("{}scope_{}: {{", pad, id); - indent += 1; - } - Node::EndScope => { - indent = indent.saturating_sub(1); - let pad = " ".repeat(indent); - println!("{}}}", pad); - } - Node::Input(i) => { - println!("{}let n{} = input {};", pad, id, i); - } - Node::Import { name } => { - println!("{}let n{} = import {:?};", pad, id, name); - } - Node::Linear { input, ops } => { - println!("{}let n{} = n{} | {};", pad, id, input, fmt_linear_ops(ops)); - } - Node::Concat(ids) => { - let names: Vec = ids.iter().map(|i| format!("n{}", i)).collect(); - println!("{}let n{} = {};", pad, id, names.join(" + ")); - } - Node::Arrange(input) => { - // Fold into the Reduce / Join that wraps it: skip emitting - // a separate name. (Otherwise just say `| arrange`.) - if reduce_arrange_inputs.contains(&id) || join_arrange_inputs.contains(&id) { - println!("{}-- (n{}: arrange of n{} — folded into next op)", pad, id, input); - } else { - println!("{}let n{} = n{} | arrange;", pad, id, input); - } - } - Node::Join { left, right, projection } => { - println!( - "{}let n{} = n{} | join(n{}, {});", - pad, id, left, right, fmt_projection(projection) - ); - } - Node::Reduce { input, reducer } => { - let op = match reducer { - Reducer::Min => "min", - Reducer::Distinct => "distinct", - Reducer::Count => "count", - }; - // `input` is the wrapping Arrange; show the un-arranged source. - let source = match p.nodes.get(input) { - Some(Node::Arrange(inner)) => *inner, - _ => *input, - }; - println!("{}let n{} = n{} | {};", pad, id, source, op); - } - Node::Variable => { - // Defer to the matching Bind. Note the placeholder. - match var_body.get(&id) { - Some(body_id) => println!("{}-- (n{}: Variable, bound to n{} below)", pad, id, body_id), - None => println!("{}-- (n{}: Variable, never bound)", pad, id), - } - } - Node::Inspect { input, label } => { - println!("{}let n{} = n{} | inspect({});", pad, id, input, label); - } - Node::Leave(inner, level) => { - println!("{}let n{} = leave(n{}, level={});", pad, id, inner, level); - } - Node::Bind { variable, value } => { - println!("{}var n{} = n{};", pad, variable, value); - } - } - } - let pad = " ".repeat(indent); - for (name, id) in &p.export { - if name == "result" { - println!("{}result n{};", pad, id); - } else { - println!("{}export {:?} = n{};", pad, name, id); - } - } -} - -fn fmt_linear_ops(ops: &[LinearOp]) -> String { - ops.iter().map(fmt_linear_op).collect::>().join(" | ") -} - -fn fmt_linear_op(op: &LinearOp) -> String { - match op { - LinearOp::Project(p) => format!("key({})", fmt_projection_body(p)), - LinearOp::Filter(c) => format!("filter({})", fmt_condition(c)), - LinearOp::Negate => "negate".into(), - LinearOp::EnterAt(f) => format!("enter_at({})", fmt_field(f)), - LinearOp::LiftIter => "lift_iter".into(), - } -} - -fn fmt_projection(p: &Projection) -> String { - format!("({})", fmt_projection_body(p)) -} - -fn fmt_projection_body(p: &Projection) -> String { - let key: Vec = p.key.iter().map(fmt_field).collect(); - let val: Vec = p.val.iter().map(fmt_field).collect(); - format!("{} ; {}", key.join(", "), val.join(", ")) -} - -fn fmt_field(f: &FieldExpr) -> String { - match f { - FieldExpr::Pos(i) => format!("$ {}", i), - FieldExpr::Index(r, c) => format!("${}[{}]", r, c), - FieldExpr::Const(v) => format!("{}", v), - FieldExpr::Neg(inner) => format!("-{}", fmt_field(inner)), - FieldExpr::Sub(a, b) => format!("({} - {})", fmt_field(a), fmt_field(b)), - } -} - -fn fmt_condition(c: &Condition) -> String { - match c { - Condition::Eq(a, b) => format!("{} == {}", fmt_field(a), fmt_field(b)), - Condition::Ne(a, b) => format!("{} != {}", fmt_field(a), fmt_field(b)), - Condition::Lt(a, b) => format!("{} < {}", fmt_field(a), fmt_field(b)), - Condition::Le(a, b) => format!("{} <= {}", fmt_field(a), fmt_field(b)), - Condition::Gt(a, b) => format!("{} > {}", fmt_field(a), fmt_field(b)), - Condition::Ge(a, b) => format!("{} >= {}", fmt_field(a), fmt_field(b)), - Condition::And(a, b) => format!("({}) && ({})", fmt_condition(a), fmt_condition(b)), - } + rewritten.dump(); } diff --git a/interactive/src/explain.rs b/interactive/src/explain.rs deleted file mode 100644 index 52801b4fb..000000000 --- a/interactive/src/explain.rs +++ /dev/null @@ -1,1422 +0,0 @@ -//! DDIR-to-DDIR explanation rewrite. -//! -//! `explain(p)` transforms a Program into one whose execution produces -//! per-input demand-set explanations for queries against the original program's result. -//! -//! ### Architecture (per design notes) -//! -//! ```text -//! -- INPUT: -//! -- a. a user dataflow, -//! -- b. live input sources, -//! -- c. live output queries. -//! -- -//! -- This module is a pure IR-to-IR transform: it returns a Program. Any DDIR -//! -- backend (`ddir_vec`, `ddir_col`) then executes that Program; running it on -//! -- the inputs + a stream of output queries produces, for each output query, -//! -- a subset of the data sources that "explains" the query — running the user -//! -- dataflow on that subset reproduces the queried output. The explanation -//! -- updates live as the data sources and queries change. -//! -- -//! -- The returned Program contains two clones of the user dataflow plus a -//! -- reverse-tracing dataflow: -//! -- -//! -- The *witness* clone runs on the actual data sources and produces the -//! -- reference output the explanation must reproduce. -//! -- -//! -- Alongside, in an iterative scope `explain`, the *forward* clone runs the -//! -- user dataflow on the data sources restricted to per-input *demand-sets*. -//! -- Demand-sets start empty and grow until the forward clone reproduces every -//! -- queried output. -//! -- -//! -- The demand-sets are populated by a reverse-tracing dataflow. Each output -//! -- query enters the reverse dataflow as an "input" to it; it traverses each -//! -- operator in turn, asking that operator's inputs for the updates that -//! -- could play a role in forming the queried output. -//! -- -//! -- IN0 \ d_IN0 <-\ -//! -- OP -> OUT becomes OP^d <- d_OUT -//! -- IN1 / d_IN1 <-/ -//! -- -//! -- The unit of demand is `(data, time)`, where `time` is the user dataflow's -//! -- iteration coordinate(s) nested under the ambient (host) time. Time is -//! -- load-bearing: an input may only "explain" an output if it came before it. -//! -- -//! -- Both clones use `clone_with_lifts`, which inserts `lift_iter` at every -//! -- scope exit so that every collection inside a user scope has a "host- -//! -- visible" form at the outer (explain) scope with its user-iter coords -//! -- folded into the value. The `CloneResult.host[id]` map exposes this -//! -- outer-scope id for every node; the reverse rules read from it to build -//! -- their witness pair tables. -//! -- -//! -- The result is one demand-set per input — the subset of input rows that, -//! -- taken together, reproduce every queried output. -//! -//! witness: { } -//! -//! explain: { -//! -- per-input demand-set variables; grown by `reverse` below -//! -- to whatever proves sufficient to reproduce the demanded outputs. -//! -//! forward: { } -//! reverse: { backwards trace from output queries to demand-sets } -//! } -//! ``` -//! -//! ### Vocabulary -//! -//! Three related but distinct concepts share the "demand" word: -//! -//! - **demand-set** (`demand_set_`, one per Input): the accumulating -//! per-input subset — the *result* of explanation. -//! - **demand variable** (`demand_`, one per non-trivial IR node): an -//! intermediate feedback variable through which the reverse-tracing -//! dataflow propagates demand from outputs back to inputs. -//! - **demand row**: a single record `(data, time, q)` flowing through -//! the reverse dataflow — "output query `q` needs `data` produced at -//! `time` from this point." -//! -//! For an IR node `N` at user-scope depth `D`, a demand row in `demand_` -//! has shape: -//! -//! ```text -//! (K_N ; V_N ++ user_chain[D] ++ [q]) -//! ``` -//! -//! where `user_chain[D]` is one i64 per enclosing user scope (innermost -//! first; the position lifted by the innermost scope's `lift_iter` comes -//! first). `q` is the query id. -//! -//! ### Out of scope for this pass -//! -//! - `Reduce::Count` — handled by the same keyed lookup as Min/Distinct, -//! but without value-narrowing. A real Count-flavored rule would need to -//! reason about which input rows summed to the demanded count. -//! - Multi-op `Linear` chains. `emit_reverse` currently panics on these; -//! the rewrite assumes the optimizer leaves Linear nodes single-op. -//! - Pre-optimization. The optimizer can clean up dead lifts and unused -//! nodes after the rewrite emits them. -//! -//! ### Future investigation -//! -//! **Self-protection at non-pass-through ops.** Today, Concat is the one -//! pass-through-eligible op that *isn't* pass-through, because its per-input -//! SP lookup against `host[input]` is what filters spurious demand routed -//! to consumers downstream of the Concat — specifically, demand at user_chain -//! values that the consumer never actually emitted. The Join's right contrib -//! is one example: it filters on `u_R ≤ u_out` (its own pair's right side), -//! but with static edges where `u_R = 0`, that filter admits demand at any -//! `u_out ≥ 0` — including u_out values the Join never produced output at. -//! The Concat's per-input SP catches this externally. -//! -//! A cleaner model would shift the responsibility inward: every -//! non-pass-through op (Reduce, Join, Linear[Project]) self-filters its -//! incoming `dep_y` against `host[]` at the start of its bward, -//! asking "is this dep row consistent with what I actually emitted?" -//! Concat would then become pass-through. Each op becomes responsible for -//! receiving only valid demand, no externality on its consumers. -//! -//! Trade-off: a self-filter adds ~5 IR nodes per non-pass-through op vs. -//! the current ~5 nodes per Concat input. Programs with many ops behind one -//! Concat win; programs with few Concats and many ops lose. The diagnostic -//! value is the bigger gain — fewer "action at a distance" relationships -//! to reason about. -//! -//! ## Extending the IR — touchpoints -//! -//! When adding a new variant to one of the three IR enums, the type system -//! will catch most omissions because the semantic dispatch sites (those -//! enumerated below as "compile-error sites") are exhaustive matches. The -//! list still helps a contributor know *why* each site needs an update. -//! -//! ### New `Node` variant -//! -//! - `ir.rs`: define the variant; update `Program::depths()` if the node -//! enters/leaves a scope; update the IR printer (`Display`/`dump`). -//! - `lower.rs`, `parse/`: lowering + parser support, if user-facing. -//! - `explain.rs::arities::compute_arities`: produce the new node's -//! `(k, v)` shape. *Compile-error site.* -//! - `explain.rs::clone::clone_with_lifts`: emit the variant in the -//! witness/forward clones; decide if `host[N]` aliases an input (like -//! `Arrange`/`Leave`) or stands alone. *Two compile-error sites: the -//! control-flow vs data-node dispatch above, and the emission match.* -//! - `explain.rs::reverse::emit_reverse`: *the backward rule.* Decide: -//! - pass-through (data and scope unchanged — `Arrange`, `Inspect`)? -//! - host-lookup against the input's pair table (`Reduce`-style, -//! `Linear[Project]`-style, `Concat`-per-input)? -//! - special-purpose (`Leave` injects user_chain; `Bind` advances iter)? -//! - Does it cross a depth boundary? Only `Leave` does today; if your -//! node does too, mirror its structure (and look at the host vs dep -//! user-chain split on `Side`). -//! *Compile-error site.* -//! -//! ### New `LinearOp` variant -//! -//! - `ir.rs`: define; printer. -//! - `parse/`, `lower.rs`: parser + lowering if user-facing. -//! - `explain.rs::arities::apply_ops_arity`: how the op changes `(k, v)`. -//! *Compile-error site.* -//! - `explain.rs::reverse::emit_reverse`'s `Linear` arm: backward rule. -//! *Compile-error site.* -//! -//! ### New `FieldExpr` variant -//! -//! - `ir.rs`: define; `eval_field_into`, `eval_field_raw` (forward eval). -//! *Compile-error sites.* -//! - `parse/`: syntax, optional. (`Sub` is rewrite-emitted only today — no -//! parser entry; users can't write `$1[0] - 1`.) -//! - `explain.rs::analyze_lossy_invertibility`: classify as recoverable -//! (drives the pure-map shortcut) or opaque (falls back to the -//! pair-table path — sound but lossier). *Compile-error site.* -//! - `explain.rs::expand_pos_one` (used inside `emit_lookup_join`): -//! propagate the variant through `Pos` expansion. *Compile-error site.* - -use std::collections::BTreeMap; - -use crate::ir::{Id, Node, Program}; -use crate::parse::{FieldExpr, Projection, Reducer}; - -use builder::Builder; -use arities::compute_arities; -use clone::CloneResult; - -/// Transform a `Program` into one whose execution produces per-input -/// demand-set explanations for queries against the original's result. -/// See the module doc for the architecture. -/// -/// `input_arities` gives `(key_arity, val_arity)` per positional input; -/// `import_arities` gives the same per named import (entries needed for -/// every distinct `Import { name }` referenced in `p`). Both are -/// necessary because data-source shapes aren't recoverable from the IR -/// alone (Projections only invert with known input arity). -pub fn explain( - p: &Program, - input_arities: &[(usize, usize)], - import_arities: &BTreeMap, -) -> Program { - let mut b = Builder::new(); - let arities = compute_arities(p, input_arities, import_arities); - let depths = p.depths(); - // The two user-chain lengths we track at each node: - // - // * `host_user_lens[N]` — how many user-iter coords the lifted, host- - // visible collection at `N` carries in val. For most nodes this is - // `depths[N]`; for `Leave(inner, _)`, host[Leave] aliases host[inner], - // so the host length equals `depths[inner]` (one greater than - // `Leave`'s own depth). Used only for *lookups* against `host[N]` — - // i.e. via `Side`'s pair construction. - // - // * `dep_user_lens[N]` — how many user-iter coords the *demand* at `N` - // carries in val. This is always `depths[N]` — no override. - // - // The split matters at `Leave`: dep[Leave] lives in the outer scope, - // so it carries the outer scope's user_chain length, even though - // host[Leave] carries the (longer) inner one. `Leave`'s bward is the - // place that *injects* the extra inner user_chain coord (via an SP - // lookup against host[inner]) — no other node has to know about the - // depth boundary. - let host_user_lens: BTreeMap = p.nodes.iter().map(|(&id, node)| { - let len = if let Node::Leave(inner, _) = node { - *depths.get(inner).unwrap_or(&0) - } else { - *depths.get(&id).unwrap_or(&0) - }; - (id, len) - }).collect(); - let dep_user_lens: &BTreeMap = &depths; - let n_inputs = input_arities.len(); - - // Distinct import names referenced by `p`, in deterministic order. - let import_names: Vec = { - let mut s = std::collections::BTreeSet::new(); - for node in p.nodes.values() { - if let Node::Import { name } = node { s.insert(name.clone()); } - } - s.into_iter().collect() - }; - - // ---- outer scope ---- - // Original inputs of `p`, one outer-scope Import per referenced name, - // plus one extra "query" input. - let original_inputs: Vec = (0..n_inputs).map(|i| b.input(i)).collect(); - let original_imports: BTreeMap = import_names.iter() - .map(|n| (n.clone(), b.push(Node::Import { name: n.clone() }))) - .collect(); - let query_input = b.input(n_inputs); - - // witness: a clone of `p`, with lift_iter chains so every witness - // collection has a host-visible `(data, user)` form via auto-leave at - // each enclosing user scope's exit. - let witness = b.clone_with_lifts(p, &original_inputs, &original_imports, 0); - - // ---- explain scope ---- - b.scope_open(); - - // Demand-set Variables (one per input, one per import). - let demand_sets: Vec = (0..n_inputs).map(|_| b.variable()).collect(); - let import_demand_sets: BTreeMap = import_names.iter() - .map(|n| (n.clone(), b.variable())) - .collect(); - - // forward inputs: demand_set_ | semijoin(actual_input_). - // Enter actual inputs into explain scope implicitly; semijoin restricts to - // demanded rows. - let forward_inputs: Vec = (0..n_inputs) - .map(|i| { - let (k, v) = input_arities[i]; - b.semijoin_data(demand_sets[i], original_inputs[i], k, v) - }) - .collect(); - let forward_imports: BTreeMap = import_names.iter() - .map(|n| { - let (k, v) = import_arities[n]; - let semi = b.semijoin_data(import_demand_sets[n], original_imports[n], k, v); - (n.clone(), semi) - }) - .collect(); - - // forward: same clone procedure as witness, with substituted inputs. - // Offset = 1 because this clone lives INSIDE the explain scope: its real - // PointStamp depth at any point is one more than its local user_level. - let forward = b.clone_with_lifts(p, &forward_inputs, &forward_imports, 1); - - // Demand Variables are pre-allocated *only* for user-program `var` IR - // nodes (`Node::Variable`). These are the only places where the demand - // chain has a structurally-required feedback (forward cycles around - // user `var`s induce backward cycles that need a Variable to close). - // - // For every other node, demand is a closed-form expression computed - // in the reverse walk below. - let mut demand_var: BTreeMap = BTreeMap::new(); - for (&id, node) in &p.nodes { - if matches!(node, Node::Variable) { - demand_var.insert(id, b.variable()); - } - } - - // reverse rules. Walk nodes in *reverse* id order so each node's - // `contribs[id]` is already populated by its higher-id consumers by - // the time we reach it. For each node we compute its demand expression - // from `contribs[id]`, store it in `demand_var`, then dispatch the - // node's bward rule (which pushes onto its inputs' contribs). - // - // Query input directly seeds `contribs[first_export]` — the demand - // starts with the query rows against the first export. v0 only - // explains a single output; multi-export programs would need one - // query input per export and per-output dispatch in the seeding. - let primary_export = p.export.first() - .expect("explain: program has no export to seed query input against").1; - let mut contribs: BTreeMap> = BTreeMap::new(); - contribs.entry(primary_export).or_default().push(query_input); - - for (&id, node) in p.nodes.iter().rev() { - // Scope / EndScope carry no demand and have no bward action. - if matches!(node, Node::Scope | Node::EndScope) { continue; } - // Bind has no demand of its own; its handler in `emit_reverse` - // routes the variable's pre-allocated demand into the value's - // contribs. - if matches!(node, Node::Bind { .. }) { - b.emit_reverse(id, node, &witness, &forward, &demand_var, &arities, &host_user_lens, dep_user_lens, &mut contribs); - continue; - } - let cs = contribs.remove(&id).unwrap_or_default(); - if cs.is_empty() { - // No upstream demand. For a user `var`, still bind the - // pre-allocated Variable to itself so the feedback edge is - // structurally valid (it stays empty in practice). - if matches!(node, Node::Variable) { - let var = demand_var[&id]; - b.bind(var, var); - } - continue; - } - let combined = if cs.len() == 1 { cs[0] } else { b.concat(cs) }; - let (k, v) = arities[&id]; - let user_len = dep_user_lens[&id]; - let val_arity = v + user_len + 1; // V + user_chain + [q] - let dist = b.distinct_full(combined, k, val_arity); - b.debug_inspect(dist, format!("demand_{}", id)); - if matches!(node, Node::Variable) { - // User `var`: bind the pre-allocated Variable to its demand - // expression. `demand_var[id]` already holds the Variable id - // (set above); leave it as-is so consumers reference the - // Variable, not the body expression directly. - let var = demand_var[&id]; - b.bind(var, dist); - } else { - // Non-user-var: demand is the expression itself, no Variable - // wrapping. Inputs that consume this node read the expression - // id directly. - demand_var.insert(id, dist); - } - b.emit_reverse(id, node, &witness, &forward, &demand_var, &arities, &host_user_lens, dep_user_lens, &mut contribs); - } - - // Bind demand-set variables for inputs and imports symmetrically: - // demand_set_X := distinct(demand_set_X + (demand_ | strip | semijoin actual)). - // Build a Vec mapping input index `i` to its IR id in `p`, so the - // per-input loop below is O(n) total instead of O(n^2). Imports are - // looked up by name; multiple `Import { name }` nodes in `p` share - // demand via the dedup pass — we route from any one of them. - let mut input_ids: Vec> = vec![None; n_inputs]; - let mut import_ids: BTreeMap = BTreeMap::new(); - for (&id, node) in &p.nodes { - match node { - Node::Input(i) => { input_ids[*i] = Some(id); } - Node::Import { name } => { import_ids.entry(name.clone()).or_insert(id); } - _ => {} - } - } - for i in 0..n_inputs { - let in_id = input_ids[i].expect("input not found in program"); - let (kx, vx) = arities[&in_id]; - // Inputs are always at depth 0 → user_chain is empty. - let stripped = b.project(demand_var[&in_id], strip_user_and_q(kx, vx)); - let semi = b.semijoin_data(stripped, original_inputs[i], kx, vx); - let combined = b.concat(vec![demand_sets[i], semi]); - let dist = b.distinct_full(combined, kx, vx); - b.bind(demand_sets[i], dist); - } - for name in &import_names { - let imp_id = import_ids[name]; - let (kx, vx) = arities[&imp_id]; - let stripped = b.project(demand_var[&imp_id], strip_user_and_q(kx, vx)); - let semi = b.semijoin_data(stripped, original_imports[name], kx, vx); - let combined = b.concat(vec![import_demand_sets[name], semi]); - let dist = b.distinct_full(combined, kx, vx); - b.bind(import_demand_sets[name], dist); - } - - // Per-source demand-set ids inside the explain scope; we leave each - // out and register as a named export after closing the scope. - let inputs_leaves: Vec<(String, Id)> = demand_sets.iter().enumerate() - .map(|(i, &mv)| (format!("demand:input{}", i), mv)) - .collect(); - let imports_leaves: Vec<(String, Id)> = import_names.iter() - .map(|n| (format!("demand:{}", n), import_demand_sets[n])) - .collect(); - - b.scope_close(); - for (name, inner) in inputs_leaves.into_iter().chain(imports_leaves) { - let outer = b.leave(inner, 1); - b.add_export(name, outer); - } - b.into_program() -} - -/// Thin builder wrapper around `Program` for incremental IR construction. -/// -/// Each push method appends a node and returns its fresh id. Composite -/// constructors (`reduce`, `join`) emit the implicit `Arrange` wrappers the -/// IR requires of those ops, so callers can think in terms of collections. -mod builder { - use std::collections::BTreeMap; - - use crate::ir::{Id, LinearOp, Node, Program}; - use crate::parse::{Condition, Projection, Reducer}; - - pub struct Builder { - program: Program, - next_id: Id, - } - - impl Builder { - pub(super) fn new() -> Self { - Builder { - program: Program { nodes: BTreeMap::new(), export: Vec::new() }, - next_id: 0, - } - } - pub(super) fn push(&mut self, n: Node) -> Id { - let id = self.next_id; - self.next_id += 1; - self.program.nodes.insert(id, n); - id - } - pub(super) fn input(&mut self, n: usize) -> Id { self.push(Node::Input(n)) } - pub(super) fn variable(&mut self) -> Id { self.push(Node::Variable) } - pub(super) fn arrange(&mut self, input: Id) -> Id { self.push(Node::Arrange(input)) } - pub(super) fn linear(&mut self, input: Id, ops: Vec) -> Id { - self.push(Node::Linear { input, ops }) - } - pub(super) fn project(&mut self, input: Id, p: Projection) -> Id { - self.linear(input, vec![LinearOp::Project(p)]) - } - pub(super) fn filter(&mut self, input: Id, c: Condition) -> Id { - self.linear(input, vec![LinearOp::Filter(c)]) - } - pub(super) fn concat(&mut self, ids: Vec) -> Id { self.push(Node::Concat(ids)) } - pub(super) fn reduce(&mut self, input: Id, r: Reducer) -> Id { - let arr = self.arrange(input); - self.push(Node::Reduce { input: arr, reducer: r }) - } - pub(super) fn join(&mut self, left: Id, right: Id, p: Projection) -> Id { - let l = self.arrange(left); - let r = self.arrange(right); - self.push(Node::Join { left: l, right: r, projection: p }) - } - pub(super) fn inspect(&mut self, input: Id, label: String) -> Id { - self.push(Node::Inspect { input, label }) - } - pub(super) fn leave(&mut self, inner: Id, scope_level: usize) -> Id { - self.push(Node::Leave(inner, scope_level)) - } - pub(super) fn bind(&mut self, variable: Id, value: Id) { - self.push(Node::Bind { variable, value }); - } - pub(super) fn scope_open(&mut self) { self.push(Node::Scope); } - pub(super) fn scope_close(&mut self) { self.push(Node::EndScope); } - pub(super) fn add_export(&mut self, name: String, id: Id) { self.program.export.push((name, id)); } - pub(super) fn into_program(self) -> Program { self.program } - } -} - -/// Per-IR-node (key_arity, val_arity) inference. -/// -/// Walks the program to a fixed point, deriving each node's data-shape from -/// its inputs and the op-specific rule. Needed because input shapes aren't -/// recoverable from the IR alone — `Projection`s only invert with known -/// input arity, and lift_iter sites need to know how many user-iter coords -/// already sit in the val. -pub mod arities { - use std::collections::BTreeMap; - - use crate::ir::{Id, LinearOp, Node, Program}; - use crate::parse::{FieldExpr, Reducer}; - - pub fn compute_arities( - p: &Program, - input_arities: &[(usize, usize)], - import_arities: &BTreeMap, - ) -> BTreeMap { - // Variables are referenced before their Binds appear in id order; - // resolve a Variable's shape via its body. - let var_body: BTreeMap = p.nodes.iter().filter_map(|(_, n)| { - if let Node::Bind { variable, value } = n { - Some((*variable, *value)) - } else { None } - }).collect(); - - let mut out: BTreeMap = BTreeMap::new(); - loop { - let before = out.len(); - for (&id, node) in &p.nodes { - if out.contains_key(&id) { continue; } - let shape = match node { - Node::Input(i) => Some(input_arities[*i]), - Node::Import { name } => Some( - *import_arities.get(name) - .unwrap_or_else(|| panic!("explain: no arity registered for import {:?}", name)) - ), - Node::Linear { input, ops } => out.get(input).map(|s| apply_ops_arity(*s, ops)), - // Try each input — for self-recursive Variables that appear - // as `Concat([var, ...])`, the first input's shape isn't - // known on early passes; pick any input that has a known - // shape and let fixed-point iteration propagate. - Node::Concat(ids) => ids.iter().find_map(|i| out.get(i).copied()), - Node::Arrange(input) => out.get(input).copied(), - // A projection's arity is the sum of each field's *width*, - // not the count of field-exprs: `Pos(r)` is a whole-row - // reference that expands to input row `r`'s arity. The - // join's input rows are [key, left_val, right_val]. - Node::Join { left, right, projection } => match (out.get(left), out.get(right)) { - (Some(&(kl, vl)), Some(&(_kr, vr))) => { - let rows = [kl, vl, vr]; - Some((proj_arity(&projection.key, &rows), proj_arity(&projection.val, &rows))) - } - _ => None, - }, - Node::Reduce { input, reducer } => out.get(input).map(|s| match reducer { - Reducer::Distinct => (s.0, 0), - Reducer::Min => (s.0, s.1), - Reducer::Count => (s.0, 1), - }), - Node::Inspect { input, .. } => out.get(input).copied(), - Node::Leave(inner, _) => out.get(inner).copied(), - Node::Variable => var_body.get(&id).and_then(|v| out.get(v).copied()), - Node::Scope | Node::EndScope | Node::Bind { .. } => None, - }; - if let Some(s) = shape { out.insert(id, s); } - } - if out.len() == before { break; } - } - out - } - - fn apply_ops_arity((mut k, mut v): (usize, usize), ops: &[LinearOp]) -> (usize, usize) { - for op in ops { - match op { - // Project's input rows are [key, val]; expand `Pos` refs to - // their row arities rather than counting field-exprs. - LinearOp::Project(p) => { - let rows = [k, v]; - k = proj_arity(&p.key, &rows); - v = proj_arity(&p.val, &rows); - } - LinearOp::Filter(_) | LinearOp::Negate | LinearOp::EnterAt(_) => {} - LinearOp::LiftIter => { v += 1; } - } - } - (k, v) - } - - /// Width (output columns) a single `FieldExpr` expands to, given the - /// arities of the input rows it may reference. `Pos(r)` is a whole-row - /// reference of width `rows[r]`; index/const are single columns. - fn field_width(f: &FieldExpr, rows: &[usize]) -> usize { - match f { - FieldExpr::Pos(r) => rows.get(*r).copied().unwrap_or(0), - FieldExpr::Index(_, _) | FieldExpr::Const(_) => 1, - FieldExpr::Neg(inner) => field_width(inner, rows), - FieldExpr::Sub(a, _) => field_width(a, rows), - } - } - - /// Total arity of one projection side (`key`/`val`): the sum of its - /// fields' widths. - fn proj_arity(fields: &[FieldExpr], rows: &[usize]) -> usize { - fields.iter().map(|f| field_width(f, rows)).sum() - } -} - -/// IR cloning with implicit lift_iter at scope exits. -/// -/// `clone_with_lifts` emits `p` into `b` with a `lift_iter` chain at every -/// scope exit, so every non-Input data node has a host-visible version -/// reachable as `host[id]`. Inputs are aliased to `input_subst`. -/// -/// Convention: the lifted user-iter coords are appended innermost-first. A -/// node at depth D has D fields of user time appended to val in its host- -/// visible form. `enclosing_scope_depth` adjusts every emitted `Leave` to -/// account for an enclosing scope the clone itself sits inside. -mod clone { - use std::collections::BTreeMap; - - use crate::ir::{Id, LinearOp, Node, Program}; - - use super::Builder; - - pub(super) struct CloneResult { - /// The id reachable at explain scope, with user-iter coords appended to val. - pub(super) host: BTreeMap, - } - - impl Builder { - pub(super) fn clone_with_lifts( - &mut self, - p: &Program, - input_subst: &[Id], - import_subst: &BTreeMap, - enclosing_scope_depth: usize, - ) -> CloneResult { - let mut in_scope: BTreeMap = BTreeMap::new(); - let mut host: BTreeMap = BTreeMap::new(); - let mut user_level: usize = 0; - // Pending pile per enclosing user scope: (orig_id, current_cloned_id). - // On EndScope: lift_iter each (capturing this scope's iter coord into - // val), close scope, leave each to next-outer scope. If we've reached - // outer/explain scope, record in `host`; otherwise push onto the next- - // outer pile. - let mut pending: Vec> = Vec::new(); - - for (&id, node) in &p.nodes { - // Exhaustive emission per node. Control-flow nodes handle - // their own bookkeeping in-arm and yield `None`; data nodes - // yield `Some(cloned_id)`, which then shares the standard - // in_scope/host/pending registration below. - let cloned: Option = match node { - Node::Scope => { - self.scope_open(); - user_level += 1; - pending.push(Vec::new()); - None - } - Node::EndScope => { - let pile = pending.pop().expect("EndScope without Scope"); - let scope_lvl = user_level; // level inside the scope - // Lift each pending node BEFORE closing the scope. - let lifted: Vec<(Id, Id)> = pile - .into_iter() - .map(|(orig, cur)| (orig, self.linear(cur, vec![LinearOp::LiftIter]))) - .collect(); - self.scope_close(); - user_level -= 1; - // Leave each to the outer scope. `leave_dynamic(k)` truncates - // the PointStamp to length `k - 1`, so we must pass the - // *real* depth at this point, which is local `scope_lvl` - // plus the outer offset from the enclosing explain scope. - for (orig, lifted_id) in lifted { - let leaved = self.leave(lifted_id, scope_lvl + enclosing_scope_depth); - if user_level == 0 { - host.insert(orig, leaved); - } else { - pending.last_mut().unwrap().push((orig, leaved)); - } - } - None - } - Node::Bind { variable, value } => { - self.bind(in_scope[variable], in_scope[value]); - None - } - Node::Input(i) => { - // Inputs are at depth 0, host-visible directly. - in_scope.insert(id, input_subst[*i]); - host.insert(id, input_subst[*i]); - None - } - Node::Import { name } => { - // Imports are at depth 0, host-visible directly. - let sub = *import_subst.get(name) - .unwrap_or_else(|| panic!("clone: no substitution for import {:?}", name)); - in_scope.insert(id, sub); - host.insert(id, sub); - None - } - Node::Linear { input, ops } => { - Some(self.linear(in_scope[input], ops.clone())) - } - Node::Concat(ids) => { - let mapped: Vec = ids.iter().map(|i| in_scope[i]).collect(); - Some(self.concat(mapped)) - } - Node::Arrange(input) => Some(self.arrange(in_scope[input])), - Node::Join { left, right, projection } => Some(self.push(Node::Join { - left: in_scope[left], - right: in_scope[right], - projection: projection.clone(), - })), - Node::Reduce { input, reducer } => Some(self.push(Node::Reduce { - input: in_scope[input], - reducer: reducer.clone(), - })), - Node::Variable => Some(self.variable()), - Node::Inspect { input, label } => { - Some(self.inspect(in_scope[input], label.clone())) - } - Node::Leave(inner, scope_level) => { - Some(self.leave(in_scope[inner], *scope_level + enclosing_scope_depth)) - } - }; - - if let Some(cloned) = cloned { - in_scope.insert(id, cloned); - if user_level == 0 { - // For Arrange nodes at outer scope: alias host[N] to - // the underlying Collection (= host[input]) so backward - // rules never refer to an Arrangement across scope - // boundaries. - let recorded = match node { - Node::Arrange(input) => host.get(input).copied().unwrap_or(cloned), - _ => cloned, - }; - host.insert(id, recorded); - } else { - pending.last_mut().unwrap().push((id, cloned)); - } - } - } - assert!(pending.is_empty(), "Scope/EndScope imbalance in clone"); - - // Second pass: rewrite host[Leave_id] to host[inner_id] for every - // Leave in the program. host[inner] is the lifted form backward - // rules expect; the inline Leave clone above is the un-lifted form - // (right for `in_scope` references, wrong for `host`). Has to be a - // second pass because Leaves *inside* nested scopes don't have - // their inner's host entry populated until all enclosing scopes - // have closed. - for (&id, node) in &p.nodes { - if let Node::Leave(inner, _) = node { - if let Some(&host_inner) = host.get(inner) { - host.insert(id, host_inner); - } - } - } - - CloneResult { host } - } - } -} - -/// Backward (demand-propagation) rule emission. -/// -/// Per-op reverse rules all share the same skeleton: join `demand_` -/// with `witness + forward` of the inputs (possibly projected) on the op's -/// natural key, filter `user_in ≤ user_out` element-wise for soundness, -/// project to the demanded-input shape. The four `emit_lookup_*` helpers -/// differ only in how they construct the pair table and how they map fields -/// after the join; `filter_time_and_strip` is shared. -mod reverse { - use std::collections::BTreeMap; - - use crate::ir::{Id, LinearOp, Node}; - use crate::parse::{Condition, FieldExpr, Projection, Reducer}; - - use super::Builder; - use super::CloneResult; - - /// One upstream edge into a backward rule: its host-side `(data, user)` - /// collections from both clones, its data shape, and the two user-chain - /// lengths described in the top-of-module split (host vs dep). For - /// non-Leave nodes the two are equal; they only diverge at a `Leave`, - /// where the host length is the inner depth and the dep length is the - /// outer depth. - pub(super) struct Side { - pub(super) witness: Id, // witness.host[input] - pub(super) forward: Id, // forward.host[input] - pub(super) shape: (usize, usize), // (k_arity, v_arity) - pub(super) host_user_len: usize, - pub(super) dep_user_len: usize, - } - - impl Side { - pub(super) fn for_input( - id: Id, - witness: &CloneResult, - forward: &CloneResult, - arities: &BTreeMap, - host_user_lens: &BTreeMap, - dep_user_lens: &BTreeMap, - ) -> Self { - Self { - witness: witness.host[&id], - forward: forward.host[&id], - shape: arities[&id], - host_user_len: host_user_lens[&id], - dep_user_len: dep_user_lens[&id], - } - } - } - - impl Builder { - pub(super) fn emit_reverse( - &mut self, - id: Id, - node: &Node, - witness: &CloneResult, - forward: &CloneResult, - demand: &BTreeMap, - arities: &BTreeMap, - host_user_lens: &BTreeMap, - dep_user_lens: &BTreeMap, - contribs: &mut BTreeMap>, - ) { - // Bind has no demand entry of its own; it routes the *variable's* - // demand into the *value's* contribs as a pure map. - // - // The user `var`'s forward feedback advances `user_chain[0]` (the - // innermost user-iter coord, which is the var's own scope iter) - // by 1. Inverting that: dep at `var` with `user_chain[0] = K` - // maps to dep at body with `user_chain[0] = K - 1`. Rows with - // `user_chain[0] = 0` are filtered out — they represent a demand - // at iter 0, which has no body-side source (the body hadn't - // emitted anything by then). - // - // Replacing the prior `emit_lookup_shape_preserving` (which - // joined against host[body] and picked up *any* body emission of - // the same data, regardless of iter) tightens demand to the - // single iter that actually sourced the var's view. Sound only - // because the demand variables are non-monotone — see the - // earlier note on the bind loop. - if let Node::Bind { variable, value } = node { - if let Some(&dv) = demand.get(variable) { - let (kx, vx) = arities[variable]; - let var_user_len = dep_user_lens[variable]; - // Position of user_chain[0] in the dep row's val. - let chain_pos = vx; - // Filter: user_chain[0] > 0. - let filtered = self.filter( - dv, - Condition::Gt( - FieldExpr::Index(1, chain_pos), - FieldExpr::Const(0), - ), - ); - // Project: subtract 1 from user_chain[0]; leave all - // other fields (key, V_data, user_chain[1..], q) intact. - let key: Vec = - (0..kx).map(|i| FieldExpr::Index(0, i)).collect(); - let mut val: Vec = Vec::new(); - for i in 0..vx { val.push(FieldExpr::Index(1, i)); } - val.push(FieldExpr::Sub( - Box::new(FieldExpr::Index(1, chain_pos)), - Box::new(FieldExpr::Const(1)), - )); - for i in 1..var_user_len { - val.push(FieldExpr::Index(1, chain_pos + i)); - } - val.push(FieldExpr::Index(1, chain_pos + var_user_len)); - let contrib = self.project(filtered, Projection { key, val }); - contribs.entry(*value).or_default().push(contrib); - } - return; - } - let dep_this = match demand.get(&id) { - Some(&v) => v, - None => return, - }; - // `out_user_len` = number of user-iter coords in dep_'s val (after V). - let out_user_len = dep_user_lens[&id]; - let out_shape = arities[&id]; - let side = |inp: Id| Side::for_input(inp, witness, forward, arities, host_user_lens, dep_user_lens); - - match node { - Node::Input(_) | Node::Import { .. } => { /* terminal; feeds demand-set seeding. */ } - - Node::Linear { input, ops } => { - let op = match ops.as_slice() { - [single] => single, - _ => panic!("explain: multi-op Linear chain at node {}", id), - }; - match op { - LinearOp::Project(proj) => { - let input_side = side(*input); - let contrib = self.emit_lookup_lossy(dep_this, &input_side, out_shape, out_user_len, proj); - contribs.entry(*input).or_default().push(contrib); - } - LinearOp::Filter(cond) => { - // Re-apply the filter to dep_y. dep_y can carry - // demand for rows the filter would have rejected - // (e.g., when this Filter feeds a Concat whose - // other input could have supplied those rows); - // keeping only rows the filter would admit blocks - // that leak. Pass-through is sound here because - // input and output share a scope (Filter doesn't - // cross a depth boundary; only `Leave` does). - let contrib = self.filter(dep_this, cond.clone()); - contribs.entry(*input).or_default().push(contrib); - } - LinearOp::Negate => { - // Pure pass-through: data unchanged, scope unchanged. - contribs.entry(*input).or_default().push(dep_this); - } - LinearOp::EnterAt(_) => { - // Sound but over-broad. EnterAt is a data→time lift: it sets a new - // innermost user_chain coord `t_in = delay($field)` from the value, - // so its output is one scope deeper than its input. The 1:1 reverse - // would DROP that coord (recoverable as `delay($field)` from the - // preserved value): out ((k,v),[t_in,t_out..]) -> in ((k,v),[t_out..]). - // We instead pass demand through unchanged — tenable only because - // `depths()` is positional and treats EnterAt as depth-neutral, so - // the coord is stripped *unconstrained* by the neighboring Project - // that crosses the scope boundary. The result is a superset (kept - // sound by the `semijoin(actual_input)` at seeding); it never drops a - // needed edge. Tight fix: let `depths()` give EnterAt its own level - // and make this arm drop the innermost coord — see `depths()` (ir.rs). - contribs.entry(*input).or_default().push(dep_this); - } - LinearOp::LiftIter => { - // LiftIter is rewrite-emitted only; it should never - // appear in a user program that reaches `explain`. - panic!("explain: LiftIter in user program at node {}", id); - } - } - } - - // Concat: per-input SP lookup. Each input has its own host - // table; the filter against `host[input]` discriminates which - // input "sourced" a given dep row at a given iter. Demand for - // (K=4, u=0) might originate from input A (host[A] has it at - // u=0) but not from input B (host[B] has it at u=1). Pass- - // through would feed demand to both, admitting B's path - // spuriously. - Node::Concat(ids) => { - for inp in ids { - let input_side = side(*inp); - let contrib = self.emit_lookup_shape_preserving(dep_this, &input_side, out_user_len); - contribs.entry(*inp).or_default().push(contrib); - } - } - - // Pure pass-through: data unchanged, scope unchanged. - // (Both input and output live at the same depth, so they - // carry the same `dep_user_len` — no depth boundary to - // worry about. That's `Leave`'s job, below.) - Node::Arrange(input) | Node::Inspect { input, .. } => { - contribs.entry(*input).or_default().push(dep_this); - } - - // Leave is the sole depth-boundary op. dep[Leave] lives - // in the outer scope and carries `depths[Leave]` user_chain - // coords, but contribs[inner] needs `depths[inner]` (one - // more). The SP lookup against host[inner] (which carries - // user_chain via `lift_iter`) injects the extra coord — - // and only Leave needs to know about this; no other op - // has to special-case "input is a Leave." - Node::Leave(inner, _) => { - let inner_side = side(*inner); - let contrib = self.emit_lookup_shape_preserving(dep_this, &inner_side, out_user_len); - contribs.entry(*inner).or_default().push(contrib); - } - - Node::Reduce { input, reducer } => { - let input_side = side(*input); - let contrib = self.emit_lookup_keyed(dep_this, &input_side, out_shape, out_user_len, reducer); - contribs.entry(*input).or_default().push(contrib); - } - - Node::Join { left, right, projection } => { - let left_side = side(*left); - let right_side = side(*right); - let (left_contrib, right_contrib) = self.emit_lookup_join( - dep_this, &left_side, &right_side, - out_shape, out_user_len, projection, - ); - contribs.entry(*left).or_default().push(left_contrib); - contribs.entry(*right).or_default().push(right_contrib); - } - - Node::Variable => { /* handled by Bind. */ } - - Node::Bind { .. } | Node::Scope | Node::EndScope => {} - } - } - - /// Shape-preserving lookup: input and output have the same (k, v) shape. - /// Used by Concat — each input gets its own host lookup so the per-input - /// time filter can discriminate which input sourced a given dep row. - /// - /// pair = witness + forward of input. Shape (K; V ++ user_chain_in). - /// Repack to (K+V; user_chain_in). dep repacks to (K+V; user_chain_out ++ [q]). - /// Join on (K+V); produce (K; V ++ user_chain_in ++ user_chain_out ++ [q]), - /// then `filter_time_and_strip` enforces `user_in ≤ user_out` and drops - /// `user_chain_out` to yield `(K; V ++ user_chain_in ++ [q])`. - fn emit_lookup_shape_preserving( - &mut self, - dep_y: Id, - side: &Side, - output_depth: usize, - ) -> Id { - let (k, v) = side.shape; - // `host_user_len` is the user_chain length in host[input] — what the - // pair table actually carries. `dep_user_len` is what contribs[input] - // should carry (= depths[input]). They differ only when `input` is a - // `Leave`; the lookup truncates user_in down to `dep_user_len` on the - // way out. - let host_user_len = side.host_user_len; - let dep_user_len = side.dep_user_len; - let pair = self.concat(vec![side.witness, side.forward]); - let pair_keyed = self.project(pair, Projection { - key: pack_kv(k, v), - val: (0..host_user_len).map(|i| FieldExpr::Index(1, v + i)).collect(), - }); - let dep_keyed = self.project(dep_y, Projection { - key: pack_kv(k, v), - val: (0..output_depth + 1).map(|i| FieldExpr::Index(1, v + i)).collect(), - }); - // After arrange-join on (K+V): $0 = K+V, $1 = dep val, $2 = pair val. - // Keep user_out in val for the time filter, then strip. - let key: Vec = (0..k).map(|i| FieldExpr::Index(0, i)).collect(); - let mut val: Vec = Vec::new(); - for i in 0..v { val.push(FieldExpr::Index(0, k + i)); } - for i in 0..host_user_len { val.push(FieldExpr::Index(2, i)); } - for i in 0..output_depth { val.push(FieldExpr::Index(1, i)); } - val.push(FieldExpr::Index(1, output_depth)); // q - let joined = self.join(dep_keyed, pair_keyed, Projection { key, val }); - self.filter_time_and_strip(joined, k, v, host_user_len, output_depth, dep_user_len) - } - - /// Apply the soundness filter (`user_in[i] ≤ user_out[i]` element-wise) and - /// strip `user_out` from a collection whose row shape is - /// `(K[k_out]; V_pre[v_pre] ++ user_in[in_len] ++ user_out[out_len] ++ [q])`. - /// Result shape: `(K[k_out]; V_pre[v_pre] ++ user_in[0..keep_in_len] ++ [q])`. - /// - /// The filter excludes contributions whose witness input row was produced at - /// a strictly-later user-iter than the demanded output — an output cannot be - /// "explained by" an input that came after it. When `in_len` and `out_len` - /// differ, we compare only the common prefix (innermost-first ordering). - /// - /// `keep_in_len` lets callers ask for fewer user_in coords in the output - /// than the pair table carries. This is how a `Leave`-as-input collapses - /// host[Leave]'s inner user_chain (length `depths[inner]`) down to - /// contribs[Leave]'s outer user_chain (length `depths[Leave]`). - fn filter_time_and_strip( - &mut self, - coll: Id, - k_out: usize, - v_pre: usize, - in_len: usize, - out_len: usize, - keep_in_len: usize, - ) -> Id { - // The user_chain index arithmetic (the outer-end alignment whose - // off-by-one made SCC explain unsound) lives in `folded::Joined`, the - // single home shared with the scope-builder ports. - let layout = crate::folded::Joined { v_pre, in_len, out_len }; - let mut cur = coll; - if let Some(cond) = layout.time_le() { - cur = self.filter(cur, cond); - } - self.project(cur, layout.strip(k_out, keep_in_len)) - } - - /// Keyed lookup (Reduce-style): demand on `(K; V_out ++ user_out ++ q)` - /// maps to every input row at the same K, time-filtered against user_out. - /// Output: `(K; V_in ++ user_in ++ [q])`. - /// - /// For `Reducer::Min`, also applies the §3.2.2 value-narrowing: only - /// keep input rows whose `V_in` equals the queried `V_out` (the min). - /// Other inputs at the same key did not contribute to that min and are - /// not needed in the explanation. - fn emit_lookup_keyed( - &mut self, - dep_y: Id, - side: &Side, - output_shape: (usize, usize), - out_user_len: usize, - reducer: &Reducer, - ) -> Id { - let (k_in, v_in) = side.shape; - let in_user_len = side.host_user_len; - let keep_in_len = side.dep_user_len; - let (_, v_out) = output_shape; - let pair = self.concat(vec![side.witness, side.forward]); - // Min narrowing: include V_out in the val layout so we can filter - // V_in == V_out element-wise below. Only applies when arities match - // (true for Min: forward preserves arity) and there's a value to compare. - let include_v_out = matches!(reducer, Reducer::Min) && v_in == v_out && v_in > 0; - // After arrange-join on K: $0 = K, $1 = dep val (V_out + user_out + q), - // $2 = pair val (V_in + user_in). - // val layout: V_in [+ V_out for Min] + user_in + user_out + q. - let mut val: Vec = Vec::new(); - for i in 0..v_in { val.push(FieldExpr::Index(2, i)); } // V_in - if include_v_out { - for i in 0..v_out { val.push(FieldExpr::Index(1, i)); } // V_out - } - for i in 0..in_user_len { val.push(FieldExpr::Index(2, v_in + i)); } // user_in - for i in 0..out_user_len { val.push(FieldExpr::Index(1, v_out + i)); } // user_out - val.push(FieldExpr::Index(1, v_out + out_user_len)); // q - let proj = Projection { - key: (0..k_in).map(|i| FieldExpr::Index(0, i)).collect(), - val, - }; - let joined = self.join(dep_y, pair, proj); - - // For Min: filter V_in[i] == V_out[i] element-wise, then strip V_out - // back out of val so the downstream time-filter sees its expected - // (K; V_in + user_in + user_out + q) layout. - let after_min = if include_v_out { - let mut acc: Option = None; - for i in 0..v_in { - let cond = Condition::Eq( - FieldExpr::Index(1, i), // V_in[i] - FieldExpr::Index(1, v_in + i), // V_out[i] - ); - acc = Some(match acc { - None => cond, - Some(prev) => Condition::And(Box::new(prev), Box::new(cond)), - }); - } - let filtered = self.filter(joined, acc.unwrap()); - // Re-project to drop V_out, restoring (K; V_in + user_in + user_out + q). - let key: Vec = (0..k_in).map(|i| FieldExpr::Index(0, i)).collect(); - let mut new_val: Vec = Vec::new(); - for i in 0..v_in { new_val.push(FieldExpr::Index(1, i)); } // V_in - // skip V_out at [v_in..v_in+v_out] - let after_vout = v_in + v_out; - for i in 0..in_user_len { new_val.push(FieldExpr::Index(1, after_vout + i)); } // user_in - for i in 0..out_user_len { new_val.push(FieldExpr::Index(1, after_vout + in_user_len + i)); } // user_out - new_val.push(FieldExpr::Index(1, after_vout + in_user_len + out_user_len)); // q - self.project(filtered, Projection { key, val: new_val }) - } else { - joined - }; - - self.filter_time_and_strip(after_min, k_in, v_in, in_user_len, out_user_len, keep_in_len) - } - - /// Lossy lookup (Linear[Project]). - /// - /// Two emission paths depending on `proj`'s invertibility: - /// - /// **Pure-map shortcut.** If every input field is recoverable from the - /// output (every `(r, c)` appears as `Index(r, c)` or via `Pos(r)` in - /// `proj.key` or `proj.val`) AND the input lives at outer scope (no - /// `user_chain` to source), bypass the pair table entirely: emit a - /// single Linear that maps each `dep_y` row directly to its synthetic - /// `(K_in, V_in, q)` form. Cost is `O(|dep|)`, independent of `|input|` - /// — the algorithmic win that lets explanations stay bounded as inputs - /// grow. Any synthetic input row that doesn't actually exist gets - /// filtered out later by the `semijoin(actual_input_)` at must-set - /// seeding. - /// - /// **Fallback (pair-table).** Otherwise, the original construction: - /// `pair = (witness_input + forward_input)` projected to - /// `(chained_K_out; K_in ++ V_in ++ user_in)`. dep_y has shape - /// `(K_out; V_out ++ user_out ++ q)`. After join on K_out, output - /// `(K_in; V_in ++ user_in ++ [q])`, time-filtered. - fn emit_lookup_lossy( - &mut self, - dep_y: Id, - side: &Side, - output_shape: (usize, usize), - out_user_len: usize, - proj: &Projection, - ) -> Id { - let (k_in, v_in) = side.shape; - let in_user_len = side.host_user_len; - let keep_in_len = side.dep_user_len; - let (k_out, v_out) = output_shape; - - let known = analyze_lossy_invertibility(proj, k_in, v_in); - let total = (0..k_in).all(|c| known.contains_key(&(0, c))) - && (0..v_in).all(|c| known.contains_key(&(1, c))); - - // Pure-map shortcut, extended to same-scope projects of any depth. - // A Linear[Project] doesn't cross a scope boundary, so the input's - // user_chain equals the output's (in_user_len == out_user_len == - // keep_in_len when the input isn't a Leave). When every input field is - // also recoverable from the output, the whole reverse is a direct map - // from dep_y, narrowing by *all* demanded fields — including value - // fields the pair-table join (keyed on K_out only) ignores. That - // key-only match is what let sibling rows leak through a re-key that - // drops a field from the key (e.g. by_b = tc | key($0[1]; …), keyed by - // tc's destination only → fallback recovered every same-destination - // pair regardless of source). - if total && in_user_len == out_user_len && keep_in_len == in_user_len { - // Map flat output position p (into [K_out ++ V_out]) to an access - // expression against dep_y's (key, val) layout: - // p < k_out → $0[p] (key) - // p >= k_out → $1[p - k_out] (val, since dep_y.val starts with V_out) - let access = |p: usize| -> FieldExpr { - if p < k_out { FieldExpr::Index(0, p) } - else { FieldExpr::Index(1, p - k_out) } - }; - let key: Vec = (0..k_in).map(|c| access(known[&(0, c)])).collect(); - let mut val: Vec = Vec::with_capacity(v_in + in_user_len + 1); - for c in 0..v_in { val.push(access(known[&(1, c)])); } - // user_in == user_out (same scope), so copy it through. The filter - // user_in ≤ user_out then holds with equality. q at $1[v_out+out_user_len]. - for i in 0..out_user_len { val.push(FieldExpr::Index(1, v_out + i)); } - val.push(FieldExpr::Index(1, v_out + out_user_len)); - return self.project(dep_y, Projection { key, val }); - } - - let pair_src = self.concat(vec![side.witness, side.forward]); - // pair_src shape: (K_in; V_in ++ user_in[0..in_user_len]). - // Build pair: key = proj.key (computes chained K_out), val = K_in ++ V_in ++ user_in. - let mut pair_val: Vec = Vec::with_capacity(k_in + v_in + in_user_len); - for i in 0..k_in { pair_val.push(FieldExpr::Index(0, i)); } - for i in 0..v_in + in_user_len { pair_val.push(FieldExpr::Index(1, i)); } - let pair = self.project(pair_src, Projection { key: proj.key.clone(), val: pair_val }); - // After arrange-join on K_out: $0 = K_out, $1 = dep val (V_out + user_out + q), - // $2 = pair val (K_in + V_in + user_in). - // Keep user_out in val for the time filter, then strip. - let key: Vec = (0..k_in).map(|i| FieldExpr::Index(2, i)).collect(); - let mut val: Vec = Vec::new(); - for i in 0..v_in { val.push(FieldExpr::Index(2, k_in + i)); } - for i in 0..in_user_len { val.push(FieldExpr::Index(2, k_in + v_in + i)); } - for i in 0..out_user_len { val.push(FieldExpr::Index(1, v_out + i)); } - val.push(FieldExpr::Index(1, v_out + out_user_len)); - let joined = self.join(dep_y, pair, Projection { key, val }); - self.filter_time_and_strip(joined, k_in, v_in, in_user_len, out_user_len, keep_in_len) - } - - /// Join's backward rule: produces two contribs (left and right). - fn emit_lookup_join( - &mut self, - dep_y: Id, - left: &Side, - right: &Side, - output_shape: (usize, usize), - out_user_len: usize, - projection: &Projection, - ) -> (Id, Id) { - let (k_arity, v_l) = left.shape; - let (_, v_r) = right.shape; - let (_, v_out) = output_shape; - let left_user_len = left.host_user_len; - let right_user_len = right.host_user_len; - let left_keep_in_len = left.dep_user_len; - let right_keep_in_len = right.dep_user_len; - // Left and right inputs have shape (K; V_L/R ++ user_L/R[user_len]). - let left_pair_src = self.concat(vec![left.witness, left.forward]); - let right_pair_src = self.concat(vec![right.witness, right.forward]); - // Forward-join them on K, projecting to - // (chained_K_out; K ++ V_L ++ V_R ++ user_L ++ user_R). - // In the join's projection $0 = K, $1 = left val (V_L + user_L), $2 = right val. - let mut pair_val: Vec = Vec::new(); - for i in 0..k_arity { pair_val.push(FieldExpr::Index(0, i)); } - for i in 0..v_l { pair_val.push(FieldExpr::Index(1, i)); } - for i in 0..v_r { pair_val.push(FieldExpr::Index(2, i)); } - for i in 0..left_user_len { pair_val.push(FieldExpr::Index(1, v_l + i)); } - for i in 0..right_user_len { pair_val.push(FieldExpr::Index(2, v_r + i)); } - // The user's projection.key may reference `$1` / `$2` as whole-row - // expansions (FieldExpr::Pos), which is correct against the *original* - // V_L / V_R but wrong against the lift-extended host V's (which append - // user-iter coords). Rewrite Pos(i) -> bounded Index(i, 0..arity_i) so - // the projection only sees the original-shape fields and yields the - // same K_out as the user's forward join. - let pos_arities = [k_arity, v_l, v_r]; - let key_expanded = expand_pos_bounded(&projection.key, &pos_arities); - let pair = self.join( - left_pair_src, - right_pair_src, - Projection { key: key_expanded, val: pair_val }, - ); - // pair val arity: k_arity + v_l + v_r + left_user_len + right_user_len. - // After arrange-join with dep_y on K_out: - // $0 = K_out, $1 = dep val (v_out + out_user_len + 1), - // $2 = pair val (k_arity + v_l + v_r + left_user_len + right_user_len). - let q_pair_pos = v_out + out_user_len; - let k_pair_start = 0; - let vl_pair_start = k_pair_start + k_arity; - let vr_pair_start = vl_pair_start + v_l; - let ul_pair_start = vr_pair_start + v_r; - let ur_pair_start = ul_pair_start + left_user_len; - // Left contrib: (K; V_L + user_L + user_out + [q]), then filter+strip. - let key_left: Vec = - (0..k_arity).map(|i| FieldExpr::Index(2, k_pair_start + i)).collect(); - let mut val_left: Vec = Vec::new(); - for i in 0..v_l { val_left.push(FieldExpr::Index(2, vl_pair_start + i)); } - for i in 0..left_user_len { val_left.push(FieldExpr::Index(2, ul_pair_start + i)); } - for i in 0..out_user_len { val_left.push(FieldExpr::Index(1, v_out + i)); } - val_left.push(FieldExpr::Index(1, q_pair_pos)); - let left_joined = self.join(dep_y, pair, Projection { key: key_left, val: val_left }); - let left_contrib = self.filter_time_and_strip(left_joined, k_arity, v_l, left_user_len, out_user_len, left_keep_in_len); - // Right contrib: (K; V_R + user_R + user_out + [q]), then filter+strip. - let key_right: Vec = - (0..k_arity).map(|i| FieldExpr::Index(2, k_pair_start + i)).collect(); - let mut val_right: Vec = Vec::new(); - for i in 0..v_r { val_right.push(FieldExpr::Index(2, vr_pair_start + i)); } - for i in 0..right_user_len { val_right.push(FieldExpr::Index(2, ur_pair_start + i)); } - for i in 0..out_user_len { val_right.push(FieldExpr::Index(1, v_out + i)); } - val_right.push(FieldExpr::Index(1, q_pair_pos)); - let right_joined = self.join(dep_y, pair, Projection { key: key_right, val: val_right }); - let right_contrib = self.filter_time_and_strip(right_joined, k_arity, v_r, right_user_len, out_user_len, right_keep_in_len); - (left_contrib, right_contrib) - } - } - - /// Rewrite `FieldExpr::Pos(i)` in a key/val list to a bounded expansion - /// `[Index(i, 0), Index(i, 1), ..., Index(i, arities[i]-1)]`. Used in - /// `emit_lookup_join` where the user's projection is applied to host-side - /// (lift-extended) inputs — `Pos(i)` against the extended row would also - /// pick up the trailing user-iter coords, which we explicitly do not want - /// inside the key. - fn expand_pos_bounded(fields: &[FieldExpr], arities: &[usize]) -> Vec { - let mut out = Vec::with_capacity(fields.len()); - for f in fields { expand_pos_one(f, arities, &mut out); } - out - } - - fn expand_pos_one(f: &FieldExpr, arities: &[usize], out: &mut Vec) { - match f { - FieldExpr::Pos(i) => { - for c in 0..arities[*i] { out.push(FieldExpr::Index(*i, c)); } - } - FieldExpr::Index(_, _) | FieldExpr::Const(_) => out.push(f.clone()), - FieldExpr::Neg(inner) => { - let mut tmp = Vec::new(); - expand_pos_one(inner, arities, &mut tmp); - for t in tmp { out.push(FieldExpr::Neg(Box::new(t))); } - } - FieldExpr::Sub(a, b) => { - let mut ta = Vec::new(); - let mut tb = Vec::new(); - expand_pos_one(a, arities, &mut ta); - expand_pos_one(b, arities, &mut tb); - for (x, y) in ta.into_iter().zip(tb.into_iter()) { - out.push(FieldExpr::Sub(Box::new(x), Box::new(y))); - } - } - } - } - - fn pack_kv(k: usize, v: usize) -> Vec { - let mut out: Vec = Vec::with_capacity(k + v); - for i in 0..k { out.push(FieldExpr::Index(0, i)); } - for i in 0..v { out.push(FieldExpr::Index(1, i)); } - out - } - - /// Identify which input fields a `Linear[Project]` projection makes - /// recoverable from its output. Returns a map `(r, c) → output_position` - /// where `output_position` indexes into the flattened `[K_out, V_out]` - /// row and `(r, c)` is the input field — `r=0` for `K_in[c]`, `r=1` for - /// `V_in[c]`. - /// - /// Direct cases (`Index(r, c)` and `Pos(r)`) are recognized. Computed - /// fields (`Const`, `Neg`, `Sub`) are not analyzed — the input field - /// they reference, if any, is left "unknown" and falls back to the - /// pair-table path in `emit_lookup_lossy`. This is sound but loose. - /// - /// Tightening opportunities for whoever adds a parser entry for these: - /// - `Neg(Index(r, c))` recovers input via `-output_at_p`. - /// - `Sub(Index(r, c), Const(k))` recovers via `output_at_p + k`. - /// - `Sub(Const(k), Index(r, c))` recovers via `k - output_at_p`. - /// Recording these as recoverable would let the pure-map shortcut fire - /// for projections that shift fields by a constant. Today none of the - /// rewrite's own emissions or user programs reach this site with such - /// expressions, so it's purely latent looseness. - fn analyze_lossy_invertibility( - proj: &Projection, - k_in: usize, - v_in: usize, - ) -> BTreeMap<(usize, usize), usize> { - let mut known: BTreeMap<(usize, usize), usize> = BTreeMap::new(); - let mut p: usize = 0; - for fe in proj.key.iter().chain(proj.val.iter()) { - match fe { - FieldExpr::Index(r, c) => { - known.entry((*r, *c)).or_insert(p); - p += 1; - } - FieldExpr::Pos(r) => { - let arity = if *r == 0 { k_in } else { v_in }; - for c in 0..arity { - known.entry((*r, c)).or_insert(p + c); - } - p += arity; - } - FieldExpr::Const(_) | FieldExpr::Neg(_) | FieldExpr::Sub(_, _) => { - p += 1; - } - } - } - known - } -} - -// Projection helpers (used by `explain` itself, not by reverse rules). - -impl Builder { - /// Insert an `Inspect` node for diagnostic output, but only when the - /// `EXPLAIN_DEBUG_DEP` env var is set. Used to surface dep / pair - /// tables at construction sites without bloating the IR in normal runs. - pub fn debug_inspect(&mut self, input: Id, label: String) { - if std::env::var("EXPLAIN_DEBUG_DEP").is_ok() { - self.inspect(input, label); - } - } - - /// Semijoin `left (K; V)` with `right (K; V)` by `(K)`, keep left's rows. - /// (Used at demand-set seeding.) - pub fn semijoin_data(&mut self, left: Id, right: Id, k_arity: usize, v_arity: usize) -> Id { - let proj_key: Vec = (0..k_arity).map(|i| FieldExpr::Index(0, i)).collect(); - let proj_val: Vec = (0..v_arity).map(|i| FieldExpr::Index(1, i)).collect(); - self.join(left, right, Projection { key: proj_key, val: proj_val }) - } - - /// Set-level distinct on `(K; V)` rows (DDIR `distinct` is per-key only; - /// pack-distinct-unpack preserves the val). - pub fn distinct_full(&mut self, input: Id, k_arity: usize, v_arity: usize) -> Id { - let mut pack_key: Vec = (0..k_arity).map(|i| FieldExpr::Index(0, i)).collect(); - for i in 0..v_arity { - pack_key.push(FieldExpr::Index(1, i)); - } - let packed = self.project(input, Projection { key: pack_key, val: vec![] }); - let dist = self.reduce(packed, Reducer::Distinct); - let unpack_key: Vec = (0..k_arity).map(|i| FieldExpr::Index(0, i)).collect(); - let unpack_val: Vec = (0..v_arity).map(|i| FieldExpr::Index(0, k_arity + i)).collect(); - self.project(dist, Projection { key: unpack_key, val: unpack_val }) - } -} - -/// Strip user-chain and `[q]` from a dep row's val: `(K; V ++ user ++ [q])` -> `(K; V)`. -fn strip_user_and_q(k_arity: usize, v_arity: usize) -> Projection { - let key: Vec = (0..k_arity).map(|i| FieldExpr::Index(0, i)).collect(); - let val: Vec = (0..v_arity).map(|i| FieldExpr::Index(1, i)).collect(); - Projection { key, val } -} - diff --git a/interactive/src/explain_tree.rs b/interactive/src/explain_tree.rs new file mode 100644 index 000000000..b98e0b707 --- /dev/null +++ b/interactive/src/explain_tree.rs @@ -0,0 +1,998 @@ +//! The explanation rewrite on the scope-tree IR. +//! +//! `explain_tree(p)` transforms a program into one whose execution produces +//! per-source demand-set explanations for queries against `p`'s first +//! export: root { sources, query input, witness clone } plus an iterative +//! `explain` scope { demand-set vars, forward clone on demanded rows, +//! reverse-tracing ops, demand exports }. See the section comments below. +//! +//! The clone-with-lifts is the foundation the rewrite's witness and +//! forward copies are built from. `clone_into` clones an original +//! program's scopes into an output scope under construction; every nested +//! scope additionally `lift_iter`s and exports each internal collection, so +//! every value-producing site in the subtree has a "host-visible" form — its +//! user-iter coordinates folded into the value, innermost first — at the +//! embedding level. +//! +//! This is the tree form of the flat rewrite's `host` map. There it required +//! positional scope tracking, a pending pile per scope, depth-offset +//! arithmetic for each `leave`, and a fix-up pass for `Leave` aliasing; here +//! it is "a scope exports its lifted internals", and the cascade through +//! enclosing scopes is the recursion. The embedding depth is not a parameter: +//! the renderer derives depth structurally, so a clone needn't know where it +//! will sit. + +use crate::ir::LinearOp; +use crate::scope_ir::{Bind, Export, Import, Item, Node, Program, Ref, Scope, Source, Var}; + +/// A value-producing site in an original tree: the `path` of `Sub` item +/// indices from the root, then the site within that scope. Sites are ops and +/// feedback variables; imports are substituted, not sites. +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Addr { + pub path: Vec, + pub site: Site, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum Site { + Op(usize), + Var(usize), +} + +/// Clone `orig`'s contents into `out`, splicing (no extra scope around the +/// root level). `import_map[k]` is the ref in `out` standing in for `orig`'s +/// import `k`. Original exports are cloned in order (so `ChildExport` indices +/// keep meaning); nested scopes gain lifted `$host:` exports after them. +/// Returns the host-visible ref at the `out` level for every site in the +/// subtree. +pub fn clone_into(orig: &Scope, out: &mut Scope, import_map: &[Ref]) -> Vec<(Addr, Ref)> { + clone_rec(orig, out, import_map, &[]) +} + +/// The identity check: a program cloned into a fresh root computes the same +/// named exports as the original (the extra `$host:` exports ride along, +/// unconsumed). Backends hook this behind `CLONE_RT=1` for A/B verification. +pub fn clone_identity(p: &Program) -> Program { + let mut out = Scope { + name: p.root.name.clone(), + imports: p.root.imports.clone(), + ..Scope::default() + }; + let import_map: Vec = (0..out.imports.len()).map(Ref::Import).collect(); + let _visible = clone_into(&p.root, &mut out, &import_map); + Program { root: out } +} + +fn map_ref(r: &Ref, locals: &[Option], subs: &[Option], import_map: &[Ref], var_base: usize) -> Ref { + match r { + Ref::Local(i) => locals[*i].clone().expect("clone: reference to a later item"), + Ref::Import(k) => import_map[*k].clone(), + Ref::Var(v) => Ref::Var(var_base + v), + Ref::ChildExport(i, j) => Ref::ChildExport(subs[*i].expect("clone: reference to a later sub"), *j), + } +} + +fn clone_node(node: &Node, m: impl Fn(&Ref) -> Ref) -> Node { + match node { + Node::Linear { input, ops } => Node::Linear { input: m(input), ops: ops.clone() }, + Node::Concat(refs) => Node::Concat(refs.iter().map(&m).collect()), + Node::Arrange(r) => Node::Arrange(m(r)), + Node::Join { left, right, projection } => Node::Join { left: m(left), right: m(right), projection: projection.clone() }, + Node::Reduce { input, reducer } => Node::Reduce { input: m(input), reducer: reducer.clone() }, + Node::Inspect { input, label } => Node::Inspect { input: m(input), label: label.clone() }, + } +} + +fn clone_rec(orig: &Scope, out: &mut Scope, import_map: &[Ref], path: &[usize]) -> Vec<(Addr, Ref)> { + let addr = |site: Site| Addr { path: path.to_vec(), site }; + let mut visible: Vec<(Addr, Ref)> = Vec::new(); + + // Feedback variables first: anything in the scope may reference them. + let var_base = out.vars.len(); + for (vi, v) in orig.vars.iter().enumerate() { + out.vars.push(Var { name: v.name.clone() }); + visible.push((addr(Site::Var(vi)), Ref::Var(var_base + vi))); + } + + // orig-ref -> out-ref tables for this scope's content. + let mut locals: Vec> = vec![None; orig.items.len()]; + let mut subs: Vec> = vec![None; orig.items.len()]; + + for (i, item) in orig.items.iter().enumerate() { + match item { + Item::Op(node) => { + let cloned = clone_node(node, |r| map_ref(r, &locals, &subs, import_map, var_base)); + let out_idx = out.items.len(); + out.items.push(Item::Op(cloned)); + locals[i] = Some(Ref::Local(out_idx)); + // The host form of an Arrange is its underlying collection + // (reverse rules build pair tables from collections). + let vis = match node { + Node::Arrange(input) => map_ref(input, &locals, &subs, import_map, var_base), + _ => Ref::Local(out_idx), + }; + visible.push((addr(Site::Op(i)), vis)); + } + Item::Sub(child) => { + // The cloned child declares the same imports, with the parent- + // side refs mapped into the output parent. + let cloned_imports: Vec = child.imports.iter().map(|imp| Import { + name: imp.name.clone(), + from: match &imp.from { + Source::Parent(r) => Source::Parent(map_ref(r, &locals, &subs, import_map, var_base)), + other => panic!("clone: nested scope with external source {:?}", other), + }, + }).collect(); + let mut child_out = Scope { + name: child.name.clone(), + imports: cloned_imports, + ..Scope::default() + }; + // Inside the child, its imports map to themselves (same order). + let ident: Vec = (0..child.imports.len()).map(Ref::Import).collect(); + let mut child_path = path.to_vec(); + child_path.push(i); + let child_visible = clone_rec(child, &mut child_out, &ident, &child_path); + + // Lift each subtree site at this scope's exit and export it: + // the lift folds this scope's iteration coordinate into the + // value (innermost coordinates were folded by deeper exits), + // and the export is the host-visible edge upward. + let sub_idx = out.items.len(); + for (a, vref) in child_visible { + let lift_idx = child_out.items.len(); + child_out.items.push(Item::Op(Node::Linear { input: vref, ops: vec![LinearOp::LiftIter] })); + let export_idx = child_out.exports.len(); + child_out.exports.push(Export { + name: format!("$host:{}", export_idx), + value: Ref::Local(lift_idx), + }); + visible.push((a, Ref::ChildExport(sub_idx, export_idx))); + } + + out.items.push(Item::Sub(child_out)); + subs[i] = Some(sub_idx); + } + } + } + + // Loop closures, with this scope's variable indices offset. + for b in &orig.binds { + out.binds.push(Bind { + var: var_base + b.var, + value: map_ref(&b.value, &locals, &subs, import_map, var_base), + }); + } + + // Original exports, in order (ChildExport indices upward stay valid). + for e in &orig.exports { + out.exports.push(Export { + name: e.name.clone(), + value: map_ref(&e.value, &locals, &subs, import_map, var_base), + }); + } + + visible +} + +// ===== The explanation transform ===== +// +// `explain_tree(p)` produces a Program whose execution yields per-source +// demand-set explanations for queries against `p`'s first export. Output +// shape: root { sources, query input, witness clone } and an iterative +// `explain` scope { demand-set vars, forward clone on demanded rows, +// reverse-tracing ops, demand exports }. +// +// The reverse dataflow is *flat inside the explain scope* by design: demand +// rows carry the user-iteration chain folded into the value (the `folded` +// layout), so no nesting is needed. The per-op reverse rules port from the +// flat rewrite nearly unchanged; what the tree changes is the boundary +// bookkeeping. Flat `Leave` had a special backward rule injecting the inner +// user-chain coordinate; here a reference is *resolved* through explicit +// import/export edges to the value site it names, and the ordinary +// shape-preserving lookup against that site's host form injects or strips +// coordinates as the depths dictate. No op needs to know about boundaries. + +use std::collections::BTreeMap; +use crate::parse::{Condition, FieldExpr, Projection, Reducer}; +use crate::ir::{apply_ops_arity, proj_arity}; + +/// What a reference ultimately names: a value-producing site, or one of the +/// root's external sources. +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +enum Target { + Site(Addr), + /// Index into the original root's imports. + Source(usize), +} + +fn scope_at<'a>(root: &'a Scope, path: &[usize]) -> &'a Scope { + let mut s = root; + for &i in path { + let Item::Sub(c) = &s.items[i] else { panic!("scope_at: path step is not a Sub") }; + s = c; + } + s +} + +/// Resolve `r`, a reference within the scope at `path`, to its target, +/// chasing import edges upward and child-export edges downward. +fn resolve(root: &Scope, path: &[usize], r: &Ref) -> Target { + match r { + Ref::Local(i) => Target::Site(Addr { path: path.to_vec(), site: Site::Op(*i) }), + Ref::Var(v) => Target::Site(Addr { path: path.to_vec(), site: Site::Var(*v) }), + Ref::Import(k) => { + if path.is_empty() { + Target::Source(*k) + } else { + let parent_path = &path[..path.len() - 1]; + let scope = scope_at(root, path); + match &scope.imports[*k].from { + Source::Parent(pr) => resolve(root, parent_path, pr), + _ => unreachable!("non-root scope with external source"), + } + } + } + Ref::ChildExport(c, j) => { + let mut child_path = path.to_vec(); + child_path.push(*c); + let child = scope_at(root, &child_path); + resolve(root, &child_path, &child.exports[*j].value.clone()) + } + } +} + +/// `(k, v)` per site, forward-propagated from source arities to a fixed +/// point (feedback variables converge through their binds; a program with an +/// unconstrained var would simply leave it absent, and the rewrite panics on +/// lookup — the standalone shape pass is the place for a polite error). +fn site_shapes( + p: &Program, + source_shapes: &[(usize, usize)], +) -> BTreeMap { + let mut shapes: BTreeMap = BTreeMap::new(); + loop { + let before = shapes.len(); + walk_shapes(&p.root, &p.root, &[], source_shapes, &mut shapes); + if shapes.len() == before { break; } + } + shapes +} + +fn shape_of_ref( + root: &Scope, + path: &[usize], + r: &Ref, + source_shapes: &[(usize, usize)], + shapes: &BTreeMap, +) -> Option<(usize, usize)> { + match resolve(root, path, r) { + Target::Source(k) => Some(source_shapes[k]), + Target::Site(a) => shapes.get(&a).copied(), + } +} + +fn walk_shapes( + root: &Scope, + s: &Scope, + path: &[usize], + source_shapes: &[(usize, usize)], + shapes: &mut BTreeMap, +) { + let addr = |site: Site| Addr { path: path.to_vec(), site }; + // Var shapes from their binds. + for b in &s.binds { + if !shapes.contains_key(&addr(Site::Var(b.var))) { + if let Some(sh) = shape_of_ref(root, path, &b.value, source_shapes, shapes) { + shapes.insert(addr(Site::Var(b.var)), sh); + } + } + } + for (i, item) in s.items.iter().enumerate() { + match item { + Item::Op(node) => { + if shapes.contains_key(&addr(Site::Op(i))) { continue; } + let of = |r: &Ref| shape_of_ref(root, path, r, source_shapes, shapes); + let sh = match node { + Node::Linear { input, ops } => of(input).map(|s| apply_ops_arity(s, ops)), + Node::Concat(refs) => refs.iter().find_map(|r| of(r)), + Node::Arrange(r) | Node::Inspect { input: r, .. } => of(r), + Node::Join { left, right, projection } => match (of(left), of(right)) { + (Some((kl, vl)), Some((_, vr))) => { + let rows = [kl, vl, vr]; + Some((proj_arity(&projection.key, &rows), proj_arity(&projection.val, &rows))) + } + _ => None, + }, + Node::Reduce { input, reducer } => of(input).map(|(k, v)| match reducer { + Reducer::Distinct => (k, 0), + Reducer::Min => (k, v), + Reducer::Count => (k, 1), + }), + }; + if let Some(sh) = sh { shapes.insert(addr(Site::Op(i)), sh); } + } + Item::Sub(child) => { + let mut cp = path.to_vec(); + cp.push(i); + walk_shapes(root, child, &cp, source_shapes, shapes); + } + } + } +} + +/// Minimal builder over a `Scope` under construction: push an op, get a Ref. +struct Sb { + s: Scope, +} + +impl Sb { + fn new(name: &str) -> Self { Sb { s: Scope { name: name.into(), ..Scope::default() } } } + fn op(&mut self, n: Node) -> Ref { + let i = self.s.items.len(); + self.s.items.push(Item::Op(n)); + Ref::Local(i) + } + fn linear(&mut self, input: Ref, ops: Vec) -> Ref { self.op(Node::Linear { input, ops }) } + fn project(&mut self, input: Ref, p: Projection) -> Ref { self.linear(input, vec![LinearOp::Project(p)]) } + fn filter(&mut self, input: Ref, c: Condition) -> Ref { self.linear(input, vec![LinearOp::Filter(c)]) } + fn concat(&mut self, refs: Vec) -> Ref { + if refs.len() == 1 { return refs.into_iter().next().unwrap(); } + self.op(Node::Concat(refs)) + } + fn join(&mut self, l: Ref, r: Ref, projection: Projection) -> Ref { + let la = self.op(Node::Arrange(l)); + let ra = self.op(Node::Arrange(r)); + self.op(Node::Join { left: la, right: ra, projection }) + } + fn reduce(&mut self, input: Ref, reducer: Reducer) -> Ref { + let a = self.op(Node::Arrange(input)); + self.op(Node::Reduce { input: a, reducer }) + } + fn variable(&mut self, name: &str) -> Ref { + let v = self.s.vars.len(); + self.s.vars.push(Var { name: name.into() }); + Ref::Var(v) + } + fn bind(&mut self, var: Ref, value: Ref) { + let Ref::Var(v) = var else { panic!("bind: not a variable ref") }; + self.s.binds.push(Bind { var: v, value }); + } + fn import(&mut self, name: String, from: Source) -> Ref { + let k = self.s.imports.len(); + self.s.imports.push(Import { name, from }); + Ref::Import(k) + } + fn export(&mut self, name: String, value: Ref) -> usize { + let j = self.s.exports.len(); + self.s.exports.push(Export { name, value }); + j + } + fn debug_inspect(&mut self, input: Ref, label: String) { + if std::env::var("EXPLAIN_DEBUG_DEP").is_ok() { + self.op(Node::Inspect { input, label }); + } + } + /// Semijoin `left (K; V)` with `right (K; V)` by `(K)`, keep left's rows. + fn semijoin_data(&mut self, left: Ref, right: Ref, k_arity: usize, v_arity: usize) -> Ref { + let key: Vec = (0..k_arity).map(|i| FieldExpr::Index(0, i)).collect(); + let val: Vec = (0..v_arity).map(|i| FieldExpr::Index(1, i)).collect(); + self.join(left, right, Projection { key, val }) + } + /// Set-level distinct on `(K; V)` rows (pack-distinct-unpack). + fn distinct_full(&mut self, input: Ref, k_arity: usize, v_arity: usize) -> Ref { + let mut pack_key: Vec = (0..k_arity).map(|i| FieldExpr::Index(0, i)).collect(); + for i in 0..v_arity { pack_key.push(FieldExpr::Index(1, i)); } + let packed = self.project(input, Projection { key: pack_key, val: vec![] }); + let dist = self.reduce(packed, Reducer::Distinct); + let unpack_key: Vec = (0..k_arity).map(|i| FieldExpr::Index(0, i)).collect(); + let unpack_val: Vec = (0..v_arity).map(|i| FieldExpr::Index(0, k_arity + i)).collect(); + self.project(dist, Projection { key: unpack_key, val: unpack_val }) + } + /// Soundness filter + strip, via the shared `folded` layout algebra. + fn filter_time_and_strip(&mut self, coll: Ref, k_out: usize, v_pre: usize, in_len: usize, out_len: usize, keep_in_len: usize) -> Ref { + let layout = crate::folded::Joined { v_pre, in_len, out_len }; + let mut cur = coll; + if let Some(cond) = layout.time_le() { cur = self.filter(cur, cond); } + self.project(cur, layout.strip(k_out, keep_in_len)) + } +} + +/// One upstream edge into a backward rule: the target's host-side `(data, +/// user)` collections from both clones, its shape, and its user-chain length. +/// (The flat version carried two lengths that diverged at `Leave`; with +/// per-site hosts there is one.) +struct Side { + witness: Ref, + forward: Ref, + shape: (usize, usize), + user_len: usize, +} + +#[allow(clippy::too_many_arguments)] +fn strip_user_and_q(k_arity: usize, v_arity: usize) -> Projection { + let key: Vec = (0..k_arity).map(|i| FieldExpr::Index(0, i)).collect(); + let val: Vec = (0..v_arity).map(|i| FieldExpr::Index(1, i)).collect(); + Projection { key, val } +} + +impl Sb { + /// Shape-preserving lookup; also the universal depth adapter — when a + /// contribution's chain length differs from its target's, the join against + /// the target's host form injects (deeper target) or strips (shallower) + /// the difference, outer-aligned by `folded`. + fn emit_lookup_shape_preserving(&mut self, dep_y: Ref, side: &Side, output_depth: usize) -> Ref { + let (k, v) = side.shape; + let user_len = side.user_len; + let pack_kv = |k: usize, v: usize| -> Vec { + let mut out: Vec = Vec::with_capacity(k + v); + for i in 0..k { out.push(FieldExpr::Index(0, i)); } + for i in 0..v { out.push(FieldExpr::Index(1, i)); } + out + }; + let pair = self.concat(vec![side.witness.clone(), side.forward.clone()]); + let pair_keyed = self.project(pair, Projection { + key: pack_kv(k, v), + val: (0..user_len).map(|i| FieldExpr::Index(1, v + i)).collect(), + }); + let dep_keyed = self.project(dep_y, Projection { + key: pack_kv(k, v), + val: (0..output_depth + 1).map(|i| FieldExpr::Index(1, v + i)).collect(), + }); + let key: Vec = (0..k).map(|i| FieldExpr::Index(0, i)).collect(); + let mut val: Vec = Vec::new(); + for i in 0..v { val.push(FieldExpr::Index(0, k + i)); } + for i in 0..user_len { val.push(FieldExpr::Index(2, i)); } + for i in 0..output_depth { val.push(FieldExpr::Index(1, i)); } + val.push(FieldExpr::Index(1, output_depth)); // q + let joined = self.join(dep_keyed, pair_keyed, Projection { key, val }); + self.filter_time_and_strip(joined, k, v, user_len, output_depth, user_len) + } + + /// Keyed lookup (Reduce-style), with Min's value-narrowing. + fn emit_lookup_keyed(&mut self, dep_y: Ref, side: &Side, output_shape: (usize, usize), out_user_len: usize, reducer: &Reducer) -> Ref { + let (k_in, v_in) = side.shape; + let in_user_len = side.user_len; + let (_, v_out) = output_shape; + let pair = self.concat(vec![side.witness.clone(), side.forward.clone()]); + let include_v_out = matches!(reducer, Reducer::Min) && v_in == v_out && v_in > 0; + let mut val: Vec = Vec::new(); + for i in 0..v_in { val.push(FieldExpr::Index(2, i)); } + if include_v_out { + for i in 0..v_out { val.push(FieldExpr::Index(1, i)); } + } + for i in 0..in_user_len { val.push(FieldExpr::Index(2, v_in + i)); } + for i in 0..out_user_len { val.push(FieldExpr::Index(1, v_out + i)); } + val.push(FieldExpr::Index(1, v_out + out_user_len)); + let proj = Projection { key: (0..k_in).map(|i| FieldExpr::Index(0, i)).collect(), val }; + let joined = self.join(dep_y, pair, proj); + let after_min = if include_v_out { + let mut acc: Option = None; + for i in 0..v_in { + let cond = Condition::Eq(FieldExpr::Index(1, i), FieldExpr::Index(1, v_in + i)); + acc = Some(match acc { None => cond, Some(prev) => Condition::And(Box::new(prev), Box::new(cond)) }); + } + let filtered = self.filter(joined, acc.unwrap()); + let key: Vec = (0..k_in).map(|i| FieldExpr::Index(0, i)).collect(); + let mut new_val: Vec = Vec::new(); + for i in 0..v_in { new_val.push(FieldExpr::Index(1, i)); } + let after_vout = v_in + v_out; + for i in 0..in_user_len { new_val.push(FieldExpr::Index(1, after_vout + i)); } + for i in 0..out_user_len { new_val.push(FieldExpr::Index(1, after_vout + in_user_len + i)); } + new_val.push(FieldExpr::Index(1, after_vout + in_user_len + out_user_len)); + self.project(filtered, Projection { key, val: new_val }) + } else { + joined + }; + self.filter_time_and_strip(after_min, k_in, v_in, in_user_len, out_user_len, in_user_len) + } + + /// Lossy lookup (Linear[Project]): pure-map shortcut when invertible and + /// same-scope, pair-table fallback otherwise. + fn emit_lookup_lossy(&mut self, dep_y: Ref, side: &Side, output_shape: (usize, usize), out_user_len: usize, proj: &Projection) -> Ref { + let (k_in, v_in) = side.shape; + let in_user_len = side.user_len; + let (k_out, v_out) = output_shape; + let known = analyze_lossy_invertibility(proj, k_in, v_in); + let total = (0..k_in).all(|c| known.contains_key(&(0, c))) + && (0..v_in).all(|c| known.contains_key(&(1, c))); + if total && in_user_len == out_user_len { + let access = |p: usize| -> FieldExpr { + if p < k_out { FieldExpr::Index(0, p) } else { FieldExpr::Index(1, p - k_out) } + }; + let key: Vec = (0..k_in).map(|c| access(known[&(0, c)])).collect(); + let mut val: Vec = Vec::with_capacity(v_in + in_user_len + 1); + for c in 0..v_in { val.push(access(known[&(1, c)])); } + for i in 0..out_user_len { val.push(FieldExpr::Index(1, v_out + i)); } + val.push(FieldExpr::Index(1, v_out + out_user_len)); + return self.project(dep_y, Projection { key, val }); + } + let pair_src = self.concat(vec![side.witness.clone(), side.forward.clone()]); + let mut pair_val: Vec = Vec::with_capacity(k_in + v_in + in_user_len); + for i in 0..k_in { pair_val.push(FieldExpr::Index(0, i)); } + for i in 0..v_in + in_user_len { pair_val.push(FieldExpr::Index(1, i)); } + let pair = self.project(pair_src, Projection { key: proj.key.clone(), val: pair_val }); + let key: Vec = (0..k_in).map(|i| FieldExpr::Index(2, i)).collect(); + let mut val: Vec = Vec::new(); + for i in 0..v_in { val.push(FieldExpr::Index(2, k_in + i)); } + for i in 0..in_user_len { val.push(FieldExpr::Index(2, k_in + v_in + i)); } + for i in 0..out_user_len { val.push(FieldExpr::Index(1, v_out + i)); } + val.push(FieldExpr::Index(1, v_out + out_user_len)); + let joined = self.join(dep_y, pair, Projection { key, val }); + self.filter_time_and_strip(joined, k_in, v_in, in_user_len, out_user_len, in_user_len) + } + + /// Join's backward rule: two contribs (left, right). + fn emit_lookup_join(&mut self, dep_y: Ref, left: &Side, right: &Side, output_shape: (usize, usize), out_user_len: usize, projection: &Projection) -> (Ref, Ref) { + let (k_arity, v_l) = left.shape; + let (_, v_r) = right.shape; + let (_, v_out) = output_shape; + let left_user_len = left.user_len; + let right_user_len = right.user_len; + let left_pair_src = self.concat(vec![left.witness.clone(), left.forward.clone()]); + let right_pair_src = self.concat(vec![right.witness.clone(), right.forward.clone()]); + let mut pair_val: Vec = Vec::new(); + for i in 0..k_arity { pair_val.push(FieldExpr::Index(0, i)); } + for i in 0..v_l { pair_val.push(FieldExpr::Index(1, i)); } + for i in 0..v_r { pair_val.push(FieldExpr::Index(2, i)); } + for i in 0..left_user_len { pair_val.push(FieldExpr::Index(1, v_l + i)); } + for i in 0..right_user_len { pair_val.push(FieldExpr::Index(2, v_r + i)); } + let pos_arities = [k_arity, v_l, v_r]; + let key_expanded = expand_pos_bounded(&projection.key, &pos_arities); + let pair = self.join(left_pair_src, right_pair_src, Projection { key: key_expanded, val: pair_val }); + let q_pair_pos = v_out + out_user_len; + let vl_pair_start = k_arity; + let vr_pair_start = vl_pair_start + v_l; + let ul_pair_start = vr_pair_start + v_r; + let ur_pair_start = ul_pair_start + left_user_len; + let key_left: Vec = (0..k_arity).map(|i| FieldExpr::Index(2, i)).collect(); + let mut val_left: Vec = Vec::new(); + for i in 0..v_l { val_left.push(FieldExpr::Index(2, vl_pair_start + i)); } + for i in 0..left_user_len { val_left.push(FieldExpr::Index(2, ul_pair_start + i)); } + for i in 0..out_user_len { val_left.push(FieldExpr::Index(1, v_out + i)); } + val_left.push(FieldExpr::Index(1, q_pair_pos)); + let left_joined = self.join(dep_y.clone(), pair.clone(), Projection { key: key_left, val: val_left }); + let left_contrib = self.filter_time_and_strip(left_joined, k_arity, v_l, left_user_len, out_user_len, left_user_len); + let key_right: Vec = (0..k_arity).map(|i| FieldExpr::Index(2, i)).collect(); + let mut val_right: Vec = Vec::new(); + for i in 0..v_r { val_right.push(FieldExpr::Index(2, vr_pair_start + i)); } + for i in 0..right_user_len { val_right.push(FieldExpr::Index(2, ur_pair_start + i)); } + for i in 0..out_user_len { val_right.push(FieldExpr::Index(1, v_out + i)); } + val_right.push(FieldExpr::Index(1, q_pair_pos)); + let right_joined = self.join(dep_y, pair, Projection { key: key_right, val: val_right }); + let right_contrib = self.filter_time_and_strip(right_joined, k_arity, v_r, right_user_len, out_user_len, right_user_len); + (left_contrib, right_contrib) + } +} + +fn expand_pos_bounded(fields: &[FieldExpr], arities: &[usize]) -> Vec { + let mut out = Vec::with_capacity(fields.len()); + for f in fields { expand_pos_one(f, arities, &mut out); } + out +} + +fn expand_pos_one(f: &FieldExpr, arities: &[usize], out: &mut Vec) { + match f { + FieldExpr::Pos(i) => { + for c in 0..arities[*i] { out.push(FieldExpr::Index(*i, c)); } + } + FieldExpr::Index(_, _) | FieldExpr::Const(_) => out.push(f.clone()), + FieldExpr::Neg(inner) => { + let mut tmp = Vec::new(); + expand_pos_one(inner, arities, &mut tmp); + for t in tmp { out.push(FieldExpr::Neg(Box::new(t))); } + } + FieldExpr::Sub(a, b) => { + let (mut ta, mut tb) = (Vec::new(), Vec::new()); + expand_pos_one(a, arities, &mut ta); + expand_pos_one(b, arities, &mut tb); + for (x, y) in ta.into_iter().zip(tb) { out.push(FieldExpr::Sub(Box::new(x), Box::new(y))); } + } + } +} + +fn analyze_lossy_invertibility(proj: &Projection, k_in: usize, v_in: usize) -> BTreeMap<(usize, usize), usize> { + let mut known: BTreeMap<(usize, usize), usize> = BTreeMap::new(); + let mut p: usize = 0; + for fe in proj.key.iter().chain(proj.val.iter()) { + match fe { + FieldExpr::Index(r, c) => { known.entry((*r, *c)).or_insert(p); p += 1; } + FieldExpr::Pos(r) => { + let arity = if *r == 0 { k_in } else { v_in }; + for c in 0..arity { known.entry((*r, c)).or_insert(p + c); } + p += arity; + } + FieldExpr::Const(_) | FieldExpr::Neg(_) | FieldExpr::Sub(_, _) => { p += 1; } + } + } + known +} + +/// The `(k, v)` shape of the program's first export — what a query against it +/// must match. A query row is `(key[k]; val[v] ++ [q])`; a shape-mismatched +/// query addresses nothing and yields junk demand, so harnesses should check +/// loudly at seeding time. +pub fn export_shape(p: &Program, source_shapes: &[(usize, usize)]) -> (usize, usize) { + let shapes = site_shapes(p, source_shapes); + let first = p.root.exports.first().expect("export_shape: program has no export").value.clone(); + match resolve(&p.root, &[], &first) { + Target::Site(a) => shapes[&a], + Target::Source(k) => source_shapes[k], + } +} + +/// The transform. `source_shapes[k]` is the `(k, v)` of the original root's +/// import `k` (positional inputs and named traces alike). The query arrives +/// as one extra positional input appended after the original inputs. +pub fn explain_tree(p: &Program, source_shapes: &[(usize, usize)]) -> Program { + let n_sources = p.root.imports.len(); + assert_eq!(source_shapes.len(), n_sources, "one shape per root import"); + let shapes = site_shapes(p, source_shapes); + // The largest positional-input index, for placing the query input after. + let max_input = p.root.imports.iter().filter_map(|imp| match &imp.from { + Source::Input(i) => Some(*i + 1), + _ => None, + }).max().unwrap_or(0); + + // ---- output root: original sources + query input + witness clone ---- + let mut root = Sb::new(&p.root.name); + root.s.imports = p.root.imports.clone(); + let src_refs: Vec = (0..n_sources).map(Ref::Import).collect(); + let query_ref = root.import("query".into(), Source::Input(max_input)); + let witness: BTreeMap = clone_into(&p.root, &mut root.s, &src_refs).into_iter().collect(); + // The witness clone re-exported the original program's exports; the + // explain output's exports are the demand sets only. + root.s.exports.clear(); + + // ---- explain scope ---- + let mut ex = Sb::new("explain"); + let ex_src: Vec = (0..n_sources) + .map(|k| ex.import(format!("$src:{}", k), Source::Parent(src_refs[k].clone()))) + .collect(); + let ex_query = ex.import("$query".into(), Source::Parent(query_ref)); + let wit: BTreeMap = witness.iter() + .map(|(a, r)| (a.clone(), ex.import(format!("$wit:{:?}:{:?}", a.path, a.site), Source::Parent(r.clone())))) + .collect(); + + // Demand-set variables, one per source; forward inputs are the actual + // sources restricted to the demand-sets. + let dsets: Vec = (0..n_sources).map(|k| ex.variable(&format!("demand_set_{}", k))).collect(); + let fwd_inputs: Vec = (0..n_sources).map(|k| { + let (ka, va) = source_shapes[k]; + ex.semijoin_data(dsets[k].clone(), ex_src[k].clone(), ka, va) + }).collect(); + let forward: BTreeMap = clone_into(&p.root, &mut ex.s, &fwd_inputs).into_iter().collect(); + ex.s.exports.clear(); + + // Pre-allocated demand variables for the original feedback vars (forward + // cycles induce backward cycles that need a Variable to close). + let mut var_addrs: Vec = Vec::new(); + collect_var_addrs(&p.root, &[], &mut var_addrs); + let mut demand: BTreeMap = BTreeMap::new(); + for a in &var_addrs { + let dv = ex.variable(&format!("demand_var_{:?}_{:?}", a.path, a.site)); + demand.insert(a.clone(), dv); + } + + // The reverse walk. + let mut rev = Reverse { + orig: &p.root, + shapes: &shapes, + source_shapes, + wit: &wit, + fwd: &forward, + ex_src: &ex_src, + fwd_inputs: &fwd_inputs, + demand, + contribs: BTreeMap::new(), + }; + // Seed: the query rows are demand against the first export's target. + let first = p.root.exports.first().expect("explain_tree: program has no export").value.clone(); + let target = resolve(&p.root, &[], &first); + let seeded = rev.route(&mut ex, ex_query, 0, &target); + rev.contribs.entry(target).or_default().push(seeded); + rev.walk(&mut ex, &p.root, &[]); + + // Demand-set closure per source: strip q, restrict to actual rows, + // accumulate, and export. + let mut demand_exports: Vec<(String, usize)> = Vec::new(); + for k in 0..n_sources { + let (ka, va) = source_shapes[k]; + let cs = rev.contribs.remove(&Target::Source(k)).unwrap_or_default(); + let combined = if cs.is_empty() { + dsets[k].clone() // no demand: the set stays empty (self-bind below) + } else { + let merged = ex.concat(cs); + let stripped = ex.project(merged, strip_user_and_q(ka, va)); + let semi = ex.semijoin_data(stripped, ex_src[k].clone(), ka, va); + ex.concat(vec![dsets[k].clone(), semi]) + }; + let dist = ex.distinct_full(combined, ka, va); + ex.debug_inspect(dist.clone(), format!("demand_set:{}", k)); + ex.bind(dsets[k].clone(), dist.clone()); + let name = match &p.root.imports[k].from { + Source::Input(i) => format!("demand:input{}", i), + Source::Trace(nm) => format!("demand:{}", nm), + Source::Parent(_) => unreachable!(), + }; + let j = ex.export(name.clone(), dsets[k].clone()); + demand_exports.push((name, j)); + } + + // Close: the explain scope becomes a Sub of the root; its demand exports + // become the program's exports. + let ex_idx = root.s.items.len(); + root.s.items.push(Item::Sub(ex.s)); + for (name, j) in demand_exports { + root.export(name, Ref::ChildExport(ex_idx, j)); + } + Program { root: root.s } +} + +fn collect_var_addrs(s: &Scope, path: &[usize], out: &mut Vec) { + for v in 0..s.vars.len() { + out.push(Addr { path: path.to_vec(), site: Site::Var(v) }); + } + for (i, item) in s.items.iter().enumerate() { + if let Item::Sub(c) = item { + let mut cp = path.to_vec(); + cp.push(i); + collect_var_addrs(c, &cp, out); + } + } +} + +/// State for the reverse walk: demand refs per site, contributions per target. +struct Reverse<'a> { + orig: &'a Scope, + shapes: &'a BTreeMap, + source_shapes: &'a [(usize, usize)], + wit: &'a BTreeMap, + fwd: &'a BTreeMap, + ex_src: &'a [Ref], + fwd_inputs: &'a [Ref], + demand: BTreeMap, + contribs: BTreeMap>, +} + +impl<'a> Reverse<'a> { + fn side(&self, t: &Target) -> Side { + match t { + Target::Site(a) => Side { + witness: self.wit[a].clone(), + forward: self.fwd[a].clone(), + shape: self.shapes[a], + user_len: a.path.len(), + }, + Target::Source(k) => Side { + witness: self.ex_src[*k].clone(), + forward: self.fwd_inputs[*k].clone(), + shape: self.source_shapes[*k], + user_len: 0, + }, + } + } + + /// Adapt `contrib` (chain length `from_len`) to `target`'s depth: equal + /// lengths push through; otherwise the shape-preserving lookup against the + /// target's host injects or strips the difference. + fn route(&mut self, ex: &mut Sb, contrib: Ref, from_len: usize, target: &Target) -> Ref { + let to_len = match target { Target::Site(a) => a.path.len(), Target::Source(_) => 0 }; + if to_len == from_len { + contrib + } else { + let side = self.side(target); + ex.emit_lookup_shape_preserving(contrib, &side, from_len) + } + } + + fn push(&mut self, ex: &mut Sb, path: &[usize], input: &Ref, contrib: Ref, from_len: usize) { + let target = resolve(self.orig, path, input); + let routed = self.route(ex, contrib, from_len, &target); + self.contribs.entry(target).or_default().push(routed); + } + + fn walk(&mut self, ex: &mut Sb, s: &Scope, path: &[usize]) { + // Binds first: route each var's demand into its value's contribs, + // inverting the feedback's iter advance (user_chain[0] -= 1, dropping + // iter-0 demand, which has no body-side source). + for b in &s.binds { + let var_addr = Addr { path: path.to_vec(), site: Site::Var(b.var) }; + let dv = self.demand[&var_addr].clone(); + let (kx, vx) = self.shapes[&var_addr]; + let var_user_len = path.len(); + let chain_pos = vx; + let filtered = ex.filter(dv, Condition::Gt(FieldExpr::Index(1, chain_pos), FieldExpr::Const(0))); + let key: Vec = (0..kx).map(|i| FieldExpr::Index(0, i)).collect(); + let mut val: Vec = Vec::new(); + for i in 0..vx { val.push(FieldExpr::Index(1, i)); } + val.push(FieldExpr::Sub(Box::new(FieldExpr::Index(1, chain_pos)), Box::new(FieldExpr::Const(1)))); + for i in 1..var_user_len { val.push(FieldExpr::Index(1, chain_pos + i)); } + val.push(FieldExpr::Index(1, chain_pos + var_user_len)); + let contrib = ex.project(filtered, Projection { key, val }); + self.push(ex, path, &b.value, contrib, var_user_len); + } + // Items in reverse: consumers have contributed by the time we arrive. + for (i, item) in s.items.iter().enumerate().rev() { + match item { + Item::Op(node) => self.site(ex, s, path, i, node), + Item::Sub(child) => { + let mut cp = path.to_vec(); + cp.push(i); + self.walk(ex, child, &cp); + } + } + } + // Close this scope's feedback variables: bind each demand variable to + // its accumulated (distinct) demand, or to itself if none arrived. + for v in 0..s.vars.len() { + let addr = Addr { path: path.to_vec(), site: Site::Var(v) }; + let dv = self.demand[&addr].clone(); + let cs = self.contribs.remove(&Target::Site(addr.clone())).unwrap_or_default(); + if cs.is_empty() { + ex.bind(dv.clone(), dv); + continue; + } + let combined = ex.concat(cs); + let (k, vx) = self.shapes[&addr]; + let dist = ex.distinct_full(combined, k, vx + path.len() + 1); + ex.debug_inspect(dist.clone(), format!("demand_{:?}:{:?}", path, Site::Var(v))); + ex.bind(dv, dist); + } + } + + fn site(&mut self, ex: &mut Sb, _s: &Scope, path: &[usize], i: usize, node: &Node) { + let addr = Addr { path: path.to_vec(), site: Site::Op(i) }; + let cs = self.contribs.remove(&Target::Site(addr.clone())).unwrap_or_default(); + if cs.is_empty() { return; } + let combined = ex.concat(cs); + let (k, v) = self.shapes[&addr]; + let out_user_len = path.len(); + let dist = ex.distinct_full(combined, k, v + out_user_len + 1); + ex.debug_inspect(dist.clone(), format!("demand_{:?}:{:?}", path, Site::Op(i))); + self.demand.insert(addr.clone(), dist.clone()); + let out_shape = (k, v); + let dep_this = dist; + + match node { + Node::Linear { input, ops } => { + let op = match ops.as_slice() { + [single] => single, + _ => panic!("explain_tree: multi-op Linear (run before optimize)"), + }; + match op { + LinearOp::Project(proj) => { + let target = resolve(self.orig, path, input); + let side = self.side(&target); + let contrib = ex.emit_lookup_lossy(dep_this, &side, out_shape, out_user_len, proj); + self.contribs.entry(target).or_default().push(contrib); + } + LinearOp::Filter(cond) => { + let contrib = ex.filter(dep_this, cond.clone()); + self.push(ex, path, input, contrib, out_user_len); + } + LinearOp::Negate | LinearOp::EnterAt(_) => { + // Negate: pure pass-through. EnterAt: sound but + // over-broad pass-through (see the flat rule's note); + // the routing adapter handles any depth difference. + self.push(ex, path, input, dep_this, out_user_len); + } + LinearOp::LiftIter => panic!("explain_tree: LiftIter in user program"), + } + } + Node::Concat(refs) => { + for r in refs { + let target = resolve(self.orig, path, r); + let side = self.side(&target); + let contrib = ex.emit_lookup_shape_preserving(dep_this.clone(), &side, out_user_len); + self.contribs.entry(target).or_default().push(contrib); + } + } + Node::Arrange(input) | Node::Inspect { input, .. } => { + self.push(ex, path, input, dep_this, out_user_len); + } + Node::Reduce { input, reducer } => { + let target = resolve(self.orig, path, input); + let side = self.side(&target); + let contrib = ex.emit_lookup_keyed(dep_this, &side, out_shape, out_user_len, reducer); + self.contribs.entry(target).or_default().push(contrib); + } + Node::Join { left, right, projection } => { + let lt = resolve(self.orig, path, left); + let rt = resolve(self.orig, path, right); + let ls = self.side(<); + let rs = self.side(&rt); + let (lc, rc) = ex.emit_lookup_join(dep_this, &ls, &rs, out_shape, out_user_len, projection); + self.contribs.entry(lt).or_default().push(lc); + self.contribs.entry(rt).or_default().push(rc); + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::lower::lower_tree; + + fn parse(src: &str) -> Vec { crate::parse::pipe::parse(src) } + + const SCC: &str = r#" + let edges = input 0 | key($0[0] ; $0[1]); + let trans = edges | key($1 ; $0); + outer: { + let scc = edges + trim; + fwd: { + let nodes = edges | key($1 ; $1) | enter_at($1[0]); + let labels = proposals + nodes | min; + var proposals = labels | join(scc, ($2 ; $1)); + } + let trim_fwd = edges + | join(fwd::labels, ($1 ; $0, $2)) + | join(fwd::labels, ($0 ; $1, $2)) + | filter($1[1] == $1[2]) + | key($0 ; $1[0]); + bwd: { + let nodes = trans | key($1 ; $1) | enter_at($1[0]); + let labels = proposals + nodes | min; + var proposals = labels | join(trim_fwd, ($2 ; $1)); + } + let trim_bwd = trans + | join(bwd::labels, ($1 ; $0, $2)) + | join(bwd::labels, ($0 ; $1, $2)) + | filter($1[1] == $1[2]) + | key($0 ; $1[0]); + var trim = trim_bwd - edges; + } + export "result" = outer::scc | map(;) | arrange | inspect(total); + "#; + + fn vars_total(s: &Scope) -> usize { + s.vars.len() + s.items.iter().map(|i| match i { Item::Sub(c) => vars_total(c), _ => 0 }).sum::() + } + + #[test] + fn every_site_is_host_visible() { + let p = lower_tree(parse(SCC)); + let mut out = Scope { imports: p.root.imports.clone(), ..Scope::default() }; + let import_map: Vec = (0..out.imports.len()).map(Ref::Import).collect(); + let visible = clone_into(&p.root, &mut out, &import_map); + let sites = p.op_count() + vars_total(&p.root); + assert_eq!(visible.len(), sites, "one host-visible ref per op and var"); + // A depth-2 site (inside fwd) surfaces as a ChildExport at the root. + assert!(visible.iter().any(|(a, r)| a.path.len() == 2 && matches!(r, Ref::ChildExport(..))), + "depth-2 sites surface via child exports"); + } + + #[test] + fn nested_exports_carry_one_lift_per_level() { + let p = lower_tree(parse(SCC)); + let mut out = Scope { imports: p.root.imports.clone(), ..Scope::default() }; + let import_map: Vec = (0..out.imports.len()).map(Ref::Import).collect(); + clone_into(&p.root, &mut out, &import_map); + // outer's clone: every $host: export is a LiftIter Linear; and the + // ones re-exporting fwd/bwd internals chain TWO lifts (one per level): + // the value behind the lift is itself a ChildExport of a lift. + let Item::Sub(outer) = out.items.iter().find(|i| matches!(i, Item::Sub(_))).unwrap() else { unreachable!() }; + let mut depth2_chains = 0; + for e in outer.exports.iter().filter(|e| e.name.starts_with("$host:")) { + let Ref::Local(li) = &e.value else { panic!("$host export should be a fresh lift") }; + let Item::Op(Node::Linear { input, ops }) = &outer.items[*li] else { panic!("expected a lift") }; + assert_eq!(ops.as_slice().len(), 1); + assert!(matches!(ops[0], LinearOp::LiftIter)); + if matches!(input, Ref::ChildExport(..)) { depth2_chains += 1; } + } + assert!(depth2_chains > 0, "fwd/bwd internals re-lift at outer's exit"); + } + + #[test] + fn identity_clone_preserves_structure() { + let p = lower_tree(parse(SCC)); + let c = clone_identity(&p); + assert_eq!(c.root.exports.len(), p.root.exports.len(), "root exports preserved (no $host at root)"); + assert_eq!(c.root.exports[0].name, "result"); + assert_eq!(vars_total(&c.root), vars_total(&p.root), "feedback variables preserved"); + assert!(c.op_count() > p.op_count(), "clone adds the lift chains"); + } +} diff --git a/interactive/src/ir.rs b/interactive/src/ir.rs index 88d3d1946..429085a28 100644 --- a/interactive/src/ir.rs +++ b/interactive/src/ir.rs @@ -1,11 +1,10 @@ -//! IR types for DD IR programs. -//! -//! The IR is a flat map of nodes addressed by index. -//! Nodes are symbolic — no closures, no generics. +//! Row and operator vocabulary shared by the IR and the renderers: +//! `LinearOp`, `RowLike`, field/condition evaluation, and the arity +//! transfer functions. +//! The program structure itself lives in `scope_ir`. -use std::collections::BTreeMap; -use crate::parse::{Projection, Condition, FieldExpr, Reducer}; +use crate::parse::{Projection, Condition, FieldExpr}; pub type Diff = i64; pub type Id = usize; @@ -52,298 +51,6 @@ pub enum LinearOp { /// `Expr::LiftIter` for the discipline restriction. LiftIter, } - -/// Symbolic IR node. -pub enum Node { - Input(usize), - /// A named external trace, resolved against a registry at install time; - /// shape is inferred from the registry, not the IR. - /// - /// STUB: only the server resolves this; the example renderers don't, and no - /// example program uses it yet. The intended end-state is a single - /// named-source substrate that also subsumes `Input(usize)` — there should - /// not be two ways to bring in a source. Until that cutover, `Input` is the - /// working input and `Import` is forward-looking. - Import { name: String }, - /// A chain of linear operations on a stream of (data, time, diff) triples. - Linear { input: Id, ops: Vec }, - Concat(Vec), - Arrange(Id), - Join { left: Id, right: Id, projection: Projection }, - Reduce { input: Id, reducer: Reducer }, - Variable, - Inspect { input: Id, label: String }, - Leave(Id, usize), - Scope, - EndScope, - Bind { variable: Id, value: Id }, -} - -pub struct Program { - pub nodes: BTreeMap, - /// Named outputs of the program. - pub export: Vec<(String, Id)>, -} - -impl Program { - /// Print a human-readable summary of the IR. - pub fn dump(&self) { - for (&id, node) in &self.nodes { - let desc = match node { - Node::Input(i) => format!("Input({})", i), - Node::Import { name } => format!("Import({:?})", name), - Node::Linear { input, ops } => { - let ops_str: Vec = ops.iter().map(|op| match op { - LinearOp::Project(_) => "Project".into(), - LinearOp::Filter(_) => "Filter".into(), - LinearOp::Negate => "Negate".into(), - LinearOp::EnterAt(_) => "EnterAt".into(), - LinearOp::LiftIter => "LiftIter".into(), - }).collect(); - format!("Linear({}, [{}])", input, ops_str.join(", ")) - }, - Node::Concat(ids) => format!("Concat({:?})", ids), - Node::Arrange(input) => format!("Arrange({})", input), - Node::Join { left, right, .. } => format!("Join({}, {})", left, right), - Node::Reduce { input, .. } => format!("Reduce({})", input), - Node::Variable => "Variable".into(), - Node::Inspect { input, label } => format!("Inspect({}, {:?})", input, label), - Node::Leave(id, lvl) => format!("Leave({}, {})", id, lvl), - Node::Scope => "Scope".into(), - Node::EndScope => "EndScope".into(), - Node::Bind { variable, value } => format!("Bind({} <- {})", variable, value), - }; - println!(" {:3}: {}", id, desc); - } - for (name, id) in &self.export { - println!(" export {:?} = {}", name, id); - } - } - - /// Per-node user-scope depth. Computed by walking `nodes` in id order - /// and tracking `Scope` / `EndScope` markers. A node sits at the depth - /// active at the moment it was lowered; `Scope` itself sits at its - /// outer depth (the increment applies to subsequent nodes), and - /// `EndScope` sits at its inner depth (the decrement applies after). - /// - /// Note: this is purely positional, so `enter_at` — a data→time lift that - /// semantically adds one scope coordinate (its output is one level deeper - /// than its input) — is counted depth-NEUTRAL here. The coordinate it - /// introduces is instead absorbed by a neighboring Project's depth jump and - /// stripped *unconstrained* in the reverse rewrite: sound but over-broad. - /// The fix is to let `enter_at` own its level (output = input depth + 1); - /// see the `LinearOp::EnterAt` arm in `explain.rs::emit_reverse`. - pub fn depths(&self) -> BTreeMap { - let mut out = BTreeMap::new(); - let mut depth = 0usize; - for (&id, node) in &self.nodes { - match node { - Node::Scope => { out.insert(id, depth); depth += 1; }, - Node::EndScope => { out.insert(id, depth); depth = depth.saturating_sub(1); }, - _ => { out.insert(id, depth); }, - } - } - out - } - - /// Reject programs where a `LinearOp::LiftIter` result is referenced - /// inside its own scope. See `Expr::LiftIter` for the rationale: in- - /// scope use would let loop bodies branch on iter, defeating the - /// time-invariant-body property fixpoints rely on. - pub fn validate_lift_iter(&self) -> Result<(), String> { - let depths = self.depths(); - // Build a map: producer id -> list of (user id, user node). - let mut users: BTreeMap> = BTreeMap::new(); - for (&user_id, node) in &self.nodes { - let inputs: Vec = match node { - Node::Linear { input, .. } | Node::Arrange(input) - | Node::Reduce { input, .. } | Node::Inspect { input, .. } => vec![*input], - Node::Join { left, right, .. } => vec![*left, *right], - Node::Concat(ids) => ids.clone(), - Node::Leave(id, _) => vec![*id], - Node::Bind { value, .. } => vec![*value], - Node::Input(_) | Node::Import { .. } | Node::Variable | Node::Scope | Node::EndScope => vec![], - }; - for input in inputs { - users.entry(input).or_default().push(user_id); - } - } - for (&id, node) in &self.nodes { - if let Node::Linear { ops, .. } = node { - if !ops.iter().any(|o| matches!(o, LinearOp::LiftIter)) { continue; } - let my_depth = depths[&id]; - if my_depth == 0 { - return Err(format!( - "lift_iter at node {} is at scope depth 0; lift_iter is only meaningful inside a user scope", - id - )); - } - if let Some(uses) = users.get(&id) { - for &user in uses { - let user_depth = depths[&user]; - if user_depth >= my_depth { - return Err(format!( - "lift_iter at node {} (depth {}) referenced by node {} (depth {}); lift_iter result must be referenced only from an enclosing scope", - id, my_depth, user, user_depth - )); - } - } - } - } - } - Ok(()) - } - - /// Redirect every reference to node `from` so it points at `to`, across all - /// nodes' inputs and the export list. Used by `optimize` when it collapses - /// or fuses one node into another and the old id must be retargeted. - fn rewrite(&mut self, from: Id, to: Id) { - for node in self.nodes.values_mut() { - match node { - Node::Linear { input, .. } | Node::Arrange(input) - | Node::Reduce { input, .. } | Node::Inspect { input, .. } => { - if *input == from { *input = to; } - }, - Node::Join { left, right, .. } => { - if *left == from { *left = to; } - if *right == from { *right = to; } - }, - Node::Concat(ids) => { - for id in ids.iter_mut() { if *id == from { *id = to; } } - }, - Node::Leave(id, _) => { - if *id == from { *id = to; } - }, - Node::Bind { variable, value } => { - if *variable == from { *variable = to; } - if *value == from { *value = to; } - }, - Node::Input(_) | Node::Import { .. } | Node::Variable | Node::Scope | Node::EndScope => {}, - } - } - for (_, id) in self.export.iter_mut() { - if *id == from { *id = to; } - } - } - - /// Optimize the IR in place, iterating to a fixed point. - pub fn optimize(&mut self) { - loop { - let before = self.nodes.len(); - - // Arrange(x) where x already produces an arrangement -> collapse. - let collapses: Vec<(Id, Id)> = self.nodes.iter() - .filter_map(|(&id, node)| { - if let Node::Arrange(input) = node { - if matches!(self.nodes.get(input), Some(Node::Arrange(_) | Node::Reduce { .. })) { - return Some((id, *input)); - } - } - None - }) - .collect(); - for (outer, inner) in collapses { - self.rewrite(outer, inner); - self.nodes.remove(&outer); - } - - // Fuse Linear chains: if a Linear's input is another Linear (with no other consumers), - // concatenate the ops into one node. - // First, count references to each node. - let mut ref_counts: std::collections::HashMap = std::collections::HashMap::new(); - for node in self.nodes.values() { - match node { - Node::Linear { input, .. } | Node::Arrange(input) - | Node::Reduce { input, .. } | Node::Inspect { input, .. } => { - *ref_counts.entry(*input).or_default() += 1; - }, - Node::Join { left, right, .. } => { - *ref_counts.entry(*left).or_default() += 1; - *ref_counts.entry(*right).or_default() += 1; - }, - Node::Concat(ids) => { - for id in ids { *ref_counts.entry(*id).or_default() += 1; } - }, - Node::Leave(id, _) => { *ref_counts.entry(*id).or_default() += 1; }, - Node::Bind { variable, value } => { - *ref_counts.entry(*variable).or_default() += 1; - *ref_counts.entry(*value).or_default() += 1; - }, - _ => {}, - } - } - for (_, id) in &self.export { *ref_counts.entry(*id).or_default() += 1; } - - let fusions: Vec<(Id, Id)> = self.nodes.iter() - .filter_map(|(&id, node)| { - if let Node::Linear { input, .. } = node { - if matches!(self.nodes.get(input), Some(Node::Linear { .. })) { - if ref_counts.get(input).copied().unwrap_or(0) == 1 { - return Some((id, *input)); - } - } - } - None - }) - .collect(); - for (outer_id, inner_id) in fusions { - // Take both nodes out, fuse, put back. - let outer = self.nodes.remove(&outer_id).unwrap(); - let inner = self.nodes.remove(&inner_id).unwrap(); - if let (Node::Linear { ops: mut outer_ops, .. }, Node::Linear { input: inner_input, ops: inner_ops }) = (outer, inner) { - let mut fused = inner_ops; - fused.append(&mut outer_ops); - self.nodes.insert(outer_id, Node::Linear { input: inner_input, ops: fused }); - } - } - - // Deduplicate structurally identical nodes. - use std::collections::HashMap; - fn structural_key(node: &Node) -> Option { - match node { - Node::Variable | Node::Scope | Node::EndScope | Node::Bind { .. } => None, - other => Some(format!("{:?}", DebugNode(other))), - } - } - struct DebugNode<'a>(&'a Node); - impl<'a> std::fmt::Debug for DebugNode<'a> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self.0 { - Node::Input(i) => write!(f, "Input({})", i), - Node::Import { name } => write!(f, "Import({:?})", name), - Node::Linear { input, ops } => write!(f, "Linear({},{:?})", input, ops), - Node::Concat(ids) => write!(f, "Concat({:?})", ids), - Node::Arrange(input) => write!(f, "Arrange({})", input), - Node::Join { left, right, projection } => write!(f, "Join({},{},{:?})", left, right, projection), - Node::Reduce { input, reducer } => write!(f, "Reduce({},{:?})", input, reducer), - Node::Inspect { input, label } => write!(f, "Inspect({},{:?})", input, label), - Node::Leave(id, lvl) => write!(f, "Leave({},{})", id, lvl), - _ => write!(f, ""), - } - } - } - - let mut seen: HashMap = HashMap::new(); - let dupes: Vec<(Id, Id)> = self.nodes.iter() - .filter_map(|(&id, node)| { - let key = structural_key(node)?; - if let Some(&canonical) = seen.get(&key) { - Some((id, canonical)) - } else { - seen.insert(key, id); - None - } - }) - .collect(); - for (dupe, canonical) in dupes { - self.rewrite(dupe, canonical); - self.nodes.remove(&dupe); - } - if self.nodes.len() == before { break; } - } // loop - } -} - /// Evaluate fields into a row. pub fn eval_fields(fields: &[FieldExpr], inputs: &[&[i64]]) -> R { let mut r = R::new(); @@ -399,3 +106,42 @@ fn eval_field_raw(field: &FieldExpr, inputs: &[&[i64]], result: &mut Vec) { } } } + +// Arity transfer functions: how ops and projections change a row's (k, v) +// shape. A projection field's width is the sum of its parts — `Pos(r)` is a +// whole-row reference of width = row r's arity, not one column (the miscount +// that once made SCC's explanation unsound). +pub(crate) fn apply_ops_arity((mut k, mut v): (usize, usize), ops: &[LinearOp]) -> (usize, usize) { + for op in ops { + match op { + // Project's input rows are [key, val]; expand `Pos` refs to + // their row arities rather than counting field-exprs. + LinearOp::Project(p) => { + let rows = [k, v]; + k = proj_arity(&p.key, &rows); + v = proj_arity(&p.val, &rows); + } + LinearOp::Filter(_) | LinearOp::Negate | LinearOp::EnterAt(_) => {} + LinearOp::LiftIter => { v += 1; } + } + } + (k, v) +} + +/// Width (output columns) a single `FieldExpr` expands to, given the +/// arities of the input rows it may reference. `Pos(r)` is a whole-row +/// reference of width `rows[r]`; index/const are single columns. +fn field_width(f: &FieldExpr, rows: &[usize]) -> usize { + match f { + FieldExpr::Pos(r) => rows.get(*r).copied().unwrap_or(0), + FieldExpr::Index(_, _) | FieldExpr::Const(_) => 1, + FieldExpr::Neg(inner) => field_width(inner, rows), + FieldExpr::Sub(a, _) => field_width(a, rows), + } +} + +/// Total arity of one projection side (`key`/`val`): the sum of its +/// fields' widths. +pub(crate) fn proj_arity(fields: &[FieldExpr], rows: &[usize]) -> usize { + fields.iter().map(|f| field_width(f, rows)).sum() +} diff --git a/interactive/src/lib.rs b/interactive/src/lib.rs index e070cc3f4..a6d32fe2d 100644 --- a/interactive/src/lib.rs +++ b/interactive/src/lib.rs @@ -1,9 +1,9 @@ pub mod parse; pub mod ir; pub mod lower; -pub mod explain; pub mod folded; pub mod scope_ir; +pub mod explain_tree; use std::collections::BTreeSet; diff --git a/interactive/src/lower.rs b/interactive/src/lower.rs index fe3d42f28..32bbdc84b 100644 --- a/interactive/src/lower.rs +++ b/interactive/src/lower.rs @@ -1,220 +1,17 @@ -//! Lowering from AST to IR. +//! Lowering from AST to the scope-tree IR. //! -//! Statement order within a scope does not affect semantics. At each scope -//! level we: -//! -//! 1. Bucket statements (and error on duplicate names). -//! 2. Pre-push `Variable` placeholders for every `var` so that anything in -//! the scope can refer to them. -//! 3. Topologically lower `let` bindings and child scopes by dependency: -//! each item is lowered once all the names it transitively needs at this -//! level are bound. A cycle among `let`s is an error (use a `var` to -//! introduce recursion). -//! 4. Lower the `export` expressions (root scope only). -//! 5. Lower each `var`'s body and emit a `Bind` from the placeholder to the -//! resulting value. - -use std::collections::{BTreeMap, BTreeSet, HashMap}; - -use crate::parse::*; -use crate::ir::{Node, LinearOp, Id, Program}; - -struct Lowering { - nodes: BTreeMap, - next_id: Id, - /// Stack of value-name scopes; innermost last. - scopes: Vec>, - /// Inner environments of named scopes, keyed by scope name; the `usize` - /// records the scope's nesting depth (used by `Node::Leave`). - named_scopes: HashMap)>, - level: usize, -} - -impl Lowering { - fn new() -> Self { - Lowering { - nodes: BTreeMap::new(), - next_id: 0, - scopes: vec![HashMap::new()], - named_scopes: HashMap::new(), - level: 0, - } - } - - fn push(&mut self, node: Node) -> Id { let id = self.next_id; self.next_id += 1; self.nodes.insert(id, node); id } - fn bind_name(&mut self, name: String, id: Id) { self.scopes.last_mut().unwrap().insert(name, id); } - fn resolve_name(&self, name: &str) -> Id { - for scope in self.scopes.iter().rev() { - if let Some(&id) = scope.get(name) { return id; } - } - panic!("Unresolved name: {}", name) - } - - fn lower_program(mut self, stmts: Vec) -> Program { - let mut exports = Vec::new(); - self.lower_stmts(stmts, &mut exports); - if exports.is_empty() { - panic!("Program has no `export` statement"); - } - Program { nodes: self.nodes, export: exports } - } - - fn lower_stmts(&mut self, stmts: Vec, exports: &mut Vec<(String, Id)>) { - // ---- 1. Bucket statements; reject duplicate names. ---- - // `order` records the original textual order so the topological pass - // is deterministic when several items are simultaneously ready. - let mut vars: Vec<(String, Expr)> = Vec::new(); - let mut lets: HashMap = HashMap::new(); - let mut scopes: HashMap> = HashMap::new(); - let mut order: Vec<(ItemKind, String)> = Vec::new(); - // Exports in declaration order (root scope only — rejected below if nested). - let mut local_exports: Vec<(String, Expr)> = Vec::new(); - let mut seen: BTreeSet = BTreeSet::new(); - for stmt in stmts { - match stmt { - Stmt::Let(name, expr) => { - if !seen.insert(name.clone()) { panic!("Duplicate name in scope: {}", name); } - order.push((ItemKind::Let, name.clone())); - lets.insert(name, expr); - }, - Stmt::Var(name, expr) => { - if !seen.insert(name.clone()) { panic!("Duplicate name in scope: {}", name); } - vars.push((name, expr)); - }, - Stmt::Scope(name, body) => { - if !seen.insert(name.clone()) { panic!("Duplicate name in scope: {}", name); } - order.push((ItemKind::Scope, name.clone())); - scopes.insert(name, body); - }, - Stmt::Export(name, expr) => { - // Exports are the program's output interface and only make - // sense at the root; reject nested ones rather than silently - // dropping them. - if self.level > 0 { - panic!("`export {:?}` is nested; exports are only allowed at the root scope", name); - } - local_exports.push((name, expr)); - }, - } - } - // Reject duplicate export names (root-only, so this is the whole - // program's output interface). - { - let mut names: BTreeSet<&str> = BTreeSet::new(); - for (n, _) in &local_exports { - if !names.insert(n) { - panic!("Duplicate export name: {:?}", n); - } - } - } +//! Statement order within a scope does not affect semantics: each scope +//! buckets its statements (rejecting duplicate names), pre-declares its +//! `var`s so anything in the scope can refer to them, topologically lowers +//! `let`s and child scopes by dependency (a cycle among `let`s is an error; +//! use a `var` for recursion), then lowers exports and the `var` bodies +//! (each emitting a `Bind`). - // ---- 2. Pre-bind `Variable` placeholders. ---- - for (name, _) in &vars { - let id = self.push(Node::Variable); - self.bind_name(name.clone(), id); - } - - // ---- 3. Topologically lower lets and child scopes. ---- - // Deps for a let: free names of its expression that are themselves - // defined as let/scope at this level (vars are already bound). - // Deps for a scope: free names that escape the scope body, restricted - // similarly. - let defined_topo: BTreeSet<&str> = lets.keys().chain(scopes.keys()).map(String::as_str).collect(); - let mut remaining_deps: HashMap> = HashMap::new(); - for (name, expr) in &lets { - remaining_deps.insert(name.clone(), expr_deps(expr, &defined_topo, name)); - } - for (name, body) in &scopes { - remaining_deps.insert(name.clone(), scope_body_deps(body, &defined_topo, name)); - } - drop(defined_topo); - - // Greedy topo: scan `order` for an item with no remaining deps; lower - // it and remove it from every other item's dep set. Repeat until done. - let mut pending: Vec<(ItemKind, String)> = order; - while !pending.is_empty() { - let pick = pending.iter().position(|(_, n)| remaining_deps[n].is_empty()); - let Some(idx) = pick else { - let stuck: Vec = pending.iter().map(|(_, n)| n.clone()).collect(); - panic!("Cyclic dependency among let/scope bindings: {:?}. Use `var` to introduce recursion.", stuck); - }; - let (kind, name) = pending.remove(idx); - remaining_deps.remove(&name); - for deps in remaining_deps.values_mut() { deps.remove(&name); } - - match kind { - ItemKind::Let => { - let expr = lets.remove(&name).unwrap(); - let id = self.lower_expr(expr); - self.bind_name(name, id); - }, - ItemKind::Scope => { - let body = scopes.remove(&name).unwrap(); - self.push(Node::Scope); - self.level += 1; - self.scopes.push(HashMap::new()); - // Exports are root-only (lower_stmts rejects nested ones), - // so this stays empty. - let mut inner_exports = Vec::new(); - self.lower_stmts(body, &mut inner_exports); - let inner_scope = self.scopes.pop().unwrap(); - let scope_level = self.level; - self.named_scopes.insert(name, (scope_level, inner_scope)); - self.level -= 1; - self.push(Node::EndScope); - }, - } - } - - // ---- 4. Lower export expressions (if any) and record them. ---- - for (name, expr) in local_exports { - let id = self.lower_expr(expr); - exports.push((name, id)); - } +use std::collections::{BTreeSet, HashMap}; - // ---- 5. Lower var bodies and emit Bind nodes. ---- - for (name, expr) in vars { - let var_id = self.resolve_name(&name); - let value_id = self.lower_expr(expr); - self.push(Node::Bind { variable: var_id, value: value_id }); - } - } +use crate::parse::*; +use crate::ir::LinearOp; - fn lower_expr(&mut self, expr: Expr) -> Id { - match expr { - Expr::Input(n) => self.push(Node::Input(n)), - Expr::Import(name) => self.push(Node::Import { name }), - Expr::Name(name) => self.resolve_name(&name), - Expr::Qualified(scope_name, name) => { - let (scope_level, inner_id) = { - let (lvl, scope) = self.named_scopes.get(&scope_name).unwrap_or_else(|| panic!("Unknown scope: {}", scope_name)); - (*lvl, *scope.get(&name).unwrap_or_else(|| panic!("Unknown name {}::{}", scope_name, name))) - }; - self.push(Node::Leave(inner_id, scope_level)) - }, - Expr::Map(input, proj) => { let id = self.lower_expr(*input); self.push(Node::Linear { input: id, ops: vec![LinearOp::Project(proj)] }) }, - Expr::Join(left, right, proj) => { - let l = self.lower_expr(*left); - let l = self.push(Node::Arrange(l)); - let r = self.lower_expr(*right); - let r = self.push(Node::Arrange(r)); - self.push(Node::Join { left: l, right: r, projection: proj }) - }, - Expr::Reduce(input, reducer) => { - let id = self.lower_expr(*input); - let id = self.push(Node::Arrange(id)); - self.push(Node::Reduce { input: id, reducer }) - }, - Expr::Filter(input, cond) => { let id = self.lower_expr(*input); self.push(Node::Linear { input: id, ops: vec![LinearOp::Filter(cond)] }) }, - Expr::Negate(input) => { let id = self.lower_expr(*input); self.push(Node::Linear { input: id, ops: vec![LinearOp::Negate] }) }, - Expr::EnterAt(input, fld) => { let id = self.lower_expr(*input); self.push(Node::Linear { input: id, ops: vec![LinearOp::EnterAt(fld)] }) }, - Expr::LiftIter(input) => { let id = self.lower_expr(*input); self.push(Node::Linear { input: id, ops: vec![LinearOp::LiftIter] }) }, - Expr::Inspect(input, lab) => { let id = self.lower_expr(*input); self.push(Node::Inspect { input: id, label: lab }) }, - Expr::Concat(exprs) => { let ids: Vec = exprs.into_iter().map(|e| self.lower_expr(e)).collect(); self.push(Node::Concat(ids)) }, - Expr::Arrange(input) => { let id = self.lower_expr(*input); self.push(Node::Arrange(id)) }, - } - } -} #[derive(Clone, Copy)] enum ItemKind { Let, Scope } @@ -279,12 +76,6 @@ fn collect_body_free_names<'a>(body: &'a [Stmt], out: &mut BTreeSet<&'a str>) { for n in inner { if !local.contains(n) { out.insert(n); } } } -pub fn lower(stmts: Vec) -> Program { - let program = Lowering::new().lower_program(stmts); - program.validate_lift_iter().unwrap_or_else(|e| panic!("{}", e)); - program -} - // ===== Scope-tree lowering (AST -> scope_ir) ===== // // Produces the tree IR (see `scope_ir`): each `{ .. }` becomes an diff --git a/interactive/src/scope_ir.rs b/interactive/src/scope_ir.rs index 80a770ef9..18ba81c70 100644 --- a/interactive/src/scope_ir.rs +++ b/interactive/src/scope_ir.rs @@ -149,6 +149,13 @@ impl Program { self.root.optimize(); } + /// Print the tree as indented structural text (readability, not + /// parseability): imports and vars first, items in order (`Sub`s nest), + /// then binds and exports. + pub fn dump(&self) { + dump_scope(&self.root, 0); + } + /// Total operator (`Op`) count across all scopes. pub fn op_count(&self) -> usize { fn count(s: &Scope) -> usize { @@ -301,6 +308,76 @@ impl Scope { } } +fn fmt_ref(r: &Ref) -> String { + match r { + Ref::Local(i) => format!("n{}", i), + Ref::Import(k) => format!("in{}", k), + Ref::Var(v) => format!("v{}", v), + Ref::ChildExport(c, j) => format!("n{}::{}", c, j), + } +} + +fn dump_scope(s: &Scope, indent: usize) { + let pad = " ".repeat(indent); + println!("{}{}: {{", pad, if s.name.is_empty() { "scope" } else { &s.name }); + dump_scope_body(s, indent); + println!("{}}}", pad); +} + +fn dump_scope_inner(s: &Scope, indent: usize) { + // As `dump_scope`, but the caller began the opening line. + println!("{}: {{", if s.name.is_empty() { "scope" } else { &s.name }); + dump_scope_body(s, indent); + println!("{}}}", " ".repeat(indent)); +} + +fn dump_scope_body(s: &Scope, indent: usize) { + let pad2 = " ".repeat(indent + 1); + for (k, imp) in s.imports.iter().enumerate() { + let from = match &imp.from { + Source::Parent(r) => fmt_ref(r), + Source::Input(i) => format!("input {}", i), + Source::Trace(n) => format!("import {:?}", n), + }; + println!("{}in{} = {} ({:?});", pad2, k, from, imp.name); + } + for (v, var) in s.vars.iter().enumerate() { + println!("{}var v{} ({:?});", pad2, v, var.name); + } + for (i, item) in s.items.iter().enumerate() { + match item { + Item::Op(node) => { + let desc = match node { + Node::Linear { input, ops } => { + let ops: Vec<&str> = ops.iter().map(|op| match op { + LinearOp::Project(_) => "project", LinearOp::Filter(_) => "filter", + LinearOp::Negate => "negate", LinearOp::EnterAt(_) => "enter_at", + LinearOp::LiftIter => "lift_iter", + }).collect(); + format!("{} | {}", fmt_ref(input), ops.join(" | ")) + } + Node::Concat(refs) => refs.iter().map(fmt_ref).collect::>().join(" + "), + Node::Arrange(r) => format!("{} | arrange", fmt_ref(r)), + Node::Join { left, right, .. } => format!("join({}, {})", fmt_ref(left), fmt_ref(right)), + Node::Reduce { input, reducer } => format!("{} | {:?}", fmt_ref(input), reducer), + Node::Inspect { input, label } => format!("{} | inspect({})", fmt_ref(input), label), + }; + println!("{}n{} = {};", pad2, i, desc); + } + Item::Sub(child) => { + print!("{}n{} = ", pad2, i); + dump_scope_inner(child, indent + 1); + } + } + } + for b in &s.binds { + println!("{}bind v{} = {};", pad2, b.var, fmt_ref(&b.value)); + } + for e in &s.exports { + println!("{}export {:?} = {};", pad2, e.name, fmt_ref(&e.value)); + } +} + #[cfg(test)] mod tests { use super::*;