From d48a787447b626d47db784bbedeb1d59475f1ff3 Mon Sep 17 00:00:00 2001 From: Scott Richmond Date: Tue, 10 Dec 2024 16:44:52 -0500 Subject: [PATCH] all the work trying to write a validator --- assets/test_prelude.ld | 5 + src/context.rs | 15 +- src/main.rs | 23 +-- src/validator.rs | 377 +++++++++++++++++++++++++++++++++++++++++ src/value.rs | 9 +- 5 files changed, 393 insertions(+), 36 deletions(-) create mode 100644 assets/test_prelude.ld create mode 100644 src/validator.rs diff --git a/assets/test_prelude.ld b/assets/test_prelude.ld new file mode 100644 index 0000000..d0a15e7 --- /dev/null +++ b/assets/test_prelude.ld @@ -0,0 +1,5 @@ +base :print! ("Hello from Prelude") + +fn add (x, y) -> base :add (x, y) + +#{add} diff --git a/src/context.rs b/src/context.rs index 594185f..57abc0f 100644 --- a/src/context.rs +++ b/src/context.rs @@ -6,17 +6,6 @@ use imbl::Vector; use std::cell::RefCell; use std::rc::Rc; -pub fn match_eq(x: T, y: T, z: U) -> Option -where - T: PartialEq, -{ - if x == y { - Some(z) - } else { - None - } -} - pub struct LErr { pub msg: String, pub trace: Vec, @@ -41,7 +30,7 @@ type LResult<'src> = Result, LErr>; pub struct Context<'src> { pub locals: Vec<(String, Value<'src>)>, pub prelude: Vec<(String, Value<'src>)>, - pub prelude_ast: &'src Ast, + // pub prelude_ast: &'src Ast, pub ast: &'src Ast, } @@ -76,7 +65,7 @@ impl<'src> Context<'src> { T: PartialEq, { if x == y { - Some(&self) + Some(self) } else { None } diff --git a/src/main.rs b/src/main.rs index 4e51887..1fa69f2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -50,12 +50,12 @@ use crate::value::*; mod parser; use crate::parser::*; -// mod vm; -// use crate::vm::*; - mod base; use crate::base::*; +mod validator; +use crate::validator::*; + mod context; use crate::context::*; @@ -64,7 +64,7 @@ use crate::context::*; struct Asset; pub fn prelude<'src>() -> Context<'src> { - let prelude = Asset::get("prelude.ld").unwrap().data.into_owned(); + let prelude = Asset::get("test_prelude.ld").unwrap().data.into_owned(); // we know for sure Prelude should live through the whole run of the program let leaked = Box::leak(Box::new(prelude)); let prelude = std::str::from_utf8(leaked).unwrap(); @@ -94,7 +94,7 @@ pub fn prelude<'src>() -> Context<'src> { locals: vec![], ast: p_ast, prelude: base_pkg, - prelude_ast: &Ast::Nil, + // prelude_ast: &Ast::Nil, }; let prelude = base_ctx.eval(); @@ -122,7 +122,7 @@ pub fn prelude<'src>() -> Context<'src> { locals: vec![], ast: &Ast::Nil, prelude: p_ctx, - prelude_ast: p_ast, + // prelude_ast: p_ast, } } @@ -158,16 +158,7 @@ pub fn run(src: &'static str) { pub fn main() { let src = " -fn fib { - (1) -> 1 - (2) -> 1 - (n) -> add ( - fib (dec (n)) - fib (sub (n, 2)) - ) -} - -fib (25) +add (1, 2) "; run(src); // struct_scalpel::print_dissection_info::() diff --git a/src/validator.rs b/src/validator.rs new file mode 100644 index 0000000..8edd8ac --- /dev/null +++ b/src/validator.rs @@ -0,0 +1,377 @@ +use crate::parser::*; +use crate::spans::Span; +use std::collections::{HashMap, HashSet}; + +#[derive(Clone, Debug, PartialEq)] +pub struct VErr { + msg: String, + span: Span, +} + +impl VErr { + pub fn new(msg: String, span: Span) -> VErr { + VErr { msg, span } + } +} + +#[derive(Clone, Debug, PartialEq)] +struct VStatus { + tail_position: bool, + in_loop: bool, + last_term: bool, + has_placeholder: bool, + used_bindings: Vec, +} + +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub enum Arity { + Fixed(u8), + Splat(u8), +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum FnInfo { + Declared, + Defined(HashSet, HashSet), + Unknown, +} + +#[derive(Debug, PartialEq)] +pub struct Validator<'a> { + // TODO: add another term here: FnStatus. See Issue #18. + pub locals: Vec<(String, Span, FnInfo)>, + pub prelude: &'a Vec, + pub ast: &'a Ast, + pub span: Span, + pub errors: Vec, + pub fn_info: HashMap<*const Ast, FnInfo>, + status: VStatus, +} + +impl<'a> Validator<'a> { + fn new(ast: &'a mut Ast, span: Span, prelude: &'a Vec) -> Validator<'a> { + Validator { + locals: vec![], + prelude, + ast, + span, + fn_info: std::collections::HashMap::new(), + errors: vec![], + status: VStatus { + tail_position: false, + in_loop: false, + last_term: false, + has_placeholder: false, + used_bindings: vec![], + }, + } + } + + fn bind(&mut self, name: String) { + self.locals.push((name, self.span, FnInfo::Unknown)); + } + + fn declare_fn(&mut self, name: String) { + self.locals.push((name, self.span, FnInfo::Declared)); + } + + fn define_fn(&mut self, name: String, info: FnInfo) { + self.locals.push((name, self.span, info)); + } + + fn resolved(&self, name: &str) -> bool { + self.locals.iter().any(|(bound, ..)| name == bound.as_str()) + || self.prelude.iter().any(|bound| name == bound.as_str()) + } + + fn bound(&self, name: &str) -> Option<&(String, Span, FnInfo)> { + match self.locals.iter().rev().find(|(bound, ..)| name == bound) { + Some(binding) => Some(binding), + None => None, + } + } + + fn err(&mut self, msg: String) { + self.errors.push(VErr::new(msg, self.span)) + } + + fn use_name(&mut self, name: String) { + self.status.used_bindings.push(name); + } + + fn validate(&mut self) { + let root = self.ast; + match root { + Ast::Error => unreachable!(), + Ast::Word(name) | Ast::Splat(name) => { + if !self.resolved(name) { + self.err(format!("unbound name {name}")) + } + } + Ast::Interpolated(parts) => { + for part in parts { + if let (StringPart::Word(name), span) = part { + self.span = *span; + if !self.resolved(name.as_str()) { + self.err(format!("unbound name {name}")) + } + } + } + } + // validate each line + // ensure it's not empty + // pass through tail position validation + // check if there are any declared but undefined functions + // pop all the bindings off the local stack + Ast::Block(block) => { + if block.is_empty() { + self.err("blocks must have at least one expression".to_string()); + return; + } + let to = self.locals.len(); + let tailpos = self.status.tail_position; + for (expr, span) in block.iter().take(block.len() - 1) { + self.status.tail_position = false; + self.ast = expr; + self.span = *span; + self.validate(); + } + + let (expr, span) = block.last().unwrap(); + self.ast = expr; + self.span = *span; + self.status.tail_position = tailpos; + self.validate(); + + let block_bindings = self.locals.split_off(to); + + for binding in block_bindings { + let (name, _, fn_info) = binding; + if matches!(fn_info, FnInfo::Declared) { + self.err(format!("fn `{name}` is declared but not defined")) + } + } + } + // if in tail position, pass through tail position validation + // no unbound names + Ast::If(cond, then, r#else) => { + let tailpos = self.status.tail_position; + self.status.tail_position = false; + + let (expr, span) = cond.as_ref(); + self.ast = expr; + self.span = *span; + self.validate(); + + // pass through tailpos only to then/else + self.status.tail_position = tailpos; + let (expr, span) = then.as_ref(); + self.ast = expr; + self.span = *span; + self.validate(); + + let (expr, span) = r#else.as_ref(); + self.ast = expr; + self.span = *span; + self.validate(); + } + Ast::Tuple(members) => { + if members.is_empty() { + return; + } + let tailpos = self.status.tail_position; + self.status.tail_position = false; + for (expr, span) in members { + self.ast = expr; + self.span = *span; + self.validate(); + } + self.status.tail_position = tailpos; + } + // no more than one placeholder + Ast::Arguments(args) => { + if args.is_empty() { + return; + } + let tailpos = self.status.tail_position; + self.status.tail_position = false; + for (expr, span) in args { + self.ast = expr; + self.span = *span; + self.validate(); + } + self.status.has_placeholder = false; + self.status.tail_position = tailpos; + } + Ast::Placeholder => { + if self.status.has_placeholder { + self.err( + "you may only use one placeholder when partially applying functions" + .to_string(), + ); + } + self.status.has_placeholder = true; + } + Ast::List(list) => { + if list.is_empty() { + return; + } + let tailpos = self.status.tail_position; + self.status.tail_position = false; + for (expr, span) in list { + self.ast = expr; + self.span = *span; + self.validate(); + } + + self.status.tail_position = tailpos; + } + Ast::Pair(_, value) => { + let (expr, span) = value.as_ref(); + self.ast = expr; + self.span = *span; + self.validate(); + } + Ast::Dict(dict) => { + if dict.is_empty() { + return; + } + let tailpos = self.status.tail_position; + self.status.tail_position = false; + for (expr, span) in dict { + self.ast = expr; + self.span = *span; + self.validate(); + } + self.status.tail_position = tailpos; + } + + // TODO! + // first check all nodes + // then... + // check arity is 1 if first term is keyword + // check arity against fn info if first term is word and second term is args + Ast::Synthetic(first, second, rest) => { + todo!() + } + Ast::When(clauses) => { + // let tailpos = self.status.tail_position; + // for (clause, _) in clauses { + // self.status.tail_position = false; + // let (expr, span) = clause.cond.clone(); + // self.ast = &expr; + // self.span = span; + // self.validate(); + + // self.status.tail_position = tailpos; + // let (expr, span) = clause.body; + // self.ast = &expr; + // self.span = span; + // self.validate(); + // } + } + + // binding forms + // TODO: set up errors to include original binding + Ast::Box(name, boxed) => { + if self.bound(name).is_some() { + self.err(format!("box name `{name}` is already bound")); + } + let (expr, span) = boxed.as_ref(); + self.ast = expr; + self.span = *span; + self.validate(); + } + Ast::Let(lhs, rhs) => todo!(), + Ast::Match(scrutinee, clauses) => todo!(), + + Ast::FnDeclaration(name) => { + let tailpos = self.status.tail_position; + self.status.tail_position = false; + if self.bound(name).is_some() { + self.err(format!("fn name `{name}` is already bound")); + return; + } + self.declare_fn(name.to_string()); + self.status.tail_position = tailpos; + } + Ast::Fn(name, clauses, ..) => { + match self.bound(name) { + Some((_, _, FnInfo::Declared)) => (), + None => (), + _ => { + self.err(format!("name `{name}` is already bound")); + } + } + + let from = self.status.used_bindings.len(); + let arities = HashSet::new(); + + for clause in clauses { + // TODO: validate all parts of clauses + // add clause arity to arities + } + + let mut closed_over = HashSet::new(); + + for binding in self.status.used_bindings.iter().skip(from) { + closed_over.insert(binding.clone()); + } + + let info = FnInfo::Defined(arities, closed_over); + + self.define_fn(name.to_string(), info) + } + + Ast::Panic(msg) => { + let tailpos = self.status.tail_position; + self.status.tail_position = false; + let (expr, span) = msg.as_ref(); + self.ast = expr; + self.span = *span; + self.validate(); + self.status.tail_position = tailpos; + } + Ast::Do(terms) => { + for term in terms { + let (expr, span) = term; + self.ast = expr; + self.span = *span; + self.validate(); + } + } + Ast::Repeat(times, body) => { + self.status.tail_position = false; + let (expr, span) = times.as_ref(); + self.ast = expr; + self.span = *span; + self.validate(); + + let (expr, span) = body.as_ref(); + self.ast = expr; + self.span = *span; + self.validate(); + } + Ast::Loop(with, body) => { + let (expr, span) = with.as_ref(); + self.span = *span; + self.ast = expr; + self.validate(); + + let in_loop = self.status.in_loop; + self.status.in_loop = true; + + let (expr, span) = body; + self.span = span; + self.expr = expr; + self.validate(); + + self.status.in_loop = in_loop; + } + Ast::Recur(args) => {} + // terminals can never be invalid + Ast::Nil | Ast::Boolean(_) | Ast::Number(_) | Ast::Keyword(_) | Ast::String(_) => (), + }; + self.ast = root; + } +} diff --git a/src/value.rs b/src/value.rs index 900b8c6..89a0d5f 100644 --- a/src/value.rs +++ b/src/value.rs @@ -20,17 +20,12 @@ pub enum Value<'src> { Boolean(bool), Number(f64), Keyword(&'static str), - // TODO: add a "runtime-generated" string type that wraps a Rust String - // this is necessary for nice documentation and string interpolation InternedString(&'static str), AllocatedString(Rc), // on the heap for now Tuple(Rc>), Args(Rc>), - // ref-counted, immutable, persistent List(Vector), - // ref-counted, immutable, persistent - // dicts may only use keywords as keys Dict(HashMap<&'static str, Self>), Box(&'static str, Rc>), Fn(Rc>), @@ -62,7 +57,7 @@ impl<'src> Clone for Value<'src> { Value::Fn(f) => Value::Fn(f.clone()), Value::List(l) => Value::List(l.clone()), Value::Dict(d) => Value::Dict(d.clone()), - Value::Box(name, b) => Value::Box(*name, b.clone()), + Value::Box(name, b) => Value::Box(name, b.clone()), Value::Placeholder => Value::Placeholder, Value::Base(b) => Value::Base(b.clone()), Value::Recur(..) => unreachable!(), @@ -177,7 +172,7 @@ impl Value<'_> { .map(|(k, v)| format!(":{} {}", k, v.interpolate())) .collect::>() .join(", "), - Value::Fn(x) => format!("fn {}", x.name.to_string()), + Value::Fn(x) => format!("fn {}", x.name), Value::Placeholder => unreachable!(), Value::Args(_) => unreachable!(), Value::Recur(_) => unreachable!(),