all the work trying to write a validator

This commit is contained in:
Scott Richmond 2024-12-10 16:44:52 -05:00
parent 9a4f247a21
commit d48a787447
5 changed files with 393 additions and 36 deletions

5
assets/test_prelude.ld Normal file
View File

@ -0,0 +1,5 @@
base :print! ("Hello from Prelude")
fn add (x, y) -> base :add (x, y)
#{add}

View File

@ -6,17 +6,6 @@ use imbl::Vector;
use std::cell::RefCell; use std::cell::RefCell;
use std::rc::Rc; use std::rc::Rc;
pub fn match_eq<T, U>(x: T, y: T, z: U) -> Option<U>
where
T: PartialEq,
{
if x == y {
Some(z)
} else {
None
}
}
pub struct LErr { pub struct LErr {
pub msg: String, pub msg: String,
pub trace: Vec<String>, pub trace: Vec<String>,
@ -41,7 +30,7 @@ type LResult<'src> = Result<Value<'src>, LErr>;
pub struct Context<'src> { pub struct Context<'src> {
pub locals: Vec<(String, Value<'src>)>, pub locals: Vec<(String, Value<'src>)>,
pub prelude: Vec<(String, Value<'src>)>, pub prelude: Vec<(String, Value<'src>)>,
pub prelude_ast: &'src Ast, // pub prelude_ast: &'src Ast,
pub ast: &'src Ast, pub ast: &'src Ast,
} }
@ -76,7 +65,7 @@ impl<'src> Context<'src> {
T: PartialEq, T: PartialEq,
{ {
if x == y { if x == y {
Some(&self) Some(self)
} else { } else {
None None
} }

View File

@ -50,12 +50,12 @@ use crate::value::*;
mod parser; mod parser;
use crate::parser::*; use crate::parser::*;
// mod vm;
// use crate::vm::*;
mod base; mod base;
use crate::base::*; use crate::base::*;
mod validator;
use crate::validator::*;
mod context; mod context;
use crate::context::*; use crate::context::*;
@ -64,7 +64,7 @@ use crate::context::*;
struct Asset; struct Asset;
pub fn prelude<'src>() -> Context<'src> { pub fn prelude<'src>() -> Context<'src> {
let prelude = Asset::get("prelude.ld").unwrap().data.into_owned(); let prelude = Asset::get("test_prelude.ld").unwrap().data.into_owned();
// we know for sure Prelude should live through the whole run of the program // we know for sure Prelude should live through the whole run of the program
let leaked = Box::leak(Box::new(prelude)); let leaked = Box::leak(Box::new(prelude));
let prelude = std::str::from_utf8(leaked).unwrap(); let prelude = std::str::from_utf8(leaked).unwrap();
@ -94,7 +94,7 @@ pub fn prelude<'src>() -> Context<'src> {
locals: vec![], locals: vec![],
ast: p_ast, ast: p_ast,
prelude: base_pkg, prelude: base_pkg,
prelude_ast: &Ast::Nil, // prelude_ast: &Ast::Nil,
}; };
let prelude = base_ctx.eval(); let prelude = base_ctx.eval();
@ -122,7 +122,7 @@ pub fn prelude<'src>() -> Context<'src> {
locals: vec![], locals: vec![],
ast: &Ast::Nil, ast: &Ast::Nil,
prelude: p_ctx, prelude: p_ctx,
prelude_ast: p_ast, // prelude_ast: p_ast,
} }
} }
@ -158,16 +158,7 @@ pub fn run(src: &'static str) {
pub fn main() { pub fn main() {
let src = " let src = "
fn fib { add (1, 2)
(1) -> 1
(2) -> 1
(n) -> add (
fib (dec (n))
fib (sub (n, 2))
)
}
fib (25)
"; ";
run(src); run(src);
// struct_scalpel::print_dissection_info::<value::Value>() // struct_scalpel::print_dissection_info::<value::Value>()

377
src/validator.rs Normal file
View File

@ -0,0 +1,377 @@
use crate::parser::*;
use crate::spans::Span;
use std::collections::{HashMap, HashSet};
#[derive(Clone, Debug, PartialEq)]
pub struct VErr {
msg: String,
span: Span,
}
impl VErr {
pub fn new(msg: String, span: Span) -> VErr {
VErr { msg, span }
}
}
#[derive(Clone, Debug, PartialEq)]
struct VStatus {
tail_position: bool,
in_loop: bool,
last_term: bool,
has_placeholder: bool,
used_bindings: Vec<String>,
}
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub enum Arity {
Fixed(u8),
Splat(u8),
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum FnInfo {
Declared,
Defined(HashSet<Arity>, HashSet<String>),
Unknown,
}
#[derive(Debug, PartialEq)]
pub struct Validator<'a> {
// TODO: add another term here: FnStatus. See Issue #18.
pub locals: Vec<(String, Span, FnInfo)>,
pub prelude: &'a Vec<String>,
pub ast: &'a Ast,
pub span: Span,
pub errors: Vec<VErr>,
pub fn_info: HashMap<*const Ast, FnInfo>,
status: VStatus,
}
impl<'a> Validator<'a> {
fn new(ast: &'a mut Ast, span: Span, prelude: &'a Vec<String>) -> Validator<'a> {
Validator {
locals: vec![],
prelude,
ast,
span,
fn_info: std::collections::HashMap::new(),
errors: vec![],
status: VStatus {
tail_position: false,
in_loop: false,
last_term: false,
has_placeholder: false,
used_bindings: vec![],
},
}
}
fn bind(&mut self, name: String) {
self.locals.push((name, self.span, FnInfo::Unknown));
}
fn declare_fn(&mut self, name: String) {
self.locals.push((name, self.span, FnInfo::Declared));
}
fn define_fn(&mut self, name: String, info: FnInfo) {
self.locals.push((name, self.span, info));
}
fn resolved(&self, name: &str) -> bool {
self.locals.iter().any(|(bound, ..)| name == bound.as_str())
|| self.prelude.iter().any(|bound| name == bound.as_str())
}
fn bound(&self, name: &str) -> Option<&(String, Span, FnInfo)> {
match self.locals.iter().rev().find(|(bound, ..)| name == bound) {
Some(binding) => Some(binding),
None => None,
}
}
fn err(&mut self, msg: String) {
self.errors.push(VErr::new(msg, self.span))
}
fn use_name(&mut self, name: String) {
self.status.used_bindings.push(name);
}
fn validate(&mut self) {
let root = self.ast;
match root {
Ast::Error => unreachable!(),
Ast::Word(name) | Ast::Splat(name) => {
if !self.resolved(name) {
self.err(format!("unbound name {name}"))
}
}
Ast::Interpolated(parts) => {
for part in parts {
if let (StringPart::Word(name), span) = part {
self.span = *span;
if !self.resolved(name.as_str()) {
self.err(format!("unbound name {name}"))
}
}
}
}
// validate each line
// ensure it's not empty
// pass through tail position validation
// check if there are any declared but undefined functions
// pop all the bindings off the local stack
Ast::Block(block) => {
if block.is_empty() {
self.err("blocks must have at least one expression".to_string());
return;
}
let to = self.locals.len();
let tailpos = self.status.tail_position;
for (expr, span) in block.iter().take(block.len() - 1) {
self.status.tail_position = false;
self.ast = expr;
self.span = *span;
self.validate();
}
let (expr, span) = block.last().unwrap();
self.ast = expr;
self.span = *span;
self.status.tail_position = tailpos;
self.validate();
let block_bindings = self.locals.split_off(to);
for binding in block_bindings {
let (name, _, fn_info) = binding;
if matches!(fn_info, FnInfo::Declared) {
self.err(format!("fn `{name}` is declared but not defined"))
}
}
}
// if in tail position, pass through tail position validation
// no unbound names
Ast::If(cond, then, r#else) => {
let tailpos = self.status.tail_position;
self.status.tail_position = false;
let (expr, span) = cond.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
// pass through tailpos only to then/else
self.status.tail_position = tailpos;
let (expr, span) = then.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
let (expr, span) = r#else.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
}
Ast::Tuple(members) => {
if members.is_empty() {
return;
}
let tailpos = self.status.tail_position;
self.status.tail_position = false;
for (expr, span) in members {
self.ast = expr;
self.span = *span;
self.validate();
}
self.status.tail_position = tailpos;
}
// no more than one placeholder
Ast::Arguments(args) => {
if args.is_empty() {
return;
}
let tailpos = self.status.tail_position;
self.status.tail_position = false;
for (expr, span) in args {
self.ast = expr;
self.span = *span;
self.validate();
}
self.status.has_placeholder = false;
self.status.tail_position = tailpos;
}
Ast::Placeholder => {
if self.status.has_placeholder {
self.err(
"you may only use one placeholder when partially applying functions"
.to_string(),
);
}
self.status.has_placeholder = true;
}
Ast::List(list) => {
if list.is_empty() {
return;
}
let tailpos = self.status.tail_position;
self.status.tail_position = false;
for (expr, span) in list {
self.ast = expr;
self.span = *span;
self.validate();
}
self.status.tail_position = tailpos;
}
Ast::Pair(_, value) => {
let (expr, span) = value.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
}
Ast::Dict(dict) => {
if dict.is_empty() {
return;
}
let tailpos = self.status.tail_position;
self.status.tail_position = false;
for (expr, span) in dict {
self.ast = expr;
self.span = *span;
self.validate();
}
self.status.tail_position = tailpos;
}
// TODO!
// first check all nodes
// then...
// check arity is 1 if first term is keyword
// check arity against fn info if first term is word and second term is args
Ast::Synthetic(first, second, rest) => {
todo!()
}
Ast::When(clauses) => {
// let tailpos = self.status.tail_position;
// for (clause, _) in clauses {
// self.status.tail_position = false;
// let (expr, span) = clause.cond.clone();
// self.ast = &expr;
// self.span = span;
// self.validate();
// self.status.tail_position = tailpos;
// let (expr, span) = clause.body;
// self.ast = &expr;
// self.span = span;
// self.validate();
// }
}
// binding forms
// TODO: set up errors to include original binding
Ast::Box(name, boxed) => {
if self.bound(name).is_some() {
self.err(format!("box name `{name}` is already bound"));
}
let (expr, span) = boxed.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
}
Ast::Let(lhs, rhs) => todo!(),
Ast::Match(scrutinee, clauses) => todo!(),
Ast::FnDeclaration(name) => {
let tailpos = self.status.tail_position;
self.status.tail_position = false;
if self.bound(name).is_some() {
self.err(format!("fn name `{name}` is already bound"));
return;
}
self.declare_fn(name.to_string());
self.status.tail_position = tailpos;
}
Ast::Fn(name, clauses, ..) => {
match self.bound(name) {
Some((_, _, FnInfo::Declared)) => (),
None => (),
_ => {
self.err(format!("name `{name}` is already bound"));
}
}
let from = self.status.used_bindings.len();
let arities = HashSet::new();
for clause in clauses {
// TODO: validate all parts of clauses
// add clause arity to arities
}
let mut closed_over = HashSet::new();
for binding in self.status.used_bindings.iter().skip(from) {
closed_over.insert(binding.clone());
}
let info = FnInfo::Defined(arities, closed_over);
self.define_fn(name.to_string(), info)
}
Ast::Panic(msg) => {
let tailpos = self.status.tail_position;
self.status.tail_position = false;
let (expr, span) = msg.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
self.status.tail_position = tailpos;
}
Ast::Do(terms) => {
for term in terms {
let (expr, span) = term;
self.ast = expr;
self.span = *span;
self.validate();
}
}
Ast::Repeat(times, body) => {
self.status.tail_position = false;
let (expr, span) = times.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
let (expr, span) = body.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
}
Ast::Loop(with, body) => {
let (expr, span) = with.as_ref();
self.span = *span;
self.ast = expr;
self.validate();
let in_loop = self.status.in_loop;
self.status.in_loop = true;
let (expr, span) = body;
self.span = span;
self.expr = expr;
self.validate();
self.status.in_loop = in_loop;
}
Ast::Recur(args) => {}
// terminals can never be invalid
Ast::Nil | Ast::Boolean(_) | Ast::Number(_) | Ast::Keyword(_) | Ast::String(_) => (),
};
self.ast = root;
}
}

View File

@ -20,17 +20,12 @@ pub enum Value<'src> {
Boolean(bool), Boolean(bool),
Number(f64), Number(f64),
Keyword(&'static str), Keyword(&'static str),
// TODO: add a "runtime-generated" string type that wraps a Rust String
// this is necessary for nice documentation and string interpolation
InternedString(&'static str), InternedString(&'static str),
AllocatedString(Rc<String>), AllocatedString(Rc<String>),
// on the heap for now // on the heap for now
Tuple(Rc<Vec<Self>>), Tuple(Rc<Vec<Self>>),
Args(Rc<Vec<Self>>), Args(Rc<Vec<Self>>),
// ref-counted, immutable, persistent
List(Vector<Self>), List(Vector<Self>),
// ref-counted, immutable, persistent
// dicts may only use keywords as keys
Dict(HashMap<&'static str, Self>), Dict(HashMap<&'static str, Self>),
Box(&'static str, Rc<RefCell<Self>>), Box(&'static str, Rc<RefCell<Self>>),
Fn(Rc<Fn<'src>>), Fn(Rc<Fn<'src>>),
@ -62,7 +57,7 @@ impl<'src> Clone for Value<'src> {
Value::Fn(f) => Value::Fn(f.clone()), Value::Fn(f) => Value::Fn(f.clone()),
Value::List(l) => Value::List(l.clone()), Value::List(l) => Value::List(l.clone()),
Value::Dict(d) => Value::Dict(d.clone()), Value::Dict(d) => Value::Dict(d.clone()),
Value::Box(name, b) => Value::Box(*name, b.clone()), Value::Box(name, b) => Value::Box(name, b.clone()),
Value::Placeholder => Value::Placeholder, Value::Placeholder => Value::Placeholder,
Value::Base(b) => Value::Base(b.clone()), Value::Base(b) => Value::Base(b.clone()),
Value::Recur(..) => unreachable!(), Value::Recur(..) => unreachable!(),
@ -177,7 +172,7 @@ impl Value<'_> {
.map(|(k, v)| format!(":{} {}", k, v.interpolate())) .map(|(k, v)| format!(":{} {}", k, v.interpolate()))
.collect::<Vec<_>>() .collect::<Vec<_>>()
.join(", "), .join(", "),
Value::Fn(x) => format!("fn {}", x.name.to_string()), Value::Fn(x) => format!("fn {}", x.name),
Value::Placeholder => unreachable!(), Value::Placeholder => unreachable!(),
Value::Args(_) => unreachable!(), Value::Args(_) => unreachable!(),
Value::Recur(_) => unreachable!(), Value::Recur(_) => unreachable!(),