rudus/src/validator.rs

648 lines
23 KiB
Rust
Raw Normal View History

use crate::parser::*;
use crate::spans::Span;
use std::collections::{HashMap, HashSet};
#[derive(Clone, Debug, PartialEq)]
pub struct VErr {
msg: String,
span: Span,
}
impl VErr {
pub fn new(msg: String, span: Span) -> VErr {
VErr { msg, span }
}
}
#[derive(Clone, Debug, PartialEq)]
struct VStatus {
tail_position: bool,
in_loop: bool,
2024-12-10 23:07:31 +00:00
loop_arity: u8,
last_term: bool,
has_placeholder: bool,
used_bindings: Vec<String>,
}
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub enum Arity {
Fixed(u8),
Splat(u8),
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum FnInfo {
Declared,
Defined(HashSet<Arity>, HashSet<String>),
Unknown,
}
2024-12-11 05:20:34 +00:00
fn match_arities(arities: &HashSet<Arity>, num_args: u8) -> bool {
arities.iter().any(|arity| match arity {
Arity::Fixed(n) => *n == num_args,
Arity::Splat(n) => *n <= num_args,
})
}
#[derive(Debug, PartialEq)]
pub struct Validator<'a> {
// TODO: add another term here: FnStatus. See Issue #18.
pub locals: Vec<(String, Span, FnInfo)>,
pub prelude: &'a Vec<String>,
pub ast: &'a Ast,
pub span: Span,
pub errors: Vec<VErr>,
pub fn_info: HashMap<*const Ast, FnInfo>,
status: VStatus,
}
impl<'a> Validator<'a> {
2024-12-10 22:14:26 +00:00
pub fn new(ast: &'a Ast, span: Span, prelude: &'a Vec<String>) -> Validator<'a> {
Validator {
locals: vec![],
prelude,
ast,
span,
fn_info: std::collections::HashMap::new(),
errors: vec![],
status: VStatus {
tail_position: false,
in_loop: false,
2024-12-10 23:07:31 +00:00
loop_arity: 0,
last_term: false,
has_placeholder: false,
used_bindings: vec![],
},
}
}
fn bind(&mut self, name: String) {
self.locals.push((name, self.span, FnInfo::Unknown));
}
fn declare_fn(&mut self, name: String) {
self.locals.push((name, self.span, FnInfo::Declared));
}
fn define_fn(&mut self, name: String, info: FnInfo) {
self.locals.push((name, self.span, info));
}
fn resolved(&self, name: &str) -> bool {
self.locals.iter().any(|(bound, ..)| name == bound.as_str())
|| self.prelude.iter().any(|bound| name == bound.as_str())
}
fn bound(&self, name: &str) -> Option<&(String, Span, FnInfo)> {
match self.locals.iter().rev().find(|(bound, ..)| name == bound) {
Some(binding) => Some(binding),
None => None,
}
}
fn err(&mut self, msg: String) {
self.errors.push(VErr::new(msg, self.span))
}
fn use_name(&mut self, name: String) {
self.status.used_bindings.push(name);
}
2024-12-11 04:42:05 +00:00
fn arity(&mut self) -> Arity {
let Ast::MatchClause(pattern, ..) = self.ast else {
unreachable!("internal Ludus error")
};
let (Ast::TuplePattern(members), _) = pattern.as_ref() else {
unreachable!("internal Ludus error");
};
let last_member = members.last();
match last_member {
None => Arity::Fixed(0),
Some((Ast::Splattern(..), _)) => Arity::Splat(members.len() as u8),
Some(_) => Arity::Fixed(members.len() as u8),
}
}
2024-12-10 22:14:26 +00:00
pub fn validate(&mut self) {
2024-12-11 03:40:57 +00:00
use Ast::*;
let root = self.ast;
match root {
2024-12-11 03:40:57 +00:00
Error => unreachable!(),
Word(name) | Ast::Splat(name) => {
if !self.resolved(name) {
2024-12-10 22:14:26 +00:00
self.err(format!("unbound name `{name}`"))
} else {
self.use_name(name.to_string())
}
}
2024-12-11 03:40:57 +00:00
Interpolated(parts) => {
for part in parts {
if let (StringPart::Word(name), span) = part {
self.span = *span;
if !self.resolved(name.as_str()) {
2024-12-10 22:14:26 +00:00
self.err(format!("unbound name `{name}`"));
} else {
self.use_name(name.to_string());
}
}
}
}
// validate each line
// ensure it's not empty
// pass through tail position validation
// check if there are any declared but undefined functions
// pop all the bindings off the local stack
2024-12-11 03:40:57 +00:00
Block(block) => {
if block.is_empty() {
self.err("blocks must have at least one expression".to_string());
return;
}
let to = self.locals.len();
let tailpos = self.status.tail_position;
for (expr, span) in block.iter().take(block.len() - 1) {
self.status.tail_position = false;
self.ast = expr;
self.span = *span;
self.validate();
}
let (expr, span) = block.last().unwrap();
self.ast = expr;
self.span = *span;
self.status.tail_position = tailpos;
self.validate();
let block_bindings = self.locals.split_off(to);
for binding in block_bindings {
let (name, _, fn_info) = binding;
if matches!(fn_info, FnInfo::Declared) {
self.err(format!("fn `{name}` is declared but not defined"))
}
}
}
// if in tail position, pass through tail position validation
// no unbound names
2024-12-11 03:40:57 +00:00
If(cond, then, r#else) => {
let tailpos = self.status.tail_position;
self.status.tail_position = false;
let (expr, span) = cond.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
// pass through tailpos only to then/else
self.status.tail_position = tailpos;
let (expr, span) = then.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
let (expr, span) = r#else.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
}
2024-12-11 03:40:57 +00:00
Tuple(members) => {
if members.is_empty() {
return;
}
let tailpos = self.status.tail_position;
self.status.tail_position = false;
for (expr, span) in members {
self.ast = expr;
self.span = *span;
self.validate();
}
self.status.tail_position = tailpos;
}
// no more than one placeholder
2024-12-11 03:40:57 +00:00
Arguments(args) => {
if args.is_empty() {
return;
}
let tailpos = self.status.tail_position;
self.status.tail_position = false;
for (expr, span) in args {
self.ast = expr;
self.span = *span;
self.validate();
}
self.status.has_placeholder = false;
self.status.tail_position = tailpos;
}
2024-12-11 03:40:57 +00:00
Placeholder => {
if self.status.has_placeholder {
self.err(
"you may only use one placeholder when partially applying functions"
.to_string(),
);
}
self.status.has_placeholder = true;
}
2024-12-11 03:40:57 +00:00
List(list) => {
if list.is_empty() {
return;
}
let tailpos = self.status.tail_position;
self.status.tail_position = false;
for (expr, span) in list {
self.ast = expr;
self.span = *span;
self.validate();
}
self.status.tail_position = tailpos;
}
2024-12-11 03:40:57 +00:00
Pair(_, value) => {
let (expr, span) = value.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
}
2024-12-11 03:40:57 +00:00
Dict(dict) => {
if dict.is_empty() {
return;
}
let tailpos = self.status.tail_position;
self.status.tail_position = false;
for (expr, span) in dict {
self.ast = expr;
self.span = *span;
self.validate();
}
self.status.tail_position = tailpos;
}
// TODO!
// check arity against fn info if first term is word and second term is args
2024-12-11 03:40:57 +00:00
Synthetic(first, second, rest) => {
2024-12-10 22:23:15 +00:00
match (&first.0, &second.0) {
(Ast::Word(_), Ast::Keyword(_)) => {
let (expr, span) = first.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
}
(Ast::Keyword(_), Ast::Arguments(args)) => {
if args.len() != 1 {
self.err("called keywords may only take one argument".to_string())
}
let (expr, span) = second.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
}
2024-12-11 05:20:34 +00:00
(Ast::Word(name), Ast::Arguments(args)) => {
2024-12-10 22:23:15 +00:00
let (expr, span) = first.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
let (expr, span) = second.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
2024-12-10 22:39:02 +00:00
2024-12-11 05:20:34 +00:00
//TODO: check arities of prelude fns, too
let fn_binding = self.bound(name);
if let Some((_, _, FnInfo::Defined(arities, _))) = fn_binding {
let num_args = args.len();
if !match_arities(arities, num_args as u8) {
self.err(format!("arity mismatch: no clause in function `{name}` with {num_args} argument(s)"))
}
}
2024-12-10 22:23:15 +00:00
}
_ => unreachable!(),
}
for term in rest {
let (expr, span) = term;
self.ast = expr;
self.span = *span;
self.validate();
}
}
2024-12-11 04:42:05 +00:00
WhenClause(cond, body) => {
let tailpos = self.status.tail_position;
self.status.tail_position = false;
let (expr, span) = cond.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
self.status.tail_position = tailpos;
let (expr, span) = body.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
}
2024-12-11 03:40:57 +00:00
When(clauses) => {
2024-12-11 04:42:05 +00:00
for clause in clauses {
let (expr, span) = clause;
self.ast = expr;
self.span = *span;
self.validate();
}
}
// binding forms
// TODO: set up errors to include original binding
2024-12-11 03:40:57 +00:00
LBox(name, boxed) => {
if self.bound(name).is_some() {
self.err(format!("box name `{name}` is already bound"));
2024-12-10 22:14:26 +00:00
} else {
self.bind(name.to_string());
}
let (expr, span) = boxed.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
}
2024-12-11 03:40:57 +00:00
Let(lhs, rhs) => {
2024-12-10 22:14:26 +00:00
let (expr, span) = rhs.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
2024-12-11 03:40:57 +00:00
let (expr, span) = lhs.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
2024-12-10 22:14:26 +00:00
}
2024-12-11 04:42:05 +00:00
MatchClause(pattern, guard, body) => {
let to = self.locals.len();
let (patt, span) = pattern.as_ref();
self.ast = patt;
self.span = *span;
self.validate();
if let Some((expr, span)) = guard.as_ref() {
self.ast = expr;
self.span = *span;
self.validate();
}
let (expr, span) = body.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
self.locals.truncate(to);
}
2024-12-11 03:40:57 +00:00
Match(scrutinee, clauses) => {
2024-12-10 22:14:26 +00:00
let (expr, span) = scrutinee.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
2024-12-11 04:42:05 +00:00
for clause in clauses {
let (expr, span) = clause;
self.ast = expr;
self.span = *span;
self.validate();
}
}
2024-12-11 03:40:57 +00:00
FnDeclaration(name) => {
let tailpos = self.status.tail_position;
self.status.tail_position = false;
if self.bound(name).is_some() {
self.err(format!("fn name `{name}` is already bound"));
return;
}
self.declare_fn(name.to_string());
self.status.tail_position = tailpos;
}
2024-12-11 03:40:57 +00:00
Fn(name, clauses, ..) => {
match self.bound(name) {
Some((_, _, FnInfo::Declared)) => (),
None => (),
_ => {
self.err(format!("name `{name}` is already bound"));
}
}
2024-12-11 05:20:34 +00:00
// TODO: devise a placeholder binding for recursive functions
let from = self.status.used_bindings.len();
2024-12-11 04:42:05 +00:00
let mut arities = HashSet::new();
for clause in clauses {
// TODO: validate all parts of clauses
2024-12-11 04:42:05 +00:00
let (expr, span) = clause;
self.ast = expr;
self.span = *span;
// add clause arity to arities
2024-12-11 04:42:05 +00:00
arities.insert(self.arity());
self.validate();
}
2024-12-10 22:14:26 +00:00
// this should be right
// we can't bind anything that's already bound,
// even in arg names
// so anything that is already bound and used
// will, of necessity, be closed over
// we don't want to try to close over locals in functions
let mut closed_over = HashSet::new();
for binding in self.status.used_bindings.iter().skip(from) {
2024-12-10 22:14:26 +00:00
if self.bound(binding.as_str()).is_some() {
closed_over.insert(binding.clone());
}
}
let info = FnInfo::Defined(arities, closed_over);
2024-12-11 04:42:05 +00:00
let root_ptr: *const Ast = root;
self.fn_info.insert(root_ptr, info.clone());
self.define_fn(name.to_string(), info);
}
2024-12-11 03:40:57 +00:00
Panic(msg) => {
let tailpos = self.status.tail_position;
self.status.tail_position = false;
let (expr, span) = msg.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
self.status.tail_position = tailpos;
}
2024-12-10 22:39:02 +00:00
// TODO: fix the tail call here?
2024-12-11 03:40:57 +00:00
Do(terms) => {
2024-12-10 22:39:02 +00:00
if terms.len() < 2 {
return self.err("do expressions must have at least two terms".to_string());
}
for term in terms.iter().take(terms.len() - 1) {
let (expr, span) = term;
self.ast = expr;
self.span = *span;
self.validate();
}
2024-12-10 22:39:02 +00:00
let (expr, span) = terms.last().unwrap();
self.ast = expr;
self.span = *span;
if matches!(expr, Ast::Recur(_)) {
self.err("`recur` may not be used in `do` forms".to_string());
}
self.validate();
}
2024-12-11 03:40:57 +00:00
Repeat(times, body) => {
self.status.tail_position = false;
let (expr, span) = times.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
let (expr, span) = body.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
}
2024-12-11 03:40:57 +00:00
Loop(with, body) => {
let (expr, span) = with.as_ref();
self.span = *span;
self.ast = expr;
self.validate();
2024-12-10 23:07:31 +00:00
let Ast::Tuple(input) = expr else {
unreachable!()
};
let in_loop = self.status.in_loop;
2024-12-10 23:07:31 +00:00
let arity = self.status.loop_arity;
self.status.in_loop = true;
2024-12-10 23:07:31 +00:00
self.status.loop_arity = input.len() as u8;
2024-12-11 04:42:05 +00:00
for clause in body {
let (expr, span) = clause;
self.ast = expr;
self.span = *span;
match self.arity() {
Arity::Fixed(clause_arity) => {
if clause_arity != arity {
self.err(format!("mismatched arity: expected {arity} arguments in `loop` clause; got {clause_arity}"))
}
}
Arity::Splat(clause_arity) => {
if clause_arity > arity {
self.err(format!("mismathced arity: expected {arity} arguments in `loop` clause; this clause takes {clause_arity} or more"))
}
}
};
self.validate();
}
self.status.in_loop = in_loop;
2024-12-10 23:07:31 +00:00
self.status.loop_arity = arity;
}
2024-12-11 03:40:57 +00:00
Recur(args) => {
2024-12-10 22:14:26 +00:00
if !self.status.in_loop {
2024-12-10 22:39:02 +00:00
self.err("you may only use `recur` in a `loop` form".to_string());
return;
2024-12-10 22:14:26 +00:00
}
if !self.status.tail_position {
self.err("you may only use `recur` in tail position".to_string());
}
2024-12-10 23:07:31 +00:00
let num_args = args.len() as u8;
let loop_arity = self.status.loop_arity;
if num_args != loop_arity {
self.err(format!("loop arity mismatch: loop has arity of {loop_arity}; `recur` called with {num_args} arguments"))
}
2024-12-10 22:14:26 +00:00
self.status.tail_position = false;
for arg in args {
let (expr, span) = arg;
self.ast = expr;
self.span = *span;
self.validate();
}
}
2024-12-11 03:40:57 +00:00
WordPattern(name) => match self.bound(name) {
Some((name, _span, _)) => {
self.err(format!("name `{name}` is already bound"));
}
None => {
self.bind(name.to_string());
}
},
2024-12-11 04:42:05 +00:00
InterpolatedPattern(parts, _) => {
for (part, span) in parts {
if let StringPart::Word(name) = part {
self.span = *span;
match self.bound(name) {
Some(_) => self.err(format!("name `{name}` is already bound")),
None => self.bind(name.to_string()),
}
}
}
}
AsPattern(name, r#type) => {
match self.bound(name) {
Some((name, _span, _)) => {
self.err(format!("name `{name}` is already bound"));
}
None => {
self.bind(name.to_string());
}
}
let as_type = *r#type;
match as_type {
"nil" | "bool" | "number" | "keyword" | "string" | "tuple" | "dict"
| "list" | "fn" | "box" => (),
_ => self.err(format!("unknown type `:{as_type}`")),
}
}
Splattern(splatted) => {
if !self.status.last_term {
self.err("splats in patterns must come last".to_string());
}
match splatted.as_ref() {
2024-12-11 05:20:34 +00:00
(PlaceholderPattern, _) => (),
(Word(name), span) => match self.bound(name) {
2024-12-11 04:42:05 +00:00
Some(_) => {
self.span = *span;
self.err(format!("name `{name}` is already bound"))
}
None => self.bind(name.to_string()),
},
_ => unreachable!(),
}
}
TuplePattern(terms) | ListPattern(terms) | DictPattern(terms) => {
if terms.is_empty() {
return;
}
for term in terms.iter().take(terms.len() - 1) {
let (patt, span) = term;
self.ast = patt;
self.span = *span;
self.validate();
}
self.status.last_term = true;
let (patt, span) = terms.last().unwrap();
self.ast = patt;
self.span = *span;
self.validate();
self.status.last_term = false;
}
PairPattern(_, patt) => {
let (patt, span) = patt.as_ref();
self.ast = patt;
self.span = *span;
self.validate();
}
// terminals can never be invalid
2024-12-11 03:40:57 +00:00
Nil | Boolean(_) | Number(_) | Keyword(_) | String(_) => (),
// terminal patterns can never be invalid
NilPattern | BooleanPattern(..) | NumberPattern(..) | StringPattern(..)
| KeywordPattern(..) | PlaceholderPattern => (),
};
self.ast = root;
}
}