rudus/src/validator.rs

503 lines
17 KiB
Rust
Raw Normal View History

use crate::parser::*;
use crate::spans::Span;
use std::collections::{HashMap, HashSet};
#[derive(Clone, Debug, PartialEq)]
pub struct VErr {
msg: String,
span: Span,
}
impl VErr {
pub fn new(msg: String, span: Span) -> VErr {
VErr { msg, span }
}
}
#[derive(Clone, Debug, PartialEq)]
struct VStatus {
tail_position: bool,
in_loop: bool,
2024-12-10 23:07:31 +00:00
loop_arity: u8,
last_term: bool,
has_placeholder: bool,
used_bindings: Vec<String>,
}
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub enum Arity {
Fixed(u8),
Splat(u8),
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum FnInfo {
Declared,
Defined(HashSet<Arity>, HashSet<String>),
Unknown,
}
#[derive(Debug, PartialEq)]
pub struct Validator<'a> {
// TODO: add another term here: FnStatus. See Issue #18.
pub locals: Vec<(String, Span, FnInfo)>,
pub prelude: &'a Vec<String>,
pub ast: &'a Ast,
pub span: Span,
pub errors: Vec<VErr>,
pub fn_info: HashMap<*const Ast, FnInfo>,
status: VStatus,
}
impl<'a> Validator<'a> {
2024-12-10 22:14:26 +00:00
pub fn new(ast: &'a Ast, span: Span, prelude: &'a Vec<String>) -> Validator<'a> {
Validator {
locals: vec![],
prelude,
ast,
span,
fn_info: std::collections::HashMap::new(),
errors: vec![],
status: VStatus {
tail_position: false,
in_loop: false,
2024-12-10 23:07:31 +00:00
loop_arity: 0,
last_term: false,
has_placeholder: false,
used_bindings: vec![],
},
}
}
fn bind(&mut self, name: String) {
self.locals.push((name, self.span, FnInfo::Unknown));
}
fn declare_fn(&mut self, name: String) {
self.locals.push((name, self.span, FnInfo::Declared));
}
fn define_fn(&mut self, name: String, info: FnInfo) {
self.locals.push((name, self.span, info));
}
fn resolved(&self, name: &str) -> bool {
self.locals.iter().any(|(bound, ..)| name == bound.as_str())
|| self.prelude.iter().any(|bound| name == bound.as_str())
}
fn bound(&self, name: &str) -> Option<&(String, Span, FnInfo)> {
match self.locals.iter().rev().find(|(bound, ..)| name == bound) {
Some(binding) => Some(binding),
None => None,
}
}
fn err(&mut self, msg: String) {
self.errors.push(VErr::new(msg, self.span))
}
fn use_name(&mut self, name: String) {
self.status.used_bindings.push(name);
}
2024-12-10 22:14:26 +00:00
pub fn validate(&mut self) {
2024-12-11 03:40:57 +00:00
use Ast::*;
let root = self.ast;
match root {
2024-12-11 03:40:57 +00:00
Error => unreachable!(),
Word(name) | Ast::Splat(name) => {
if !self.resolved(name) {
2024-12-10 22:14:26 +00:00
self.err(format!("unbound name `{name}`"))
} else {
self.use_name(name.to_string())
}
}
2024-12-11 03:40:57 +00:00
Interpolated(parts) => {
for part in parts {
if let (StringPart::Word(name), span) = part {
self.span = *span;
if !self.resolved(name.as_str()) {
2024-12-10 22:14:26 +00:00
self.err(format!("unbound name `{name}`"));
} else {
self.use_name(name.to_string());
}
}
}
}
// validate each line
// ensure it's not empty
// pass through tail position validation
// check if there are any declared but undefined functions
// pop all the bindings off the local stack
2024-12-11 03:40:57 +00:00
Block(block) => {
if block.is_empty() {
self.err("blocks must have at least one expression".to_string());
return;
}
let to = self.locals.len();
let tailpos = self.status.tail_position;
for (expr, span) in block.iter().take(block.len() - 1) {
self.status.tail_position = false;
self.ast = expr;
self.span = *span;
self.validate();
}
let (expr, span) = block.last().unwrap();
self.ast = expr;
self.span = *span;
self.status.tail_position = tailpos;
self.validate();
let block_bindings = self.locals.split_off(to);
for binding in block_bindings {
let (name, _, fn_info) = binding;
if matches!(fn_info, FnInfo::Declared) {
self.err(format!("fn `{name}` is declared but not defined"))
}
}
}
// if in tail position, pass through tail position validation
// no unbound names
2024-12-11 03:40:57 +00:00
If(cond, then, r#else) => {
let tailpos = self.status.tail_position;
self.status.tail_position = false;
let (expr, span) = cond.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
// pass through tailpos only to then/else
self.status.tail_position = tailpos;
let (expr, span) = then.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
let (expr, span) = r#else.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
}
2024-12-11 03:40:57 +00:00
Tuple(members) => {
if members.is_empty() {
return;
}
let tailpos = self.status.tail_position;
self.status.tail_position = false;
for (expr, span) in members {
self.ast = expr;
self.span = *span;
self.validate();
}
self.status.tail_position = tailpos;
}
// no more than one placeholder
2024-12-11 03:40:57 +00:00
Arguments(args) => {
if args.is_empty() {
return;
}
let tailpos = self.status.tail_position;
self.status.tail_position = false;
for (expr, span) in args {
self.ast = expr;
self.span = *span;
self.validate();
}
self.status.has_placeholder = false;
self.status.tail_position = tailpos;
}
2024-12-11 03:40:57 +00:00
Placeholder => {
if self.status.has_placeholder {
self.err(
"you may only use one placeholder when partially applying functions"
.to_string(),
);
}
self.status.has_placeholder = true;
}
2024-12-11 03:40:57 +00:00
List(list) => {
if list.is_empty() {
return;
}
let tailpos = self.status.tail_position;
self.status.tail_position = false;
for (expr, span) in list {
self.ast = expr;
self.span = *span;
self.validate();
}
self.status.tail_position = tailpos;
}
2024-12-11 03:40:57 +00:00
Pair(_, value) => {
let (expr, span) = value.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
}
2024-12-11 03:40:57 +00:00
Dict(dict) => {
if dict.is_empty() {
return;
}
let tailpos = self.status.tail_position;
self.status.tail_position = false;
for (expr, span) in dict {
self.ast = expr;
self.span = *span;
self.validate();
}
self.status.tail_position = tailpos;
}
// TODO!
// first check all nodes
// then...
// check arity is 1 if first term is keyword
// check arity against fn info if first term is word and second term is args
2024-12-11 03:40:57 +00:00
Synthetic(first, second, rest) => {
2024-12-10 22:23:15 +00:00
match (&first.0, &second.0) {
(Ast::Word(_), Ast::Keyword(_)) => {
let (expr, span) = first.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
}
(Ast::Keyword(_), Ast::Arguments(args)) => {
if args.len() != 1 {
self.err("called keywords may only take one argument".to_string())
}
let (expr, span) = second.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
}
(Ast::Word(_), Ast::Arguments(_)) => {
let (expr, span) = first.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
let (expr, span) = second.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
2024-12-10 22:39:02 +00:00
//TODO: check arity of call
2024-12-10 22:23:15 +00:00
}
_ => unreachable!(),
}
for term in rest {
let (expr, span) = term;
self.ast = expr;
self.span = *span;
self.validate();
}
}
2024-12-11 03:40:57 +00:00
WhenClause(cond, body) => todo!(),
When(clauses) => {
// let tailpos = self.status.tail_position;
// for (clause, _) in clauses {
// self.status.tail_position = false;
// let (expr, span) = clause.cond.clone();
// self.ast = &expr;
// self.span = span;
// self.validate();
// self.status.tail_position = tailpos;
// let (expr, span) = clause.body;
// self.ast = &expr;
// self.span = span;
// self.validate();
// }
}
// binding forms
// TODO: set up errors to include original binding
2024-12-11 03:40:57 +00:00
LBox(name, boxed) => {
if self.bound(name).is_some() {
self.err(format!("box name `{name}` is already bound"));
2024-12-10 22:14:26 +00:00
} else {
self.bind(name.to_string());
}
let (expr, span) = boxed.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
}
2024-12-11 03:40:57 +00:00
Let(lhs, rhs) => {
2024-12-10 22:14:26 +00:00
let (expr, span) = rhs.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
2024-12-11 03:40:57 +00:00
let (expr, span) = lhs.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
2024-12-10 22:14:26 +00:00
}
2024-12-11 03:40:57 +00:00
MatchClause(pattern, guard, body) => todo!(),
Match(scrutinee, clauses) => {
2024-12-10 22:14:26 +00:00
let (expr, span) = scrutinee.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
}
2024-12-11 03:40:57 +00:00
FnDeclaration(name) => {
let tailpos = self.status.tail_position;
self.status.tail_position = false;
if self.bound(name).is_some() {
self.err(format!("fn name `{name}` is already bound"));
return;
}
self.declare_fn(name.to_string());
self.status.tail_position = tailpos;
}
2024-12-11 03:40:57 +00:00
Fn(name, clauses, ..) => {
match self.bound(name) {
Some((_, _, FnInfo::Declared)) => (),
None => (),
_ => {
self.err(format!("name `{name}` is already bound"));
}
}
let from = self.status.used_bindings.len();
let arities = HashSet::new();
for clause in clauses {
// TODO: validate all parts of clauses
// add clause arity to arities
}
2024-12-10 22:14:26 +00:00
// this should be right
// we can't bind anything that's already bound,
// even in arg names
// so anything that is already bound and used
// will, of necessity, be closed over
// we don't want to try to close over locals in functions
let mut closed_over = HashSet::new();
for binding in self.status.used_bindings.iter().skip(from) {
2024-12-10 22:14:26 +00:00
if self.bound(binding.as_str()).is_some() {
closed_over.insert(binding.clone());
}
}
let info = FnInfo::Defined(arities, closed_over);
self.define_fn(name.to_string(), info)
}
2024-12-11 03:40:57 +00:00
Panic(msg) => {
let tailpos = self.status.tail_position;
self.status.tail_position = false;
let (expr, span) = msg.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
self.status.tail_position = tailpos;
}
2024-12-10 22:39:02 +00:00
// TODO: fix the tail call here?
2024-12-11 03:40:57 +00:00
Do(terms) => {
2024-12-10 22:39:02 +00:00
if terms.len() < 2 {
return self.err("do expressions must have at least two terms".to_string());
}
for term in terms.iter().take(terms.len() - 1) {
let (expr, span) = term;
self.ast = expr;
self.span = *span;
self.validate();
}
2024-12-10 22:39:02 +00:00
let (expr, span) = terms.last().unwrap();
self.ast = expr;
self.span = *span;
if matches!(expr, Ast::Recur(_)) {
self.err("`recur` may not be used in `do` forms".to_string());
}
self.validate();
}
2024-12-11 03:40:57 +00:00
Repeat(times, body) => {
self.status.tail_position = false;
let (expr, span) = times.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
let (expr, span) = body.as_ref();
self.ast = expr;
self.span = *span;
self.validate();
}
2024-12-11 03:40:57 +00:00
Loop(with, body) => {
let (expr, span) = with.as_ref();
self.span = *span;
self.ast = expr;
self.validate();
2024-12-10 23:07:31 +00:00
let Ast::Tuple(input) = expr else {
unreachable!()
};
let in_loop = self.status.in_loop;
2024-12-10 23:07:31 +00:00
let arity = self.status.loop_arity;
self.status.in_loop = true;
2024-12-10 23:07:31 +00:00
self.status.loop_arity = input.len() as u8;
2024-12-10 23:07:31 +00:00
// for clause in body {}
self.status.in_loop = in_loop;
2024-12-10 23:07:31 +00:00
self.status.loop_arity = arity;
}
2024-12-11 03:40:57 +00:00
Recur(args) => {
2024-12-10 22:14:26 +00:00
if !self.status.in_loop {
2024-12-10 22:39:02 +00:00
self.err("you may only use `recur` in a `loop` form".to_string());
return;
2024-12-10 22:14:26 +00:00
}
if !self.status.tail_position {
self.err("you may only use `recur` in tail position".to_string());
}
2024-12-10 23:07:31 +00:00
let num_args = args.len() as u8;
let loop_arity = self.status.loop_arity;
if num_args != loop_arity {
self.err(format!("loop arity mismatch: loop has arity of {loop_arity}; `recur` called with {num_args} arguments"))
}
2024-12-10 22:14:26 +00:00
self.status.tail_position = false;
for arg in args {
let (expr, span) = arg;
self.ast = expr;
self.span = *span;
self.validate();
}
}
2024-12-11 03:40:57 +00:00
WordPattern(name) => match self.bound(name) {
Some((name, _span, _)) => {
self.err(format!("name `{name}` is already bound"));
}
None => {
self.bind(name.to_string());
}
},
InterpolatedPattern(parts, _) => todo!(),
AsPattern(name, r#type) => todo!(),
Splattern(splatted) => todo!(),
TuplePattern(tuple) => todo!(),
ListPattern(list) => todo!(),
PairPattern(key, patt) => todo!(),
DictPattern(dict) => todo!(),
// terminals can never be invalid
2024-12-11 03:40:57 +00:00
Nil | Boolean(_) | Number(_) | Keyword(_) | String(_) => (),
// terminal patterns can never be invalid
NilPattern | BooleanPattern(..) | NumberPattern(..) | StringPattern(..)
| KeywordPattern(..) | PlaceholderPattern => (),
};
self.ast = root;
}
}