171 lines
4.9 KiB
Rust
171 lines
4.9 KiB
Rust
use crate::spans::*;
|
|
use chumsky::prelude::*;
|
|
use std::fmt;
|
|
|
|
#[derive(Clone, PartialEq, Debug)]
|
|
pub enum Token {
|
|
Nil,
|
|
Number(f64),
|
|
Word(&'static str),
|
|
Boolean(bool),
|
|
Keyword(&'static str),
|
|
String(&'static str),
|
|
// todo: hard code these types
|
|
Reserved(&'static str),
|
|
Punctuation(&'static str),
|
|
Method(&'static str),
|
|
}
|
|
|
|
impl fmt::Display for Token {
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
match self {
|
|
Token::Number(n) => write!(f, "[Number {}]", n),
|
|
Token::Word(w) => write!(f, "[Word {}]", w),
|
|
Token::Boolean(b) => write!(f, "[Boolean {}]", b),
|
|
Token::Keyword(k) => write!(f, "[Keyword :{}]", k),
|
|
Token::String(s) => write!(f, "[String {}]", s),
|
|
Token::Reserved(r) => write!(f, "[Reserved {}]", r),
|
|
Token::Nil => write!(f, "[nil]"),
|
|
Token::Punctuation(p) => write!(f, "[Punctuation {}]", p),
|
|
Token::Method(m) => write!(f, "[Method {m}]"),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Token {
|
|
pub fn show(&self) -> String {
|
|
match self {
|
|
Token::Number(n) => format!("{n}"),
|
|
Token::Boolean(b) => format!("{b}"),
|
|
Token::Keyword(k) => format!(":{k}"),
|
|
Token::Method(m) => format!("::{m}"),
|
|
Token::Nil => "nil".to_string(),
|
|
Token::String(s) => format!("\"{s}\""),
|
|
Token::Reserved(s) | Token::Word(s) => s.to_string(),
|
|
Token::Punctuation(s) => {
|
|
let out = if *s == "\n" { "newline" } else { s };
|
|
out.to_string()
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn lexer(
|
|
) -> impl Parser<'static, &'static str, Vec<(Token, Span)>, extra::Err<Rich<'static, char, Span>>> {
|
|
let number = just('-')
|
|
.or_not()
|
|
.then(text::int(10).then(just('.').then(text::digits(10)).or_not()))
|
|
.to_slice()
|
|
.from_str()
|
|
.unwrapped()
|
|
.map(Token::Number);
|
|
|
|
let word = any()
|
|
.filter(char::is_ascii_lowercase)
|
|
.then(
|
|
any()
|
|
.filter(char::is_ascii_alphanumeric)
|
|
.or(one_of("*/?!_"))
|
|
.repeated(),
|
|
)
|
|
.to_slice();
|
|
|
|
let reserved_or_word = word.map(|word: &str| match word {
|
|
"true" => Token::Boolean(true),
|
|
"false" => Token::Boolean(false),
|
|
"nil" => Token::Nil,
|
|
// todo: hard code these as type constructors
|
|
"as" | "box" | "do" | "else" | "fn" | "if" | "let" | "loop" | "match" | "panic!"
|
|
| "recur" | "repeat" | "then" | "when" | "with" | "or" | "and" | "receive" | "spawn!" => {
|
|
Token::Reserved(word)
|
|
}
|
|
_ => Token::Word(word),
|
|
});
|
|
|
|
let method = just("::").ignore_then(word).map(Token::Method);
|
|
|
|
let keyword = just(':').ignore_then(word).map(Token::Keyword);
|
|
|
|
let escape = just('\\')
|
|
.then(choice((
|
|
just('\\').to('\\'),
|
|
just('n').to('\n'),
|
|
just('t').to('\t'),
|
|
just('r').to('\r'),
|
|
just('"').to('"'), // TODO: figure out why this isn't working
|
|
)))
|
|
.ignored();
|
|
|
|
let string = none_of('"')
|
|
.ignored()
|
|
.or(escape)
|
|
.repeated()
|
|
.to_slice()
|
|
.map(Token::String)
|
|
.delimited_by(just('"'), just('"'));
|
|
|
|
// todo: hard code these as type constructors
|
|
let punctuation = one_of(",=[]{}()>;\n_")
|
|
.to_slice()
|
|
.or(just("->"))
|
|
.or(just("..."))
|
|
.or(just("#{"))
|
|
.or(just("${"))
|
|
.map(Token::Punctuation);
|
|
|
|
let token = number
|
|
.or(reserved_or_word)
|
|
.or(keyword)
|
|
.or(method)
|
|
.or(string)
|
|
.or(punctuation);
|
|
|
|
let comment = just('&')
|
|
.ignore_then(any().and_is(just('\n').not()).repeated())
|
|
.repeated();
|
|
|
|
let ludus_ws = just(' ').or(just('\t')).repeated();
|
|
|
|
token
|
|
.map_with(|tok, e| (tok, e.span()))
|
|
.padded_by(ludus_ws)
|
|
.padded_by(comment)
|
|
.recover_with(skip_then_retry_until(any().ignored(), end()))
|
|
.repeated()
|
|
.collect()
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn it_lexes_nil() {
|
|
let spanned_toks = lexer().parse("nil").into_output_errors().0.unwrap();
|
|
let (token, _) = spanned_toks[0].clone();
|
|
assert_eq!(token, Token::Nil);
|
|
}
|
|
|
|
#[test]
|
|
fn it_lexes_strings() {
|
|
let spanned_toks = lexer()
|
|
.parse("\"foo bar baz\"")
|
|
.into_output_errors()
|
|
.0
|
|
.unwrap();
|
|
let (token, _) = spanned_toks[0].clone();
|
|
assert_eq!(token, Token::String("foo bar baz"));
|
|
}
|
|
|
|
#[test]
|
|
fn it_lexes_strings_w_escaped_quotes() {
|
|
let spanned_toks = lexer()
|
|
.parse("\"foo \\\"bar baz\"")
|
|
.into_output_errors()
|
|
.0
|
|
.unwrap();
|
|
let (token, _) = spanned_toks[0].clone();
|
|
assert_eq!(token, Token::String("foo \"bar baz"));
|
|
}
|
|
}
|