rudus/src/lexer.rs

99 lines
2.9 KiB
Rust

use crate::spans::*;
use chumsky::prelude::*;
use std::fmt;
#[derive(Clone, Debug, PartialEq)]
pub enum Token {
Nil,
Number(f64),
Word(&'static str),
Boolean(bool),
Keyword(&'static str),
String(&'static str),
// todo: hard code these types
Reserved(&'static str),
Punctuation(&'static str),
}
impl fmt::Display for Token {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Token::Number(n) => write!(f, "[Number {}]", n),
Token::Word(w) => write!(f, "[Word {}]", w),
Token::Boolean(b) => write!(f, "[Boolean {}]", b),
Token::Keyword(k) => write!(f, "[Keyword :{}]", k),
Token::String(s) => write!(f, "[String {}]", s),
Token::Reserved(r) => write!(f, "[Reserved {}]", r),
Token::Nil => write!(f, "[nil]"),
Token::Punctuation(p) => write!(f, "[Punctuation {}]", p),
}
}
}
pub fn lexer(
) -> impl Parser<'static, &'static str, Vec<(Token, Span)>, extra::Err<Rich<'static, char, Span>>> {
let number = just('-')
.or_not()
.then(text::int(10).then(just('.').then(text::digits(10)).or_not()))
.to_slice()
.from_str()
.unwrapped()
.map(Token::Number);
let word = any()
.filter(char::is_ascii_lowercase)
.then(
any()
.filter(char::is_ascii_alphanumeric)
.or(one_of("*/?!_"))
.repeated(),
)
.to_slice();
let reserved_or_word = word.map(|word: &str| match word {
"true" => Token::Boolean(true),
"false" => Token::Boolean(false),
"nil" => Token::Nil,
// todo: hard code these as type constructors
"as" | "box" | "do" | "else" | "fn" | "if" | "let" | "loop" | "match" | "panic!"
| "recur" | "repeat" | "then" | "when" | "with" => Token::Reserved(word),
_ => Token::Word(word),
});
let keyword = just(':').ignore_then(word).map(Token::Keyword);
let string = just('"')
.ignore_then(none_of("\"").repeated().to_slice())
.then_ignore(just('"'))
.map(Token::String);
// todo: hard code these as type constructors
let punctuation = one_of(",=[]{}()>;\n_")
.to_slice()
.or(just("->"))
.or(just("..."))
.or(just("#{"))
.or(just("${"))
.map(Token::Punctuation);
let token = number
.or(reserved_or_word)
.or(keyword)
.or(string)
.or(punctuation);
let comment = just('&')
.ignore_then(any().and_is(just('\n').not()).repeated())
.repeated();
let ludus_ws = just(' ').or(just('\t')).repeated();
token
.map_with(|tok, e| (tok, e.span()))
.padded_by(ludus_ws)
.padded_by(comment)
.recover_with(skip_then_retry_until(any().ignored(), end()))
.repeated()
.collect()
}