rudus/src/lib.rs.old

199 lines
5.4 KiB
Rust
Raw Normal View History

2024-10-27 21:45:17 +00:00
// use ariadne::{sources, Color, Label, Report, ReportKind};
use chumsky::prelude::*;
use std::fmt;
pub type Span = SimpleSpan<usize>;
#[derive(Clone, Debug, PartialEq)]
pub enum Token<'src> {
// atomic types
Boolean(bool),
Number(f64),
String(&'src str),
Word(&'src str),
Keyword(&'src str),
Pkgkeyword(&'src str),
Ignored(&'src str),
// reserved words
As,
Box,
Do,
Else,
Fn,
If,
Import,
Let,
Loop,
Match,
Nil,
Ns,
Panic,
Pkg,
Recur,
Repeat,
Test,
Then,
Use,
When,
With,
// punctuation
Arrow,
Comma,
Equals,
Lbrace,
Lbracket,
Lparen,
Newline,
Pipeline,
Placeholder,
Rbrace,
Rbracket,
Rparen,
Semi,
Splat,
Startdict,
Startset,
}
impl<'src> fmt::Display for Token<'src> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Token::Boolean(b) => write!(f, "{}", b),
Token::Number(n) => write!(f, "{}", n),
Token::String(s) => write!(f, "{}", s),
Token::Word(w) => write!(f, "{}", w),
Token::Keyword(k) => write!(f, ":{}", k),
Token::Ignored(i) => write!(f, "_{}", i),
Token::Pkgkeyword(k) => write!(f, ":{}", k),
Token::As => write!(f, "as"),
Token::Box => write!(f, "box"),
Token::Do => write!(f, "do"),
Token::Else => write!(f, "else"),
Token::Fn => write!(f, "fn"),
Token::If => write!(f, "if"),
Token::Import => write!(f, "import"),
Token::Let => write!(f, "let"),
Token::Loop => write!(f, "loop"),
Token::Match => write!(f, "match"),
Token::Nil => write!(f, "nil"),
Token::Ns => write!(f, "ns"),
Token::Panic => write!(f, "panic!"),
Token::Pkg => write!(f, "pkg"),
Token::Recur => write!(f, "recur"),
Token::Repeat => write!(f, "repeat"),
Token::Test => write!(f, "test"),
Token::Then => write!(f, "then"),
Token::Use => write!(f, "use"),
Token::When => write!(f, "when"),
Token::With => write!(f, "with"),
Token::Arrow => write!(f, "->"),
Token::Comma => write!(f, ","),
Token::Equals => write!(f, "="),
Token::Lbrace => write!(f, "{{"),
Token::Lbracket => write!(f, "["),
Token::Lparen => write!(f, "("),
Token::Newline => write!(f, "\\n"),
Token::Pipeline => write!(f, ">"),
Token::Placeholder => write!(f, "_"),
Token::Rbrace => write!(f, "}}"),
Token::Rbracket => write!(f, "]"),
Token::Rparen => write!(f, ")"),
Token::Semi => write!(f, ";"),
Token::Splat => write!(f, "..."),
Token::Startdict => write!(f, "#{{"),
Token::Startset => write!(f, "${{"),
}
}
}
pub fn lexer<'src>(
) -> impl Parser<'src, &'src str, Vec<(Token<'src>, Span)>, extra::Err<Rich<'src, char, Span>>> {
let string = just('"')
.ignore_then(none_of('"').repeated().to_slice())
.then_ignore(just('"'))
.map(Token::String);
let word = any()
.filter(char::is_ascii_lowercase)
.then(
any()
.filter(char::is_ascii_alphanumeric)
.or(one_of("*_/!?")),
)
.repeated()
.to_slice();
let keyword = just(':').ignore_then(word.clone()).map(Token::Keyword);
let number = just('-')
.or_not()
.then(text::int(10).then(just('.').then(text::digits(10)).or_not()))
.to_slice()
.from_str()
.unwrapped()
.map(Token::Number);
let reserved_or_word = word.map(|word: &str| match word {
"as" => Token::As,
"box" => Token::Box,
"do" => Token::Do,
"else" => Token::Else,
"false" => Token::Boolean(false),
"fn" => Token::Fn,
"if" => Token::If,
"import" => Token::Import,
"let" => Token::Let,
"loop" => Token::Loop,
"match" => Token::Match,
"nil" => Token::Nil,
"ns" => Token::Ns,
"panic!" => Token::Panic, // won't match until C-style ident -> Ludus word
"pkg" => Token::Pkg,
"recur" => Token::Recur,
"repeat" => Token::Repeat,
"test" => Token::Test,
"then" => Token::Then,
"true" => Token::Boolean(true),
"use" => Token::Use,
"when" => Token::When,
"with" => Token::With,
_ => Token::Word(word),
});
let arrow = just("->").to(Token::Arrow);
let comma = just(',').to(Token::Comma);
let semicolon = just(';').to(Token::Semi);
let placeholder = just('_').to(Token::Placeholder);
let control = arrow.or(comma).or(semicolon).or(placeholder);
let comment = just('&')
.then(any().and_is(just('\n').not()).repeated())
.padded();
let atom = number.or(string).or(keyword).or(reserved_or_word);
atom.or(control)
.map_with(|tok, e| (tok, e.span()))
.padded_by(comment.repeated())
.padded()
}
#[cfg(test)]
mod tests {
use crate::lexer;
use crate::Token;
use chumsky::{container::Seq, prelude::*};
#[test]
fn it_works() {
let toks = lexer().parse("42").unwrap();
let (tok, _) = toks[0].clone();
assert_eq!(tok, Token::Number(42.0));
}
}