lexing?
This commit is contained in:
commit
7829f739fc
11
Cargo.toml
Normal file
11
Cargo.toml
Normal file
|
@ -0,0 +1,11 @@
|
|||
[package]
|
||||
name = "rudus"
|
||||
version = "0.0.1"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
ariadne = { git = "https://github.com/zesterer/ariadne" }
|
||||
chumsky = { git = "https://github.com/zesterer/chumsky" }
|
||||
imbl = "3.0.0"
|
192
src/lib.rs
Normal file
192
src/lib.rs
Normal file
|
@ -0,0 +1,192 @@
|
|||
use chumsky::prelude::*;
|
||||
use std::fmt;
|
||||
|
||||
pub type Span = SimpleSpan<usize>;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum Token<'src> {
|
||||
Number(f64),
|
||||
Word(&'src str),
|
||||
Boolean(bool),
|
||||
Keyword(&'src str),
|
||||
String(&'src str),
|
||||
Reserved(&'src str),
|
||||
Nil,
|
||||
Punctuation(&'src str),
|
||||
}
|
||||
|
||||
impl<'src> fmt::Display for Token<'src> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
Token::Number(n) => write!(f, "{}", n),
|
||||
Token::Word(w) => write!(f, "{}", w),
|
||||
Token::Boolean(b) => write!(f, "{}", b),
|
||||
Token::Keyword(k) => write!(f, ":{}", k),
|
||||
Token::String(s) => write!(f, "{}", s),
|
||||
Token::Reserved(r) => write!(f, "{}", r),
|
||||
Token::Nil => write!(f, "nil"),
|
||||
Token::Punctuation(p) => write!(f, "{}", p),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn lexer<'src>(
|
||||
) -> impl Parser<'src, &'src str, Vec<(Token<'src>, Span)>, extra::Err<Rich<'src, char, Span>>> {
|
||||
let number = just('-')
|
||||
.or_not()
|
||||
.then(text::int(10).then(just('.').then(text::digits(10)).or_not()))
|
||||
.to_slice()
|
||||
.from_str()
|
||||
.unwrapped()
|
||||
.map(Token::Number);
|
||||
|
||||
let word = any()
|
||||
.filter(char::is_ascii_lowercase)
|
||||
.then(
|
||||
any()
|
||||
.filter(char::is_ascii_alphanumeric)
|
||||
.or(one_of("*/?!_"))
|
||||
.repeated(),
|
||||
)
|
||||
.to_slice();
|
||||
|
||||
let reserved_or_word = word.map(|word: &str| match word {
|
||||
"true" => Token::Boolean(true),
|
||||
"false" => Token::Boolean(false),
|
||||
"nil" => Token::Nil,
|
||||
"as" | "box" | "do" | "else" | "fn" | "if" | "import" | "let" | "loop" | "match" | "ns"
|
||||
| "panic!" | "pkg" | "recur" | "repeat" | "test" | "then" | "use" | "when" | "with" => {
|
||||
Token::Reserved(word)
|
||||
}
|
||||
_ => Token::Word(word),
|
||||
});
|
||||
|
||||
let keyword = just(':').ignore_then(word.clone()).map(Token::Keyword);
|
||||
|
||||
let string = just('"')
|
||||
.ignore_then(none_of("\"").repeated().to_slice())
|
||||
.then_ignore(just('"'))
|
||||
.map(Token::String);
|
||||
|
||||
let punctuation = one_of(",=[]{}()>;\n")
|
||||
.to_slice()
|
||||
.or(just("->"))
|
||||
.or(just("..."))
|
||||
.or(just("#{"))
|
||||
.or(just("${"))
|
||||
.map(Token::Punctuation);
|
||||
|
||||
let token = number
|
||||
.or(reserved_or_word)
|
||||
.or(keyword)
|
||||
.or(string)
|
||||
.or(punctuation);
|
||||
|
||||
let comment = just('&')
|
||||
.ignore_then(any().and_is(just('\n').not()).repeated())
|
||||
.repeated();
|
||||
|
||||
let ludus_ws = just(' ').or(just('\t')).repeated();
|
||||
|
||||
token
|
||||
.map_with(|tok, e| (tok, e.span()))
|
||||
.padded_by(ludus_ws)
|
||||
.padded_by(comment)
|
||||
.recover_with(skip_then_retry_until(any().ignored(), end()))
|
||||
.repeated()
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::lexer;
|
||||
use crate::Token;
|
||||
use chumsky::prelude::*;
|
||||
|
||||
#[test]
|
||||
fn it_lexes_positive_ints() {
|
||||
let (mytoken, _) = lexer().parse("42").unwrap()[0].clone();
|
||||
assert_eq!(mytoken, Token::Number(42.0))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn it_lexes_negative_ints() {
|
||||
let (mytoken, _) = lexer().parse("-42").unwrap()[0].clone();
|
||||
assert_eq!(mytoken, Token::Number(-42.0))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn it_lexes_positive_floats() {
|
||||
let (mytoken, _) = lexer().parse("42.032").unwrap()[0].clone();
|
||||
assert_eq!(mytoken, Token::Number(42.032))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn it_lexes_positive_decimals() {
|
||||
let (mytoken, _) = lexer().parse("0.123").unwrap()[0].clone();
|
||||
assert_eq!(mytoken, Token::Number(0.123))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn it_lexes_negative_floats() {
|
||||
let mytoken = lexer().parse("-42.123").unwrap()[0].clone().0;
|
||||
assert_eq!(mytoken, Token::Number(-42.123))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn it_lexes_negative_decimals() {
|
||||
let mytoken = lexer().parse("-0.123").unwrap()[0].clone().0;
|
||||
assert_eq!(mytoken, Token::Number(-0.123))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn it_lexes_bools() {
|
||||
let tt = lexer().parse("true").unwrap()[0].clone().0;
|
||||
assert_eq!(tt, Token::Boolean(true));
|
||||
let ff = lexer().parse("false").unwrap()[0].clone().0;
|
||||
assert_eq!(ff, Token::Boolean(false))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn it_lexes_words() {
|
||||
let mytoken = lexer().parse("foo").unwrap()[0].clone().0;
|
||||
assert_eq!(mytoken, Token::Word("foo"))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn it_lexes_keywords() {
|
||||
let kw = lexer().parse(":foo").unwrap()[0].clone().0;
|
||||
assert_eq!(kw, Token::Keyword("foo"))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn it_lexes_strings() {
|
||||
let s = lexer().parse("\"foo bar baz\"").unwrap()[0].clone().0;
|
||||
assert_eq!(s, Token::String("foo bar baz"))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn it_ignores_comments() {
|
||||
let e = lexer().parse("foo &bar\nbaz").unwrap();
|
||||
assert_eq!(e[0].0, Token::Word("foo"));
|
||||
assert_eq!(e[2].0, Token::Word("baz"))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn it_lexes_multiple_tokens() {
|
||||
let toks = lexer().parse("foo;bar\nbaz").unwrap();
|
||||
assert_eq!(toks[0].0, Token::Word("foo"));
|
||||
assert_eq!(toks[2].0, Token::Word("bar"));
|
||||
assert_eq!(toks[4].0, Token::Word("baz"))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn it_lexes_collections() {
|
||||
let toks = lexer().parse("(1, 2)").unwrap();
|
||||
assert_eq!(toks[0].0, Token::Punctuation("("));
|
||||
assert_eq!(toks[1].0, Token::Number(1.0));
|
||||
assert_eq!(toks[2].0, Token::Punctuation(","));
|
||||
assert_eq!(toks[3].0, Token::Number(2.0));
|
||||
assert_eq!(toks[4].0, Token::Punctuation(")"))
|
||||
}
|
||||
}
|
198
src/lib.rs.old
Normal file
198
src/lib.rs.old
Normal file
|
@ -0,0 +1,198 @@
|
|||
// use ariadne::{sources, Color, Label, Report, ReportKind};
|
||||
use chumsky::prelude::*;
|
||||
use std::fmt;
|
||||
|
||||
pub type Span = SimpleSpan<usize>;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum Token<'src> {
|
||||
// atomic types
|
||||
Boolean(bool),
|
||||
Number(f64),
|
||||
String(&'src str),
|
||||
Word(&'src str),
|
||||
Keyword(&'src str),
|
||||
Pkgkeyword(&'src str),
|
||||
Ignored(&'src str),
|
||||
|
||||
// reserved words
|
||||
As,
|
||||
Box,
|
||||
Do,
|
||||
Else,
|
||||
Fn,
|
||||
If,
|
||||
Import,
|
||||
Let,
|
||||
Loop,
|
||||
Match,
|
||||
Nil,
|
||||
Ns,
|
||||
Panic,
|
||||
Pkg,
|
||||
Recur,
|
||||
Repeat,
|
||||
Test,
|
||||
Then,
|
||||
Use,
|
||||
When,
|
||||
With,
|
||||
|
||||
// punctuation
|
||||
Arrow,
|
||||
Comma,
|
||||
Equals,
|
||||
Lbrace,
|
||||
Lbracket,
|
||||
Lparen,
|
||||
Newline,
|
||||
Pipeline,
|
||||
Placeholder,
|
||||
Rbrace,
|
||||
Rbracket,
|
||||
Rparen,
|
||||
Semi,
|
||||
Splat,
|
||||
Startdict,
|
||||
Startset,
|
||||
}
|
||||
|
||||
impl<'src> fmt::Display for Token<'src> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
Token::Boolean(b) => write!(f, "{}", b),
|
||||
Token::Number(n) => write!(f, "{}", n),
|
||||
Token::String(s) => write!(f, "{}", s),
|
||||
Token::Word(w) => write!(f, "{}", w),
|
||||
Token::Keyword(k) => write!(f, ":{}", k),
|
||||
Token::Ignored(i) => write!(f, "_{}", i),
|
||||
Token::Pkgkeyword(k) => write!(f, ":{}", k),
|
||||
|
||||
Token::As => write!(f, "as"),
|
||||
Token::Box => write!(f, "box"),
|
||||
Token::Do => write!(f, "do"),
|
||||
Token::Else => write!(f, "else"),
|
||||
Token::Fn => write!(f, "fn"),
|
||||
Token::If => write!(f, "if"),
|
||||
Token::Import => write!(f, "import"),
|
||||
Token::Let => write!(f, "let"),
|
||||
Token::Loop => write!(f, "loop"),
|
||||
Token::Match => write!(f, "match"),
|
||||
Token::Nil => write!(f, "nil"),
|
||||
Token::Ns => write!(f, "ns"),
|
||||
Token::Panic => write!(f, "panic!"),
|
||||
Token::Pkg => write!(f, "pkg"),
|
||||
Token::Recur => write!(f, "recur"),
|
||||
Token::Repeat => write!(f, "repeat"),
|
||||
Token::Test => write!(f, "test"),
|
||||
Token::Then => write!(f, "then"),
|
||||
Token::Use => write!(f, "use"),
|
||||
Token::When => write!(f, "when"),
|
||||
Token::With => write!(f, "with"),
|
||||
|
||||
Token::Arrow => write!(f, "->"),
|
||||
Token::Comma => write!(f, ","),
|
||||
Token::Equals => write!(f, "="),
|
||||
Token::Lbrace => write!(f, "{{"),
|
||||
Token::Lbracket => write!(f, "["),
|
||||
Token::Lparen => write!(f, "("),
|
||||
Token::Newline => write!(f, "\\n"),
|
||||
Token::Pipeline => write!(f, ">"),
|
||||
Token::Placeholder => write!(f, "_"),
|
||||
Token::Rbrace => write!(f, "}}"),
|
||||
Token::Rbracket => write!(f, "]"),
|
||||
Token::Rparen => write!(f, ")"),
|
||||
Token::Semi => write!(f, ";"),
|
||||
Token::Splat => write!(f, "..."),
|
||||
Token::Startdict => write!(f, "#{{"),
|
||||
Token::Startset => write!(f, "${{"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn lexer<'src>(
|
||||
) -> impl Parser<'src, &'src str, Vec<(Token<'src>, Span)>, extra::Err<Rich<'src, char, Span>>> {
|
||||
let string = just('"')
|
||||
.ignore_then(none_of('"').repeated().to_slice())
|
||||
.then_ignore(just('"'))
|
||||
.map(Token::String);
|
||||
|
||||
let word = any()
|
||||
.filter(char::is_ascii_lowercase)
|
||||
.then(
|
||||
any()
|
||||
.filter(char::is_ascii_alphanumeric)
|
||||
.or(one_of("*_/!?")),
|
||||
)
|
||||
.repeated()
|
||||
.to_slice();
|
||||
|
||||
let keyword = just(':').ignore_then(word.clone()).map(Token::Keyword);
|
||||
|
||||
let number = just('-')
|
||||
.or_not()
|
||||
.then(text::int(10).then(just('.').then(text::digits(10)).or_not()))
|
||||
.to_slice()
|
||||
.from_str()
|
||||
.unwrapped()
|
||||
.map(Token::Number);
|
||||
|
||||
let reserved_or_word = word.map(|word: &str| match word {
|
||||
"as" => Token::As,
|
||||
"box" => Token::Box,
|
||||
"do" => Token::Do,
|
||||
"else" => Token::Else,
|
||||
"false" => Token::Boolean(false),
|
||||
"fn" => Token::Fn,
|
||||
"if" => Token::If,
|
||||
"import" => Token::Import,
|
||||
"let" => Token::Let,
|
||||
"loop" => Token::Loop,
|
||||
"match" => Token::Match,
|
||||
"nil" => Token::Nil,
|
||||
"ns" => Token::Ns,
|
||||
"panic!" => Token::Panic, // won't match until C-style ident -> Ludus word
|
||||
"pkg" => Token::Pkg,
|
||||
"recur" => Token::Recur,
|
||||
"repeat" => Token::Repeat,
|
||||
"test" => Token::Test,
|
||||
"then" => Token::Then,
|
||||
"true" => Token::Boolean(true),
|
||||
"use" => Token::Use,
|
||||
"when" => Token::When,
|
||||
"with" => Token::With,
|
||||
_ => Token::Word(word),
|
||||
});
|
||||
|
||||
let arrow = just("->").to(Token::Arrow);
|
||||
let comma = just(',').to(Token::Comma);
|
||||
let semicolon = just(';').to(Token::Semi);
|
||||
let placeholder = just('_').to(Token::Placeholder);
|
||||
|
||||
let control = arrow.or(comma).or(semicolon).or(placeholder);
|
||||
|
||||
let comment = just('&')
|
||||
.then(any().and_is(just('\n').not()).repeated())
|
||||
.padded();
|
||||
|
||||
let atom = number.or(string).or(keyword).or(reserved_or_word);
|
||||
|
||||
atom.or(control)
|
||||
.map_with(|tok, e| (tok, e.span()))
|
||||
.padded_by(comment.repeated())
|
||||
.padded()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::lexer;
|
||||
use crate::Token;
|
||||
use chumsky::{container::Seq, prelude::*};
|
||||
|
||||
#[test]
|
||||
fn it_works() {
|
||||
let toks = lexer().parse("42").unwrap();
|
||||
let (tok, _) = toks[0].clone();
|
||||
assert_eq!(tok, Token::Number(42.0));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user