ludus/janet/recursive.janet

525 lines
14 KiB
Plaintext
Raw Normal View History

### A recursive descent parser for Ludus
### First, some mutual recursion helpers
(defn unreachable
"A function that errors out if called."
2024-04-29 22:38:08 +00:00
[&] (error "reached the unreachable"))
(defmacro declare
"Forward-declares a function name, so that it can be called in a mutually recursive manner."
[& names]
(def bindings @[])
(loop [name :in names]
(def binding ~(var ,name unreachable))
(array/push bindings binding))
~(upscope ,;bindings))
(defmacro defrec
"Defines a function depended on by another function, that has been forward `declare`d."
[name & forms]
(if-not (dyn name) (error "recursive functions must be declared before they are defined"))
~(set ,name (defn ,name ,;forms)))
### Next: a data structure for a parser
2024-05-08 19:29:18 +00:00
(defn- new-parser
"Creates a new parser data structure to pass around"
[tokens]
@{
:tokens (tokens :tokens)
:ast @[]
:current 0
:errors @[]
})
### and some helper functions for interfacing with that data structure
2024-05-08 19:29:18 +00:00
(defn- current
"Returns the current token of a parser. If the parser is at the last token, keeps returning the last token."
[parser]
(def tokens (parser :tokens))
(get tokens (parser :current) (last tokens)))
(defn- peek
"Returns the next token of the parser. If the parser is at the last token, keeps returning the last token."
[parser]
(def tokens (parser :tokens))
(get tokens (inc (parser :current)) (last tokens)))
(defn- advance
"Advances the parser by a token"
[parser]
(update parser :current inc))
(defn- type
"Returns the type of a token"
[token]
(get token :type))
(defn- check
"Returns true if the parser's current token is one of the passed types"
[parser type & types]
(def accepts [type ;types])
(def current-type (-> parser current (get :type)))
2024-05-08 19:29:18 +00:00
(has-value? accepts current-type))
### Parsing functions
2024-05-08 17:50:26 +00:00
# forward declarations
(declare simple nonbinding expr toplevel synthetic)
# errors
2024-05-08 19:29:18 +00:00
# terminators are what terminate expressions
(def terminators [:break :newline :semicolon :eof])
2024-05-08 19:29:18 +00:00
(defn- terminates?
"Returns true if the current token in the parser is a terminator"
[parser]
(def curr (current parser))
(def ttype (type curr))
(has-value? terminators ttype))
2024-05-08 19:29:18 +00:00
# breakers are what terminate panics
2024-04-29 22:38:08 +00:00
(def breaking [:break :newline :semicolon :comma :eof :then :else])
2024-05-08 19:29:18 +00:00
(defn- breaks?
"Returns true if the current token in the parser should break a panic"
[parser]
2024-04-29 22:38:08 +00:00
(def curr (current parser))
(def ttype (type curr))
(has-value? breaking ttype))
2024-05-08 19:29:18 +00:00
(defn- panic
"Panics the parser: starts skipping tokens until a breaking token is encountered. Adds the error to the parser's errors array, and also errors out."
[parser message]
2024-05-08 17:50:26 +00:00
(print "Panic in the parser: " message)
(def origin (current parser))
(advance parser)
(def skipped @[origin])
2024-04-29 22:38:08 +00:00
(while (not (breaks? parser))
(array/push skipped (current parser))
(advance parser))
(array/push skipped (current parser))
2024-04-29 22:38:08 +00:00
(def err {:type :error :data skipped :token origin :msg message})
(update parser :errors array/push err)
(error err))
2024-05-08 19:29:18 +00:00
(defn- expected
"Panics the parser with a message: expected {type} got ..."
[parser ttype & ttypes]
(def expected (map string [ttype ;ttypes]))
(def type-msg (string/join expected " | "))
(panic parser (string "expected {" type-msg "}, got " (-> parser current type))))
(defn- expect
"Panics if the parser's current token is not of type; otherwise does nothing & returns nil"
[parser type & types]
(if-not (check parser type ;types) (expected parser type ;types)))
(defn- expect-ret
"Same as expect, but captures the error, returning it as a value"
[parser type]
2024-05-08 17:22:49 +00:00
(try (expect parser type) ([e] e)))
2024-05-08 19:29:18 +00:00
(defn- capture
"Applies the parse function to the parser, returning the parsed AST. If there is a panic, captures the panic and returns it as a value."
[parse-fn parser]
2024-05-08 17:50:26 +00:00
(try (parse-fn parser) ([e] e)))
2024-05-08 19:29:18 +00:00
(defn- accept-one
"Accepts a single token of passed type, advancing the parser if a match, doing nothing if not."
[parser type]
(if (check parser type) (advance parser)))
(defn- accept-many
"Accepts any number of tokens of a passed type, advancing the parser on match until there are no more matches. Does nothing on no match."
[parser type]
(while (check parser type) (advance parser)))
# atoms
(defn- bool [parser]
(expect parser :bool)
(def curr (-> parser current))
(def ttype (type curr))
(def value (if (= ttype :true) true false))
(advance parser)
{:type :bool :data value :token curr}
)
(defn- num [parser]
(expect parser :number)
(def curr (-> parser current))
(advance parser)
{:type :number :data (curr :literal) :token curr}
)
(defn- kw [parser]
(expect parser :keyword)
2024-04-29 22:38:08 +00:00
(if (= :lparen (-> parser peek type)) (break (synthetic parser)))
(def curr (-> parser current))
(advance parser)
{:type :keyword :data (curr :literal) :token curr}
)
(defn- nill [parser]
(expect parser :nil)
(advance parser)
{:type :nil :token (current parser)}
)
(defn- str [parser]
(expect parser :string)
(def curr (-> parser current))
(advance parser)
{:type :string :data (curr :literal) :token curr}
)
2024-04-29 22:38:08 +00:00
# words & synthetic expressions
(def separates [:break :newline :comma])
(defn- separates? [parser]
(def curr (current parser))
(def ttype (type curr))
(has-value? separates ttype))
(defn- separators [parser]
(if-not (separates? parser)
(panic parser (string "expected separator, got " (-> parser current type))))
(while (separates? parser) (advance parser)))
2024-04-29 22:38:08 +00:00
(def sequels [:lparen :keyword])
(defn- word [parser]
(expect parser :word)
(if (has-value? sequels (-> parser peek type)) (break (synthetic parser)))
(def curr (-> parser current))
(advance parser)
{:type :word :data (curr :lexeme) :token curr}
)
(defn- args [parser]
(def origin (current parser))
(advance parser) # consume the :lparen
(def ast @{:type :args :data @[] :token origin :partial false})
(while (separates? parser) (advance parser)) # consume any separators
(while (not (check parser :rparen))
(def origin (current parser))
(def term (if (check parser :placeholder)
(if (ast :partial)
(do
(def err {:type :error :data [] :token origin :msg "partially applied functions may only use one placeholder"})
(advance parser)
(update parser :errors array/push err)
err)
(do
(set (ast :partial) true)
(advance parser)
{:type :placeholder :token origin}))
(try (nonbinding parser) ([e] e))))
(array/push (ast :data) term)
(try (separators parser)
([e] (pp e) (array/push (ast :data) e))))
(advance parser)
ast)
(defrec synthetic [parser]
(def origin (current parser))
(def ast {:type :synthetic :data @[origin] :token origin})
(advance parser)
(while (has-value? sequels (-> parser current type))
(def term
(case (-> parser current type)
:lparen (args parser)
:keyword (kw parser)
))
(array/push (ast :data) term)
)
ast
)
2024-04-29 22:38:08 +00:00
# collections
(defn- tup [parser]
(def origin (current parser))
(advance parser) # consume the :lparen
(def ast {:type :tuple :data @[] :token origin})
(while (separates? parser) (advance parser)) # consume any separators
(while (not (check parser :rparen))
(def term (try (nonbinding parser) ([e] e)))
(array/push (ast :data) term)
(try (separators parser)
([e] (pp e) (array/push (ast :data) e))))
(advance parser)
ast)
2024-04-29 22:38:08 +00:00
(defn- list [parser]
(def origin (current parser))
(advance parser)
(def ast {:type :list :data @[] :token origin})
(while (separates? parser) (advance parser))
(while (not (check parser :rbracket))
(def origin (current parser))
(def term (if (check parser :splat)
(do
(advance parser)
(def splatted (try (word parser) ([e] e)))
{:type :splat :data splatted :token origin}
)
(try (nonbinding parser) ([e] e))))
(array/push (ast :data) term)
(try (separators parser)
([e] (array/push (ast :data) e))))
(advance parser)
ast)
2024-05-08 17:50:26 +00:00
(defn- sett [parser]
2024-04-29 22:38:08 +00:00
(def origin (current parser))
(advance parser)
(def ast {:type :set :data @[] :token origin})
(while (separates? parser) (advance parser))
(while (not (check parser :rbrace))
(def origin (current parser))
(def term (if (check parser :splat)
(do
(advance parser)
(def splatted (try (word parser) ([e] e)))
{:type :splat :data splatted :token origin}
)
(try (nonbinding parser) ([e] e))))
(array/push (ast :data) term)
(try (separators parser)
([e] (array/push (ast :data) e))))
(advance parser)
ast)
(defn- dict [parser]
(def origin (current parser))
(advance parser)
(def ast {:type :dict :data @[] :token origin})
(while (separates? parser) (advance parser))
(while (not (check parser :rbrace))
(def origin (current parser))
(def term (case (type origin)
:splat {:type :splat :data (try (word (advance parser)) ([e] e)) :token origin}
:word (try (word parser) ([e] e))
:keyword (do
(def key (try (kw parser) ([e] e)))
(def value (try (nonbinding parser) ([e] e)))
{:type :pair :data [key value] :token origin})
(try (panic parser (string expect "expected dict term, got " (type origin))) ([e] e))
))
(array/push (ast :data) term)
(try (separators parser) ([e] (array/push (ast :data) e))))
(advance parser)
ast)
2024-05-08 17:22:49 +00:00
### conditional forms
2024-05-08 19:29:18 +00:00
# if {simple} then {nonbinding} else {nonbinding}
2024-05-08 17:22:49 +00:00
(defn- iff [parser]
(def ast {:type :if :data @[] :token (current parser)})
(advance parser) #consume the if
2024-05-08 17:50:26 +00:00
(array/push (ast :data) (capture simple parser))
2024-05-08 19:29:18 +00:00
(accept-many parser :newline)
(if-let [err (expect-ret parser :then)]
(array/push (ast :data) err)
(advance parser))
2024-05-08 17:50:26 +00:00
(array/push (ast :data) (capture nonbinding parser))
2024-05-08 19:29:18 +00:00
(accept-many parser :newline)
(if-let [err (expect-ret parser :else)]
(array/push (ast :data) err)
(advance parser))
2024-05-08 17:50:26 +00:00
(array/push (ast :data) (capture nonbinding parser))
2024-05-08 17:22:49 +00:00
ast)
2024-05-08 19:29:18 +00:00
(defn- terminator [parser]
(if-not (terminates? parser) (panic parser "expected terminator"))
(advance parser)
(while (terminates? parser) (advance parser)))
# {simple} -> {nonbinding} {terminator}
(defn- when-clause [parser]
(def clause @[])
(print "parsing lhs: " (-> parser current type))
(array/push clause (capture simple parser))
(print "parsing arrow")
(if-let [err (expect-ret parser :arrow)]
(array/push clause err)
(advance parser))
(print "accepting newlines")
(accept-many parser :newline)
(print "parsing rhs")
(array/push clause (nonbinding parser))
(print "parsing terminator")
(try (terminator parser) ([e] (array/push clause e)))
clause)
(defn- whenn [parser]
(def ast {:type :when :data @[] :origin (current parser)})
(advance parser) # consume cond
(if-let [err (expect-ret parser :lbrace)]
(do
(array/push (ast :data) err)
(break ast))
(advance parser))
(accept-many parser :newline)
(while (not (check parser :rbrace :eof))
(array/push (ast :data) (capture when-clause parser)))
(advance parser)
ast)
2024-05-08 17:22:49 +00:00
(defn- match [parser])
### function forms
(defn- fnn [parser])
(defn- lambda [parser])
### compoound forms
(defn- block [parser])
(defn- doo [parser])
2024-04-29 22:38:08 +00:00
### expressions
# four levels of expression complexity:
# simple (atoms, collections, synthetic expressions; no conditionals or binding or blocks)
# nonbinding (excludes let, ref, named fn: what is allowed inside collections)
# plain old exprs (anything but toplevel)
# toplevel (exprs + ns, pkg, test, import, use)
# simple expressions: what can go anywhere you expect an expression
(defrec simple [parser]
(def curr (current parser))
(case (type curr)
:nil (nill parser)
:true (bool parser)
:false (bool parser)
:number (num parser)
:keyword (kw parser)
:string (str parser)
:lparen (tup parser)
:lbracket (list parser)
:startdict (dict parser)
:startset (sett parser)
:word (word parser)
(panic parser (string expect "expected simple expression, got " (type curr)))
)
)
# non-binding expressions
# the rhs of lets, clauses, inside conditional forms, etc.
(defrec nonbinding [parser]
(def curr (current parser))
(case (type curr)
# atoms
:nil (nill parser)
:true (bool parser)
:false (bool parser)
:number (num parser)
:keyword (kw parser)
# strings
:string (str parser)
### TODO: interpolated strings
2024-04-29 22:38:08 +00:00
:interpolated (unreachable)
2024-04-29 22:38:08 +00:00
# collection literals
:lparen (tup parser)
2024-04-29 22:38:08 +00:00
:lbracket (list parser)
:startdict (dict parser)
:startset (sett parser)
# synthetic
:word (word parser)
# conditional forms
2024-05-08 17:22:49 +00:00
:if (iff parser)
2024-05-08 19:29:18 +00:00
:when (whenn parser)
2024-04-29 22:38:08 +00:00
:match (unreachable)
:with (unreachable)
# do
:do (unreachable)
# fn: but only lambda
:fn (unreachable)
# blocks
:lbrace (unreachable)
(panic parser (string "expected nonbinding expression, got " (type curr)))
)
)
2024-04-29 22:38:08 +00:00
(defrec expr [parser]
(def curr (current parser))
(case (type curr)
2024-05-08 19:29:18 +00:00
:let (unreachable)
:fn (unreachable)
:ref (unreachable)
2024-04-29 22:38:08 +00:00
:nil (nill parser)
:true (bool parser)
:false (bool parser)
:number (num parser)
:keyword (kw parser)
:string (str parser)
:lparen (tup parser)
:lbracket (list parser)
:startdict (dict parser)
:startset (sett parser)
:word (word parser)
2024-05-08 17:50:26 +00:00
:if (iff parser)
2024-05-08 19:29:18 +00:00
:when (whenn parser)
2024-04-29 22:38:08 +00:00
:match (unreachable)
:with (unreachable)
:do (unreachable)
:lbrace (unreachable)
(panic parser (string "expected expression, got " (type curr)))
)
)
(defrec toplevel [parser]
2024-05-08 19:29:18 +00:00
(def when (current parser))
2024-04-29 22:38:08 +00:00
(case (type curr)
2024-05-08 19:29:18 +00:00
:pkg (unreachable)
:ns (unreachable)
:test (unreachable)
:import (unreachable)
:use (unreachable)
:let (unreachable)
:fn (unreachable)
:ref (unreachable)
2024-04-29 22:38:08 +00:00
:nil (nill parser)
:true (bool parser)
:false (bool parser)
:number (num parser)
:keyword (kw parser)
:string (str parser)
:lparen (tup parser)
:lbracket (list parser)
:startdict (dict parser)
:startset (sett parser)
:word (word parser)
2024-05-08 17:50:26 +00:00
:if (iff parser)
2024-05-08 19:29:18 +00:00
:when (whenn parser)
2024-04-29 22:38:08 +00:00
:match (unreachable)
:with (unreachable)
:do (unreachable)
:lbrace (unreachable)
(panic parser (string "expected expression, got " (type curr)))
)
)
2024-05-08 19:29:18 +00:00
(os/cd "janet") # when repl to do relative imports
(import ./scanner :as s)
(do
#(comment
2024-05-08 19:29:18 +00:00
(def source "when {
foo -> bar
}
")
2024-05-08 17:50:26 +00:00
(def scanned (s/scan source))
(def a-parser (new-parser scanned))
2024-05-08 19:29:18 +00:00
(def parsed (whenn a-parser))
2024-04-29 22:38:08 +00:00
(-> parsed)
)