ludus/janet/parser.janet
2024-06-04 12:13:40 -04:00

1160 lines
33 KiB
Plaintext

### A recursive descent parser for Ludus
### We still need to scan some things
(try (os/cd "janet") ([_] nil)) # when in repl to do relative imports
(import ./scanner :as s)
(defmacro declare
"Forward-declares a function name, so that it can be called in a mutually recursive manner."
[& names]
(def bindings @[])
(loop [name :in names]
(def binding ~(var ,name nil))
(array/push bindings binding))
~(upscope ,;bindings))
(defmacro defrec
"Defines a function depended on by another function, that has been forward `declare`d."
[name & forms]
(if-not (dyn name) (error "recursive functions must be declared before they are defined"))
~(set ,name (defn- ,name ,;forms)))
### Next: a data structure for a parser
(defn- new-parser
"Creates a new parser data structure to pass around"
[tokens]
@{
:tokens (tokens :tokens)
:ast @[]
:current 0
:errors @[]
})
### and some helper functions for interfacing with that data structure
(defn- current
"Returns the current token of a parser. If the parser is at the last token, keeps returning the last token."
[parser]
(def tokens (parser :tokens))
(get tokens (parser :current) (last tokens)))
(defn- peek
"Returns the next token of the parser. If the parser is at the last token, keeps returning the last token."
[parser]
(def tokens (parser :tokens))
(get tokens (inc (parser :current)) (last tokens)))
(defn- advance
"Advances the parser by a token"
[parser]
(update parser :current inc))
(defn- type
"Returns the type of a token"
[token]
(get token :type))
(defn- check
"Returns true if the parser's current token is one of the passed types"
[parser type & types]
(def accepts [type ;types])
(def current-type (-> parser current (get :type)))
(has-value? accepts current-type))
### Parsing functions
# forward declarations
(declare simple nonbinding expr toplevel synthetic)
# errors
# terminators are what terminate expressions
(def terminators [:break :newline :semicolon])
(defn- terminates?
"Returns true if the current token in the parser is a terminator"
[parser]
(def curr (current parser))
(def ttype (type curr))
(has-value? terminators ttype))
# breakers are what terminate panics
(def breaking [:break :newline :semicolon :comma :eof :then :else])
(defn- breaks?
"Returns true if the current token in the parser should break a panic"
[parser]
(def curr (current parser))
(def ttype (type curr))
(has-value? breaking ttype))
(defn- panic
"Panics the parser: starts skipping tokens until a breaking token is encountered. Adds the error to the parser's errors array, and also errors out."
[parser message]
(print "Panic in the parser: " message)
(def origin (current parser))
(advance parser)
(def skipped @[origin])
(while (not (breaks? parser))
(array/push skipped (current parser))
(advance parser))
(array/push skipped (current parser))
(def err {:type :error :data skipped :token origin :msg message})
(update parser :errors array/push err)
(error err))
(defn- expected
"Panics the parser with a message: expected {type} got ..."
[parser ttype & ttypes]
(def expected (map string [ttype ;ttypes]))
(def type-msg (string/join expected " | "))
(panic parser (string "expected {" type-msg "}, got " (-> parser current type))))
(defn- expect
"Panics if the parser's current token is not of type; otherwise does nothing & returns nil"
[parser type & types]
(if-not (check parser type ;types) (expected parser type ;types)))
(defn- expect-ret
"Same as expect, but captures the error, returning it as a value"
[parser type & types]
(try (expect parser type ;types) ([e] e)))
(defn- capture
"Applies the parse function to the parser, returning the parsed AST. If there is a panic, captures the panic and returns it as a value."
[parse-fn parser]
(try (parse-fn parser) ([e] e)))
(defn- accept-one
"Accepts a single token of passed type, advancing the parser if a match, doing nothing if not."
[parser type & types]
(if (check parser type ;types) (advance parser)))
(defn- accept-many
"Accepts any number of tokens of a passed type, advancing the parser on match until there are no more matches. Does nothing on no match."
[parser type & types]
(while (check parser type ;types) (advance parser)))
# atoms
(defn- bool [parser]
(expect parser :true :false)
(def curr (-> parser current))
(def ttype (type curr))
(def value (if (= ttype :true) true false))
(advance parser)
{:type :bool :data value :token curr}
)
(defn- num [parser]
(expect parser :number)
(def curr (-> parser current))
(advance parser)
{:type :number :data (curr :literal) :token curr}
)
(defn- kw [parser]
(expect parser :keyword)
(if (= :lparen (-> parser peek type)) (break (synthetic parser)))
(def curr (-> parser current))
(advance parser)
{:type :keyword :data (curr :literal) :token curr}
)
(defn- kw-only [parser]
(expect parser :keyword)
(def curr (-> parser current))
(advance parser)
{:type :keyword :data (curr :literal) :token curr})
(defn- pkg-kw [parser]
(expect parser :pkg-kw)
(def curr (-> parser current))
(advance parser)
{:type :pkg-kw :data (curr :literal) :token curr})
(defn- nill [parser]
(expect parser :nil)
(def curr (current parser))
(advance parser)
{:type :nil :token curr})
(defn- str [parser]
(expect parser :string)
(def curr (-> parser current))
(advance parser)
{:type :string :data (curr :literal) :token curr})
# interpolated strings, which are a whole other scene
(defn- scan-interpolations [data]
(print "scanning interpolation: " data)
(when (buffer? data) (break data))
(pp data)
(def to-scan (data :to-scan))
(def {:tokens tokens :errors errors} (s/scan to-scan))
(pp tokens)
(print "there are " (length tokens) " tokens")
(def first-token (first tokens))
(cond
(first errors) (first errors)
(empty? tokens)
{:type :error :msg "string interpolations/patterns must be single words"}
(< 3 (length tokens))
{:type :error :msg "string interpolations/patterns must be single words"}
(= :word (first-token :type))
{:type :word :data (first-token :lexeme) :token first-token}
:else {:type :error :msg "string interpolations/patterns must be single words"}))
(defn- is-error? [data]
(cond
(buffer? data) false
(= :error (data :type)) true
false))
(defn- interpolated [parser]
(expect parser :interpolated)
(def origin (current parser))
(def source (origin :literal))
(def data @[])
(var curr @"")
(var interp? false)
(var escape? false)
(each code source
(def char (string/from-bytes code))
(cond
(= char "\\") (set escape? true)
escape? (if (= char "{")
(do
(buffer/push curr "{")
(set escape? false))
(do
(buffer/push curr "\\")
(buffer/push curr char)
(set escape? false)))
(= char "{") (do
(set interp? true)
(array/push data curr)
(set curr @""))
(= char "}") (if interp? (do
(set interp? false)
(array/push data {:to-scan curr})
(set curr @""))
(buffer/push curr char))
:else (buffer/push curr char)))
(array/push data curr)
(def interpolated (map scan-interpolations data))
(advance parser)
(def ast @{:type :interpolated :data interpolated :token origin})
(if (some is-error? interpolated)
(do
(def err {:type :error :msg "bad interpolated string" :data ast :token origin})
(array/push (parser :errors) err)
err)
ast))
# words & synthetic expressions
(def separates [:break :newline :comma])
(defn- separates? [parser]
(def curr (current parser))
(def ttype (type curr))
(has-value? separates ttype))
(defn- separators [parser]
(if-not (separates? parser)
(panic parser (string "expected separator, got " (-> parser current type))))
(while (separates? parser) (advance parser)))
(def sequels [:lparen :keyword :pkg-kw])
(defn- word-expr [parser]
(expect parser :word)
(if (has-value? sequels (-> parser peek type)) (break (synthetic parser)))
(def curr (-> parser current))
(advance parser)
{:type :word :data (curr :lexeme) :token curr})
(defn- word-only [parser]
(expect parser :word)
(def curr (current parser))
(advance parser)
{:type :word :data (curr :lexeme) :token curr})
(defn- args [parser]
(def origin (current parser))
(advance parser) # consume the :lparen
(def ast @{:type :args :data @[] :token origin :partial false})
(while (separates? parser) (advance parser)) # consume any separators
(while (not (check parser :rparen))
(when (check parser :eof)
(def err {:type :error :token origin :msg "unclosed paren"})
(array/push (parser :errors) err)
(error err))
(def origin (current parser))
(def term (if (check parser :placeholder)
(if (ast :partial)
(do
(def err {:type :error :data [] :token origin :msg "partially applied functions may only use one placeholder"})
(advance parser)
(update parser :errors array/push err)
err)
(do
(set (ast :partial) true)
(advance parser)
{:type :placeholder :token origin}))
(capture nonbinding parser)))
(array/push (ast :data) term)
(try (separators parser)
([e] (pp e) (array/push (ast :data) e))))
(advance parser)
ast)
(defn- synth-root [parser]
(print "parsing synth root")
(def origin (current parser))
(advance parser)
(case (type origin)
:word {:type :word :data (origin :lexeme) :token origin}
:keyword {:type :keyword :data (origin :literal) :token origin}
:pkg-name {:type :pkg-name :data (origin :lexeme) :token origin}
(panic parser "expected word, keyword, or package")
)
)
(defrec synthetic [parser]
(print "parsing synthetic")
(def origin (current parser))
# (def ast {:type :synthetic :data @[(synth-root parser)] :token origin})
(def terms @[(synth-root parser)])
(while (has-value? sequels (-> parser current type))
(def term
(case (-> parser current type)
:lparen (args parser)
:keyword (kw-only parser)
:pkg-kw (pkg-kw parser)
))
(array/push terms term)
)
{:type :synthetic :data [;terms] :token origin})
# collections
(defn- tup [parser]
(def origin (current parser))
(advance parser) # consume the :lparen
(def ast {:type :tuple :data @[] :token origin})
(while (separates? parser) (advance parser)) # consume any separators
(while (not (check parser :rparen))
(when (check parser :eof)
(def err {:type :error :token origin :msg "unclosed paren"})
(array/push (parser :errors) err)
(error err))
(def term (capture nonbinding parser))
(array/push (ast :data) term)
(try (separators parser)
([e] (pp e) (array/push (ast :data) e))))
(advance parser)
ast)
(defn- list [parser]
(def origin (current parser))
(advance parser)
(def ast {:type :list :data @[] :token origin})
(while (separates? parser) (advance parser))
(while (not (check parser :rbracket))
(when (check parser :eof)
(def err {:type :error :token origin :msg "unclosed bracket"})
(array/push (parser :errors) err)
(error err))
(def origin (current parser))
(def term (if (check parser :splat)
(do
(advance parser)
(def splatted (capture word-only parser))
{:type :splat :data splatted :token origin}
)
(capture nonbinding parser)))
(array/push (ast :data) term)
(try (separators parser)
([e] (array/push (ast :data) e))))
(advance parser)
ast)
(defn- sett [parser]
(def origin (current parser))
(advance parser)
(def ast {:type :set :data @[] :token origin})
(while (separates? parser) (advance parser))
(while (not (check parser :rbrace))
(when (check parser :eof)
(def err {:type :error :token origin :msg "unclosed brace"})
(array/push (parser :errors) err)
(error err))
(def origin (current parser))
(def term (if (check parser :splat)
(do
(advance parser)
(def splatted (capture word-only parser))
{:type :splat :data splatted :token origin}
)
(capture nonbinding parser)))
(array/push (ast :data) term)
(try (separators parser)
([e] (array/push (ast :data) e))))
(advance parser)
ast)
(defn- dict [parser]
(def origin (current parser))
(advance parser)
(def ast {:type :dict :data @[] :token origin})
(while (separates? parser) (advance parser))
(while (not (check parser :rbrace))
(when (check parser :eof)
(def err {:type :error :token origin :msg "unclosed brace"})
(array/push (parser :errors) err)
(error err))
(def origin (current parser))
(def term (case (type origin)
:splat {:type :splat :data (capture word-only (advance parser)) :token origin}
:word (do
(def value (capture word-only parser))
(def key {:type :keyword :data (keyword (value :data))
:token origin})
{:type :pair :data [key value] :token origin})
:keyword (do
(def key (capture kw-only parser))
(def value (capture nonbinding parser))
{:type :pair :data [key value] :token origin})
(try (panic parser (string "expected dict term, got " (type origin))) ([e] e))
))
(array/push (ast :data) term)
(try (separators parser) ([e] (array/push (ast :data) e))))
(advance parser)
ast)
### patterns
(declare pattern)
(defn- placeholder [parser]
(expect parser :placeholder :ignored)
(def origin (current parser))
(advance parser)
{:type :placeholder :token origin})
(defn- word-pattern [parser]
(expect parser :word)
(def origin (current parser))
(advance parser)
(def the-word {:type :word :data (origin :lexeme) :token origin})
(if (check parser :as)
(do
(advance parser)
(def type (kw parser))
{:type :typed :data [type the-word] :token origin})
the-word))
(defn- tup-pattern [parser]
(def origin (current parser))
(advance parser) # consume the :lparen
(def ast {:type :tuple :data @[] :token origin})
(while (separates? parser) (advance parser)) # consume any separators
(while (not (check parser :rparen))
(when (check parser :eof)
(def err {:type :error :token origin :msg "unclosed paren"})
(array/push (parser :errors) err)
(error err))
(def origin (current parser))
(def term (if (check parser :splat)
(do
(advance parser)
(def splatted (when (check parser :word) (word-only parser)))
{:type :splat :data splatted :token origin})
(capture pattern parser)))
(array/push (ast :data) term)
(try (separators parser)
([e] (pp e) (array/push (ast :data) e))))
(advance parser)
ast)
(defn- list-pattern [parser]
(def origin (current parser))
(advance parser)
(def ast {:type :list :data @[] :token origin})
(while (separates? parser) (advance parser))
(while (not (check parser :rbracket))
(when (check parser :eof)
(def err {:type :error :token origin :msg "unclosed bracket"})
(array/push (parser :errors) err)
(error err))
(def origin (current parser))
(def term (if (check parser :splat)
(do
(advance parser)
(def splatted (when (check parser :word) (word-only parser)))
{:type :splat :data splatted :token origin})
(capture pattern parser)))
(array/push (ast :data) term)
(try (separators parser)
([e] (array/push (ast :data) e))))
(advance parser)
ast)
(defn- dict-pattern [parser]
(def origin (current parser))
(advance parser)
(def ast {:type :dict :data @[] :token origin})
(while (separates? parser) (advance parser))
(while (not (check parser :rbrace))
(when (check parser :eof)
(def err {:type :error :token origin :msg "unclosed brace"})
(array/push (parser :errors) err)
(error err))
(def origin (current parser))
(def term (case (type origin)
:splat {:type :splat :data (when (check (advance parser) :word) (word-only parser)) :token origin}
:word (do
(def word (capture word-pattern parser))
(def name (word :data))
(def key {:type :keyword :data (keyword name) :token origin})
{:type :pair :data [key word] :token origin})
:keyword (do
(def key (capture kw-only parser))
(def value (capture pattern parser))
{:type :pair :data [key value] :token origin})
(try (panic parser (string "expected dict term, got " (type origin))) ([e] e))
))
(array/push (ast :data) term)
(try (separators parser) ([e] (array/push (ast :data) e))))
(advance parser)
ast)
### TODO: add as patterns
(defrec pattern [parser]
(case (-> parser current type)
:nil (nill parser)
:true (bool parser)
:false (bool parser)
:keyword (kw parser)
:number (num parser)
:string (str parser)
:word (word-pattern parser)
:placeholder (placeholder parser)
:ignored (placeholder parser)
:lparen (tup-pattern parser)
:lbracket (list-pattern parser)
:startdict (dict-pattern parser)
:interpolated (interpolated parser)
(panic parser "expected pattern")
))
### let
# let {pattern} = {nonbinding}
(defn- lett [parser]
(def ast {:type :let :data @[] :token (current parser)})
(advance parser) # consume the let
(array/push (ast :data) (capture pattern parser))
(if-let [err (expect-ret parser :equals)]
(do (array/push (ast :data) err) (break ast))
(advance parser))
(accept-many parser :newline)
(array/push (ast :data) (capture nonbinding parser))
ast)
### conditional forms
# if {simple} then {nonbinding} else {nonbinding}
(defn- iff [parser]
(def ast {:type :if :data @[] :token (current parser)})
(advance parser) #consume the if
(array/push (ast :data) (capture simple parser))
(accept-many parser :newline)
(if-let [err (expect-ret parser :then)]
(array/push (ast :data) err)
(advance parser))
(array/push (ast :data) (capture nonbinding parser))
(accept-many parser :newline)
(if-let [err (expect-ret parser :else)]
(array/push (ast :data) err)
(advance parser))
(array/push (ast :data) (capture nonbinding parser))
ast)
(defn- terminator [parser]
(if-not (terminates? parser)
# this line panics, captures the panic, advances the parser, and re-throws the error; solves an off-by-one error
(panic parser "expected terminator"))
(advance parser)
(while (terminates? parser) (advance parser)))
# {simple} -> {nonbinding} {terminator}
### TODO: add placeholder as valid lhs
(defn- when-clause [parser]
(try
(do
(def lhs (simple parser))
(expect parser :arrow)
(advance parser)
(accept-many parser :newline)
(def rhs (nonbinding parser))
(terminator parser)
[lhs rhs])
([err]
(advance parser) # consume the breaking token
(accept-many parser :newline :semicolon :break) # ...and any additional ones
err)))
# when { {when-clause}+ }
(defn- whenn [parser]
(def origin (current parser))
(def ast {:type :when :data @[] :token origin})
(advance parser) # consume when
(if-let [err (expect-ret parser :lbrace)]
(do
(array/push (ast :data) err)
(break ast)) # early return; just bail if we don't have {
(advance parser))
(accept-many parser :newline)
(while (not (check parser :rbrace )) # make sure we don't roll past eof
(when (check parser :eof) (error {:type :error :token origin :data ast :msg "unclosed brace"}))
(array/push (ast :data) (capture when-clause parser)))
(advance parser)
ast)
### TODO: add guards to patterns
(defn- match-clause [parser]
(try
(do
(def ast {:type :clause :data @[] :origin (current parser)})
(def lhs (pattern parser))
(def guard (when (check parser :if)
(advance parser)
(simple parser)))
(expect parser :arrow)
(advance parser)
(accept-many parser :newline)
(def rhs (nonbinding parser))
(terminator parser)
[lhs guard rhs])
([err]
(accept-many parser ;terminators)
err)))
(defn- matchh [parser]
(def origin (current parser))
(def ast {:type :match :data @[] :token origin})
(var to-match nil)
(def clauses @[])
(expect parser :match)
(advance parser)
(try
(do
(set to-match (simple parser))
(expect parser :with) (advance parser)
(def open-brace (current parser))
(expect parser :lbrace) (advance parser)
(accept-many parser :newline)
(while (not (check parser :rbrace))
(when (check parser :eof)
(error {:type :error :token open-brace :msg "unclosed brace"}))
(array/push clauses (match-clause parser)))
(advance parser)
@{:type :match :data [to-match clauses] :token origin})
([err] err)))
# {pattern} = {nonbinding} {terminators}
(defn- with-clause [parser]
(try
(do
(def lhs (pattern parser))
(def guard (when (check parser :if)
(advance parser)
(simple parser)))
(expect parser :equals) (advance parser)
(def rhs (nonbinding parser))
(terminator parser)
[lhs guard rhs]
)
([err]
(accept-many parser ;terminators)
err)
)
)
# with { {clauses}+ } {terminators}? then {nonbinding} {terminators}? else {nonbinding}
(defn- withh [parser]
(def origin (current parser))
(expect parser :with) (advance parser)
(try
(do
(expect parser :lbrace) (var lbrace (current parser)) (advance parser)
(accept-many parser ;terminators)
(def clauses @[])
(array/push clauses (with-clause parser))
(accept-many parser ;terminators)
(while (not (check parser :rbrace))
(if (check parser :eof)
(error {:type :error :data [clauses] :token lbrace :msg "unclosed brace"}))
(array/push clauses (with-clause parser))
(accept-many parser ;terminators))
(advance parser) # consume closing brace
(accept-many parser :newline)
(expect parser :then) (advance parser)
(def then (nonbinding parser))
(accept-many parser :newline)
(expect parser :else) (advance parser)
(expect parser :lbrace) (set lbrace (current parser)) (advance parser)
(accept-many parser ;terminators)
(def else @[])
(while (not (check parser :rbrace))
(when (check parser :eof) (error {:type :error :token lbrace :data [else] :msg "unclosed brace"}))
(array/push else (match-clause parser)))
(advance parser)
{:type :with :data [clauses then else] :token origin})
([err] err)
)
)
### function forms
(defn- fn-simple [parser]
(print "parsing simple function body")
(try
(do
(def lhs (tup-pattern parser))
(print "parsed lhs")
(def guard (when (check parser :if)
(advance parser)
(simple parser)))
(print "parsed guard")
(expect parser :arrow) (advance parser)
(print "parsed arrow")
(accept-many parser :newline)
(def rhs (nonbinding parser))
(print "parsed rhs")
{:clauses [[lhs guard rhs]]}
)
([err] err)
)
)
(defn- fn-clause [parser]
(def origin (current parser))
(try
(do
(def lhs (tup-pattern parser))
(def guard (when (check parser :if)
(advance parser)
(simple parser)))
(expect parser :arrow) (advance parser)
(accept-many parser :newline)
(def rhs (nonbinding parser))
(terminator parser)
[lhs guard rhs])
([err]
(advance parser)
(accept-many parser ;terminators)
err
)
)
)
(defn- fn-clauses [parser]
(print "parsing fn clauses")
(def origin (current parser))
(expect parser :lbrace) (advance parser)
(accept-many parser ;terminators)
(def doc (when (= :string ((current parser) :type))
(def docstring ((current parser) :literal))
(advance parser)
(accept-many parser ;terminators)
docstring))
(def data @[])
(while (not (check parser :rbrace))
(if (check parser :eof)
(error {:type :error :token origin :data data :msg "unclosed brace"}))
(array/push data (capture fn-clause parser)))
(advance parser)
{:clauses data :doc doc})
(defn- lambda [parser]
(def origin (current parser))
(expect parser :fn) (advance parser)
@{:type :fn :data (fn-simple parser) :token origin})
(defn- fnn [parser]
(if (= :lparen (-> parser peek type)) (break (lambda parser)))
(try
(do
(print "parsing named function")
(def origin (current parser))
(expect parser :fn) (advance parser)
(print "consumed `fn`")
(print "next token: ")
(pp (current parser))
(def name (-> parser word-only (get :data)))
(print "function name: ")
(pp name)
(def {:clauses data :doc doc} (case (-> parser current type)
:lbrace (fn-clauses parser)
:lparen (fn-simple parser)
{:clauses :nothing}))
@{:type :fn :name name :data data :token origin :doc doc})
([err] err)))
### compoound forms
(defn- block [parser]
(def origin (current parser))
(expect parser :lbrace) (advance parser)
(accept-many parser ;terminators)
(def data @[])
(while (not (check parser :rbrace))
(if (check parser :eof)
(error {:type :error :token origin :data data :msg "unclosed brace"}))
(array/push data (capture expr parser))
(terminator parser))
(advance parser)
{:type :block :data data :token origin})
### TODO: decide whether this design works
# newlines are allowed AFTER pipelines, but not before
# eg. `do foo > \n bar > \n baz`
# but not `do foo \n > bar \n > baz`
# Otherwise, this isn't LR
(defn- doo [parser]
(def origin (current parser))
(expect parser :do) (advance parser)
(def data @[])
(array/push data (capture simple parser))
(print "added first expression. current token:")
(pp (current parser))
(while (check parser :pipeline)
(advance parser)
(accept-many parser :newline)
(array/push data (capture simple parser)))
{:type :do :data data :token origin})
### refs, pkgs, nses, etc.
(defn- ref [parser]
(def origin (current parser))
(expect parser :ref) (advance parser)
(try
(do
(def name (-> parser word-only (get :data)))
(expect parser :equals) (advance parser)
(def value (nonbinding parser))
{:type :ref :data value :name name :token origin})
([err] err)))
(defn- pkg-name [parser]
(expect parser :pkg-name)
(def origin (current parser))
(def next-type (-> parser peek type))
(when (or (= :keyword next-type) (= :pkg-kw next-type))
(break (synthetic parser)))
(advance parser)
{:type :pkg-name :data (origin :lexeme) :token origin})
(defn- pkg-name-only [parser]
(expect parser :pkg-name)
(def origin (current parser))
(advance parser)
{:type :pkg-name :data (origin :lexeme) :token origin})
(defn- usee [parser]
(def origin (current parser))
(expect parser :use) (advance parser)
(try
(do
{:type :use :data (pkg-name parser) :token origin})
([err] err)))
(defn- pkg [parser]
(try
(do
(def origin (current parser))
(expect parser :pkg) (advance parser)
(def name (-> parser pkg-name (get :data)))
(expect parser :lbrace) (advance parser)
(while (separates? parser) (advance parser))
(def data @[])
(while (not (check parser :rbrace))
(when (check parser :eof)
(def err {:type :error :token origin :data data :msg "unclosed brace"})
(array/push (parser :errors) err)
(error err))
(case (-> parser current type)
:pkg-name (do
(def origin (current parser))
(def value (pkg-name-only parser))
(def key (keyword (value :data)))
(def pkg-kw-ast {:type :pkg-kw :data key :token origin})
(array/push data {:type :pkg-pair :data [pkg-kw-ast value] :token origin}))
:keyword (do
(def origin (current parser))
(def key (capture kw parser))
(def value (capture simple parser))
(array/push data {:type :pair :data [key value] :token origin}))
:word (do
(def origin (current parser))
(def value (word-only parser))
(def key (keyword (value :data)))
(def kw-ast {:type :keyword :data key :token origin})
(array/push data {:type :pair :data [kw-ast value] :token origin}))
(panic parser "expected pkg term"))
(separators parser))
(advance parser)
@{:type :pkg :data data :token origin :name name})
([err] err)))
(defn- ns [parser]
(try
(do
(def origin (current parser))
(expect parser :ns) (advance parser)
(def name (-> parser pkg-name (get :data)))
(def body (block parser))
@{:type :ns :data body :name name :token origin})
([err] err)))
(defn- importt [parser]
(def origin (current parser))
(expect parser :import) (advance parser)
(def path (str parser))
(expect parser :as) (advance parser)
(def name-parser (if (check parser :pkg-name) pkg-name word-only))
(def name
(-> parser name-parser (get :data)))
{:type :import :data path :name name :token origin})
### tests
(defn- testt [parser]
(def origin (current parser))
(expect parser :test) (advance parser)
(def desc (str parser))
(def body (nonbinding parser))
{:type :test :data [desc body] :token origin})
### loops and repeates
(defn- loopp [parser]
(def origin (current parser))
(expect parser :loop) (advance parser)
(def args (tup parser))
(expect parser :with) (advance parser)
(def clauses (case (-> parser current type)
:lparen (fn-simple parser)
:lbrace (fn-clauses parser)
))
@{:type :loop :data [args clauses] :token origin})
(defn- recur [parser]
(def origin (current parser))
(expect parser :recur) (advance parser)
(def args (tup parser))
{:type :recur :data args :token origin})
(defn- repeatt [parser]
(def origin (current parser))
(advance parser)
(def times (case (-> parser current type)
:number (num parser)
:word (word-only parser)
(panic parser "expected number or word")
))
(def body (block parser))
{:type :repeat :data [times body] :token origin})
### panics
(defn- panicc [parser]
(def origin (current parser))
(expect parser :panic) (advance parser)
{:type :panic :data (nonbinding parser) :token origin})
### expressions
# four levels of expression complexity:
# simple (atoms, collections, synthetic expressions; no conditionals or binding or blocks)
# nonbinding (excludes let, ref, named fn: what is allowed inside collections)
# plain old exprs (anything but toplevel)
# toplevel (exprs + ns, pkg, test, import, use)
# simple expressions: what can go anywhere you expect an expression
(defrec simple [parser]
(def curr (current parser))
(case (type curr)
:nil (nill parser)
:true (bool parser)
:false (bool parser)
:number (num parser)
:keyword (kw parser)
:string (str parser)
:interpolated (interpolated parser)
:lparen (tup parser)
:lbracket (list parser)
:startdict (dict parser)
:startset (sett parser)
:word (word-expr parser)
:pkg-name (pkg-name parser)
:recur (recur parser)
:panic (panicc parser)
(panic parser (string "expected simple expression, got " (type curr)))
)
)
# non-binding expressions
# the rhs of lets, clauses, inside conditional forms, etc.
# any form that does not bind a name
(defrec nonbinding [parser]
(def curr (current parser))
(case (type curr)
# atoms
:nil (nill parser)
:true (bool parser)
:false (bool parser)
:number (num parser)
:keyword (kw parser)
# strings
:string (str parser)
### TODO: interpolated strings
:interpolated (interpolated parser)
# collection literals
:lparen (tup parser)
:lbracket (list parser)
:startdict (dict parser)
:startset (sett parser)
# synthetic
:word (word-expr parser)
:pkg-name (pkg-name parser)
:recur (recur parser)
# conditional forms
:if (iff parser)
:when (whenn parser)
:match (matchh parser)
:with (withh parser)
# do
:do (doo parser)
# fn: but only lambda
:fn (lambda parser)
# blocks
:lbrace (block parser)
# looping forms
:loop (loopp parser)
:repeat (repeatt parser)
# panic!
:panic (panicc parser)
(panic parser (string "expected nonbinding expression, got " (type curr)))
)
)
(defrec expr [parser]
(def curr (current parser))
(case (type curr)
# binding forms
:let (lett parser)
:fn (fnn parser)
:ref (ref parser)
# nonbinding forms
:nil (nill parser)
:true (bool parser)
:false (bool parser)
:number (num parser)
:keyword (kw parser)
:string (str parser)
:interpolated (interpolated parser)
:lparen (tup parser)
:lbracket (list parser)
:startdict (dict parser)
:startset (sett parser)
:word (word-expr parser)
:pkg-name (pkg-name parser)
:recur (recur parser)
:if (iff parser)
:when (whenn parser)
:match (matchh parser)
:with (withh parser)
:do (doo parser)
:lbrace (block parser)
:loop (loopp parser)
:repeat (repeatt parser)
:panic (panicc parser)
(panic parser (string "expected expression, got " (type curr)))
)
)
(defrec toplevel [parser]
(def curr (current parser))
(case (type curr)
# toplevel-only
:pkg (pkg parser)
:ns (ns parser)
:test (testt parser)
:import (importt parser)
:use (usee parser)
# all the other expressions
(expr parser)
)
)
(defn- script [parser]
(def origin (current parser))
(def lines @[])
(while (not (check parser :eof))
(array/push lines (capture toplevel parser))
(capture terminator parser))
{:type :script :data lines :token origin})
(defn parse [scanned]
(def parser (new-parser scanned))
(def ast (script parser))
(set (parser :ast) ast)
parser)
(defn- indent-by [n]
(def indentation @"")
(repeat n (buffer/push indentation ".."))
indentation)
(defn- pp-ast [ast &opt indent]
(default indent 0)
(def {:type t :name n :data d :msg m} ast)
(string (indent-by indent) t ": " n m
(if (indexed? d)
(string "\n" (string/join (map (fn [a] (pp-ast a (inc indent))) d)))
d
)
"\n"
)
)
(do
# (comment
(def source `
fn foo {
() -> :clause
}
`)
(def scanned (s/scan source))
(print "\n***NEW PARSE***\n")
(def a-parser (new-parser scanned))
(def parsed (script a-parser))
# (print (pp-ast parsed))
# (pp scanned)
(pp parsed)
# (def cleaned (get-in parsed [:data 2]))
# (pp cleaned)
)
# FIXME:
# TODO:
# DECIDE:
# - when to use a flat try/catch format, and when to use capture/expect-ret to get values instead of errors