### A recursive descent parser for Ludus ### First, some mutual recursion helpers (defn unreachable "A function that errors out if called." [&] (error "reached the unreachable")) (defmacro declare "Forward-declares a function name, so that it can be called in a mutually recursive manner." [& names] (def bindings @[]) (loop [name :in names] (def binding ~(var ,name unreachable)) (array/push bindings binding)) ~(upscope ,;bindings)) (defmacro defrec "Defines a function depended on by another function, that has been forward `declare`d." [name & forms] (if-not (dyn name) (error "recursive functions must be declared before they are defined")) ~(set ,name (defn ,name ,;forms))) ### Next: a data structure for a parser (defn- new-parser "Creates a new parser data structure to pass around" [tokens] @{ :tokens (tokens :tokens) :ast @[] :current 0 :errors @[] }) ### and some helper functions for interfacing with that data structure (defn- current "Returns the current token of a parser. If the parser is at the last token, keeps returning the last token." [parser] (def tokens (parser :tokens)) (get tokens (parser :current) (last tokens))) (defn- peek "Returns the next token of the parser. If the parser is at the last token, keeps returning the last token." [parser] (def tokens (parser :tokens)) (get tokens (inc (parser :current)) (last tokens))) (defn- advance "Advances the parser by a token" [parser] (update parser :current inc)) (defn- type "Returns the type of a token" [token] (get token :type)) (defn- check "Returns true if the parser's current token is one of the passed types" [parser type & types] (def accepts [type ;types]) (def current-type (-> parser current (get :type))) (has-value? accepts current-type)) ### Parsing functions # forward declarations (declare simple nonbinding expr toplevel synthetic) # errors # terminators are what terminate expressions (def terminators [:break :newline :semicolon :eof]) (defn- terminates? "Returns true if the current token in the parser is a terminator" [parser] (def curr (current parser)) (def ttype (type curr)) (has-value? terminators ttype)) # breakers are what terminate panics (def breaking [:break :newline :semicolon :comma :eof :then :else]) (defn- breaks? "Returns true if the current token in the parser should break a panic" [parser] (def curr (current parser)) (def ttype (type curr)) (has-value? breaking ttype)) (defn- panic "Panics the parser: starts skipping tokens until a breaking token is encountered. Adds the error to the parser's errors array, and also errors out." [parser message] (print "Panic in the parser: " message) (def origin (current parser)) (advance parser) (def skipped @[origin]) (while (not (breaks? parser)) (array/push skipped (current parser)) (advance parser)) (array/push skipped (current parser)) (def err {:type :error :data skipped :token origin :msg message}) (update parser :errors array/push err) (error err)) (defn- expected "Panics the parser with a message: expected {type} got ..." [parser ttype & ttypes] (def expected (map string [ttype ;ttypes])) (def type-msg (string/join expected " | ")) (panic parser (string "expected {" type-msg "}, got " (-> parser current type)))) (defn- expect "Panics if the parser's current token is not of type; otherwise does nothing & returns nil" [parser type & types] (if-not (check parser type ;types) (expected parser type ;types))) (defn- expect-ret "Same as expect, but captures the error, returning it as a value" [parser type & types] (try (expect parser type ;types) ([e] e))) (defn- capture "Applies the parse function to the parser, returning the parsed AST. If there is a panic, captures the panic and returns it as a value." [parse-fn parser] (try (parse-fn parser) ([e] e))) (defn- accept-one "Accepts a single token of passed type, advancing the parser if a match, doing nothing if not." [parser type & types] (if (check parser type ;types) (advance parser))) (defn- accept-many "Accepts any number of tokens of a passed type, advancing the parser on match until there are no more matches. Does nothing on no match." [parser type & types] (while (check parser type ;types) (advance parser))) # atoms (defn- bool [parser] (expect parser :bool) (def curr (-> parser current)) (def ttype (type curr)) (def value (if (= ttype :true) true false)) (advance parser) {:type :bool :data value :token curr} ) (defn- num [parser] (expect parser :number) (def curr (-> parser current)) (advance parser) {:type :number :data (curr :literal) :token curr} ) (defn- kw [parser] (expect parser :keyword) (if (= :lparen (-> parser peek type)) (break (synthetic parser))) (def curr (-> parser current)) (advance parser) {:type :keyword :data (curr :literal) :token curr} ) (defn- nill [parser] (expect parser :nil) (advance parser) {:type :nil :token (current parser)} ) (defn- str [parser] (expect parser :string) (def curr (-> parser current)) (advance parser) {:type :string :data (curr :literal) :token curr} ) # words & synthetic expressions (def separates [:break :newline :comma]) (defn- separates? [parser] (def curr (current parser)) (def ttype (type curr)) (has-value? separates ttype)) (defn- separators [parser] (if-not (separates? parser) (panic parser (string "expected separator, got " (-> parser current type)))) (while (separates? parser) (advance parser))) (def sequels [:lparen :keyword]) (defn- word [parser] (expect parser :word) (if (has-value? sequels (-> parser peek type)) (break (synthetic parser))) (def curr (-> parser current)) (advance parser) {:type :word :data (curr :lexeme) :token curr} ) (defn- args [parser] (def origin (current parser)) (advance parser) # consume the :lparen (def ast @{:type :args :data @[] :token origin :partial false}) (while (separates? parser) (advance parser)) # consume any separators (while (not (check parser :rparen)) (def origin (current parser)) (def term (if (check parser :placeholder) (if (ast :partial) (do (def err {:type :error :data [] :token origin :msg "partially applied functions may only use one placeholder"}) (advance parser) (update parser :errors array/push err) err) (do (set (ast :partial) true) (advance parser) {:type :placeholder :token origin})) (try (nonbinding parser) ([e] e)))) (array/push (ast :data) term) (try (separators parser) ([e] (pp e) (array/push (ast :data) e)))) (advance parser) ast) (defrec synthetic [parser] (def origin (current parser)) (def ast {:type :synthetic :data @[origin] :token origin}) (advance parser) (while (has-value? sequels (-> parser current type)) (def term (case (-> parser current type) :lparen (args parser) :keyword (kw parser) )) (array/push (ast :data) term) ) ast ) # collections (defn- tup [parser] (def origin (current parser)) (advance parser) # consume the :lparen (def ast {:type :tuple :data @[] :token origin}) (while (separates? parser) (advance parser)) # consume any separators (while (not (check parser :rparen)) (def term (try (nonbinding parser) ([e] e))) (array/push (ast :data) term) (try (separators parser) ([e] (pp e) (array/push (ast :data) e)))) (advance parser) ast) (defn- list [parser] (def origin (current parser)) (advance parser) (def ast {:type :list :data @[] :token origin}) (while (separates? parser) (advance parser)) (while (not (check parser :rbracket)) (def origin (current parser)) (def term (if (check parser :splat) (do (advance parser) (def splatted (try (word parser) ([e] e))) {:type :splat :data splatted :token origin} ) (try (nonbinding parser) ([e] e)))) (array/push (ast :data) term) (try (separators parser) ([e] (array/push (ast :data) e)))) (advance parser) ast) (defn- sett [parser] (def origin (current parser)) (advance parser) (def ast {:type :set :data @[] :token origin}) (while (separates? parser) (advance parser)) (while (not (check parser :rbrace)) (def origin (current parser)) (def term (if (check parser :splat) (do (advance parser) (def splatted (try (word parser) ([e] e))) {:type :splat :data splatted :token origin} ) (try (nonbinding parser) ([e] e)))) (array/push (ast :data) term) (try (separators parser) ([e] (array/push (ast :data) e)))) (advance parser) ast) (defn- dict [parser] (def origin (current parser)) (advance parser) (def ast {:type :dict :data @[] :token origin}) (while (separates? parser) (advance parser)) (while (not (check parser :rbrace)) (def origin (current parser)) (def term (case (type origin) :splat {:type :splat :data (try (word (advance parser)) ([e] e)) :token origin} :word (try (word parser) ([e] e)) :keyword (do (def key (try (kw parser) ([e] e))) (def value (try (nonbinding parser) ([e] e))) {:type :pair :data [key value] :token origin}) (try (panic parser (string expect "expected dict term, got " (type origin))) ([e] e)) )) (array/push (ast :data) term) (try (separators parser) ([e] (array/push (ast :data) e)))) (advance parser) ast) ### patterns ### conditional forms # if {simple} then {nonbinding} else {nonbinding} (defn- iff [parser] (def ast {:type :if :data @[] :token (current parser)}) (advance parser) #consume the if (array/push (ast :data) (capture simple parser)) (accept-many parser :newline) (if-let [err (expect-ret parser :then)] (array/push (ast :data) err) (advance parser)) (array/push (ast :data) (capture nonbinding parser)) (accept-many parser :newline) (if-let [err (expect-ret parser :else)] (array/push (ast :data) err) (advance parser)) (array/push (ast :data) (capture nonbinding parser)) ast) (defn- terminator [parser] (if-not (terminates? parser) # this line panics, captures the panic, advances the parser, and re-throws the error; solves an off-by-one error (panic parser "expected terminator")) (advance parser) (while (terminates? parser) (advance parser))) # {simple} -> {nonbinding} {terminator} (defn- when-clause [parser] (try (do (def lhs (simple parser)) (expect parser :arrow) (advance parser) (accept-many parser :newline) (def rhs (nonbinding parser)) (terminator parser) [lhs rhs]) ([err] (advance parser) # consume the breaking token (accept-many parser :newline :semicolon :break) # ...and any additional ones err))) (defn- whenn [parser] (def ast {:type :when :data @[] :origin (current parser)}) (advance parser) # consume when (if-let [err (expect-ret parser :lbrace)] (do (array/push (ast :data) err) (break ast)) # early return; just bail if we don't have { (advance parser)) (accept-many parser :newline) (while (not (check parser :rbrace :eof)) # make sure we don't roll past eof (array/push (ast :data) (capture when-clause parser))) (advance parser) ast) (defn- match [parser]) ### function forms (defn- fnn [parser]) (defn- lambda [parser]) ### compoound forms (defn- block [parser]) (defn- doo [parser]) ### expressions # four levels of expression complexity: # simple (atoms, collections, synthetic expressions; no conditionals or binding or blocks) # nonbinding (excludes let, ref, named fn: what is allowed inside collections) # plain old exprs (anything but toplevel) # toplevel (exprs + ns, pkg, test, import, use) # simple expressions: what can go anywhere you expect an expression (defrec simple [parser] (def curr (current parser)) (case (type curr) :nil (nill parser) :true (bool parser) :false (bool parser) :number (num parser) :keyword (kw parser) :string (str parser) :lparen (tup parser) :lbracket (list parser) :startdict (dict parser) :startset (sett parser) :word (word parser) (panic parser (string expect "expected simple expression, got " (type curr))) ) ) # non-binding expressions # the rhs of lets, clauses, inside conditional forms, etc. (defrec nonbinding [parser] (def curr (current parser)) (case (type curr) # atoms :nil (nill parser) :true (bool parser) :false (bool parser) :number (num parser) :keyword (kw parser) # strings :string (str parser) ### TODO: interpolated strings :interpolated (unreachable) # collection literals :lparen (tup parser) :lbracket (list parser) :startdict (dict parser) :startset (sett parser) # synthetic :word (word parser) # conditional forms :if (iff parser) :when (whenn parser) :match (unreachable) :with (unreachable) # do :do (unreachable) # fn: but only lambda :fn (unreachable) # blocks :lbrace (unreachable) (panic parser (string "expected nonbinding expression, got " (type curr))) ) ) (defrec expr [parser] (def curr (current parser)) (case (type curr) :let (unreachable) :fn (unreachable) :ref (unreachable) :nil (nill parser) :true (bool parser) :false (bool parser) :number (num parser) :keyword (kw parser) :string (str parser) :lparen (tup parser) :lbracket (list parser) :startdict (dict parser) :startset (sett parser) :word (word parser) :if (iff parser) :when (whenn parser) :match (unreachable) :with (unreachable) :do (unreachable) :lbrace (unreachable) (panic parser (string "expected expression, got " (type curr))) ) ) (defrec toplevel [parser] (def when (current parser)) (case (type curr) :pkg (unreachable) :ns (unreachable) :test (unreachable) :import (unreachable) :use (unreachable) :let (unreachable) :fn (unreachable) :ref (unreachable) :nil (nill parser) :true (bool parser) :false (bool parser) :number (num parser) :keyword (kw parser) :string (str parser) :lparen (tup parser) :lbracket (list parser) :startdict (dict parser) :startset (sett parser) :word (word parser) :if (iff parser) :when (whenn parser) :match (unreachable) :with (unreachable) :do (unreachable) :lbrace (unreachable) (panic parser (string "expected expression, got " (type curr))) ) ) (os/cd "janet") # when repl to do relative imports (import ./scanner :as s) (do #(comment (def source `when { a -> b foo -> bar quux -> baz c -> d } `) (def scanned (s/scan source)) (def a-parser (new-parser scanned)) (def parsed (whenn a-parser)) (-> parsed) # (map (fn [t] (t :type)) (scanned :tokens)) )