### A recursive descent parser for Ludus ### First, some mutual recursion helpers (defn unreachable "A function that errors out if called." [&] (error "reached the unreachable")) (defmacro declare "Forward-declares a function name, so that it can be called in a mutually recursive manner." [& names] (def bindings @[]) (loop [name :in names] (def binding ~(var ,name unreachable)) (array/push bindings binding)) ~(upscope ,;bindings)) (defmacro defrec "Defines a function depended on by another function, that has been forward `declare`d." [name & forms] (if-not (dyn name) (error "recursive functions must be declared before they are defined")) ~(set ,name (defn ,name ,;forms))) ### Next: a data structure for a parser (defn- new-parser [tokens] @{ :tokens (tokens :tokens) :ast @[] :current 0 :errors @[] }) ### and some helper functions for interfacing with that data structure (defn- current [parser] (def curr (get (parser :tokens) (parser :current))) (if (not curr) (error "no more tokens") curr)) (defn- peek [parser] (get (parser :tokens) (inc (parser :current)))) (defn- advance [parser] (update parser :current inc)) (defn- type [token] (get token :type)) (defn- check [parser type] (def current-type (-> parser current (get :type))) (= type current-type)) ### Parsing functions (declare nonbinding binding synthetic) # errors (def terminators [:break :newline :semicolon :eof]) (defn- terminates? [parser] (def curr (current parser)) (def ttype (type curr)) (has-value? terminators ttype)) (def breaking [:break :newline :semicolon :comma :eof :then :else]) (defn- breaks? [parser] (def curr (current parser)) (def ttype (type curr)) (has-value? breaking ttype)) (defn- panic [parser message] # (print "Panic in the parser: " message) (def origin (current parser)) (advance parser) (def skipped @[origin]) (while (not (breaks? parser)) (array/push skipped (current parser)) (advance parser)) (array/push skipped (current parser)) # (advance parser) (def err {:type :error :data skipped :token origin :msg message}) (update parser :errors array/push err) (error err)) (defn- expected [parser ttype] (panic parser (string "expected " ttype ", got " (-> parser current type)))) (defn- expect [parser type] (if-not (check parser type) (expected parser type))) # atoms (defn- bool [parser] (expect parser :bool) (def curr (-> parser current)) (def ttype (type curr)) (def value (if (= ttype :true) true false)) (advance parser) {:type :bool :data value :token curr} ) (defn- num [parser] (expect parser :number) (def curr (-> parser current)) (advance parser) {:type :number :data (curr :literal) :token curr} ) (defn- kw [parser] (expect parser :keyword) (if (= :lparen (-> parser peek type)) (break (synthetic parser))) (def curr (-> parser current)) (advance parser) {:type :keyword :data (curr :literal) :token curr} ) (defn- nill [parser] (expect parser :nil) (advance parser) {:type :nil :token (current parser)} ) (defn- str [parser] (expect parser :string) (def curr (-> parser current)) (advance parser) {:type :string :data (curr :literal) :token curr} ) # words & synthetic expressions (def separates [:break :newline :comma]) (defn- separates? [parser] (def curr (current parser)) (def ttype (type curr)) (has-value? separates ttype)) (defn- separators [parser] (if-not (separates? parser) (panic parser (string "expected separator, got " (-> parser current type)))) (while (separates? parser) (advance parser))) (def sequels [:lparen :keyword]) (defn- word [parser] (expect parser :word) (if (has-value? sequels (-> parser peek type)) (break (synthetic parser))) (def curr (-> parser current)) (advance parser) {:type :word :data (curr :lexeme) :token curr} ) (defn- args [parser] (def origin (current parser)) (advance parser) # consume the :lparen (def ast @{:type :args :data @[] :token origin :partial false}) (while (separates? parser) (advance parser)) # consume any separators (while (not (check parser :rparen)) (def origin (current parser)) (def term (if (check parser :placeholder) (if (ast :partial) (do (def err {:type :error :data [] :token origin :msg "partially applied functions may only use one placeholder"}) (advance parser) (update parser :errors array/push err) err) (do (set (ast :partial) true) (advance parser) {:type :placeholder :token origin})) (try (nonbinding parser) ([e] e)))) (array/push (ast :data) term) (try (separators parser) ([e] (pp e) (array/push (ast :data) e)))) (advance parser) ast) (defrec synthetic [parser] (def origin (current parser)) (def ast {:type :synthetic :data @[origin] :token origin}) (advance parser) (while (has-value? sequels (-> parser current type)) (def term (case (-> parser current type) :lparen (args parser) :keyword (kw parser) )) (array/push (ast :data) term) ) ast ) # collections (defn- tup [parser] (def origin (current parser)) (advance parser) # consume the :lparen (def ast {:type :tuple :data @[] :token origin}) (while (separates? parser) (advance parser)) # consume any separators (while (not (check parser :rparen)) (def term (try (nonbinding parser) ([e] e))) (array/push (ast :data) term) (try (separators parser) ([e] (pp e) (array/push (ast :data) e)))) (advance parser) ast) (defn- list [parser] (def origin (current parser)) (advance parser) (def ast {:type :list :data @[] :token origin}) (while (separates? parser) (advance parser)) (while (not (check parser :rbracket)) (def origin (current parser)) (def term (if (check parser :splat) (do (advance parser) (def splatted (try (word parser) ([e] e))) {:type :splat :data splatted :token origin} ) (try (nonbinding parser) ([e] e)))) (array/push (ast :data) term) (try (separators parser) ([e] (array/push (ast :data) e)))) (advance parser) ast) (defn- set [parser] (def origin (current parser)) (advance parser) (def ast {:type :set :data @[] :token origin}) (while (separates? parser) (advance parser)) (while (not (check parser :rbrace)) (def origin (current parser)) (def term (if (check parser :splat) (do (advance parser) (def splatted (try (word parser) ([e] e))) {:type :splat :data splatted :token origin} ) (try (nonbinding parser) ([e] e)))) (array/push (ast :data) term) (try (separators parser) ([e] (array/push (ast :data) e)))) (advance parser) ast) (defn- dict [parser] (def origin (current parser)) (advance parser) (def ast {:type :dict :data @[] :token origin}) (while (separates? parser) (advance parser)) (while (not (check parser :rbrace)) (def origin (current parser)) (def term (case (type origin) :splat {:type :splat :data (try (word (advance parser)) ([e] e)) :token origin} :word (try (word parser) ([e] e)) :keyword (do (def key (try (kw parser) ([e] e))) (def value (try (nonbinding parser) ([e] e))) {:type :pair :data [key value] :token origin}) (try (panic parser (string expect "expected dict term, got " (type origin))) ([e] e)) )) (array/push (ast :data) term) (try (separators parser) ([e] (array/push (ast :data) e)))) (advance parser) ast) ### expressions # four levels of expression complexity: # simple (atoms, collections, synthetic expressions; no conditionals or binding or blocks) # nonbinding (excludes let, ref, named fn: what is allowed inside collections) # plain old exprs (anything but toplevel) # toplevel (exprs + ns, pkg, test, import, use) # simple expressions: what can go anywhere you expect an expression (defrec simple [parser] (def curr (current parser)) (case (type curr) :nil (nill parser) :true (bool parser) :false (bool parser) :number (num parser) :keyword (kw parser) :string (str parser) :lparen (tup parser) :lbracket (list parser) :startdict (dict parser) :startset (sett parser) :word (word parser) (panic parser (string expect "expected simple expression, got " (type curr))) ) ) # non-binding expressions # the rhs of lets, clauses, inside conditional forms, etc. (defrec nonbinding [parser] (def curr (current parser)) (case (type curr) # atoms :nil (nill parser) :true (bool parser) :false (bool parser) :number (num parser) :keyword (kw parser) # strings :string (str parser) ### TODO: interpolated strings :interpolated (unreachable) # collection literals :lparen (tup parser) :lbracket (list parser) :startdict (dict parser) :startset (sett parser) # synthetic :word (word parser) # conditional forms :if (unreachable) :cond (unreachable) :match (unreachable) :with (unreachable) # do :do (unreachable) # fn: but only lambda :fn (unreachable) # blocks :lbrace (unreachable) (panic parser (string "expected nonbinding expression, got " (type curr))) ) ) (defrec expr [parser] (def curr (current parser)) (case (type curr) :let nil :fn nil :ref nil :nil (nill parser) :true (bool parser) :false (bool parser) :number (num parser) :keyword (kw parser) :string (str parser) :lparen (tup parser) :lbracket (list parser) :startdict (dict parser) :startset (sett parser) :word (word parser) :if (unreachable) :cond (unreachable) :match (unreachable) :with (unreachable) :do (unreachable) :lbrace (unreachable) (panic parser (string "expected expression, got " (type curr))) ) ) (defrec toplevel [parser] (def curr (current parser)) (case (type curr) :pkg nil :ns nil :test nil :import nil :use nil :let nil :fn nil :ref nil :nil (nill parser) :true (bool parser) :false (bool parser) :number (num parser) :keyword (kw parser) :string (str parser) :lparen (tup parser) :lbracket (list parser) :startdict (dict parser) :startset (sett parser) :word (word parser) :if (unreachable) :cond (unreachable) :match (unreachable) :with (unreachable) :do (unreachable) :lbrace (unreachable) (panic parser (string "expected expression, got " (type curr))) ) ) (os/cd "janet") # For repl to do relative imports (import ./scanner :as s) (do #(comment (def scanned (s/scan "#{}")) (def a-parser (new-parser scanned)) (def parsed (nonbinding a-parser)) (-> parsed) )