From b6e1d0e6ec23caa62436ba1e83178dd41e304550 Mon Sep 17 00:00:00 2001 From: Scott Richmond Date: Tue, 14 May 2024 18:56:06 -0400 Subject: [PATCH] clean up files --- janet/clj-loop.janet | 49 -- janet/myparser.janet | 393 ----------- janet/parser.janet | 1520 +++++++++++++++++++++++++++++----------- janet/peg-parser.janet | 104 --- janet/recursive.janet | 1112 ----------------------------- 5 files changed, 1112 insertions(+), 2066 deletions(-) delete mode 100644 janet/clj-loop.janet delete mode 100644 janet/myparser.janet delete mode 100644 janet/peg-parser.janet delete mode 100644 janet/recursive.janet diff --git a/janet/clj-loop.janet b/janet/clj-loop.janet deleted file mode 100644 index 502f8c7..0000000 --- a/janet/clj-loop.janet +++ /dev/null @@ -1,49 +0,0 @@ -(defmacro clj-loop - ``A drop-in replacement for Clojure's loop form. Useful for the current project of converting Clojure code to Janet. - `` - [bindings & body] - (assert (even? (length bindings)) "Binding tuple must have an even number of terms") - (def names @[]) - (def args @[]) - (loop [i :range [0 (length bindings)]] - (if (even? i) - (array/push names (get bindings i)) - (array/push args (get bindings i)))) - ~(do (defn recur [,;names] ,;body) (recur ,;args))) - -(defmacro defn+ - [name & clauses] - ~(defn ,name [& args] - (print "before do") - (do - (def arities @{}) - (def clauses ,clauses) - (def bindingses (map first clauses)) - (def bodies (map |(slice $ 1) clauses)) - (print "before loop") - (loop [i :range [0 (length clauses)]] - (def bindings (get bindingses i)) - (def arity (length bindings)) - (assert (not (get arities i)) "Clauses must have different arities") - (def body (get bodies i)) - (def clause ~(fn ,name ,bindings ,;body)) - (put $arities arity clause)) - (print "before quasiquote") - (fn [& args] - (def arity (length args)) - (def clause (get arities arity)) - (assert clause "No clause with that arity") - (clause ;args))))) - -(defn+ add - ([] 0) - ([x] x) - ([x y] (+ x y))) - -(macex1 -'(defn+ add - ([] 0) - ([x] x) - ([x y] (+ x y)))) - - diff --git a/janet/myparser.janet b/janet/myparser.janet deleted file mode 100644 index 60c6d11..0000000 --- a/janet/myparser.janet +++ /dev/null @@ -1,393 +0,0 @@ -(comment -So the thing here is that this is a much, much simpler version of the rather unweildy Clojure parser framework I wrote. This leans pretty hard on the Janet PEG framework, but is also bespoke for my purposes here. - -This lets me parse not raw text, but scanned tokens, which makes parsing a much easier lift. Also, it lets me do things like insert :break tokens, which really helps manage collection types and blocks. - -At current, I do need to shift to returning a data structure rather than `nil`s and `true`s. That way, I can advance the parser by a certain amount, however much a passing test-rule would consume. - -Do I really need to optimize this? I don't think so: we can do that later. I could store various information on stacks. - -Alternately, returning data structures may well complicate things: KISS! - -The idea, however, is that Jesus: we must not have test-rule and apply-rule: combinators without a fixed advance amount can't work that way. So we have only `apply-rule`, which `advance`s the parser by however much should be consumed. BUT! We can always, and ought wherever we might, stash the origin index and roll back any changes (i.e. in `!` and `*`) - -) - -(defn pprint [x] (printf "%M" x)) - -(os/cd "janet") ### XXX: to remove, repl only -(import ./scanner :prefix "") - -(defn new-parser - `` - Creates a new parser from a list of tokens. A parser has three fields in addition to `:tokens`: - :i the index, in the list of tokens, of the current token - :errors an array of errors, treated like a stack - :captured an array of captures (tokens or arrays of tokens), treated like a stack. - `` - [{:tokens tokens}] - @{:tokens tokens :i 0 :errors @[] :captured @[]}) - -(defn pop-to - "Pops items from a stack until it contains `len` items. Returns the stack." - [stack len] - (assert (>= len 0)) - (while (< len (length stack)) (array/pop stack)) - stack) - -(defn popn - "Pops n items from a stack. Returns the stack." - [stack n] - (loop [_ :range [0 n]] (array/pop stack)) - stack) - -(defn current "Returns the current token." [{:tokens tokens :i i}] (get tokens i)) - -(defn previous "Returns the token before the current token." [{:tokens tokens :i i}] (get tokens (dec i))) - -(defn next "Returns the token after the current token." [{:tokens tokens :i i}] (get tokens (inc i))) - -(def add +) ### I `def` `+` later; stash this - -(defn advance - "Advances the parser by `count`, or 1 if count is not supplied." - [parser] (update parser :i inc)) - -(defn stash - "Stashes a parser state: captures the current index, error, and capture stacks." - [{:i i :errors errs :captured cap}] - {:i i :err-len (length errs) :cap-len (length cap)}) - -(defn restore - "Restores the parser to a stashed state: resets :i, pops any new errors and captures off their stacks." - [parser state] - (def {:i i :err-len err-len :cap-len cap-len} state) - (def {:errors errors :captured captured} parser) - (put parser :i i) - (pop-to errors err-len) - (pop-to captured cap-len) - parser) - -(defn at "Returns the token at index i." [parser i] (get (get parser :tokens) i)) - -(defn capture "Captures the last token." [parser] (update parser :captured array/push (previous parser))) - -(defn apply-keyword - "Applies a keyword-based rule. Consumes the passing token." - [kw parser] - (def curr (current parser)) - (def type (get curr :type)) - (if (= kw type) - (do - (advance parser) - true) - nil)) - -(defn apply-table - "Applies a table-based rule." - [rule parser] - (def curr (current parser)) - (def rule-fn (get rule :rule)) - (rule-fn parser)) - -(defn apply-rule - "Applies a rule: if the rule passes, consume all passing tokens and return true. If the rule fails, do nothing, returning `nil`." - [rule parser] - (case (type rule) - :keyword (apply-keyword rule parser) - :table (apply-table rule parser) - (error (string "unknown parser type: " (type rule))))) - -(defn name - "Returns the name of a rule. Use this instead of (get rule :name), since bare keywords are also valid rules." - [rule] - (case (type rule) - :keyword rule - :table (rule :name))) - -(defn rename - "Renames a rule. By convention, rule names are keywords." - [rule name] (put rule :name name)) - -### As noted above: we should not use this -### Instead, combinators which might not advance should stash state -### These are: !, *, ...? -(defn test - "Tests a rule: returns whether a rule passes but does not consume any tokens." - [rule parser] - (def origin (stash parser)) - (def result (apply-rule rule parser)) - (restore parser origin) - result) - -(defn ! - "Not: negates a rule. If the rule passes, does nothing, returns `nil`. If the rule fails, advances the parser a single token, returns true. Works well only for single token rules." - [rule] - @{:name (keyword (string "!" (name rule))) - :rule (fn !* [parser] - (def origin (stash parser)) - (def result (apply-rule rule parser)) - (restore parser origin) - (if-not result - (do (advance parser) true) - nil))}) - -(defn <- - "Capture: if the rule matches, consumes the token and pushes it to the capture stack. If the rule does not match, does nothing." - [rule] - @{:name (keyword (string "<-" (name rule))) - :rule (fn <-* [parser] - (def origin (stash parser)) - (def result (apply-rule rule parser)) - (if result (capture parser)) - result)}) - -(defn + - "Choose a rule: matches if any one of the rules is matched. Order matters: rules are evaluated in order. It's useful to put any errors last." - [& rules] - @{:name (keyword (string/join (map name rules) "+")) - :rule (fn +* [parser] - (var passing nil) - (def origin (stash parser)) - (loop [rule :in rules :while (not passing)] - (restore parser origin) - (set passing (apply-rule rule parser))) - (if-not passing (restore parser origin)) - passing)}) - -(defn unreachable [&] (error "reached the unreachable")) - -(defmacro declare [& names] - (def bindings @[]) - (loop [name :in names] - (def binding ~(var ,name @{:name ,(keyword name) :rule unreachable})) - (array/push bindings binding)) - ~(upscope ,;bindings)) - -(defmacro defp [name rule] - (if (dyn name) - ~(set ,name (put ,name :rule (,rule :rule))) - ~(var ,name (rename ,rule ,(keyword name))))) - -(comment (defn panic - [rule &opt msg] - @{:name (keyword (string "panic-" (name rule))) - :rule (fn panic* [parser] - (print "panicking!") - (def origin (current parser)) - (var passing false) - (def skipped @[]) - (while (not passing) - (array/push skipped (current parser)) - (advance parser) - (set passing (apply-rule rule parser))) - (print "looped") - (def the-error @{:type :error :token origin :msg msg :skipped skipped}) - (array/push (parser :errors) the-error) - (array/push (parser :captured) the-error) - true - )})) - -# If we wanted to be robust, we wouldn't hard-code this -# To consider: use a stack of "panic-until" on a parser to dynamically decide how far to panic -(defp stop (+ :newline :semicolon :break)) - -(defn panic - "Panics the parser, consuming all tokens until the rule matches (including the match). It also adds an error node to both the capture and the error stacks." - [parser expected] - (print "panic! in the parser") - (def origin (current parser)) - (var passing false) - (def skipped @[]) - (while (not passing) - (array/push skipped (current parser)) - (advance parser) - (set passing (apply-rule stop parser)) - (print "phew; I'm done panicking") - (pprint (current parser)) - (def the-error {:type :error :expected expected :token origin :skipped skipped}) - (array/push (parser :errors) the-error) - (array/push (parser :captured) the-error) - (error parser))) - -(defn * - "Sequences rules: matches if all rules are matched, in sequence." - [& rules] - @{:name (keyword (string/join (map name rules) "*")) - :rule (fn ** [parser] - (print "applying sequence") - (var passing true) - (def origin (stash parser)) - (pprint origin) - (var failing nil) - (var passing true) - (loop [rule :in rules :while passing] - (def pass? (apply-rule rule parser)) - (when (not pass?) - (set failing rule) - (set passing false))) - # this isn't the right thing to do: don't back out, panic! - # (when (not passing) (restore parser origin)) - (if passing - passing - (panic parser (string "expected " (name failing)))))}) -## next problem: where do we back out to, and how? -## the answer: an "anchor" point, where we `(try) something, and if it panics, start up with another line; in a block it will be line; in a script it will be toplevel` -## maybe terms in a tuple, list, dict -## it seems noteworthy that the things we want to return to are all either "any" or "some", and all either end with a newline or a break -## they're not all either any or some (empty data structures, but lines in blocks and scripts) -## also: there's a finite number of possibilities for any of them: -## another well-formed term, or the end of the item -## consider: -(comment -(defp tuple-term (nonbinding (some separator))) -(defp tuple-expr (order-1 (<- :lparen) (any separator) (any! tuple-term separator) (:rparen))) -) -## the tuple-term can be the anchor point -## also, in this case, we panic until a *separator*, not a *terminator* -## that suggests we need a panic-until parameter, rather than the hardcoded stop -## is this right? - - -(comment (defn order-1 [& rules] - @{:name (keyword (string/join (map name rules) "*")) - :rule (fn order-1 [parser] - (def result (test (first rules) parser)) - (if result - (apply-rule (order-0 ;rules)) ### compile error: unknown symbol order-0 - nil))})) - -(defn capture-group - "Takes a parser and an origin state: takes everything captured since the origin, gathers it up in a single array, pops it all off the capture stack, and pushes the gathered captures back on the stack." - [parser origin] - (def from (get origin :cap-len)) - (def captured (get parser :captured)) - (def grouped (array/slice captured from -1)) - (pop-to captured from) - (array/push captured grouped) - captured) - -(defn & - "Groups rules: pops all captured patterns and pushes them on the captured stack as a single array." - [rule] - @{:name (keyword (string "&" (name rule))) - :rule (fn &* [parser] - (def origin (stash parser)) - (def result (apply-rule rule parser)) - (if result - (do - (capture-group parser origin) - true) - (do - (restore parser origin) - nil)))}) - -(defn fn-name - "Returns the name of a function." - [f] - (def rep (string f)) - (string/slice rep 10 -2)) - -(defn / - "Substitution: takes a rule and a function, and applies the function to any captures arising from the rule; the captures are individual parameters to the function." - [rule f] - @{:name (keyword (string (name rule) "/" (fn-name f))) - :rule (fn /* [parser] - (def origin (stash parser)) - (def captured (get parser :captured)) - (def result (apply-rule (& rule) parser)) - (if result - (do - (def grouped (array/pop captured)) - (def applied (f ;grouped)) - (array/push captured applied) - true) - (do - (restore parser origin) - nil)))}) - -(defn any - "Matches zero or more instances of a rule." - [rule] - @{:name (keyword (string "any-" (name rule))) - :rule (fn any* [parser] - (var result true) - (while result - (set result (apply-rule rule parser))) - true)}) - -(defn some - "Matches one or more instances of a rule." - [rule] - @{:name (keyword (string "some-" (name rule))) - :rule (fn some* [parser] - (def origin (stash parser)) - (def result (apply-rule rule parser)) - (if result - (do - (apply-rule (any rule) parser) - true) - (do - (restore parser origin) - nil)))}) - -(defn parse [tokens rule] - (def parser (new-parser tokens)) - (try - (do (apply-rule rule parser) parser) - ([err] err))) - -(upscope #XXX for easier repl use - -(defn literal->ast [token] {:type (get token :type) :data (get token :literal) :token token}) - -(defn word->ast [token] {:type (get token :type) :data (get token :lexeme) :token token}) - -(defn tuple->ast [origin & tokens] {:type :tuple :data tokens :token origin}) - -(defn if->ast [origin & tokens] {:type :if :data tokens :token origin}) - -(declare expression simple nonbinding) - -(defp separator (some (+ :newline :comma :break))) -(defp separators? (any (+ :newline :comma :break))) -(defp terminator (some (+ :newline :semicolon :break))) -(defp terminators? (any (+ :newline :semicolon :break))) -(defp nls? (any :newline)) - -(defp atom (/ (<- (+ :nil :true :false :number :keyword :string)) literal->ast)) -(defp word (/ (<- :word) word->ast)) - -(defp tuple-term (* nonbinding separator)) -(defp tuple-literal (/ (* (<- :lparen) separators? (any tuple-term) :rparen) tuple->ast)) - -(defp if-expr (/ (* (<- :if) simple nls? :then nonbinding nls? :else nonbinding) if->ast)) - -# (defp root (<- (+ :word :keyword))) -# (defp follow (<- (+ :keyword :arg-tuple))) -# (defp synthetic (& (* root (some follow)))) - -(defp simple (+ atom word tuple-literal)) -(defp nonbinding (+ atom word tuple-literal)) -(defp expression (+ atom word tuple-literal)) -(defp line (* expression (some terminator))) -# (defp toplevel) -(defp script (some line)) - -(def source -`` -if (:)foo then :bar else :baz - -`` -) - -(-> (scan source) (parse if-expr) pprint) -) - -(comment - -Okay, so I'm trying to figure out how to manage panics in this declarative (not recursive-descent) parser. -The question is how to do the thing where we pop out of the stack up to the most relevant rule. -Given that we may have deeply nested rules, we either want: - - A stack of rules in the parser, which -) diff --git a/janet/parser.janet b/janet/parser.janet index 32c428c..a5862d3 100644 --- a/janet/parser.janet +++ b/janet/parser.janet @@ -1,408 +1,1112 @@ -### in repl, make sure to (os/cd) into the janet dir -# (os/cd "janet") - -(import ./clj-loop :prefix "") - -(defn ? [val default] (if (nil? val) default val)) - -(defn ok? [{:status status}] - (= status :ok)) - -(def failing {:err true :none true}) - -(def passing {:ok true :group true :quiet true}) - -(defn pass? [{:status status}] (get passing status)) - -(defn fail? [{:status status}] (get failing status)) - -(defn data [{:data d}] d) - -(defn remaining [{:remaining r}] r) - -(defn pname [parser] (? (get parser :name) parser)) - -(defn kw+str [kw mystr] (keyword (string kw) mystr)) - -(defn value [token] - (if (= :none (get token :literal)) (get token :lexeme) (get token :literal))) - -(defn rest [seq] - (let [len (length seq)] - (cond - (empty? seq) [] - (tuple? seq) (tuple/slice 1 len) - (array? seq) (array/slice 1 len)))) - -(defn some? [val] (not (nil? val))) - -(defn apply-kw-parser [kw tokens] - (let [token (first tokens)] - #(if (= kw (get token :type)) (println "Matched " kw)) - (if (= kw (get token :type)) - @{:status :ok - :type kw - :data (if (some? (value token)) @[(value token)] @[]) - :token token - :remaining (rest tokens)} - @{:status :none :token token :trace [kw] :remaining (rest tokens)}))) - -(defn apply-fn-parser [parser tokens] - (let [rule (get parser :rule) name (get parser :name) result (rule tokens)] - #(if (pass? result) (println "Matched " (get parser :name))) - result)) - -(defn apply-parser [parser tokens] - #(println "Applying parser " (? (get parser :name) parser)) - (let [result (cond - (keyword? parser) (apply-kw-parser parser tokens) - (get parser :rule) (apply-fn-parser parser tokens) - (function? parser) (apply-fn-parser (parser) tokens) - :else (error "`apply-parser` requires a parser"))] - #(println "Parser result " (? (get parser :name) parser) (get result :status)) - result - )) - - -(defn choice [name parsers] - {:name name - :rule (fn choice-fn [tokens] - (clj-loop [ps parsers] - (let [result (apply-parser (first ps) tokens) - rem-ts (remaining result) - rem-ps (rest ps)] - (cond - (pass? result) - {:status :ok :type name :data [result] :token (first tokens) :remaining rem-ts} - - (= :err (get result :status)) - (update result :trace |(array/push $ name)) - - (empty? rem-ps) - {:status :none :token (first tokens) :trace [name] :remaining rem-ts} - - :else (recur rem-ps)))))}) - -(defn order-1 [name parsers] - {:name name - :rule (fn order-fn [tokens] - (let [origin (first tokens) - first-result (apply-parser (first parsers) tokens)] - (case (get first-result :status) - (:err :none) - (put (update first-result :trace |(array/push $ name)) :status :none) - - (:ok :quiet :group) - (do (defn recur [ps results ts] - (let [result (apply-parser (first ps) ts) - res-rem (remaining result)] - (if (empty? (rest ps)) - (case (get result :status) - :ok @{:status :group - :type name - :data (array/push results result) - :token origin - :remaining res-rem} - - :quiet @{:status :group - :type name - :data results - :token origin - :remaining res-rem} - - :group @{:status :group - :type name - :data (array/concat results (get result :data)) - :token origin - :remaining res-rem} - - (:err :none) - (put (update result :trace |(array/push $ name)) :status :err)) - - (case (get result :status) - :ok (recur (rest ps) (array/push results result) res-rem) - :group (recur (rest ps) - (array/concat results (get result :data)) - res-rem) - :quiet (recur (rest ps) results res-rem) - - (:err :none) - (put (update result :trace |(array/push $ name)) :status :err))))) - (recur - (get first-result :status) - (case (get first-result :status) :ok [first-result] :quiet [] :group (get first-result :data)) - (remaining first-result))))))}) - -(defn order-0 [name parsers] - {:name name - :rule (fn order-fn [tokens] - (let [origin (first tokens)] - (defn recur [ps results ts] - (let [result (apply-parser (first ps) ts) - res-rem (remaining result)] - (if (empty? (rest ps)) - ## Nothing more: return - (case (get result :status) - :ok @{:status :group - :type name - :data (array/push results result) - :token origin - :remaining res-rem} - - :quiet @{:status :group - :type name - :data results - :token origin - :remaining res-rem} - - :group @{:status :group - :type name - :data (array/concat results (get result :data)) - :token origin - :remaining res-rem} - - (:err :none) - (put (update result :trace |(array/push $ name)) :status :err)) - - ## Still parsers left in the vector: recur - (case (get result :status) - :ok (recur (rest ps) (array/push results result) res-rem) - :group (recur (rest ps) - (array/concat results (get result :data)) - res-rem) - :quiet (recur (rest ps) results res-rem) - - (:err :none) - (put (update result :trace |(array/push $ name)) :status :err) - - (error (string "Got bad result: " (get result :status))))))) - (recur parsers [] tokens)))}) - -(defn weak-order [name parsers] - {:name name - :rule (fn order-fn [tokens] - (let [origin (first tokens)] - (clj-loop [ps parsers # now we can use my handy macro - results [] - ts tokens] - (let [result (apply-parser (first ps) ts) - res-rem (remaining result)] - (if (empty? (rest ps)) - ## Nothing more: return - (case (get result :status) - :ok @{:status :group - :type name - :data (array/push results result) - :token origin - :remaining res-rem} - - :quiet @{:status :group - :type name - :data results - :token origin - :remaining res-rem} - - :group @{:status :group - :type name - :data (array/concat results (get result :data)) - :token origin - :remaining res-rem} - - (:err :none) - (update result :trace |(array/push $ name))) - - ## Still parsers left in the vector: recur - (case (get result :status) - :ok (recur (rest ps) (array/push results result) res-rem) - :group (recur (rest ps) - (array/concat results (get result :data)) - res-rem) - :quiet (recur (rest ps) results res-rem) - - (:err :none) - (update result :trace |(array/push $ name))))))))}) - -(defn quiet [parser] - {:name (kw+str (? (get parser :name) parser) "-quiet") - :rule (fn quiet-fn [tokens] - (let [result (apply-parser parser tokens)] - (if (pass? result) - (put result :status :quiet) - result)))}) - -(defn zero+/2 [name parser] - {:name (kw+str name "-zero+") - :rule (fn zero+fn [tokens] - (clj-loop [results [] - ts tokens] - (let [result (apply-parser parser ts)] - (case (get result :status) - :ok (recur (array/push results result) (remaining result)) - :group (recur (array/concat results (get result :data)) (remaining result)) - :quiet (recur results (remaining result)) - :err (update result :trace |(array/push $ name)) - :none @{:status :group - :type name - :data results - :token (first tokens) - :remaining ts}))))}) - -(defn zero+/1 [parser] (zero+/2 (pname parser) parser)) - -(defn zero+ [& args] - (def arity (length args)) - (if (= 1 arity) (zero+/1 ;args) (zero+/2 ;args))) - - # ([parser] (zero+ (pname parser) parser)) - # ([name parser] - # {:name (kw+str name "-zero+") - # :rule (fn zero+fn [tokens] - # (clj-loop [results [] - # ts tokens] - # (let [result (apply-parser parser ts)] - # (case (get result :status) - # :ok (recur (array/push results result) (remaining result)) - # :group (recur (array/concat results (get result :data)) (remaining result)) - # :quiet (recur results (remaining result)) - # :err (update result :trace |(array/push $ name)) - # :none {:status :group - # :type name - # :data results - # :token (first tokens) - # :remaining ts}))))})) - -(defn one+/2 [name parser] - {:name (kw+str name "-one+") - :rule (fn one+fn [tokens] - (let [first-result (apply-parser parser tokens) - rest-parser (zero+ name parser)] - (case (get first-result :status) - (:ok :group) - (let [rest-result (apply-parser rest-parser (remaining first-result))] - (case (get rest-result :status) - - (:ok :group :quiet) - {:status :group - :type name - :data (array/concat (get first-result :data) (data rest-result)) - :token (first tokens) - :remaining (remaining rest-result)} - - :none {:status :group :type name - :data first-result - :token (first tokens) - :remaining (remaining rest-result)} - - :err (update rest-result :trace |(array/push % name)))) - - :quiet - (let [rest-result (apply-parser rest-parser (remaining first-result))] - {:status :quiet - :type name - :data [] - :token (first tokens) - :remaining (remaining rest-result)}) - - (:err :none) first-result)))}) - -(defn one+/1 [parser] (one+/2 (pname parser) parser)) - -(defn one+ [& args] - (def arity (length args)) - (if (= 1 arity) (one+/1 ;args) (one+/2 ;args))) - -# (defn one+ -# ([parser] (one+ (pname parser) parser)) -# ([name parser] -# {:name (kw+str name "-one+") -# :rule (fn one+fn [tokens] -# (let [first-result (apply-parser parser tokens) -# rest-parser (zero+ name parser)] -# (case (get first-result :status) -# (:ok :group) -# (let [rest-result (apply-parser rest-parser (remaining first-result))] -# (case (get rest-result :status) - -# (:ok :group :quiet) -# {:status :group -# :type name -# :data (array/concat (get first-result :data) (data rest-result)) -# :token (first tokens) -# :remaining (remaining rest-result)} - -# :none {:status :group :type name -# :data first-result -# :token (first tokens) -# :remaining (remaining rest-result)} - -# :err (update rest-result :trace |(array/push % name)))) - -# :quiet -# (let [rest-result (apply-parser rest-parser (remaining first-result))] -# {:status :quiet -# :type name -# :data [] -# :token (first tokens) -# :remaining (remaining rest-result)}) - -# (:err :none) first-result)))})) - -(defn maybe/2 [name parser]) - -(defn maybe/1 [parser] (maybe/2 (pname parser) parser)) - -(defn maybe [& args] - (def arity (length args)) - (if (= 1 arity) (maybe/1 ;args) (maybe/2 ;args))) - -# (defn maybe -# ([parser] (maybe (pname parser) parser)) -# ([name parser] -# {:name (kw+str name "-maybe") -# :rule (fn maybe-fn [tokens] -# (let [result (apply-parser parser tokens)] -# (if (pass? result) -# result -# {:status :group :type name :data [] :token (first tokens) :remaining tokens} -# )))})) - -(defn flat/2 [name parser]) - -(defn flat/1 [parser] (flat/2 (pname parser) parser)) - -(defn flat [& args] - (def arity (length args)) - (if (= 1 arity) (flat/1 ;args) (flat/2 ;args))) - -# (defn flat -# ([parser] (flat (pname parser) parser)) -# ([name parser] -# {:name (kw+str name "-flat") -# :rule (fn flat-fn [tokens] -# (let [result (apply-parser parser tokens)] -# (if (pass? result) (first (get result :data)) result)))})) - -(defn group/2 [name parser]) - -(defn group/1 []) - -(defn group - ([parser] (group (pname parser) parser)) - ([name parser] - {:name (kw+str name "-group") - :rule (fn group-fn [tokens] - (let [result (apply-parser parser tokens)] - (if (= :group (get result :status)) - (put result :status :ok) - result)))})) - -(defn err-msg [{:token token :trace trace}] - (print "Unexpected token " (get token :type) " on line " (get token :line)) - (print "Expected token " (first trace))) - -(defn butlast [xs] (if (empty? xs) [] (slice xs 0 -2))) - -(defmacro defp [name & items] - (let [arg (last items) - fns (butlast items)] - ~(defn ,name [] ((apply comp ,fns) (keyword ',name) ,arg)))) +### A recursive descent parser for Ludus + +### We still need to scan some things +#(os/cd "janet") # when in repl to do relative imports +(import ./scanner :as s) + +(defmacro declare + "Forward-declares a function name, so that it can be called in a mutually recursive manner." + [& names] + (def bindings @[]) + (loop [name :in names] + (def binding ~(var ,name nil)) + (array/push bindings binding)) + ~(upscope ,;bindings)) + +(defmacro defrec + "Defines a function depended on by another function, that has been forward `declare`d." + [name & forms] + (if-not (dyn name) (error "recursive functions must be declared before they are defined")) + ~(set ,name (defn- ,name ,;forms))) + +### Next: a data structure for a parser +(defn- new-parser + "Creates a new parser data structure to pass around" + [tokens] + @{ + :tokens (tokens :tokens) + :ast @[] + :current 0 + :errors @[] + }) + +### and some helper functions for interfacing with that data structure +(defn- current + "Returns the current token of a parser. If the parser is at the last token, keeps returning the last token." + [parser] + (def tokens (parser :tokens)) + (get tokens (parser :current) (last tokens))) + +(defn- peek + "Returns the next token of the parser. If the parser is at the last token, keeps returning the last token." + [parser] + (def tokens (parser :tokens)) + (get tokens (inc (parser :current)) (last tokens))) + +(defn- advance + "Advances the parser by a token" + [parser] + (update parser :current inc)) + +(defn- type + "Returns the type of a token" + [token] + (get token :type)) + +(defn- check + "Returns true if the parser's current token is one of the passed types" + [parser type & types] + (def accepts [type ;types]) + (def current-type (-> parser current (get :type))) + (has-value? accepts current-type)) + +### Parsing functions +# forward declarations +(declare simple nonbinding expr toplevel synthetic) + +# errors +# terminators are what terminate expressions +(def terminators [:break :newline :semicolon]) + +(defn- terminates? + "Returns true if the current token in the parser is a terminator" + [parser] + (def curr (current parser)) + (def ttype (type curr)) + (has-value? terminators ttype)) + +# breakers are what terminate panics +(def breaking [:break :newline :semicolon :comma :eof :then :else]) + +(defn- breaks? + "Returns true if the current token in the parser should break a panic" + [parser] + (def curr (current parser)) + (def ttype (type curr)) + (has-value? breaking ttype)) + +(defn- panic + "Panics the parser: starts skipping tokens until a breaking token is encountered. Adds the error to the parser's errors array, and also errors out." + [parser message] + (print "Panic in the parser: " message) + (def origin (current parser)) + (advance parser) + (def skipped @[origin]) + (while (not (breaks? parser)) + (array/push skipped (current parser)) + (advance parser)) + (array/push skipped (current parser)) + (def err {:type :error :data skipped :token origin :msg message}) + (update parser :errors array/push err) + (error err)) + +(defn- expected + "Panics the parser with a message: expected {type} got ..." + [parser ttype & ttypes] + (def expected (map string [ttype ;ttypes])) + (def type-msg (string/join expected " | ")) + (panic parser (string "expected {" type-msg "}, got " (-> parser current type)))) + +(defn- expect + "Panics if the parser's current token is not of type; otherwise does nothing & returns nil" + [parser type & types] + (if-not (check parser type ;types) (expected parser type ;types))) + +(defn- expect-ret + "Same as expect, but captures the error, returning it as a value" + [parser type & types] + (try (expect parser type ;types) ([e] e))) + +(defn- capture + "Applies the parse function to the parser, returning the parsed AST. If there is a panic, captures the panic and returns it as a value." + [parse-fn parser] + (try (parse-fn parser) ([e] e))) + +(defn- accept-one + "Accepts a single token of passed type, advancing the parser if a match, doing nothing if not." + [parser type & types] + (if (check parser type ;types) (advance parser))) + +(defn- accept-many + "Accepts any number of tokens of a passed type, advancing the parser on match until there are no more matches. Does nothing on no match." + [parser type & types] + (while (check parser type ;types) (advance parser))) + +# atoms +(defn- bool [parser] + (expect parser :true :false) + (def curr (-> parser current)) + (def ttype (type curr)) + (def value (if (= ttype :true) true false)) + (advance parser) + {:type :bool :data value :token curr} + ) + +(defn- num [parser] + (expect parser :number) + (def curr (-> parser current)) + (advance parser) + {:type :number :data (curr :literal) :token curr} + ) + +(defn- kw [parser] + (expect parser :keyword) + (if (= :lparen (-> parser peek type)) (break (synthetic parser))) + (def curr (-> parser current)) + (advance parser) + {:type :keyword :data (curr :literal) :token curr} + ) + +(defn- nill [parser] + (expect parser :nil) + (def curr (current parser)) + (advance parser) + {:type :nil :token curr}) + +(defn- str [parser] + (expect parser :string) + (def curr (-> parser current)) + (advance parser) + {:type :string :data (curr :literal) :token curr}) + +# interpolated strings, which are a whole other scene +(defn- scan-interpolations [data] + (print "scanning interpolation: " data) + (when (buffer? data) (break data)) + (pp data) + (def to-scan (data :to-scan)) + (def {:tokens tokens :errors errors} (s/scan to-scan)) + (pp tokens) + (print "there are " (length tokens) " tokens") + (def first-token (first tokens)) + (cond + (first errors) (first errors) + (empty? tokens) + {:type :error :msg "string interpolations/patterns must be single words"} + (< 3 (length tokens)) + {:type :error :msg "string interpolations/patterns must be single words"} + (= :word (first-token :type)) + {:type :word :data (first-token :lexeme) :token first-token} + :else {:type :error :msg "string interpolations/patterns must be single words"})) + +(defn- is-error? [data] + (cond + (buffer? data) false + (= :error (data :type)) true + false)) + +(defn- interpolated [parser] + (expect parser :interpolated) + (def origin (current parser)) + (def source (origin :literal)) + (def data @[]) + (var curr @"") + (var interp? false) + (var escape? false) + (each code source + (def char (string/from-bytes code)) + (cond + (= char "\\") (set escape? true) + escape? (if (= char "{") + (do + (buffer/push curr "{") + (set escape? false)) + (do + (buffer/push curr "\\") + (buffer/push curr char) + (set escape? false))) + (= char "{") (do + (set interp? true) + (array/push data curr) + (set curr @"")) + (= char "}") (if interp? (do + (set interp? false) + (array/push data {:to-scan curr}) + (set curr @"")) + (buffer/push curr char)) + :else (buffer/push curr char))) + (array/push data curr) + (def interpolated (map scan-interpolations data)) + (advance parser) + (def ast {:type :interpolated :data interpolated :token origin}) + (if (some is-error? interpolated) + (do + (def err {:type :error :msg "bad interpolated string" :data ast :token origin}) + (array/push (parser :errors) err) + err) + ast)) + +# words & synthetic expressions +(def separates [:break :newline :comma]) + +(defn- separates? [parser] + (def curr (current parser)) + (def ttype (type curr)) + (has-value? separates ttype)) + +(defn- separators [parser] + (if-not (separates? parser) + (panic parser (string "expected separator, got " (-> parser current type)))) + (while (separates? parser) (advance parser))) + +(def sequels [:lparen :keyword]) + +(defn- word-expr [parser] + (expect parser :word) + (if (has-value? sequels (-> parser peek type)) (break (synthetic parser))) + (def curr (-> parser current)) + (advance parser) + {:type :word :data (curr :lexeme) :token curr}) + +(defn- word-only [parser] + (expect parser :word) + (def curr (current parser)) + (advance parser) + {:type :word :data (curr :lexeme) :token curr}) + +(defn- args [parser] + (def origin (current parser)) + (advance parser) # consume the :lparen + (def ast @{:type :args :data @[] :token origin :partial false}) + (while (separates? parser) (advance parser)) # consume any separators + (while (not (check parser :rparen)) + (when (check parser :eof) + (def err {:type :error :token origin :msg "unclosed paren"}) + (array/push (parser :errors) err) + (error err)) + (def origin (current parser)) + (def term (if (check parser :placeholder) + (if (ast :partial) + (do + (def err {:type :error :data [] :token origin :msg "partially applied functions may only use one placeholder"}) + (advance parser) + (update parser :errors array/push err) + err) + (do + (set (ast :partial) true) + (advance parser) + {:type :placeholder :token origin})) + (capture nonbinding parser))) + (array/push (ast :data) term) + (try (separators parser) + ([e] (pp e) (array/push (ast :data) e)))) + (advance parser) + ast) + +(defn- synth-root [parser] + (print "parsing synth root") + (def origin (current parser)) + (advance parser) + (case (type origin) + :word {:type :word :data (origin :lexeme) :token origin} + :keyword {:type :keyword :data (origin :literal) :token origin} + :pkg-name {:type :pkg-name :data (origin :lexeme) :token origin} + (panic parser "expected word, keyword, or package") + ) +) + +(defrec synthetic [parser] + (print "parsing synthetic") + (def origin (current parser)) + (def ast {:type :synthetic :data @[(synth-root parser)] :token origin}) + (while (has-value? sequels (-> parser current type)) + (def term + (case (-> parser current type) + :lparen (args parser) + :keyword (kw parser) + )) + (array/push (ast :data) term) + ) + ast +) + +# collections +(defn- tup [parser] + (def origin (current parser)) + (advance parser) # consume the :lparen + (def ast {:type :tuple :data @[] :token origin}) + (while (separates? parser) (advance parser)) # consume any separators + (while (not (check parser :rparen)) + (when (check parser :eof) + (def err {:type :error :token origin :msg "unclosed paren"}) + (array/push (parser :errors) err) + (error err)) + (def term (capture nonbinding parser)) + (array/push (ast :data) term) + (try (separators parser) + ([e] (pp e) (array/push (ast :data) e)))) + (advance parser) + ast) + +(defn- list [parser] + (def origin (current parser)) + (advance parser) + (def ast {:type :list :data @[] :token origin}) + (while (separates? parser) (advance parser)) + (while (not (check parser :rbracket)) + (when (check parser :eof) + (def err {:type :error :token origin :msg "unclosed bracket"}) + (array/push (parser :errors) err) + (error err)) + (def origin (current parser)) + (def term (if (check parser :splat) + (do + (advance parser) + (def splatted (capture word-only parser)) + {:type :splat :data splatted :token origin} + ) + (capture nonbinding parser))) + (array/push (ast :data) term) + (try (separators parser) + ([e] (array/push (ast :data) e)))) + (advance parser) + ast) + +(defn- sett [parser] + (def origin (current parser)) + (advance parser) + (def ast {:type :set :data @[] :token origin}) + (while (separates? parser) (advance parser)) + (while (not (check parser :rbrace)) + (when (check parser :eof) + (def err {:type :error :token origin :msg "unclosed brace"}) + (array/push (parser :errors) err) + (error err)) + (def origin (current parser)) + (def term (if (check parser :splat) + (do + (advance parser) + (def splatted (capture word-only parser)) + {:type :splat :data splatted :token origin} + ) + (capture nonbinding parser))) + (array/push (ast :data) term) + (try (separators parser) + ([e] (array/push (ast :data) e)))) + (advance parser) + ast) + +(defn- dict [parser] + (def origin (current parser)) + (advance parser) + (def ast {:type :dict :data @[] :token origin}) + (while (separates? parser) (advance parser)) + (while (not (check parser :rbrace)) + (when (check parser :eof) + (def err {:type :error :token origin :msg "unclosed brace"}) + (array/push (parser :errors) err) + (error err)) + (def origin (current parser)) + (def term (case (type origin) + :splat {:type :splat :data (capture word-only (advance parser)) :token origin} + :word (try (word-only parser) ([e] e)) + :keyword (do + (def key (try (kw parser) ([e] e))) + (def value (capture nonbinding parser)) + {:type :pair :data [key value] :token origin}) + (try (panic parser (string "expected dict term, got " (type origin))) ([e] e)) + )) + (array/push (ast :data) term) + (try (separators parser) ([e] (array/push (ast :data) e)))) + (advance parser) + ast) + +### patterns +(declare pattern) + +(defn- placeholder [parser] + (expect parser :placeholder :ignored) + (def origin (current parser)) + (advance parser) + {:type :placeholder :token origin}) + +(defn- word-pattern [parser] + (expect parser :word) + (def origin (current parser)) + (advance parser) + (def the-word {:type :word :data (origin :lexeme) :token origin}) + (if (check parser :as) + (do + (advance parser) + (def type (kw parser)) + {:type :typed :data [type the-word] :token origin}) + the-word)) + +(defn- tup-pattern [parser] + (def origin (current parser)) + (advance parser) # consume the :lparen + (def ast {:type :tuple :data @[] :token origin}) + (while (separates? parser) (advance parser)) # consume any separators + (while (not (check parser :rparen)) + (when (check parser :eof) + (def err {:type :error :token origin :msg "unclosed paren"}) + (array/push (parser :errors) err) + (error err)) + (def origin (current parser)) + (def term (if (check parser :splat) + (do + (advance parser) + (def splatted (when (check parser :word) (word-only parser))) + {:type :splat :data splatted :token origin}) + (capture pattern parser))) + (array/push (ast :data) term) + (try (separators parser) + ([e] (pp e) (array/push (ast :data) e)))) + (advance parser) + ast) + +(defn- list-pattern [parser] + (def origin (current parser)) + (advance parser) + (def ast {:type :list :data @[] :token origin}) + (while (separates? parser) (advance parser)) + (while (not (check parser :rbracket)) + (when (check parser :eof) + (def err {:type :error :token origin :msg "unclosed bracket"}) + (array/push (parser :errors) err) + (error err)) + (def origin (current parser)) + (def term (if (check parser :splat) + (do + (advance parser) + (def splatted (when (check parser :word) (word-only parser))) + {:type :splat :data splatted :token origin}) + (capture pattern parser))) + (array/push (ast :data) term) + (try (separators parser) + ([e] (array/push (ast :data) e)))) + (advance parser) + ast) + +(defn- dict-pattern [parser] + (def origin (current parser)) + (advance parser) + (def ast {:type :dict :data @[] :token origin}) + (while (separates? parser) (advance parser)) + (while (not (check parser :rbrace)) + (when (check parser :eof) + (def err {:type :error :token origin :msg "unclosed brace"}) + (array/push (parser :errors) err) + (error err)) + (def origin (current parser)) + (def term (case (type origin) + :splat {:type :splat :data (when (check (advance parser) :word) (word-only parser)) :token origin} + :word (do + (def word (capture word-pattern parser)) + (def name (word :data)) + (def key {:type :keyword :data (keyword name) :token origin}) + {:type :pair :data [key word] :token origin}) + :keyword (do + (def key (capture kw parser)) + (def value (capture pattern parser)) + {:type :pair :data [key value] :token origin}) + (try (panic parser (string "expected dict term, got " (type origin))) ([e] e)) + )) + (array/push (ast :data) term) + (try (separators parser) ([e] (array/push (ast :data) e)))) + (advance parser) + ast) + +### TODO: add as patterns +(defrec pattern [parser] + (case (-> parser current type) + :nil (nill parser) + :true (bool parser) + :false (bool parser) + :keyword (kw parser) + :number (num parser) + :string (str parser) + :word (word-pattern parser) + :placeholder (placeholder parser) + :ignored (placeholder parser) + :lparen (tup-pattern parser) + :lbracket (list-pattern parser) + :startdict (dict-pattern parser) + :interpolated (interpolated parser) + (panic parser "expected pattern") + )) + +### let +# let {pattern} = {nonbinding} +(defn- lett [parser] + (def ast {:type :let :data @[] :token (current parser)}) + (advance parser) # consume the let + (array/push (ast :data) (capture pattern parser)) + (if-let [err (expect-ret parser :equals)] + (do (array/push (ast :data) err) (break ast)) + (advance parser)) + (accept-many parser :newline) + (array/push (ast :data) (capture nonbinding parser)) + ast) + +### conditional forms +# if {simple} then {nonbinding} else {nonbinding} +(defn- iff [parser] + (def ast {:type :if :data @[] :token (current parser)}) + (advance parser) #consume the if + (array/push (ast :data) (capture simple parser)) + (accept-many parser :newline) + (if-let [err (expect-ret parser :then)] + (array/push (ast :data) err) + (advance parser)) + (array/push (ast :data) (capture nonbinding parser)) + (accept-many parser :newline) + (if-let [err (expect-ret parser :else)] + (array/push (ast :data) err) + (advance parser)) + (array/push (ast :data) (capture nonbinding parser)) + ast) + +(defn- terminator [parser] + (if-not (terminates? parser) + # this line panics, captures the panic, advances the parser, and re-throws the error; solves an off-by-one error + (panic parser "expected terminator")) + (advance parser) + (while (terminates? parser) (advance parser))) + +# {simple} -> {nonbinding} {terminator} +### TODO: add placeholder as valid lhs +(defn- when-clause [parser] + (try + (do + (def lhs (simple parser)) + (expect parser :arrow) + (advance parser) + (accept-many parser :newline) + (def rhs (nonbinding parser)) + (terminator parser) + [lhs rhs]) + ([err] + (advance parser) # consume the breaking token + (accept-many parser :newline :semicolon :break) # ...and any additional ones + err))) + +# when { {when-clause}+ } +(defn- whenn [parser] + (def origin (current parser)) + (def ast {:type :when :data @[] :token origin}) + (advance parser) # consume when + (if-let [err (expect-ret parser :lbrace)] + (do + (array/push (ast :data) err) + (break ast)) # early return; just bail if we don't have { + (advance parser)) + (accept-many parser :newline) + (while (not (check parser :rbrace )) # make sure we don't roll past eof + (when (check parser :eof) (error {:type :error :token origin :data ast :msg "unclosed brace"})) + (array/push (ast :data) (capture when-clause parser))) + (advance parser) + ast) + +### TODO: add guards to patterns +(defn- match-clause [parser] + (try + (do + (def ast {:type :clause :data @[] :origin (current parser)}) + (def lhs (pattern parser)) + (def guard (when (check parser :if) + (advance parser) + (simple parser))) + (expect parser :arrow) + (advance parser) + (accept-many parser :newline) + (def rhs (nonbinding parser)) + (terminator parser) + [lhs guard rhs]) + ([err] + (accept-many parser ;terminators) + err))) + +(defn- matchh [parser] + (def origin (current parser)) + (def ast {:type :match :data @[] :token origin}) + (expect parser :match) + (advance parser) + (try + (do + (simple parser) + (expect parser :with) (advance parser) + (expect parser :lbrace) (advance parser) + (accept-many parser :newline) + (while (not (check parser :rbrace)) + (when (check parser :eof) (error {:type :error :token origin :data ast :msg "unclosed brace"})) + (array/push (ast :data) (match-clause parser))) + (advance parser) + ast) + ([err] err))) + +# {pattern} = {nonbinding} {terminators} +(defn- with-clause [parser] + (try + (do + (def lhs (pattern parser)) + (def guard (when (check parser :if) + (advance parser) + (simple parser))) + (expect parser :equals) (advance parser) + (def rhs (nonbinding parser)) + (terminator parser) + [lhs guard rhs] + ) + ([err] + (accept-many parser ;terminators) + err) + ) +) + +# with { {clauses}+ } {terminators}? then {nonbinding} {terminators}? else {nonbinding} +(defn- withh [parser] + (def origin (current parser)) + (expect parser :with) (advance parser) + (try + (do + (expect parser :lbrace) (var lbrace (current parser)) (advance parser) + (accept-many parser ;terminators) + (def clauses @[]) + (array/push clauses (with-clause parser)) + (accept-many parser ;terminators) + (while (not (check parser :rbrace)) + (if (check parser :eof) + (error {:type :error :data [clauses] :token lbrace :msg "unclosed brace"})) + (array/push clauses (with-clause parser)) + (accept-many parser ;terminators)) + (advance parser) # consume closing brace + (accept-many parser :newline) + (expect parser :then) (advance parser) + (def then (nonbinding parser)) + (accept-many parser :newline) + (expect parser :else) (advance parser) + (expect parser :lbrace) (set lbrace (current parser)) (advance parser) + (accept-many parser ;terminators) + (def else @[]) + (while (not (check parser :rbrace)) + (when (check parser :eof) (error {:type :error :token lbrace :data [else] :msg "unclosed brace"})) + (array/push else (match-clause parser))) + (advance parser) + {:type :with :data [clauses then else] :token origin}) + ([err] err) + ) +) + +### function forms +(defn- fn-simple [parser] + (print "parsing simple function body") + (try + (do + (def lhs (tup-pattern parser)) + (print "parsed lhs") + (def guard (when (check parser :if) + (advance parser) + (simple parser))) + (print "parsed guard") + (expect parser :arrow) (advance parser) + (print "parsed arrow") + (accept-many parser :newline) + (def rhs (nonbinding parser)) + (print "parsed rhs") + [[lhs guard rhs]] + ) + ([err] err) + ) +) + +(defn- fn-clause [parser] + (def origin (current parser)) + (try + (do + (def lhs (tup-pattern parser)) + (def guard (when (check parser :if) + (advance parser) + (simple parser))) + (expect parser :arrow) (advance parser) + (accept-many parser :newline) + (def rhs (nonbinding parser)) + (terminator parser) + [lhs guard rhs]) + ([err] + (advance parser) + (accept-many parser ;terminators) + err + ) + ) +) + +(defn- fn-clauses [parser] + (print "parsing fn clauses") + (def origin (current parser)) + (expect parser :lbrace) (advance parser) + (accept-many parser ;terminators) + (def data @[]) + (while (not (check parser :rbrace)) + (if (check parser :eof) + (error {:type :error :token origin :data data :msg "unclosed brace"})) + (array/push data (capture fn-clause parser))) + (advance parser) + data) + +(defn- lambda [parser] + (def origin (current parser)) + (expect parser :fn) (advance parser) + @{:type :fn :data (fn-simple parser) :token origin}) + +(defn- fnn [parser] + (if (= :lparen (-> parser peek type)) (break (lambda parser))) + (try + (do + (print "parsing named function") + (def origin (current parser)) + (expect parser :fn) (advance parser) + (print "consumed `fn`") + (print "next token: ") + (pp (current parser)) + (def name (-> parser word-only (get :data))) + (print "function name: ") + (pp name) + (def data (case (-> parser current type) + :lbrace (fn-clauses parser) + :lparen (fn-simple parser) + (panic parser (string "expected clause or clauses, got " (-> current parser type))))) + @{:type :fn :name name :data data :token origin}) + ([err] err))) + +### compoound forms +(defn- block [parser] + (def origin (current parser)) + (expect parser :lbrace) (advance parser) + (accept-many parser ;terminators) + (def data @[]) + (while (not (check parser :rbrace)) + (if (check parser :eof) + (error {:type :error :token origin :data data :msg "unclosed brace"})) + (array/push data (capture expr parser)) + (terminator parser)) + (advance parser) + {:type :block :data data :token origin}) + +### TODO: decide whether this design works +# newlines are allowed AFTER pipelines, but not before +# eg. `do foo > \n bar > \n baz` +# but not `do foo \n > bar \n > baz` +# Otherwise, this isn't LR +(defn- doo [parser] + (def origin (current parser)) + (expect parser :do) (advance parser) + (def data @[]) + (array/push data (capture simple parser)) + (print "added first expression. current token:") + (pp (current parser)) + (while (check parser :pipeline) + (advance parser) + (accept-many parser :newline) + (array/push data (capture simple parser))) + {:type :do :data data :token origin}) + +### refs, pkgs, nses, etc. +(defn- ref [parser] + (def origin (current parser)) + (expect parser :ref) (advance parser) + (try + (do + (def name (-> parser word-only (get :data))) + (expect parser :equals) (advance parser) + (def value (nonbinding parser)) + {:type :ref :data value :name name :token origin}) + ([err] err))) + +(defn- pkg-name [parser] + (expect parser :pkg-name) + (def origin (current parser)) + (if (= :keyword (-> parser peek type)) (break (synthetic parser))) + (advance parser) + {:type :pkg-name :data (origin :lexeme) :token origin}) + +(defn- usee [parser] + (def origin (current parser)) + (expect parser :use) (advance parser) + (try + (do + {:type :use :data (pkg-name parser) :token origin}) + ([err] err))) + +(defn- pkg [parser] + (try + (do + (def origin (current parser)) + (expect parser :pkg) (advance parser) + (def name (-> parser pkg-name (get :data))) + (expect parser :lbrace) (advance parser) + (accept-many parser ;terminators) + (def data @[]) + (while (not (check parser :rbrace)) + (when (check parser :eof) + (def err {:type :error :token origin :data data :msg "unclosed brace"}) + (array/push (parser :errors) err) + (error err)) + (case (-> parser current type) + :keyword (do + (def origin (current parser)) + (def key (capture kw parser)) + (def value (capture simple parser)) + (array/push data {:type :pair :data [key value] :token origin})) + :word (do + (def value (word-only parser)) + (def key (keyword (value :data))) + (def kw-ast {:type :keyword :data key :token origin}) + (array/push data {:type :pair :data [key value] :token origin})) + (panic parser "expected pkg term")) + (terminator parser)) + (advance parser) + @{:type :pkg :data data :token origin :name name}) + ([err] err))) + +(defn- ns [parser] + (try + (do + (def origin (current parser)) + (expect parser :ns) (advance parser) + (def name (-> parser pkg-name (get :data))) + (def body (block parser)) + @{:type :ns :data body :name name :token origin}) + ([err] err))) + +(defn- importt [parser] + (def origin (current parser)) + (expect parser :import) (advance parser) + (def path (str parser)) + (expect parser :as) (advance parser) + (def name-parser (if (check parser :pkg-name) pkg-name word-only)) + (def name + (-> parser name-parser (get :data))) + {:type :import :data path :name name :token origin}) + +### tests +(defn- testt [parser] + (def origin (current parser)) + (expect parser :test) (advance parser) + (def desc (str parser)) + (def body (nonbinding parser)) + {:type :test :data [desc body] :token origin}) + +### loops and repeates +(defn- loopp [parser] + (def origin (current parser)) + (expect parser :loop) (advance parser) + (def args (tup parser)) + (expect parser :with) (advance parser) + (def clauses (case (-> parser current type) + :lparen (fn-simple parser) + :lbrace (fn-clauses parser) + )) + @{:type :loop :data [args clauses] :token origin}) + +(defn- recur [parser] + (def origin (current parser)) + (expect parser :recur) (advance parser) + (def args (tup parser)) + {:type :recur :data args :token origin}) + +(defn- repeatt [parser] + (def origin (current parser)) + (advance parser) + (def times (case (-> parser current type) + :number (num parser) + :word (word-only parser) + (panic parser "expected number or word") + )) + (def body (block parser)) + {:type :repeat :data [times body] :token origin}) + +### panics +(defn- panicc [parser] + (def origin (current parser)) + (expect parser :panic) (advance parser) + {:type :panic :data (nonbinding parser) :token origin}) + +### expressions +# four levels of expression complexity: +# simple (atoms, collections, synthetic expressions; no conditionals or binding or blocks) +# nonbinding (excludes let, ref, named fn: what is allowed inside collections) +# plain old exprs (anything but toplevel) +# toplevel (exprs + ns, pkg, test, import, use) + +# simple expressions: what can go anywhere you expect an expression +(defrec simple [parser] + (def curr (current parser)) + (case (type curr) + :nil (nill parser) + :true (bool parser) + :false (bool parser) + :number (num parser) + :keyword (kw parser) + :string (str parser) + :interpolated (interpolated parser) + :lparen (tup parser) + :lbracket (list parser) + :startdict (dict parser) + :startset (sett parser) + :word (word-expr parser) + :pkg-name (pkg-name parser) + :recur (recur parser) + :panic (panicc parser) + (panic parser (string "expected simple expression, got " (type curr))) + ) +) + +# non-binding expressions +# the rhs of lets, clauses, inside conditional forms, etc. +# any form that does not bind a name +(defrec nonbinding [parser] + (def curr (current parser)) + (case (type curr) + # atoms + :nil (nill parser) + :true (bool parser) + :false (bool parser) + :number (num parser) + :keyword (kw parser) + + # strings + :string (str parser) + ### TODO: interpolated strings + :interpolated (interpolated parser) + + # collection literals + :lparen (tup parser) + :lbracket (list parser) + :startdict (dict parser) + :startset (sett parser) + + # synthetic + :word (word-expr parser) + :pkg-name (pkg-name parser) + :recur (recur parser) + + # conditional forms + :if (iff parser) + :when (whenn parser) + :match (matchh parser) + :with (withh parser) + + # do + :do (doo parser) + + # fn: but only lambda + :fn (lambda parser) + + # blocks + :lbrace (block parser) + + # looping forms + :loop (loopp parser) + :repeat (repeatt parser) + + # panic! + :panic (panicc parser) + + (panic parser (string "expected nonbinding expression, got " (type curr))) + ) +) + +(defrec expr [parser] + (def curr (current parser)) + (case (type curr) + # binding forms + :let (lett parser) + :fn (fnn parser) + :ref (ref parser) + + # nonbinding forms + :nil (nill parser) + :true (bool parser) + :false (bool parser) + :number (num parser) + :keyword (kw parser) + :string (str parser) + :interpolated (interpolated parser) + :lparen (tup parser) + :lbracket (list parser) + :startdict (dict parser) + :startset (sett parser) + :word (word-expr parser) + :pkg-name (pkg-name parser) + :recur (recur parser) + :if (iff parser) + :when (whenn parser) + :match (matchh parser) + :with (withh parser) + :do (doo parser) + :lbrace (block parser) + :loop (loopp parser) + :repeat (repeatt parser) + :panic (panicc parser) + (panic parser (string "expected expression, got " (type curr))) + ) +) + +(defrec toplevel [parser] + (def curr (current parser)) + (case (type curr) + # toplevel-only + :pkg (pkg parser) + :ns (ns parser) + :test (testt parser) + :import (importt parser) + :use (usee parser) + + # all the other expressions + (expr parser) + ) +) + +(defn- script [parser] + (def origin (current parser)) + (def lines @[]) + (while (not (check parser :eof)) + (array/push lines (capture toplevel parser)) + (capture terminator parser)) + {:type :script :data lines :token origin}) + +(defn parse [scanned] + (def parser (new-parser scanned)) + (def ast (script parser)) + (set (parser :ast) ast) + parser) + +(defn- indent-by [n] + (def indentation @"") + (repeat n (buffer/push indentation "..")) + indentation) + +(defn- pp-ast [ast &opt indent] + (default indent 0) + (def {:type t :name n :data d :msg m} ast) + (string (indent-by indent) t ": " n m + (if (indexed? d) + (string "\n" (string/join (map (fn [a] (pp-ast a (inc indent))) d))) + d + ) + "\n" + ) +) + + +(do +#(comment +(def source ` +loop (1, 2) with (x, y) -> :bar +`) +(def scanned (s/scan source)) +(print "\n***NEW PARSE***\n") +(def a-parser (new-parser scanned)) +(def parsed (toplevel a-parser)) + +# (print (pp-ast parsed)) +(pp scanned) +(pp parsed) +) + + +# FIXME: +# TODO: +# DECIDE: +# - when to use a flat try/catch format, and when to use capture/expect-ret to get values instead of errors diff --git a/janet/peg-parser.janet b/janet/peg-parser.janet deleted file mode 100644 index 7241178..0000000 --- a/janet/peg-parser.janet +++ /dev/null @@ -1,104 +0,0 @@ -(defn ->kw [kw-str] - (keyword (slice kw-str 1))) - -(defn word [str] [:word str]) - -(defn num [num] [:number (scan-number num)]) - -(defn kw [_ kw] [:keyword (->kw kw)]) - -(defn str [str] [:string str]) - -(defn bool [bool] (if (= bool "true") [:boolean true] [:boolean false])) - -(defn nill [_] [:nil nil]) - -(defn not-empty? [x] (not (empty? x))) - -(defn ifl [args] [:if ;args]) - -(defn letl [args] [:let ;args]) - -(defn block [args] [:block args]) - -(def errors @[]) - -(defn ? [x default] (if (nil? x) default x)) - -(defn panic [& args] - (def info (filter |(not= "" $) args)) - (def [msg line col source] info) - (def error {:msg msg :line line :col col :source (? source "")}) - (array/push errors error) - [:error error] -) - -(do -(def ludus-grammar - ~{:nil (cmt (<- "nil") ,nill) - :true (cmt (<- "true") ,bool) - :false (cmt (<- "false") ,bool) - :comment (* "&" (any (if-not (+ "\n" -1) 1))) - :wordchars (+ :w (set "_-/*?!")) - :reserved (+ "if" "then" "else" "let") - :word (cmt (<- (if-not :reserved (* (range "az") (any :wordchars)))) ,word) - :keyword (cmt (<- (* ":" :word)) ,kw) - :hex (range "09" "af" "AF") - :escape (* "\\" (+ (set `"'0?\abefnrtvz`) - (* "x" :hex :hex) - (* "u" [4 :hex]) - (* "U" [6 :hex]) - (error (constant "bad escape")))) - :string (cmt (* "\"" (<- (any (+ :escape (if-not "\"" 1)))) "\"") ,str) - :int (* (? "-") (range "19") (any (+ "_" (range "09")))) - :float (* (? "-") (+ :int "0") "." (any (+ "_" (range "09")))) - :zero "0" - :number (cmt (<- (+ :float :int :zero)) ,num) - :comment (* "&" (any (if-not (+ "\n" -1) 1))) - :atom (+ :nil :true :false :keyword :number :word) - :newline "\n" - :terminator (* (any :ws) (some (+ :newline ";"))) - :separator (* (any :ws) (some (+ :newline ","))) - :ws (any (+ :comment (set " \t"))) - :simple (+ :atom :string :word (* (constant "expected simple" :err-msg) :panic)) - :then (+ (* (+ (some :ws) (any :newline)) (any :ws) "then" (some :ws)) (* (constant "expected then" :err-msg) :panic)) - :else (+ (* (+ (some :ws) (any :newline)) (any :ws) "else" (some :ws)) (* (constant "expected else" :err-msg) :panic)) - :panic (cmt (* (<- (-> :err-msg)) (<- (line)) (<- (column)) (<- (* (any (if-not (+ "\n" -1) 1))))) ,panic) - :non-binding (+ :atom :word :string :if :block - (* (constant "expected non-binding" :err-msg) :panic)) - :tuple-term (* :number (some :separator)) - :tuple (* "(" (any :separator) (any :tuple-term) ")") - :block (cmt (group (* "{" (some :line) (? :expression) "}")) ,block) - :if (cmt (group (* "if" (some :ws) :simple :then :non-binding :else :non-binding)) ,ifl) - :pattern (+ :atom :string (* (constant "expected pattern") :panic)) - :equals (* (any :ws) "=" (any :ws)) - :let (cmt (group (* "let" (some :ws) :pattern :equals :non-binding)) ,letl) - :expression (+ :atom :string :block :if :let - (* (constant "expected expression" :err-msg) :panic)) - :empty (* :ws :terminator) - :line (* :ws :expression :ws :terminator) - :main :tuple}) - -(def simple-tuple ~{ - :value (<- (range "az")) - :separator (+ "," "\n") - :ws (any (set " \t")) - :term (* :ws :value :ws :separator) - :tuple (group (* "(" (any :term) (? :value) :ws ")")) - :main :tuple -}) - - -# :empty/:line/:block are still giving me grief -# also, somehow line & column numbers ar -(def source -`` -( a, b ,c,d ) -`` -) - - -(peg/match simple-tuple source) -) - - diff --git a/janet/recursive.janet b/janet/recursive.janet deleted file mode 100644 index a5862d3..0000000 --- a/janet/recursive.janet +++ /dev/null @@ -1,1112 +0,0 @@ -### A recursive descent parser for Ludus - -### We still need to scan some things -#(os/cd "janet") # when in repl to do relative imports -(import ./scanner :as s) - -(defmacro declare - "Forward-declares a function name, so that it can be called in a mutually recursive manner." - [& names] - (def bindings @[]) - (loop [name :in names] - (def binding ~(var ,name nil)) - (array/push bindings binding)) - ~(upscope ,;bindings)) - -(defmacro defrec - "Defines a function depended on by another function, that has been forward `declare`d." - [name & forms] - (if-not (dyn name) (error "recursive functions must be declared before they are defined")) - ~(set ,name (defn- ,name ,;forms))) - -### Next: a data structure for a parser -(defn- new-parser - "Creates a new parser data structure to pass around" - [tokens] - @{ - :tokens (tokens :tokens) - :ast @[] - :current 0 - :errors @[] - }) - -### and some helper functions for interfacing with that data structure -(defn- current - "Returns the current token of a parser. If the parser is at the last token, keeps returning the last token." - [parser] - (def tokens (parser :tokens)) - (get tokens (parser :current) (last tokens))) - -(defn- peek - "Returns the next token of the parser. If the parser is at the last token, keeps returning the last token." - [parser] - (def tokens (parser :tokens)) - (get tokens (inc (parser :current)) (last tokens))) - -(defn- advance - "Advances the parser by a token" - [parser] - (update parser :current inc)) - -(defn- type - "Returns the type of a token" - [token] - (get token :type)) - -(defn- check - "Returns true if the parser's current token is one of the passed types" - [parser type & types] - (def accepts [type ;types]) - (def current-type (-> parser current (get :type))) - (has-value? accepts current-type)) - -### Parsing functions -# forward declarations -(declare simple nonbinding expr toplevel synthetic) - -# errors -# terminators are what terminate expressions -(def terminators [:break :newline :semicolon]) - -(defn- terminates? - "Returns true if the current token in the parser is a terminator" - [parser] - (def curr (current parser)) - (def ttype (type curr)) - (has-value? terminators ttype)) - -# breakers are what terminate panics -(def breaking [:break :newline :semicolon :comma :eof :then :else]) - -(defn- breaks? - "Returns true if the current token in the parser should break a panic" - [parser] - (def curr (current parser)) - (def ttype (type curr)) - (has-value? breaking ttype)) - -(defn- panic - "Panics the parser: starts skipping tokens until a breaking token is encountered. Adds the error to the parser's errors array, and also errors out." - [parser message] - (print "Panic in the parser: " message) - (def origin (current parser)) - (advance parser) - (def skipped @[origin]) - (while (not (breaks? parser)) - (array/push skipped (current parser)) - (advance parser)) - (array/push skipped (current parser)) - (def err {:type :error :data skipped :token origin :msg message}) - (update parser :errors array/push err) - (error err)) - -(defn- expected - "Panics the parser with a message: expected {type} got ..." - [parser ttype & ttypes] - (def expected (map string [ttype ;ttypes])) - (def type-msg (string/join expected " | ")) - (panic parser (string "expected {" type-msg "}, got " (-> parser current type)))) - -(defn- expect - "Panics if the parser's current token is not of type; otherwise does nothing & returns nil" - [parser type & types] - (if-not (check parser type ;types) (expected parser type ;types))) - -(defn- expect-ret - "Same as expect, but captures the error, returning it as a value" - [parser type & types] - (try (expect parser type ;types) ([e] e))) - -(defn- capture - "Applies the parse function to the parser, returning the parsed AST. If there is a panic, captures the panic and returns it as a value." - [parse-fn parser] - (try (parse-fn parser) ([e] e))) - -(defn- accept-one - "Accepts a single token of passed type, advancing the parser if a match, doing nothing if not." - [parser type & types] - (if (check parser type ;types) (advance parser))) - -(defn- accept-many - "Accepts any number of tokens of a passed type, advancing the parser on match until there are no more matches. Does nothing on no match." - [parser type & types] - (while (check parser type ;types) (advance parser))) - -# atoms -(defn- bool [parser] - (expect parser :true :false) - (def curr (-> parser current)) - (def ttype (type curr)) - (def value (if (= ttype :true) true false)) - (advance parser) - {:type :bool :data value :token curr} - ) - -(defn- num [parser] - (expect parser :number) - (def curr (-> parser current)) - (advance parser) - {:type :number :data (curr :literal) :token curr} - ) - -(defn- kw [parser] - (expect parser :keyword) - (if (= :lparen (-> parser peek type)) (break (synthetic parser))) - (def curr (-> parser current)) - (advance parser) - {:type :keyword :data (curr :literal) :token curr} - ) - -(defn- nill [parser] - (expect parser :nil) - (def curr (current parser)) - (advance parser) - {:type :nil :token curr}) - -(defn- str [parser] - (expect parser :string) - (def curr (-> parser current)) - (advance parser) - {:type :string :data (curr :literal) :token curr}) - -# interpolated strings, which are a whole other scene -(defn- scan-interpolations [data] - (print "scanning interpolation: " data) - (when (buffer? data) (break data)) - (pp data) - (def to-scan (data :to-scan)) - (def {:tokens tokens :errors errors} (s/scan to-scan)) - (pp tokens) - (print "there are " (length tokens) " tokens") - (def first-token (first tokens)) - (cond - (first errors) (first errors) - (empty? tokens) - {:type :error :msg "string interpolations/patterns must be single words"} - (< 3 (length tokens)) - {:type :error :msg "string interpolations/patterns must be single words"} - (= :word (first-token :type)) - {:type :word :data (first-token :lexeme) :token first-token} - :else {:type :error :msg "string interpolations/patterns must be single words"})) - -(defn- is-error? [data] - (cond - (buffer? data) false - (= :error (data :type)) true - false)) - -(defn- interpolated [parser] - (expect parser :interpolated) - (def origin (current parser)) - (def source (origin :literal)) - (def data @[]) - (var curr @"") - (var interp? false) - (var escape? false) - (each code source - (def char (string/from-bytes code)) - (cond - (= char "\\") (set escape? true) - escape? (if (= char "{") - (do - (buffer/push curr "{") - (set escape? false)) - (do - (buffer/push curr "\\") - (buffer/push curr char) - (set escape? false))) - (= char "{") (do - (set interp? true) - (array/push data curr) - (set curr @"")) - (= char "}") (if interp? (do - (set interp? false) - (array/push data {:to-scan curr}) - (set curr @"")) - (buffer/push curr char)) - :else (buffer/push curr char))) - (array/push data curr) - (def interpolated (map scan-interpolations data)) - (advance parser) - (def ast {:type :interpolated :data interpolated :token origin}) - (if (some is-error? interpolated) - (do - (def err {:type :error :msg "bad interpolated string" :data ast :token origin}) - (array/push (parser :errors) err) - err) - ast)) - -# words & synthetic expressions -(def separates [:break :newline :comma]) - -(defn- separates? [parser] - (def curr (current parser)) - (def ttype (type curr)) - (has-value? separates ttype)) - -(defn- separators [parser] - (if-not (separates? parser) - (panic parser (string "expected separator, got " (-> parser current type)))) - (while (separates? parser) (advance parser))) - -(def sequels [:lparen :keyword]) - -(defn- word-expr [parser] - (expect parser :word) - (if (has-value? sequels (-> parser peek type)) (break (synthetic parser))) - (def curr (-> parser current)) - (advance parser) - {:type :word :data (curr :lexeme) :token curr}) - -(defn- word-only [parser] - (expect parser :word) - (def curr (current parser)) - (advance parser) - {:type :word :data (curr :lexeme) :token curr}) - -(defn- args [parser] - (def origin (current parser)) - (advance parser) # consume the :lparen - (def ast @{:type :args :data @[] :token origin :partial false}) - (while (separates? parser) (advance parser)) # consume any separators - (while (not (check parser :rparen)) - (when (check parser :eof) - (def err {:type :error :token origin :msg "unclosed paren"}) - (array/push (parser :errors) err) - (error err)) - (def origin (current parser)) - (def term (if (check parser :placeholder) - (if (ast :partial) - (do - (def err {:type :error :data [] :token origin :msg "partially applied functions may only use one placeholder"}) - (advance parser) - (update parser :errors array/push err) - err) - (do - (set (ast :partial) true) - (advance parser) - {:type :placeholder :token origin})) - (capture nonbinding parser))) - (array/push (ast :data) term) - (try (separators parser) - ([e] (pp e) (array/push (ast :data) e)))) - (advance parser) - ast) - -(defn- synth-root [parser] - (print "parsing synth root") - (def origin (current parser)) - (advance parser) - (case (type origin) - :word {:type :word :data (origin :lexeme) :token origin} - :keyword {:type :keyword :data (origin :literal) :token origin} - :pkg-name {:type :pkg-name :data (origin :lexeme) :token origin} - (panic parser "expected word, keyword, or package") - ) -) - -(defrec synthetic [parser] - (print "parsing synthetic") - (def origin (current parser)) - (def ast {:type :synthetic :data @[(synth-root parser)] :token origin}) - (while (has-value? sequels (-> parser current type)) - (def term - (case (-> parser current type) - :lparen (args parser) - :keyword (kw parser) - )) - (array/push (ast :data) term) - ) - ast -) - -# collections -(defn- tup [parser] - (def origin (current parser)) - (advance parser) # consume the :lparen - (def ast {:type :tuple :data @[] :token origin}) - (while (separates? parser) (advance parser)) # consume any separators - (while (not (check parser :rparen)) - (when (check parser :eof) - (def err {:type :error :token origin :msg "unclosed paren"}) - (array/push (parser :errors) err) - (error err)) - (def term (capture nonbinding parser)) - (array/push (ast :data) term) - (try (separators parser) - ([e] (pp e) (array/push (ast :data) e)))) - (advance parser) - ast) - -(defn- list [parser] - (def origin (current parser)) - (advance parser) - (def ast {:type :list :data @[] :token origin}) - (while (separates? parser) (advance parser)) - (while (not (check parser :rbracket)) - (when (check parser :eof) - (def err {:type :error :token origin :msg "unclosed bracket"}) - (array/push (parser :errors) err) - (error err)) - (def origin (current parser)) - (def term (if (check parser :splat) - (do - (advance parser) - (def splatted (capture word-only parser)) - {:type :splat :data splatted :token origin} - ) - (capture nonbinding parser))) - (array/push (ast :data) term) - (try (separators parser) - ([e] (array/push (ast :data) e)))) - (advance parser) - ast) - -(defn- sett [parser] - (def origin (current parser)) - (advance parser) - (def ast {:type :set :data @[] :token origin}) - (while (separates? parser) (advance parser)) - (while (not (check parser :rbrace)) - (when (check parser :eof) - (def err {:type :error :token origin :msg "unclosed brace"}) - (array/push (parser :errors) err) - (error err)) - (def origin (current parser)) - (def term (if (check parser :splat) - (do - (advance parser) - (def splatted (capture word-only parser)) - {:type :splat :data splatted :token origin} - ) - (capture nonbinding parser))) - (array/push (ast :data) term) - (try (separators parser) - ([e] (array/push (ast :data) e)))) - (advance parser) - ast) - -(defn- dict [parser] - (def origin (current parser)) - (advance parser) - (def ast {:type :dict :data @[] :token origin}) - (while (separates? parser) (advance parser)) - (while (not (check parser :rbrace)) - (when (check parser :eof) - (def err {:type :error :token origin :msg "unclosed brace"}) - (array/push (parser :errors) err) - (error err)) - (def origin (current parser)) - (def term (case (type origin) - :splat {:type :splat :data (capture word-only (advance parser)) :token origin} - :word (try (word-only parser) ([e] e)) - :keyword (do - (def key (try (kw parser) ([e] e))) - (def value (capture nonbinding parser)) - {:type :pair :data [key value] :token origin}) - (try (panic parser (string "expected dict term, got " (type origin))) ([e] e)) - )) - (array/push (ast :data) term) - (try (separators parser) ([e] (array/push (ast :data) e)))) - (advance parser) - ast) - -### patterns -(declare pattern) - -(defn- placeholder [parser] - (expect parser :placeholder :ignored) - (def origin (current parser)) - (advance parser) - {:type :placeholder :token origin}) - -(defn- word-pattern [parser] - (expect parser :word) - (def origin (current parser)) - (advance parser) - (def the-word {:type :word :data (origin :lexeme) :token origin}) - (if (check parser :as) - (do - (advance parser) - (def type (kw parser)) - {:type :typed :data [type the-word] :token origin}) - the-word)) - -(defn- tup-pattern [parser] - (def origin (current parser)) - (advance parser) # consume the :lparen - (def ast {:type :tuple :data @[] :token origin}) - (while (separates? parser) (advance parser)) # consume any separators - (while (not (check parser :rparen)) - (when (check parser :eof) - (def err {:type :error :token origin :msg "unclosed paren"}) - (array/push (parser :errors) err) - (error err)) - (def origin (current parser)) - (def term (if (check parser :splat) - (do - (advance parser) - (def splatted (when (check parser :word) (word-only parser))) - {:type :splat :data splatted :token origin}) - (capture pattern parser))) - (array/push (ast :data) term) - (try (separators parser) - ([e] (pp e) (array/push (ast :data) e)))) - (advance parser) - ast) - -(defn- list-pattern [parser] - (def origin (current parser)) - (advance parser) - (def ast {:type :list :data @[] :token origin}) - (while (separates? parser) (advance parser)) - (while (not (check parser :rbracket)) - (when (check parser :eof) - (def err {:type :error :token origin :msg "unclosed bracket"}) - (array/push (parser :errors) err) - (error err)) - (def origin (current parser)) - (def term (if (check parser :splat) - (do - (advance parser) - (def splatted (when (check parser :word) (word-only parser))) - {:type :splat :data splatted :token origin}) - (capture pattern parser))) - (array/push (ast :data) term) - (try (separators parser) - ([e] (array/push (ast :data) e)))) - (advance parser) - ast) - -(defn- dict-pattern [parser] - (def origin (current parser)) - (advance parser) - (def ast {:type :dict :data @[] :token origin}) - (while (separates? parser) (advance parser)) - (while (not (check parser :rbrace)) - (when (check parser :eof) - (def err {:type :error :token origin :msg "unclosed brace"}) - (array/push (parser :errors) err) - (error err)) - (def origin (current parser)) - (def term (case (type origin) - :splat {:type :splat :data (when (check (advance parser) :word) (word-only parser)) :token origin} - :word (do - (def word (capture word-pattern parser)) - (def name (word :data)) - (def key {:type :keyword :data (keyword name) :token origin}) - {:type :pair :data [key word] :token origin}) - :keyword (do - (def key (capture kw parser)) - (def value (capture pattern parser)) - {:type :pair :data [key value] :token origin}) - (try (panic parser (string "expected dict term, got " (type origin))) ([e] e)) - )) - (array/push (ast :data) term) - (try (separators parser) ([e] (array/push (ast :data) e)))) - (advance parser) - ast) - -### TODO: add as patterns -(defrec pattern [parser] - (case (-> parser current type) - :nil (nill parser) - :true (bool parser) - :false (bool parser) - :keyword (kw parser) - :number (num parser) - :string (str parser) - :word (word-pattern parser) - :placeholder (placeholder parser) - :ignored (placeholder parser) - :lparen (tup-pattern parser) - :lbracket (list-pattern parser) - :startdict (dict-pattern parser) - :interpolated (interpolated parser) - (panic parser "expected pattern") - )) - -### let -# let {pattern} = {nonbinding} -(defn- lett [parser] - (def ast {:type :let :data @[] :token (current parser)}) - (advance parser) # consume the let - (array/push (ast :data) (capture pattern parser)) - (if-let [err (expect-ret parser :equals)] - (do (array/push (ast :data) err) (break ast)) - (advance parser)) - (accept-many parser :newline) - (array/push (ast :data) (capture nonbinding parser)) - ast) - -### conditional forms -# if {simple} then {nonbinding} else {nonbinding} -(defn- iff [parser] - (def ast {:type :if :data @[] :token (current parser)}) - (advance parser) #consume the if - (array/push (ast :data) (capture simple parser)) - (accept-many parser :newline) - (if-let [err (expect-ret parser :then)] - (array/push (ast :data) err) - (advance parser)) - (array/push (ast :data) (capture nonbinding parser)) - (accept-many parser :newline) - (if-let [err (expect-ret parser :else)] - (array/push (ast :data) err) - (advance parser)) - (array/push (ast :data) (capture nonbinding parser)) - ast) - -(defn- terminator [parser] - (if-not (terminates? parser) - # this line panics, captures the panic, advances the parser, and re-throws the error; solves an off-by-one error - (panic parser "expected terminator")) - (advance parser) - (while (terminates? parser) (advance parser))) - -# {simple} -> {nonbinding} {terminator} -### TODO: add placeholder as valid lhs -(defn- when-clause [parser] - (try - (do - (def lhs (simple parser)) - (expect parser :arrow) - (advance parser) - (accept-many parser :newline) - (def rhs (nonbinding parser)) - (terminator parser) - [lhs rhs]) - ([err] - (advance parser) # consume the breaking token - (accept-many parser :newline :semicolon :break) # ...and any additional ones - err))) - -# when { {when-clause}+ } -(defn- whenn [parser] - (def origin (current parser)) - (def ast {:type :when :data @[] :token origin}) - (advance parser) # consume when - (if-let [err (expect-ret parser :lbrace)] - (do - (array/push (ast :data) err) - (break ast)) # early return; just bail if we don't have { - (advance parser)) - (accept-many parser :newline) - (while (not (check parser :rbrace )) # make sure we don't roll past eof - (when (check parser :eof) (error {:type :error :token origin :data ast :msg "unclosed brace"})) - (array/push (ast :data) (capture when-clause parser))) - (advance parser) - ast) - -### TODO: add guards to patterns -(defn- match-clause [parser] - (try - (do - (def ast {:type :clause :data @[] :origin (current parser)}) - (def lhs (pattern parser)) - (def guard (when (check parser :if) - (advance parser) - (simple parser))) - (expect parser :arrow) - (advance parser) - (accept-many parser :newline) - (def rhs (nonbinding parser)) - (terminator parser) - [lhs guard rhs]) - ([err] - (accept-many parser ;terminators) - err))) - -(defn- matchh [parser] - (def origin (current parser)) - (def ast {:type :match :data @[] :token origin}) - (expect parser :match) - (advance parser) - (try - (do - (simple parser) - (expect parser :with) (advance parser) - (expect parser :lbrace) (advance parser) - (accept-many parser :newline) - (while (not (check parser :rbrace)) - (when (check parser :eof) (error {:type :error :token origin :data ast :msg "unclosed brace"})) - (array/push (ast :data) (match-clause parser))) - (advance parser) - ast) - ([err] err))) - -# {pattern} = {nonbinding} {terminators} -(defn- with-clause [parser] - (try - (do - (def lhs (pattern parser)) - (def guard (when (check parser :if) - (advance parser) - (simple parser))) - (expect parser :equals) (advance parser) - (def rhs (nonbinding parser)) - (terminator parser) - [lhs guard rhs] - ) - ([err] - (accept-many parser ;terminators) - err) - ) -) - -# with { {clauses}+ } {terminators}? then {nonbinding} {terminators}? else {nonbinding} -(defn- withh [parser] - (def origin (current parser)) - (expect parser :with) (advance parser) - (try - (do - (expect parser :lbrace) (var lbrace (current parser)) (advance parser) - (accept-many parser ;terminators) - (def clauses @[]) - (array/push clauses (with-clause parser)) - (accept-many parser ;terminators) - (while (not (check parser :rbrace)) - (if (check parser :eof) - (error {:type :error :data [clauses] :token lbrace :msg "unclosed brace"})) - (array/push clauses (with-clause parser)) - (accept-many parser ;terminators)) - (advance parser) # consume closing brace - (accept-many parser :newline) - (expect parser :then) (advance parser) - (def then (nonbinding parser)) - (accept-many parser :newline) - (expect parser :else) (advance parser) - (expect parser :lbrace) (set lbrace (current parser)) (advance parser) - (accept-many parser ;terminators) - (def else @[]) - (while (not (check parser :rbrace)) - (when (check parser :eof) (error {:type :error :token lbrace :data [else] :msg "unclosed brace"})) - (array/push else (match-clause parser))) - (advance parser) - {:type :with :data [clauses then else] :token origin}) - ([err] err) - ) -) - -### function forms -(defn- fn-simple [parser] - (print "parsing simple function body") - (try - (do - (def lhs (tup-pattern parser)) - (print "parsed lhs") - (def guard (when (check parser :if) - (advance parser) - (simple parser))) - (print "parsed guard") - (expect parser :arrow) (advance parser) - (print "parsed arrow") - (accept-many parser :newline) - (def rhs (nonbinding parser)) - (print "parsed rhs") - [[lhs guard rhs]] - ) - ([err] err) - ) -) - -(defn- fn-clause [parser] - (def origin (current parser)) - (try - (do - (def lhs (tup-pattern parser)) - (def guard (when (check parser :if) - (advance parser) - (simple parser))) - (expect parser :arrow) (advance parser) - (accept-many parser :newline) - (def rhs (nonbinding parser)) - (terminator parser) - [lhs guard rhs]) - ([err] - (advance parser) - (accept-many parser ;terminators) - err - ) - ) -) - -(defn- fn-clauses [parser] - (print "parsing fn clauses") - (def origin (current parser)) - (expect parser :lbrace) (advance parser) - (accept-many parser ;terminators) - (def data @[]) - (while (not (check parser :rbrace)) - (if (check parser :eof) - (error {:type :error :token origin :data data :msg "unclosed brace"})) - (array/push data (capture fn-clause parser))) - (advance parser) - data) - -(defn- lambda [parser] - (def origin (current parser)) - (expect parser :fn) (advance parser) - @{:type :fn :data (fn-simple parser) :token origin}) - -(defn- fnn [parser] - (if (= :lparen (-> parser peek type)) (break (lambda parser))) - (try - (do - (print "parsing named function") - (def origin (current parser)) - (expect parser :fn) (advance parser) - (print "consumed `fn`") - (print "next token: ") - (pp (current parser)) - (def name (-> parser word-only (get :data))) - (print "function name: ") - (pp name) - (def data (case (-> parser current type) - :lbrace (fn-clauses parser) - :lparen (fn-simple parser) - (panic parser (string "expected clause or clauses, got " (-> current parser type))))) - @{:type :fn :name name :data data :token origin}) - ([err] err))) - -### compoound forms -(defn- block [parser] - (def origin (current parser)) - (expect parser :lbrace) (advance parser) - (accept-many parser ;terminators) - (def data @[]) - (while (not (check parser :rbrace)) - (if (check parser :eof) - (error {:type :error :token origin :data data :msg "unclosed brace"})) - (array/push data (capture expr parser)) - (terminator parser)) - (advance parser) - {:type :block :data data :token origin}) - -### TODO: decide whether this design works -# newlines are allowed AFTER pipelines, but not before -# eg. `do foo > \n bar > \n baz` -# but not `do foo \n > bar \n > baz` -# Otherwise, this isn't LR -(defn- doo [parser] - (def origin (current parser)) - (expect parser :do) (advance parser) - (def data @[]) - (array/push data (capture simple parser)) - (print "added first expression. current token:") - (pp (current parser)) - (while (check parser :pipeline) - (advance parser) - (accept-many parser :newline) - (array/push data (capture simple parser))) - {:type :do :data data :token origin}) - -### refs, pkgs, nses, etc. -(defn- ref [parser] - (def origin (current parser)) - (expect parser :ref) (advance parser) - (try - (do - (def name (-> parser word-only (get :data))) - (expect parser :equals) (advance parser) - (def value (nonbinding parser)) - {:type :ref :data value :name name :token origin}) - ([err] err))) - -(defn- pkg-name [parser] - (expect parser :pkg-name) - (def origin (current parser)) - (if (= :keyword (-> parser peek type)) (break (synthetic parser))) - (advance parser) - {:type :pkg-name :data (origin :lexeme) :token origin}) - -(defn- usee [parser] - (def origin (current parser)) - (expect parser :use) (advance parser) - (try - (do - {:type :use :data (pkg-name parser) :token origin}) - ([err] err))) - -(defn- pkg [parser] - (try - (do - (def origin (current parser)) - (expect parser :pkg) (advance parser) - (def name (-> parser pkg-name (get :data))) - (expect parser :lbrace) (advance parser) - (accept-many parser ;terminators) - (def data @[]) - (while (not (check parser :rbrace)) - (when (check parser :eof) - (def err {:type :error :token origin :data data :msg "unclosed brace"}) - (array/push (parser :errors) err) - (error err)) - (case (-> parser current type) - :keyword (do - (def origin (current parser)) - (def key (capture kw parser)) - (def value (capture simple parser)) - (array/push data {:type :pair :data [key value] :token origin})) - :word (do - (def value (word-only parser)) - (def key (keyword (value :data))) - (def kw-ast {:type :keyword :data key :token origin}) - (array/push data {:type :pair :data [key value] :token origin})) - (panic parser "expected pkg term")) - (terminator parser)) - (advance parser) - @{:type :pkg :data data :token origin :name name}) - ([err] err))) - -(defn- ns [parser] - (try - (do - (def origin (current parser)) - (expect parser :ns) (advance parser) - (def name (-> parser pkg-name (get :data))) - (def body (block parser)) - @{:type :ns :data body :name name :token origin}) - ([err] err))) - -(defn- importt [parser] - (def origin (current parser)) - (expect parser :import) (advance parser) - (def path (str parser)) - (expect parser :as) (advance parser) - (def name-parser (if (check parser :pkg-name) pkg-name word-only)) - (def name - (-> parser name-parser (get :data))) - {:type :import :data path :name name :token origin}) - -### tests -(defn- testt [parser] - (def origin (current parser)) - (expect parser :test) (advance parser) - (def desc (str parser)) - (def body (nonbinding parser)) - {:type :test :data [desc body] :token origin}) - -### loops and repeates -(defn- loopp [parser] - (def origin (current parser)) - (expect parser :loop) (advance parser) - (def args (tup parser)) - (expect parser :with) (advance parser) - (def clauses (case (-> parser current type) - :lparen (fn-simple parser) - :lbrace (fn-clauses parser) - )) - @{:type :loop :data [args clauses] :token origin}) - -(defn- recur [parser] - (def origin (current parser)) - (expect parser :recur) (advance parser) - (def args (tup parser)) - {:type :recur :data args :token origin}) - -(defn- repeatt [parser] - (def origin (current parser)) - (advance parser) - (def times (case (-> parser current type) - :number (num parser) - :word (word-only parser) - (panic parser "expected number or word") - )) - (def body (block parser)) - {:type :repeat :data [times body] :token origin}) - -### panics -(defn- panicc [parser] - (def origin (current parser)) - (expect parser :panic) (advance parser) - {:type :panic :data (nonbinding parser) :token origin}) - -### expressions -# four levels of expression complexity: -# simple (atoms, collections, synthetic expressions; no conditionals or binding or blocks) -# nonbinding (excludes let, ref, named fn: what is allowed inside collections) -# plain old exprs (anything but toplevel) -# toplevel (exprs + ns, pkg, test, import, use) - -# simple expressions: what can go anywhere you expect an expression -(defrec simple [parser] - (def curr (current parser)) - (case (type curr) - :nil (nill parser) - :true (bool parser) - :false (bool parser) - :number (num parser) - :keyword (kw parser) - :string (str parser) - :interpolated (interpolated parser) - :lparen (tup parser) - :lbracket (list parser) - :startdict (dict parser) - :startset (sett parser) - :word (word-expr parser) - :pkg-name (pkg-name parser) - :recur (recur parser) - :panic (panicc parser) - (panic parser (string "expected simple expression, got " (type curr))) - ) -) - -# non-binding expressions -# the rhs of lets, clauses, inside conditional forms, etc. -# any form that does not bind a name -(defrec nonbinding [parser] - (def curr (current parser)) - (case (type curr) - # atoms - :nil (nill parser) - :true (bool parser) - :false (bool parser) - :number (num parser) - :keyword (kw parser) - - # strings - :string (str parser) - ### TODO: interpolated strings - :interpolated (interpolated parser) - - # collection literals - :lparen (tup parser) - :lbracket (list parser) - :startdict (dict parser) - :startset (sett parser) - - # synthetic - :word (word-expr parser) - :pkg-name (pkg-name parser) - :recur (recur parser) - - # conditional forms - :if (iff parser) - :when (whenn parser) - :match (matchh parser) - :with (withh parser) - - # do - :do (doo parser) - - # fn: but only lambda - :fn (lambda parser) - - # blocks - :lbrace (block parser) - - # looping forms - :loop (loopp parser) - :repeat (repeatt parser) - - # panic! - :panic (panicc parser) - - (panic parser (string "expected nonbinding expression, got " (type curr))) - ) -) - -(defrec expr [parser] - (def curr (current parser)) - (case (type curr) - # binding forms - :let (lett parser) - :fn (fnn parser) - :ref (ref parser) - - # nonbinding forms - :nil (nill parser) - :true (bool parser) - :false (bool parser) - :number (num parser) - :keyword (kw parser) - :string (str parser) - :interpolated (interpolated parser) - :lparen (tup parser) - :lbracket (list parser) - :startdict (dict parser) - :startset (sett parser) - :word (word-expr parser) - :pkg-name (pkg-name parser) - :recur (recur parser) - :if (iff parser) - :when (whenn parser) - :match (matchh parser) - :with (withh parser) - :do (doo parser) - :lbrace (block parser) - :loop (loopp parser) - :repeat (repeatt parser) - :panic (panicc parser) - (panic parser (string "expected expression, got " (type curr))) - ) -) - -(defrec toplevel [parser] - (def curr (current parser)) - (case (type curr) - # toplevel-only - :pkg (pkg parser) - :ns (ns parser) - :test (testt parser) - :import (importt parser) - :use (usee parser) - - # all the other expressions - (expr parser) - ) -) - -(defn- script [parser] - (def origin (current parser)) - (def lines @[]) - (while (not (check parser :eof)) - (array/push lines (capture toplevel parser)) - (capture terminator parser)) - {:type :script :data lines :token origin}) - -(defn parse [scanned] - (def parser (new-parser scanned)) - (def ast (script parser)) - (set (parser :ast) ast) - parser) - -(defn- indent-by [n] - (def indentation @"") - (repeat n (buffer/push indentation "..")) - indentation) - -(defn- pp-ast [ast &opt indent] - (default indent 0) - (def {:type t :name n :data d :msg m} ast) - (string (indent-by indent) t ": " n m - (if (indexed? d) - (string "\n" (string/join (map (fn [a] (pp-ast a (inc indent))) d))) - d - ) - "\n" - ) -) - - -(do -#(comment -(def source ` -loop (1, 2) with (x, y) -> :bar -`) -(def scanned (s/scan source)) -(print "\n***NEW PARSE***\n") -(def a-parser (new-parser scanned)) -(def parsed (toplevel a-parser)) - -# (print (pp-ast parsed)) -(pp scanned) -(pp parsed) -) - - -# FIXME: -# TODO: -# DECIDE: -# - when to use a flat try/catch format, and when to use capture/expect-ret to get values instead of errors