(comment So the thing here is that this is a much, much simpler version of the rather unweildy Clojure parser framework I wrote. This leans pretty hard on the Janet PEG framework, but is also bespoke for my purposes here. This lets me parse not raw text, but scanned tokens, which makes parsing a much easier lift. Also, it lets me do things like insert :break tokens, which really helps manage collection types and blocks. At current, I do need to shift to returning a data structure rather than `nil`s and `true`s. That way, I can advance the parser by a certain amount, however much a passing test-rule would consume. Do I really need to optimize this? I don't think so: we can do that later. I could store various information on stacks. Alternately, returning data structures may well complicate things: KISS! The idea, however, is that Jesus: we must not have test-rule and apply-rule: combinators without a fixed advance amount can't work that way. So we have only `apply-rule`, which `advance`s the parser by however much should be consumed. BUT! We can always, and ought wherever we might, stash the origin index and roll back any changes (i.e. in `!` and `*`) ) (defn pprint [x] (printf "%M" x)) (os/cd "janet") ### XXX: to remove, repl only (import ./scanner :prefix "") (defn new-parser `` Creates a new parser from a list of tokens. A parser has three fields in addition to `:tokens`: :i the index, in the list of tokens, of the current token :errors an array of errors, treated like a stack :captured an array of captures (tokens or arrays of tokens), treated like a stack. `` [{:tokens tokens}] @{:tokens tokens :i 0 :errors @[] :captured @[]}) (defn pop-to "Pops items from a stack until it contains `len` items. Returns the stack." [stack len] (assert (>= len 0)) (while (< len (length stack)) (array/pop stack)) stack) (defn popn "Pops n items from a stack. Returns the stack." [stack n] (loop [_ :range [0 n]] (array/pop stack)) stack) (defn current "Returns the current token." [{:tokens tokens :i i}] (get tokens i)) (defn previous "Returns the token before the current token." [{:tokens tokens :i i}] (get tokens (dec i))) (defn next "Returns the token after the current token." [{:tokens tokens :i i}] (get tokens (inc i))) (def add +) ### I `def` `+` later; stash this (defn advance "Advances the parser by `count`, or 1 if count is not supplied." [parser] (update parser :i inc)) (defn stash "Stashes a parser state: captures the current index, error, and capture stacks." [{:i i :errors errs :captured cap}] {:i i :err-len (length errs) :cap-len (length cap)}) (defn restore "Restores the parser to a stashed state: resets :i, pops any new errors and captures off their stacks." [parser state] (def {:i i :err-len err-len :cap-len cap-len} state) (def {:errors errors :captured captured} parser) (put parser :i i) (pop-to errors err-len) (pop-to captured cap-len) parser) (defn at "Returns the token at index i." [parser i] (get (get parser :tokens) i)) (defn capture "Captures the last token." [parser] (update parser :captured array/push (previous parser))) (defn apply-keyword "Applies a keyword-based rule. Consumes the passing token." [kw parser] (def curr (current parser)) (def type (get curr :type)) (if (= kw type) (do (advance parser) true) nil)) (defn apply-table "Applies a table-based rule." [rule parser] (def curr (current parser)) (def rule-fn (get rule :rule)) (rule-fn parser)) (defn apply-rule "Applies a rule: if the rule passes, consume all passing tokens and return true. If the rule fails, do nothing, returning `nil`." [rule parser] (case (type rule) :keyword (apply-keyword rule parser) :table (apply-table rule parser) (error (string "unknown parser type: " (type rule))))) (defn name "Returns the name of a rule. Use this instead of (get rule :name), since bare keywords are also valid rules." [rule] (case (type rule) :keyword rule :table (rule :name))) (defn rename "Renames a rule. By convention, rule names are keywords." [rule name] (put rule :name name)) ### As noted above: we should not use this ### Instead, combinators which might not advance should stash state ### These are: !, *, ...? (defn test "Tests a rule: returns whether a rule passes but does not consume any tokens." [rule parser] (def origin (stash parser)) (def result (apply-rule rule parser)) (restore parser origin) result) (defn ! "Not: negates a rule. If the rule passes, does nothing, returns `nil`. If the rule fails, advances the parser a single token, returns true. Works well only for single token rules." [rule] @{:name (keyword (string "!" (name rule))) :rule (fn !* [parser] (def origin (stash parser)) (def result (apply-rule rule parser)) (restore parser origin) (if-not result (do (advance parser) true) nil))}) (defn <- "Capture: if the rule matches, consumes the token and pushes it to the capture stack. If the rule does not match, does nothing." [rule] @{:name (keyword (string "<-" (name rule))) :rule (fn <-* [parser] (def origin (stash parser)) (def result (apply-rule rule parser)) (if result (capture parser)) result)}) (defn + "Choose a rule: matches if any one of the rules is matched. Order matters: rules are evaluated in order. It's useful to put any errors last." [& rules] @{:name (keyword (string/join (map name rules) "+")) :rule (fn +* [parser] (var passing nil) (def origin (stash parser)) (loop [rule :in rules :while (not passing)] (restore parser origin) (set passing (apply-rule rule parser))) (if-not passing (restore parser origin)) passing)}) (defn unreachable [&] (error "reached the unreachable")) (defmacro declare [& names] (def bindings @[]) (loop [name :in names] (def binding ~(var ,name @{:name ,(keyword name) :rule unreachable})) (array/push bindings binding)) ~(upscope ,;bindings)) (defmacro defp [name rule] (if (dyn name) ~(set ,name (put ,name :rule (,rule :rule))) ~(var ,name (rename ,rule ,(keyword name))))) (comment (defn panic [rule &opt msg] @{:name (keyword (string "panic-" (name rule))) :rule (fn panic* [parser] (print "panicking!") (def origin (current parser)) (var passing false) (def skipped @[]) (while (not passing) (array/push skipped (current parser)) (advance parser) (set passing (apply-rule rule parser))) (print "looped") (def the-error @{:type :error :token origin :msg msg :skipped skipped}) (array/push (parser :errors) the-error) (array/push (parser :captured) the-error) true )})) # If we wanted to be robust, we wouldn't hard-code this # To consider: use a stack of "panic-until" on a parser to dynamically decide how far to panic (defp stop (+ :newline :semicolon :break)) (defn panic "Panics the parser, consuming all tokens until the rule matches (including the match). It also adds an error node to both the capture and the error stacks." [parser expected] (print "panic! in the parser") (def origin (current parser)) (var passing false) (def skipped @[]) (while (not passing) (array/push skipped (current parser)) (advance parser) (set passing (apply-rule stop parser)) (print "phew; I'm done panicking") (pprint (current parser)) (def the-error {:type :error :expected expected :token origin :skipped skipped}) (array/push (parser :errors) the-error) (array/push (parser :captured) the-error) (error parser))) (defn * "Sequences rules: matches if all rules are matched, in sequence." [& rules] @{:name (keyword (string/join (map name rules) "*")) :rule (fn ** [parser] (print "applying sequence") (var passing true) (def origin (stash parser)) (pprint origin) (var failing nil) (var passing true) (loop [rule :in rules :while passing] (def pass? (apply-rule rule parser)) (when (not pass?) (set failing rule) (set passing false))) # this isn't the right thing to do: don't back out, panic! # (when (not passing) (restore parser origin)) (if passing passing (panic parser (string "expected " (name failing)))))}) ## next problem: where do we back out to, and how? ## the answer: an "anchor" point, where we `(try) something, and if it panics, start up with another line; in a block it will be line; in a script it will be toplevel` ## maybe terms in a tuple, list, dict ## it seems noteworthy that the things we want to return to are all either "any" or "some", and all either end with a newline or a break ## they're not all either any or some (empty data structures, but lines in blocks and scripts) ## also: there's a finite number of possibilities for any of them: ## another well-formed term, or the end of the item ## consider: (comment (defp tuple-term (nonbinding (some separator))) (defp tuple-expr (order-1 (<- :lparen) (any separator) (any! tuple-term separator) (:rparen))) ) ## the tuple-term can be the anchor point ## also, in this case, we panic until a *separator*, not a *terminator* ## that suggests we need a panic-until parameter, rather than the hardcoded stop ## is this right? (comment (defn order-1 [& rules] @{:name (keyword (string/join (map name rules) "*")) :rule (fn order-1 [parser] (def result (test (first rules) parser)) (if result (apply-rule (order-0 ;rules)) ### compile error: unknown symbol order-0 nil))})) (defn capture-group "Takes a parser and an origin state: takes everything captured since the origin, gathers it up in a single array, pops it all off the capture stack, and pushes the gathered captures back on the stack." [parser origin] (def from (get origin :cap-len)) (def captured (get parser :captured)) (def grouped (array/slice captured from -1)) (pop-to captured from) (array/push captured grouped) captured) (defn & "Groups rules: pops all captured patterns and pushes them on the captured stack as a single array." [rule] @{:name (keyword (string "&" (name rule))) :rule (fn &* [parser] (def origin (stash parser)) (def result (apply-rule rule parser)) (if result (do (capture-group parser origin) true) (do (restore parser origin) nil)))}) (defn fn-name "Returns the name of a function." [f] (def rep (string f)) (string/slice rep 10 -2)) (defn / "Substitution: takes a rule and a function, and applies the function to any captures arising from the rule; the captures are individual parameters to the function." [rule f] @{:name (keyword (string (name rule) "/" (fn-name f))) :rule (fn /* [parser] (def origin (stash parser)) (def captured (get parser :captured)) (def result (apply-rule (& rule) parser)) (if result (do (def grouped (array/pop captured)) (def applied (f ;grouped)) (array/push captured applied) true) (do (restore parser origin) nil)))}) (defn any "Matches zero or more instances of a rule." [rule] @{:name (keyword (string "any-" (name rule))) :rule (fn any* [parser] (var result true) (while result (set result (apply-rule rule parser))) true)}) (defn some "Matches one or more instances of a rule." [rule] @{:name (keyword (string "some-" (name rule))) :rule (fn some* [parser] (def origin (stash parser)) (def result (apply-rule rule parser)) (if result (do (apply-rule (any rule) parser) true) (do (restore parser origin) nil)))}) (defn parse [tokens rule] (def parser (new-parser tokens)) (try (do (apply-rule rule parser) parser) ([err] err))) (upscope #XXX for easier repl use (defn literal->ast [token] {:type (get token :type) :data (get token :literal) :token token}) (defn word->ast [token] {:type (get token :type) :data (get token :lexeme) :token token}) (defn tuple->ast [origin & tokens] {:type :tuple :data tokens :token origin}) (defn if->ast [origin & tokens] {:type :if :data tokens :token origin}) (declare expression simple nonbinding) (defp separator (some (+ :newline :comma :break))) (defp separators? (any (+ :newline :comma :break))) (defp terminator (some (+ :newline :semicolon :break))) (defp terminators? (any (+ :newline :semicolon :break))) (defp nls? (any :newline)) (defp atom (/ (<- (+ :nil :true :false :number :keyword :string)) literal->ast)) (defp word (/ (<- :word) word->ast)) (defp tuple-term (* nonbinding separator)) (defp tuple-literal (/ (* (<- :lparen) separators? (any tuple-term) :rparen) tuple->ast)) (defp if-expr (/ (* (<- :if) simple nls? :then nonbinding nls? :else nonbinding) if->ast)) # (defp root (<- (+ :word :keyword))) # (defp follow (<- (+ :keyword :arg-tuple))) # (defp synthetic (& (* root (some follow)))) (defp simple (+ atom word tuple-literal)) (defp nonbinding (+ atom word tuple-literal)) (defp expression (+ atom word tuple-literal)) (defp line (* expression (some terminator))) # (defp toplevel) (defp script (some line)) (def source `` if (:)foo then :bar else :baz `` ) (-> (scan source) (parse if-expr) pprint) ) (comment Okay, so I'm trying to figure out how to manage panics in this declarative (not recursive-descent) parser. The question is how to )