From efffbafdba1dde0a954127b58936799d977d38ad Mon Sep 17 00:00:00 2001 From: Scott Richmond Date: Mon, 25 Mar 2024 16:04:54 -0400 Subject: [PATCH] Save some work. --- janet/myparser.janet | 380 +++++++++++++++++++++++++++++++++++++++++ janet/parser.janet | 4 +- janet/peg-parser.janet | 104 +++++++++++ janet/test.janet | 7 + 4 files changed, 494 insertions(+), 1 deletion(-) create mode 100644 janet/myparser.janet create mode 100644 janet/peg-parser.janet create mode 100644 janet/test.janet diff --git a/janet/myparser.janet b/janet/myparser.janet new file mode 100644 index 0000000..7ae9342 --- /dev/null +++ b/janet/myparser.janet @@ -0,0 +1,380 @@ +(comment +So the thing here is that this is a much, much simpler version of the rather unweildy Clojure parser framework I wrote. This leans pretty hard on the Janet PEG framework, but is also bespoke for my purposes here. + +This lets me parse not raw text, but scanned tokens, which makes parsing a much easier lift. Also, it lets me do things like insert :break tokens, which really helps manage collection types and blocks. + +At current, I do need to shift to returning a data structure rather than `nil`s and `true`s. That way, I can advance the parser by a certain amount, however much a passing test-rule would consume. + +Do I really need to optimize this? I don't think so: we can do that later. I could store various information on stacks. + +Alternately, returning data structures may well complicate things: KISS! + +The idea, however, is that Jesus: we must not have test-rule and apply-rule: combinators without a fixed advance amount can't work that way. So we have only `apply-rule`, which `advance`s the parser by however much should be consumed. BUT! We can always, and ought wherever we might, stash the origin index and roll back any changes (i.e. in `!` and `*`) + +) + +(defn pprint [x] (printf "%M" x)) + +(os/cd "janet") ### XXX: to remove, repl only +(import ./scanner :prefix "") + +(defn new-parser + `` + Creates a new parser from a list of tokens. A parser has three fields in addition to `:tokens`: + :i the index, in the list of tokens, of the current token + :errors an array of errors, treated like a stack + :captured an array of captures (tokens or arrays of tokens), treated like a stack. + `` + [{:tokens tokens}] + @{:tokens tokens :i 0 :errors @[] :captured @[]}) + +(defn pop-to + "Pops items from a stack until it contains `len` items. Returns the stack." + [stack len] + (assert (>= len 0)) + (while (< len (length stack)) (array/pop stack)) + stack) + +(defn popn + "Pops n items from a stack. Returns the stack." + [stack n] + (loop [_ :range [0 n]] (array/pop stack)) + stack) + +(defn current "Returns the current token." [{:tokens tokens :i i}] (get tokens i)) + +(defn previous "Returns the token before the current token." [{:tokens tokens :i i}] (get tokens (dec i))) + +(defn next "Returns the token after the current token." [{:tokens tokens :i i}] (get tokens (inc i))) + +(def add +) ### I `def` `+` later; stash this + +(defn advance + "Advances the parser by `count`, or 1 if count is not supplied." + [parser] (update parser :i inc)) + +(defn stash + "Stashes a parser state: captures the current index, error, and capture stacks." + [{:i i :errors errs :captured cap}] + {:i i :err-len (length errs) :cap-len (length cap)}) + +(defn restore + "Restores the parser to a stashed state: resets :i, pops any new errors and captures off their stacks." + [parser state] + (def {:i i :err-len err-len :cap-len cap-len} state) + (def {:errors errors :captured captured} parser) + (put parser :i i) + (pop-to errors err-len) + (pop-to captured cap-len) + parser) + +(defn at "Returns the token at index i." [parser i] (get (get parser :tokens) i)) + +(defn capture "Captures the last token." [parser] (update parser :captured array/push (previous parser))) + +(defn apply-keyword + "Applies a keyword-based rule. Consumes the passing token." + [kw parser] + (def curr (current parser)) + (def type (get curr :type)) + (if (= kw type) + (do + (advance parser) + true) + nil)) + +(defn apply-table + "Applies a struct-based rule." + [rule parser] + (def curr (current parser)) + (def rule-fn (get rule :rule)) + (rule-fn parser)) + +(defn apply-rule + "Applies a rule: if the rule passes, consume all passing tokens and return true. If the rule fails, do nothing, returning `nil`." + [rule parser] + (case (type rule) + :keyword (apply-keyword rule parser) + :table (apply-table rule parser) + (error (string "unknown parser type: " (type rule))))) + +(defn name + "Returns the name of a rule. Use this instead of (get rule :name), since bare keywords are also valid rules." + [rule] + (case (type rule) + :keyword rule + :table (rule :name))) + +(defn rename + "Renames a rule. By convention, rule names are keywords." + [rule name] (put rule :name name)) + +(defn test + "Tests a rule: returns whether a rule passes but does not consume any tokens." + [rule parser] + (def origin (stash parser)) + (def result (apply-rule rule parser)) + (restore parser origin) + result) + +(defn ! + "Not: negates a rule. If the rule passes, does nothing, returns `nil`. If the rule fails, advances the parser a single token, returns true. Works well only for single token rules." + [rule] + @{:name (keyword (string "!" (name rule))) + :rule (fn !* [parser] + (def result (test rule parser)) + (if-not result + (do (advance parser) true) + nil))}) + +(defn <- + "Capture: if the rule matches, consumes the token and pushes it to the capture stack. If the rule does not match, does nothing." + [rule] + @{:name (keyword (string "<-" (name rule))) + :rule (fn <-* [parser] + (def origin (stash parser)) + (def result (apply-rule rule parser)) + (if result (capture parser)) + result)}) + +(defn + + "Choose a rule: matches if any one of the rules is matched. Order matters: rules are evaluated in order. It's useful to put any errors last." + [& rules] + @{:name (keyword (string/join (map name rules) "+")) + :rule (fn +* [parser] + (var passing nil) + (def origin (stash parser)) + (loop [rule :in rules :while (not passing)] + (restore parser origin) + (set passing (apply-rule rule parser))) + (if-not passing (restore parser origin)) + passing)}) + +(defn unreachable [&] (error "reached the unreachable")) + +(defmacro declare [& names] + (def bindings @[]) + (loop [name :in names] + (def binding ~(var ,name @{:name ,(keyword name) :rule unreachable})) + (array/push bindings binding)) + ~(upscope ,;bindings)) + +(defmacro defp [name rule] + (if (dyn name) + ~(set ,name (put ,name :rule (,rule :rule))) + ~(var ,name (rename ,rule ,(keyword name))))) + +# If we wanted to be robust, we wouldn't hard-code this +(defp stop (+ :newline :semicolon :break)) + +(defn panic + [rule &opt msg] + @{:name (keyword (string "panic-" (name rule))) + :rule (fn panic* [parser] + (print "panicking!") + (def origin (current parser)) + (var passing false) + (def skipped @[]) + (while (not passing) + (array/push skipped (current parser)) + (advance parser) + (set passing (apply-rule rule parser))) + (print "looped") + (def the-error @{:type :error :token origin :msg msg :skipped skipped}) + (array/push (parser :errors) the-error) + (array/push (parser :captured) the-error) + true + )}) + +(defn panic + "Panics the parser, consuming all tokens until the rule matches (including the match). It also adds an error node to both the capture and the error stacks." + [parser expected] + (print "panic! in the parser") + (def origin (current parser)) + (var passing false) + (def skipped @[]) + (while (not passing) + (array/push skipped (current parser)) + (advance parser) + (set passing (apply-rule rule parser)) + (print "phew; I'm done panicking") + (pprint (current parser)) + (def the-error {:type :error :expected expected :token origin :skipped skipped}) + (array/push (parser :errors) the-error) + (array/push (parser :captured) the-error) + (error parser))) + +(defn * + "Sequences rules: matches if all rules are matched, in sequence." + [& rules] + @{:name (keyword (string/join (map name rules) "*")) + :rule (fn ** [parser] + (print "applying sequence") + (var passing true) + (def origin (stash parser)) + (pprint origin) + (set failing nil) + (loop [rule :in rules :while passing] + (def pass? (apply-rule rule parser)) + (when (not pass?) + (set failing rule) + (set passing false))) + # this isn't the right thing to do: don't back out, panic! + # (when (not passing) (restore parser origin)) + (if passing + passing + (panic parser (string "expected " (name failing)))))}) +## next problem: where do we back out to, and how? +## the answer: an "anchor" point, where we `(try) something, and if it panics, start up with another line; in a block it will be line; in a script it will be toplevel` +## maybe terms in a tuple, list, dict +## it seems noteworthy that the things we want to return to are all either "any" or "some", and all either end with a newline or a break +## they're not all either any or some (empty data structures, but lines in blocks and scripts) +## also: there's a finite number of possibilities for any of them: +## another well-formed term, or the end of the item +## consider: +(comment +(defp tuple-term (nonbinding (some separator))) +(defp tuple-expr (order-1 (<- :lparen) (any separator) (any! tuple-term separator) (:rparen))) +) +## the tuple-term can be the anchor point +## also, in this case, we panic until a *separator*, not a *terminator* +## that suggests we need a panic-until parameter, rather than the hardcoded stop +## is this right? + + +(defn order-1 [& rules] + @{:name (keyword (string/join (map name rules) "*")) + :rule (fn order-1 [parser] + (def result (test (first rules) parser)) + (if result + (apply-rule (order-0 ;rules)) + nil))}) + +(defn capture-group + "Takes a parser and an origin state: takes everything captured since the origin, gathers it up in a single array, pops it all off the capture stack, and pushes the gathered captures back on the stack." + [parser origin] + (def from (get origin :cap-len)) + (def captured (get parser :captured)) + (def grouped (array/slice captured from -1)) + (pop-to captured from) + (array/push captured grouped) + captured) + +(defn & + "Groups rules: pops all captured patterns and pushes them on the captured stack as a single array." + [rule] + @{:name (keyword (string "&" (name rule))) + :rule (fn &* [parser] + (def origin (stash parser)) + (def result (apply-rule rule parser)) + (if result + (do + (capture-group parser origin) + true) + (do + (restore parser origin) + nil)))}) + +(defn fn-name + "Returns the name of a function." + [f] + (def rep (string f)) + (string/slice rep 10 -2)) + +(defn / + "Substitution: takes a rule and a function, and applies the function to any captures arising from the rule; the captures are individual parameters to the function." + [rule f] + @{:name (keyword (string (name rule) "/" (fn-name f))) + :rule (fn /* [parser] + (def origin (stash parser)) + (def captured (get parser :captured)) + (def result (apply-rule (& rule) parser)) + (if result + (do + (def grouped (array/pop captured)) + (def applied (f ;grouped)) + (array/push captured applied) + true) + (do + (restore parser origin) + nil)))}) + +(defn any + "Matches zero or more instances of a rule." + [rule] + @{:name (keyword (string "any-" (name rule))) + :rule (fn any* [parser] + (var result true) + (while result + (set result (apply-rule rule parser))) + true)}) + +(defn some + "Matches one or more instances of a rule." + [rule] + @{:name (keyword (string "some-" (name rule))) + :rule (fn some* [parser] + (def origin (stash parser)) + (def result (apply-rule rule parser)) + (if result + (do + (apply-rule (any rule) parser) + true) + (do + (restore parser origin) + nil)))}) + +(defn parse [tokens rule] + (def parser (new-parser tokens)) + (try + (do (apply-rule rule parser) parser) + ([err] err)) + +(upscope + +(defn literal->ast [token] {:type (get token :type) :data (get token :literal) :token token}) + +(defn word->ast [token] {:type (get token :type) :data (get token :lexeme) :token token}) + +(defn tuple->ast [origin & tokens] {:type :tuple :data tokens :token origin}) + +(defn if->ast [origin & tokens] {:type :if :data tokens :token origin}) + +(declare expression simple nonbinding) + +(defp separator (+ :newline :comma :break)) +(defp separators? (any (+ :newline :comma :break))) +(defp terminator (some (+ :newline :semicolon :break))) +(defp terminators? (any (+ :newline :semicolon :break))) +(defp nls? (any :newline)) + +(defp atom (/ (<- (+ :nil :true :false :number :keyword :string)) literal->ast)) +(defp word (/ (<- :word) word->ast)) + +(defp tuple-term (* nonbinding separator)) +(defp tuple-literal (/ (* (<- :lparen) separators? (any tuple-term) :rparen) tuple->ast)) + +(defp if-expr (/ (* (<- :if) simple nls? :then nonbinding nls? :else nonbinding) if->ast)) + +# (defp root (<- (+ :word :keyword))) +# (defp follow (<- (+ :keyword :arg-tuple))) +# (defp synthetic (& (* root (some follow)))) + +(defp simple (+ atom word tuple-literal)) +(defp nonbinding (+ atom word tuple-literal)) +(defp expression (+ atom word tuple-literal)) +(defp line (* expression (some terminator))) +# (defp toplevel) +(defp script (some line)) + +(def source +`` +if (:)foo then :bar else :baz + +`` +) + +(-> (scan source) (parse if-expr) pprint) +) + + diff --git a/janet/parser.janet b/janet/parser.janet index 4f35553..32c428c 100644 --- a/janet/parser.janet +++ b/janet/parser.janet @@ -1,4 +1,6 @@ -### in repl, make sure to (os/cwd) into the janet dir +### in repl, make sure to (os/cd) into the janet dir +# (os/cd "janet") + (import ./clj-loop :prefix "") (defn ? [val default] (if (nil? val) default val)) diff --git a/janet/peg-parser.janet b/janet/peg-parser.janet new file mode 100644 index 0000000..7241178 --- /dev/null +++ b/janet/peg-parser.janet @@ -0,0 +1,104 @@ +(defn ->kw [kw-str] + (keyword (slice kw-str 1))) + +(defn word [str] [:word str]) + +(defn num [num] [:number (scan-number num)]) + +(defn kw [_ kw] [:keyword (->kw kw)]) + +(defn str [str] [:string str]) + +(defn bool [bool] (if (= bool "true") [:boolean true] [:boolean false])) + +(defn nill [_] [:nil nil]) + +(defn not-empty? [x] (not (empty? x))) + +(defn ifl [args] [:if ;args]) + +(defn letl [args] [:let ;args]) + +(defn block [args] [:block args]) + +(def errors @[]) + +(defn ? [x default] (if (nil? x) default x)) + +(defn panic [& args] + (def info (filter |(not= "" $) args)) + (def [msg line col source] info) + (def error {:msg msg :line line :col col :source (? source "")}) + (array/push errors error) + [:error error] +) + +(do +(def ludus-grammar + ~{:nil (cmt (<- "nil") ,nill) + :true (cmt (<- "true") ,bool) + :false (cmt (<- "false") ,bool) + :comment (* "&" (any (if-not (+ "\n" -1) 1))) + :wordchars (+ :w (set "_-/*?!")) + :reserved (+ "if" "then" "else" "let") + :word (cmt (<- (if-not :reserved (* (range "az") (any :wordchars)))) ,word) + :keyword (cmt (<- (* ":" :word)) ,kw) + :hex (range "09" "af" "AF") + :escape (* "\\" (+ (set `"'0?\abefnrtvz`) + (* "x" :hex :hex) + (* "u" [4 :hex]) + (* "U" [6 :hex]) + (error (constant "bad escape")))) + :string (cmt (* "\"" (<- (any (+ :escape (if-not "\"" 1)))) "\"") ,str) + :int (* (? "-") (range "19") (any (+ "_" (range "09")))) + :float (* (? "-") (+ :int "0") "." (any (+ "_" (range "09")))) + :zero "0" + :number (cmt (<- (+ :float :int :zero)) ,num) + :comment (* "&" (any (if-not (+ "\n" -1) 1))) + :atom (+ :nil :true :false :keyword :number :word) + :newline "\n" + :terminator (* (any :ws) (some (+ :newline ";"))) + :separator (* (any :ws) (some (+ :newline ","))) + :ws (any (+ :comment (set " \t"))) + :simple (+ :atom :string :word (* (constant "expected simple" :err-msg) :panic)) + :then (+ (* (+ (some :ws) (any :newline)) (any :ws) "then" (some :ws)) (* (constant "expected then" :err-msg) :panic)) + :else (+ (* (+ (some :ws) (any :newline)) (any :ws) "else" (some :ws)) (* (constant "expected else" :err-msg) :panic)) + :panic (cmt (* (<- (-> :err-msg)) (<- (line)) (<- (column)) (<- (* (any (if-not (+ "\n" -1) 1))))) ,panic) + :non-binding (+ :atom :word :string :if :block + (* (constant "expected non-binding" :err-msg) :panic)) + :tuple-term (* :number (some :separator)) + :tuple (* "(" (any :separator) (any :tuple-term) ")") + :block (cmt (group (* "{" (some :line) (? :expression) "}")) ,block) + :if (cmt (group (* "if" (some :ws) :simple :then :non-binding :else :non-binding)) ,ifl) + :pattern (+ :atom :string (* (constant "expected pattern") :panic)) + :equals (* (any :ws) "=" (any :ws)) + :let (cmt (group (* "let" (some :ws) :pattern :equals :non-binding)) ,letl) + :expression (+ :atom :string :block :if :let + (* (constant "expected expression" :err-msg) :panic)) + :empty (* :ws :terminator) + :line (* :ws :expression :ws :terminator) + :main :tuple}) + +(def simple-tuple ~{ + :value (<- (range "az")) + :separator (+ "," "\n") + :ws (any (set " \t")) + :term (* :ws :value :ws :separator) + :tuple (group (* "(" (any :term) (? :value) :ws ")")) + :main :tuple +}) + + +# :empty/:line/:block are still giving me grief +# also, somehow line & column numbers ar +(def source +`` +( a, b ,c,d ) +`` +) + + +(peg/match simple-tuple source) +) + + diff --git a/janet/test.janet b/janet/test.janet new file mode 100644 index 0000000..4057c58 --- /dev/null +++ b/janet/test.janet @@ -0,0 +1,7 @@ +(def myodd? nil) + +(defn myeven? [x] (if (= 0 x) true (myodd? (dec x)))) + +(defn myodd? [x] (if (= 0 x) false (myeven? (dec x)))) + +(myeven? 2)