Save some work.

This commit is contained in:
Scott Richmond 2024-03-25 16:04:54 -04:00
parent d253dc3d3e
commit efffbafdba
4 changed files with 494 additions and 1 deletions

380
janet/myparser.janet Normal file
View File

@ -0,0 +1,380 @@
(comment
So the thing here is that this is a much, much simpler version of the rather unweildy Clojure parser framework I wrote. This leans pretty hard on the Janet PEG framework, but is also bespoke for my purposes here.
This lets me parse not raw text, but scanned tokens, which makes parsing a much easier lift. Also, it lets me do things like insert :break tokens, which really helps manage collection types and blocks.
At current, I do need to shift to returning a data structure rather than `nil`s and `true`s. That way, I can advance the parser by a certain amount, however much a passing test-rule would consume.
Do I really need to optimize this? I don't think so: we can do that later. I could store various information on stacks.
Alternately, returning data structures may well complicate things: KISS!
The idea, however, is that Jesus: we must not have test-rule and apply-rule: combinators without a fixed advance amount can't work that way. So we have only `apply-rule`, which `advance`s the parser by however much should be consumed. BUT! We can always, and ought wherever we might, stash the origin index and roll back any changes (i.e. in `!` and `*`)
)
(defn pprint [x] (printf "%M" x))
(os/cd "janet") ### XXX: to remove, repl only
(import ./scanner :prefix "")
(defn new-parser
``
Creates a new parser from a list of tokens. A parser has three fields in addition to `:tokens`:
:i the index, in the list of tokens, of the current token
:errors an array of errors, treated like a stack
:captured an array of captures (tokens or arrays of tokens), treated like a stack.
``
[{:tokens tokens}]
@{:tokens tokens :i 0 :errors @[] :captured @[]})
(defn pop-to
"Pops items from a stack until it contains `len` items. Returns the stack."
[stack len]
(assert (>= len 0))
(while (< len (length stack)) (array/pop stack))
stack)
(defn popn
"Pops n items from a stack. Returns the stack."
[stack n]
(loop [_ :range [0 n]] (array/pop stack))
stack)
(defn current "Returns the current token." [{:tokens tokens :i i}] (get tokens i))
(defn previous "Returns the token before the current token." [{:tokens tokens :i i}] (get tokens (dec i)))
(defn next "Returns the token after the current token." [{:tokens tokens :i i}] (get tokens (inc i)))
(def add +) ### I `def` `+` later; stash this
(defn advance
"Advances the parser by `count`, or 1 if count is not supplied."
[parser] (update parser :i inc))
(defn stash
"Stashes a parser state: captures the current index, error, and capture stacks."
[{:i i :errors errs :captured cap}]
{:i i :err-len (length errs) :cap-len (length cap)})
(defn restore
"Restores the parser to a stashed state: resets :i, pops any new errors and captures off their stacks."
[parser state]
(def {:i i :err-len err-len :cap-len cap-len} state)
(def {:errors errors :captured captured} parser)
(put parser :i i)
(pop-to errors err-len)
(pop-to captured cap-len)
parser)
(defn at "Returns the token at index i." [parser i] (get (get parser :tokens) i))
(defn capture "Captures the last token." [parser] (update parser :captured array/push (previous parser)))
(defn apply-keyword
"Applies a keyword-based rule. Consumes the passing token."
[kw parser]
(def curr (current parser))
(def type (get curr :type))
(if (= kw type)
(do
(advance parser)
true)
nil))
(defn apply-table
"Applies a struct-based rule."
[rule parser]
(def curr (current parser))
(def rule-fn (get rule :rule))
(rule-fn parser))
(defn apply-rule
"Applies a rule: if the rule passes, consume all passing tokens and return true. If the rule fails, do nothing, returning `nil`."
[rule parser]
(case (type rule)
:keyword (apply-keyword rule parser)
:table (apply-table rule parser)
(error (string "unknown parser type: " (type rule)))))
(defn name
"Returns the name of a rule. Use this instead of (get rule :name), since bare keywords are also valid rules."
[rule]
(case (type rule)
:keyword rule
:table (rule :name)))
(defn rename
"Renames a rule. By convention, rule names are keywords."
[rule name] (put rule :name name))
(defn test
"Tests a rule: returns whether a rule passes but does not consume any tokens."
[rule parser]
(def origin (stash parser))
(def result (apply-rule rule parser))
(restore parser origin)
result)
(defn !
"Not: negates a rule. If the rule passes, does nothing, returns `nil`. If the rule fails, advances the parser a single token, returns true. Works well only for single token rules."
[rule]
@{:name (keyword (string "!" (name rule)))
:rule (fn !* [parser]
(def result (test rule parser))
(if-not result
(do (advance parser) true)
nil))})
(defn <-
"Capture: if the rule matches, consumes the token and pushes it to the capture stack. If the rule does not match, does nothing."
[rule]
@{:name (keyword (string "<-" (name rule)))
:rule (fn <-* [parser]
(def origin (stash parser))
(def result (apply-rule rule parser))
(if result (capture parser))
result)})
(defn +
"Choose a rule: matches if any one of the rules is matched. Order matters: rules are evaluated in order. It's useful to put any errors last."
[& rules]
@{:name (keyword (string/join (map name rules) "+"))
:rule (fn +* [parser]
(var passing nil)
(def origin (stash parser))
(loop [rule :in rules :while (not passing)]
(restore parser origin)
(set passing (apply-rule rule parser)))
(if-not passing (restore parser origin))
passing)})
(defn unreachable [&] (error "reached the unreachable"))
(defmacro declare [& names]
(def bindings @[])
(loop [name :in names]
(def binding ~(var ,name @{:name ,(keyword name) :rule unreachable}))
(array/push bindings binding))
~(upscope ,;bindings))
(defmacro defp [name rule]
(if (dyn name)
~(set ,name (put ,name :rule (,rule :rule)))
~(var ,name (rename ,rule ,(keyword name)))))
# If we wanted to be robust, we wouldn't hard-code this
(defp stop (+ :newline :semicolon :break))
(defn panic
[rule &opt msg]
@{:name (keyword (string "panic-" (name rule)))
:rule (fn panic* [parser]
(print "panicking!")
(def origin (current parser))
(var passing false)
(def skipped @[])
(while (not passing)
(array/push skipped (current parser))
(advance parser)
(set passing (apply-rule rule parser)))
(print "looped")
(def the-error @{:type :error :token origin :msg msg :skipped skipped})
(array/push (parser :errors) the-error)
(array/push (parser :captured) the-error)
true
)})
(defn panic
"Panics the parser, consuming all tokens until the rule matches (including the match). It also adds an error node to both the capture and the error stacks."
[parser expected]
(print "panic! in the parser")
(def origin (current parser))
(var passing false)
(def skipped @[])
(while (not passing)
(array/push skipped (current parser))
(advance parser)
(set passing (apply-rule rule parser))
(print "phew; I'm done panicking")
(pprint (current parser))
(def the-error {:type :error :expected expected :token origin :skipped skipped})
(array/push (parser :errors) the-error)
(array/push (parser :captured) the-error)
(error parser)))
(defn *
"Sequences rules: matches if all rules are matched, in sequence."
[& rules]
@{:name (keyword (string/join (map name rules) "*"))
:rule (fn ** [parser]
(print "applying sequence")
(var passing true)
(def origin (stash parser))
(pprint origin)
(set failing nil)
(loop [rule :in rules :while passing]
(def pass? (apply-rule rule parser))
(when (not pass?)
(set failing rule)
(set passing false)))
# this isn't the right thing to do: don't back out, panic!
# (when (not passing) (restore parser origin))
(if passing
passing
(panic parser (string "expected " (name failing)))))})
## next problem: where do we back out to, and how?
## the answer: an "anchor" point, where we `(try) something, and if it panics, start up with another line; in a block it will be line; in a script it will be toplevel`
## maybe terms in a tuple, list, dict
## it seems noteworthy that the things we want to return to are all either "any" or "some", and all either end with a newline or a break
## they're not all either any or some (empty data structures, but lines in blocks and scripts)
## also: there's a finite number of possibilities for any of them:
## another well-formed term, or the end of the item
## consider:
(comment
(defp tuple-term (nonbinding (some separator)))
(defp tuple-expr (order-1 (<- :lparen) (any separator) (any! tuple-term separator) (:rparen)))
)
## the tuple-term can be the anchor point
## also, in this case, we panic until a *separator*, not a *terminator*
## that suggests we need a panic-until parameter, rather than the hardcoded stop
## is this right?
(defn order-1 [& rules]
@{:name (keyword (string/join (map name rules) "*"))
:rule (fn order-1 [parser]
(def result (test (first rules) parser))
(if result
(apply-rule (order-0 ;rules))
nil))})
(defn capture-group
"Takes a parser and an origin state: takes everything captured since the origin, gathers it up in a single array, pops it all off the capture stack, and pushes the gathered captures back on the stack."
[parser origin]
(def from (get origin :cap-len))
(def captured (get parser :captured))
(def grouped (array/slice captured from -1))
(pop-to captured from)
(array/push captured grouped)
captured)
(defn &
"Groups rules: pops all captured patterns and pushes them on the captured stack as a single array."
[rule]
@{:name (keyword (string "&" (name rule)))
:rule (fn &* [parser]
(def origin (stash parser))
(def result (apply-rule rule parser))
(if result
(do
(capture-group parser origin)
true)
(do
(restore parser origin)
nil)))})
(defn fn-name
"Returns the name of a function."
[f]
(def rep (string f))
(string/slice rep 10 -2))
(defn /
"Substitution: takes a rule and a function, and applies the function to any captures arising from the rule; the captures are individual parameters to the function."
[rule f]
@{:name (keyword (string (name rule) "/" (fn-name f)))
:rule (fn /* [parser]
(def origin (stash parser))
(def captured (get parser :captured))
(def result (apply-rule (& rule) parser))
(if result
(do
(def grouped (array/pop captured))
(def applied (f ;grouped))
(array/push captured applied)
true)
(do
(restore parser origin)
nil)))})
(defn any
"Matches zero or more instances of a rule."
[rule]
@{:name (keyword (string "any-" (name rule)))
:rule (fn any* [parser]
(var result true)
(while result
(set result (apply-rule rule parser)))
true)})
(defn some
"Matches one or more instances of a rule."
[rule]
@{:name (keyword (string "some-" (name rule)))
:rule (fn some* [parser]
(def origin (stash parser))
(def result (apply-rule rule parser))
(if result
(do
(apply-rule (any rule) parser)
true)
(do
(restore parser origin)
nil)))})
(defn parse [tokens rule]
(def parser (new-parser tokens))
(try
(do (apply-rule rule parser) parser)
([err] err))
(upscope
(defn literal->ast [token] {:type (get token :type) :data (get token :literal) :token token})
(defn word->ast [token] {:type (get token :type) :data (get token :lexeme) :token token})
(defn tuple->ast [origin & tokens] {:type :tuple :data tokens :token origin})
(defn if->ast [origin & tokens] {:type :if :data tokens :token origin})
(declare expression simple nonbinding)
(defp separator (+ :newline :comma :break))
(defp separators? (any (+ :newline :comma :break)))
(defp terminator (some (+ :newline :semicolon :break)))
(defp terminators? (any (+ :newline :semicolon :break)))
(defp nls? (any :newline))
(defp atom (/ (<- (+ :nil :true :false :number :keyword :string)) literal->ast))
(defp word (/ (<- :word) word->ast))
(defp tuple-term (* nonbinding separator))
(defp tuple-literal (/ (* (<- :lparen) separators? (any tuple-term) :rparen) tuple->ast))
(defp if-expr (/ (* (<- :if) simple nls? :then nonbinding nls? :else nonbinding) if->ast))
# (defp root (<- (+ :word :keyword)))
# (defp follow (<- (+ :keyword :arg-tuple)))
# (defp synthetic (& (* root (some follow))))
(defp simple (+ atom word tuple-literal))
(defp nonbinding (+ atom word tuple-literal))
(defp expression (+ atom word tuple-literal))
(defp line (* expression (some terminator)))
# (defp toplevel)
(defp script (some line))
(def source
``
if (:)foo then :bar else :baz
``
)
(-> (scan source) (parse if-expr) pprint)
)

View File

@ -1,4 +1,6 @@
### in repl, make sure to (os/cwd) into the janet dir
### in repl, make sure to (os/cd) into the janet dir
# (os/cd "janet")
(import ./clj-loop :prefix "")
(defn ? [val default] (if (nil? val) default val))

104
janet/peg-parser.janet Normal file
View File

@ -0,0 +1,104 @@
(defn ->kw [kw-str]
(keyword (slice kw-str 1)))
(defn word [str] [:word str])
(defn num [num] [:number (scan-number num)])
(defn kw [_ kw] [:keyword (->kw kw)])
(defn str [str] [:string str])
(defn bool [bool] (if (= bool "true") [:boolean true] [:boolean false]))
(defn nill [_] [:nil nil])
(defn not-empty? [x] (not (empty? x)))
(defn ifl [args] [:if ;args])
(defn letl [args] [:let ;args])
(defn block [args] [:block args])
(def errors @[])
(defn ? [x default] (if (nil? x) default x))
(defn panic [& args]
(def info (filter |(not= "" $) args))
(def [msg line col source] info)
(def error {:msg msg :line line :col col :source (? source "")})
(array/push errors error)
[:error error]
)
(do
(def ludus-grammar
~{:nil (cmt (<- "nil") ,nill)
:true (cmt (<- "true") ,bool)
:false (cmt (<- "false") ,bool)
:comment (* "&" (any (if-not (+ "\n" -1) 1)))
:wordchars (+ :w (set "_-/*?!"))
:reserved (+ "if" "then" "else" "let")
:word (cmt (<- (if-not :reserved (* (range "az") (any :wordchars)))) ,word)
:keyword (cmt (<- (* ":" :word)) ,kw)
:hex (range "09" "af" "AF")
:escape (* "\\" (+ (set `"'0?\abefnrtvz`)
(* "x" :hex :hex)
(* "u" [4 :hex])
(* "U" [6 :hex])
(error (constant "bad escape"))))
:string (cmt (* "\"" (<- (any (+ :escape (if-not "\"" 1)))) "\"") ,str)
:int (* (? "-") (range "19") (any (+ "_" (range "09"))))
:float (* (? "-") (+ :int "0") "." (any (+ "_" (range "09"))))
:zero "0"
:number (cmt (<- (+ :float :int :zero)) ,num)
:comment (* "&" (any (if-not (+ "\n" -1) 1)))
:atom (+ :nil :true :false :keyword :number :word)
:newline "\n"
:terminator (* (any :ws) (some (+ :newline ";")))
:separator (* (any :ws) (some (+ :newline ",")))
:ws (any (+ :comment (set " \t")))
:simple (+ :atom :string :word (* (constant "expected simple" :err-msg) :panic))
:then (+ (* (+ (some :ws) (any :newline)) (any :ws) "then" (some :ws)) (* (constant "expected then" :err-msg) :panic))
:else (+ (* (+ (some :ws) (any :newline)) (any :ws) "else" (some :ws)) (* (constant "expected else" :err-msg) :panic))
:panic (cmt (* (<- (-> :err-msg)) (<- (line)) (<- (column)) (<- (* (any (if-not (+ "\n" -1) 1))))) ,panic)
:non-binding (+ :atom :word :string :if :block
(* (constant "expected non-binding" :err-msg) :panic))
:tuple-term (* :number (some :separator))
:tuple (* "(" (any :separator) (any :tuple-term) ")")
:block (cmt (group (* "{" (some :line) (? :expression) "}")) ,block)
:if (cmt (group (* "if" (some :ws) :simple :then :non-binding :else :non-binding)) ,ifl)
:pattern (+ :atom :string (* (constant "expected pattern") :panic))
:equals (* (any :ws) "=" (any :ws))
:let (cmt (group (* "let" (some :ws) :pattern :equals :non-binding)) ,letl)
:expression (+ :atom :string :block :if :let
(* (constant "expected expression" :err-msg) :panic))
:empty (* :ws :terminator)
:line (* :ws :expression :ws :terminator)
:main :tuple})
(def simple-tuple ~{
:value (<- (range "az"))
:separator (+ "," "\n")
:ws (any (set " \t"))
:term (* :ws :value :ws :separator)
:tuple (group (* "(" (any :term) (? :value) :ws ")"))
:main :tuple
})
# :empty/:line/:block are still giving me grief
# also, somehow line & column numbers ar
(def source
``
( a, b ,c,d )
``
)
(peg/match simple-tuple source)
)

7
janet/test.janet Normal file
View File

@ -0,0 +1,7 @@
(def myodd? nil)
(defn myeven? [x] (if (= 0 x) true (myodd? (dec x))))
(defn myodd? [x] (if (= 0 x) false (myeven? (dec x))))
(myeven? 2)