clean up files
This commit is contained in:
parent
bc17fe5006
commit
b6e1d0e6ec
|
@ -1,49 +0,0 @@
|
|||
(defmacro clj-loop
|
||||
``A drop-in replacement for Clojure's loop form. Useful for the current project of converting Clojure code to Janet.
|
||||
``
|
||||
[bindings & body]
|
||||
(assert (even? (length bindings)) "Binding tuple must have an even number of terms")
|
||||
(def names @[])
|
||||
(def args @[])
|
||||
(loop [i :range [0 (length bindings)]]
|
||||
(if (even? i)
|
||||
(array/push names (get bindings i))
|
||||
(array/push args (get bindings i))))
|
||||
~(do (defn recur [,;names] ,;body) (recur ,;args)))
|
||||
|
||||
(defmacro defn+
|
||||
[name & clauses]
|
||||
~(defn ,name [& args]
|
||||
(print "before do")
|
||||
(do
|
||||
(def arities @{})
|
||||
(def clauses ,clauses)
|
||||
(def bindingses (map first clauses))
|
||||
(def bodies (map |(slice $ 1) clauses))
|
||||
(print "before loop")
|
||||
(loop [i :range [0 (length clauses)]]
|
||||
(def bindings (get bindingses i))
|
||||
(def arity (length bindings))
|
||||
(assert (not (get arities i)) "Clauses must have different arities")
|
||||
(def body (get bodies i))
|
||||
(def clause ~(fn ,name ,bindings ,;body))
|
||||
(put $arities arity clause))
|
||||
(print "before quasiquote")
|
||||
(fn [& args]
|
||||
(def arity (length args))
|
||||
(def clause (get arities arity))
|
||||
(assert clause "No clause with that arity")
|
||||
(clause ;args)))))
|
||||
|
||||
(defn+ add
|
||||
([] 0)
|
||||
([x] x)
|
||||
([x y] (+ x y)))
|
||||
|
||||
(macex1
|
||||
'(defn+ add
|
||||
([] 0)
|
||||
([x] x)
|
||||
([x y] (+ x y))))
|
||||
|
||||
|
|
@ -1,393 +0,0 @@
|
|||
(comment
|
||||
So the thing here is that this is a much, much simpler version of the rather unweildy Clojure parser framework I wrote. This leans pretty hard on the Janet PEG framework, but is also bespoke for my purposes here.
|
||||
|
||||
This lets me parse not raw text, but scanned tokens, which makes parsing a much easier lift. Also, it lets me do things like insert :break tokens, which really helps manage collection types and blocks.
|
||||
|
||||
At current, I do need to shift to returning a data structure rather than `nil`s and `true`s. That way, I can advance the parser by a certain amount, however much a passing test-rule would consume.
|
||||
|
||||
Do I really need to optimize this? I don't think so: we can do that later. I could store various information on stacks.
|
||||
|
||||
Alternately, returning data structures may well complicate things: KISS!
|
||||
|
||||
The idea, however, is that Jesus: we must not have test-rule and apply-rule: combinators without a fixed advance amount can't work that way. So we have only `apply-rule`, which `advance`s the parser by however much should be consumed. BUT! We can always, and ought wherever we might, stash the origin index and roll back any changes (i.e. in `!` and `*`)
|
||||
|
||||
)
|
||||
|
||||
(defn pprint [x] (printf "%M" x))
|
||||
|
||||
(os/cd "janet") ### XXX: to remove, repl only
|
||||
(import ./scanner :prefix "")
|
||||
|
||||
(defn new-parser
|
||||
``
|
||||
Creates a new parser from a list of tokens. A parser has three fields in addition to `:tokens`:
|
||||
:i the index, in the list of tokens, of the current token
|
||||
:errors an array of errors, treated like a stack
|
||||
:captured an array of captures (tokens or arrays of tokens), treated like a stack.
|
||||
``
|
||||
[{:tokens tokens}]
|
||||
@{:tokens tokens :i 0 :errors @[] :captured @[]})
|
||||
|
||||
(defn pop-to
|
||||
"Pops items from a stack until it contains `len` items. Returns the stack."
|
||||
[stack len]
|
||||
(assert (>= len 0))
|
||||
(while (< len (length stack)) (array/pop stack))
|
||||
stack)
|
||||
|
||||
(defn popn
|
||||
"Pops n items from a stack. Returns the stack."
|
||||
[stack n]
|
||||
(loop [_ :range [0 n]] (array/pop stack))
|
||||
stack)
|
||||
|
||||
(defn current "Returns the current token." [{:tokens tokens :i i}] (get tokens i))
|
||||
|
||||
(defn previous "Returns the token before the current token." [{:tokens tokens :i i}] (get tokens (dec i)))
|
||||
|
||||
(defn next "Returns the token after the current token." [{:tokens tokens :i i}] (get tokens (inc i)))
|
||||
|
||||
(def add +) ### I `def` `+` later; stash this
|
||||
|
||||
(defn advance
|
||||
"Advances the parser by `count`, or 1 if count is not supplied."
|
||||
[parser] (update parser :i inc))
|
||||
|
||||
(defn stash
|
||||
"Stashes a parser state: captures the current index, error, and capture stacks."
|
||||
[{:i i :errors errs :captured cap}]
|
||||
{:i i :err-len (length errs) :cap-len (length cap)})
|
||||
|
||||
(defn restore
|
||||
"Restores the parser to a stashed state: resets :i, pops any new errors and captures off their stacks."
|
||||
[parser state]
|
||||
(def {:i i :err-len err-len :cap-len cap-len} state)
|
||||
(def {:errors errors :captured captured} parser)
|
||||
(put parser :i i)
|
||||
(pop-to errors err-len)
|
||||
(pop-to captured cap-len)
|
||||
parser)
|
||||
|
||||
(defn at "Returns the token at index i." [parser i] (get (get parser :tokens) i))
|
||||
|
||||
(defn capture "Captures the last token." [parser] (update parser :captured array/push (previous parser)))
|
||||
|
||||
(defn apply-keyword
|
||||
"Applies a keyword-based rule. Consumes the passing token."
|
||||
[kw parser]
|
||||
(def curr (current parser))
|
||||
(def type (get curr :type))
|
||||
(if (= kw type)
|
||||
(do
|
||||
(advance parser)
|
||||
true)
|
||||
nil))
|
||||
|
||||
(defn apply-table
|
||||
"Applies a table-based rule."
|
||||
[rule parser]
|
||||
(def curr (current parser))
|
||||
(def rule-fn (get rule :rule))
|
||||
(rule-fn parser))
|
||||
|
||||
(defn apply-rule
|
||||
"Applies a rule: if the rule passes, consume all passing tokens and return true. If the rule fails, do nothing, returning `nil`."
|
||||
[rule parser]
|
||||
(case (type rule)
|
||||
:keyword (apply-keyword rule parser)
|
||||
:table (apply-table rule parser)
|
||||
(error (string "unknown parser type: " (type rule)))))
|
||||
|
||||
(defn name
|
||||
"Returns the name of a rule. Use this instead of (get rule :name), since bare keywords are also valid rules."
|
||||
[rule]
|
||||
(case (type rule)
|
||||
:keyword rule
|
||||
:table (rule :name)))
|
||||
|
||||
(defn rename
|
||||
"Renames a rule. By convention, rule names are keywords."
|
||||
[rule name] (put rule :name name))
|
||||
|
||||
### As noted above: we should not use this
|
||||
### Instead, combinators which might not advance should stash state
|
||||
### These are: !, *, ...?
|
||||
(defn test
|
||||
"Tests a rule: returns whether a rule passes but does not consume any tokens."
|
||||
[rule parser]
|
||||
(def origin (stash parser))
|
||||
(def result (apply-rule rule parser))
|
||||
(restore parser origin)
|
||||
result)
|
||||
|
||||
(defn !
|
||||
"Not: negates a rule. If the rule passes, does nothing, returns `nil`. If the rule fails, advances the parser a single token, returns true. Works well only for single token rules."
|
||||
[rule]
|
||||
@{:name (keyword (string "!" (name rule)))
|
||||
:rule (fn !* [parser]
|
||||
(def origin (stash parser))
|
||||
(def result (apply-rule rule parser))
|
||||
(restore parser origin)
|
||||
(if-not result
|
||||
(do (advance parser) true)
|
||||
nil))})
|
||||
|
||||
(defn <-
|
||||
"Capture: if the rule matches, consumes the token and pushes it to the capture stack. If the rule does not match, does nothing."
|
||||
[rule]
|
||||
@{:name (keyword (string "<-" (name rule)))
|
||||
:rule (fn <-* [parser]
|
||||
(def origin (stash parser))
|
||||
(def result (apply-rule rule parser))
|
||||
(if result (capture parser))
|
||||
result)})
|
||||
|
||||
(defn +
|
||||
"Choose a rule: matches if any one of the rules is matched. Order matters: rules are evaluated in order. It's useful to put any errors last."
|
||||
[& rules]
|
||||
@{:name (keyword (string/join (map name rules) "+"))
|
||||
:rule (fn +* [parser]
|
||||
(var passing nil)
|
||||
(def origin (stash parser))
|
||||
(loop [rule :in rules :while (not passing)]
|
||||
(restore parser origin)
|
||||
(set passing (apply-rule rule parser)))
|
||||
(if-not passing (restore parser origin))
|
||||
passing)})
|
||||
|
||||
(defn unreachable [&] (error "reached the unreachable"))
|
||||
|
||||
(defmacro declare [& names]
|
||||
(def bindings @[])
|
||||
(loop [name :in names]
|
||||
(def binding ~(var ,name @{:name ,(keyword name) :rule unreachable}))
|
||||
(array/push bindings binding))
|
||||
~(upscope ,;bindings))
|
||||
|
||||
(defmacro defp [name rule]
|
||||
(if (dyn name)
|
||||
~(set ,name (put ,name :rule (,rule :rule)))
|
||||
~(var ,name (rename ,rule ,(keyword name)))))
|
||||
|
||||
(comment (defn panic
|
||||
[rule &opt msg]
|
||||
@{:name (keyword (string "panic-" (name rule)))
|
||||
:rule (fn panic* [parser]
|
||||
(print "panicking!")
|
||||
(def origin (current parser))
|
||||
(var passing false)
|
||||
(def skipped @[])
|
||||
(while (not passing)
|
||||
(array/push skipped (current parser))
|
||||
(advance parser)
|
||||
(set passing (apply-rule rule parser)))
|
||||
(print "looped")
|
||||
(def the-error @{:type :error :token origin :msg msg :skipped skipped})
|
||||
(array/push (parser :errors) the-error)
|
||||
(array/push (parser :captured) the-error)
|
||||
true
|
||||
)}))
|
||||
|
||||
# If we wanted to be robust, we wouldn't hard-code this
|
||||
# To consider: use a stack of "panic-until" on a parser to dynamically decide how far to panic
|
||||
(defp stop (+ :newline :semicolon :break))
|
||||
|
||||
(defn panic
|
||||
"Panics the parser, consuming all tokens until the rule matches (including the match). It also adds an error node to both the capture and the error stacks."
|
||||
[parser expected]
|
||||
(print "panic! in the parser")
|
||||
(def origin (current parser))
|
||||
(var passing false)
|
||||
(def skipped @[])
|
||||
(while (not passing)
|
||||
(array/push skipped (current parser))
|
||||
(advance parser)
|
||||
(set passing (apply-rule stop parser))
|
||||
(print "phew; I'm done panicking")
|
||||
(pprint (current parser))
|
||||
(def the-error {:type :error :expected expected :token origin :skipped skipped})
|
||||
(array/push (parser :errors) the-error)
|
||||
(array/push (parser :captured) the-error)
|
||||
(error parser)))
|
||||
|
||||
(defn *
|
||||
"Sequences rules: matches if all rules are matched, in sequence."
|
||||
[& rules]
|
||||
@{:name (keyword (string/join (map name rules) "*"))
|
||||
:rule (fn ** [parser]
|
||||
(print "applying sequence")
|
||||
(var passing true)
|
||||
(def origin (stash parser))
|
||||
(pprint origin)
|
||||
(var failing nil)
|
||||
(var passing true)
|
||||
(loop [rule :in rules :while passing]
|
||||
(def pass? (apply-rule rule parser))
|
||||
(when (not pass?)
|
||||
(set failing rule)
|
||||
(set passing false)))
|
||||
# this isn't the right thing to do: don't back out, panic!
|
||||
# (when (not passing) (restore parser origin))
|
||||
(if passing
|
||||
passing
|
||||
(panic parser (string "expected " (name failing)))))})
|
||||
## next problem: where do we back out to, and how?
|
||||
## the answer: an "anchor" point, where we `(try) something, and if it panics, start up with another line; in a block it will be line; in a script it will be toplevel`
|
||||
## maybe terms in a tuple, list, dict
|
||||
## it seems noteworthy that the things we want to return to are all either "any" or "some", and all either end with a newline or a break
|
||||
## they're not all either any or some (empty data structures, but lines in blocks and scripts)
|
||||
## also: there's a finite number of possibilities for any of them:
|
||||
## another well-formed term, or the end of the item
|
||||
## consider:
|
||||
(comment
|
||||
(defp tuple-term (nonbinding (some separator)))
|
||||
(defp tuple-expr (order-1 (<- :lparen) (any separator) (any! tuple-term separator) (:rparen)))
|
||||
)
|
||||
## the tuple-term can be the anchor point
|
||||
## also, in this case, we panic until a *separator*, not a *terminator*
|
||||
## that suggests we need a panic-until parameter, rather than the hardcoded stop
|
||||
## is this right?
|
||||
|
||||
|
||||
(comment (defn order-1 [& rules]
|
||||
@{:name (keyword (string/join (map name rules) "*"))
|
||||
:rule (fn order-1 [parser]
|
||||
(def result (test (first rules) parser))
|
||||
(if result
|
||||
(apply-rule (order-0 ;rules)) ### compile error: unknown symbol order-0
|
||||
nil))}))
|
||||
|
||||
(defn capture-group
|
||||
"Takes a parser and an origin state: takes everything captured since the origin, gathers it up in a single array, pops it all off the capture stack, and pushes the gathered captures back on the stack."
|
||||
[parser origin]
|
||||
(def from (get origin :cap-len))
|
||||
(def captured (get parser :captured))
|
||||
(def grouped (array/slice captured from -1))
|
||||
(pop-to captured from)
|
||||
(array/push captured grouped)
|
||||
captured)
|
||||
|
||||
(defn &
|
||||
"Groups rules: pops all captured patterns and pushes them on the captured stack as a single array."
|
||||
[rule]
|
||||
@{:name (keyword (string "&" (name rule)))
|
||||
:rule (fn &* [parser]
|
||||
(def origin (stash parser))
|
||||
(def result (apply-rule rule parser))
|
||||
(if result
|
||||
(do
|
||||
(capture-group parser origin)
|
||||
true)
|
||||
(do
|
||||
(restore parser origin)
|
||||
nil)))})
|
||||
|
||||
(defn fn-name
|
||||
"Returns the name of a function."
|
||||
[f]
|
||||
(def rep (string f))
|
||||
(string/slice rep 10 -2))
|
||||
|
||||
(defn /
|
||||
"Substitution: takes a rule and a function, and applies the function to any captures arising from the rule; the captures are individual parameters to the function."
|
||||
[rule f]
|
||||
@{:name (keyword (string (name rule) "/" (fn-name f)))
|
||||
:rule (fn /* [parser]
|
||||
(def origin (stash parser))
|
||||
(def captured (get parser :captured))
|
||||
(def result (apply-rule (& rule) parser))
|
||||
(if result
|
||||
(do
|
||||
(def grouped (array/pop captured))
|
||||
(def applied (f ;grouped))
|
||||
(array/push captured applied)
|
||||
true)
|
||||
(do
|
||||
(restore parser origin)
|
||||
nil)))})
|
||||
|
||||
(defn any
|
||||
"Matches zero or more instances of a rule."
|
||||
[rule]
|
||||
@{:name (keyword (string "any-" (name rule)))
|
||||
:rule (fn any* [parser]
|
||||
(var result true)
|
||||
(while result
|
||||
(set result (apply-rule rule parser)))
|
||||
true)})
|
||||
|
||||
(defn some
|
||||
"Matches one or more instances of a rule."
|
||||
[rule]
|
||||
@{:name (keyword (string "some-" (name rule)))
|
||||
:rule (fn some* [parser]
|
||||
(def origin (stash parser))
|
||||
(def result (apply-rule rule parser))
|
||||
(if result
|
||||
(do
|
||||
(apply-rule (any rule) parser)
|
||||
true)
|
||||
(do
|
||||
(restore parser origin)
|
||||
nil)))})
|
||||
|
||||
(defn parse [tokens rule]
|
||||
(def parser (new-parser tokens))
|
||||
(try
|
||||
(do (apply-rule rule parser) parser)
|
||||
([err] err)))
|
||||
|
||||
(upscope #XXX for easier repl use
|
||||
|
||||
(defn literal->ast [token] {:type (get token :type) :data (get token :literal) :token token})
|
||||
|
||||
(defn word->ast [token] {:type (get token :type) :data (get token :lexeme) :token token})
|
||||
|
||||
(defn tuple->ast [origin & tokens] {:type :tuple :data tokens :token origin})
|
||||
|
||||
(defn if->ast [origin & tokens] {:type :if :data tokens :token origin})
|
||||
|
||||
(declare expression simple nonbinding)
|
||||
|
||||
(defp separator (some (+ :newline :comma :break)))
|
||||
(defp separators? (any (+ :newline :comma :break)))
|
||||
(defp terminator (some (+ :newline :semicolon :break)))
|
||||
(defp terminators? (any (+ :newline :semicolon :break)))
|
||||
(defp nls? (any :newline))
|
||||
|
||||
(defp atom (/ (<- (+ :nil :true :false :number :keyword :string)) literal->ast))
|
||||
(defp word (/ (<- :word) word->ast))
|
||||
|
||||
(defp tuple-term (* nonbinding separator))
|
||||
(defp tuple-literal (/ (* (<- :lparen) separators? (any tuple-term) :rparen) tuple->ast))
|
||||
|
||||
(defp if-expr (/ (* (<- :if) simple nls? :then nonbinding nls? :else nonbinding) if->ast))
|
||||
|
||||
# (defp root (<- (+ :word :keyword)))
|
||||
# (defp follow (<- (+ :keyword :arg-tuple)))
|
||||
# (defp synthetic (& (* root (some follow))))
|
||||
|
||||
(defp simple (+ atom word tuple-literal))
|
||||
(defp nonbinding (+ atom word tuple-literal))
|
||||
(defp expression (+ atom word tuple-literal))
|
||||
(defp line (* expression (some terminator)))
|
||||
# (defp toplevel)
|
||||
(defp script (some line))
|
||||
|
||||
(def source
|
||||
``
|
||||
if (:)foo then :bar else :baz
|
||||
|
||||
``
|
||||
)
|
||||
|
||||
(-> (scan source) (parse if-expr) pprint)
|
||||
)
|
||||
|
||||
(comment
|
||||
|
||||
Okay, so I'm trying to figure out how to manage panics in this declarative (not recursive-descent) parser.
|
||||
The question is how to do the thing where we pop out of the stack up to the most relevant rule.
|
||||
Given that we may have deeply nested rules, we either want:
|
||||
- A stack of rules in the parser, which
|
||||
)
|
1376
janet/parser.janet
1376
janet/parser.janet
File diff suppressed because it is too large
Load Diff
|
@ -1,104 +0,0 @@
|
|||
(defn ->kw [kw-str]
|
||||
(keyword (slice kw-str 1)))
|
||||
|
||||
(defn word [str] [:word str])
|
||||
|
||||
(defn num [num] [:number (scan-number num)])
|
||||
|
||||
(defn kw [_ kw] [:keyword (->kw kw)])
|
||||
|
||||
(defn str [str] [:string str])
|
||||
|
||||
(defn bool [bool] (if (= bool "true") [:boolean true] [:boolean false]))
|
||||
|
||||
(defn nill [_] [:nil nil])
|
||||
|
||||
(defn not-empty? [x] (not (empty? x)))
|
||||
|
||||
(defn ifl [args] [:if ;args])
|
||||
|
||||
(defn letl [args] [:let ;args])
|
||||
|
||||
(defn block [args] [:block args])
|
||||
|
||||
(def errors @[])
|
||||
|
||||
(defn ? [x default] (if (nil? x) default x))
|
||||
|
||||
(defn panic [& args]
|
||||
(def info (filter |(not= "" $) args))
|
||||
(def [msg line col source] info)
|
||||
(def error {:msg msg :line line :col col :source (? source "")})
|
||||
(array/push errors error)
|
||||
[:error error]
|
||||
)
|
||||
|
||||
(do
|
||||
(def ludus-grammar
|
||||
~{:nil (cmt (<- "nil") ,nill)
|
||||
:true (cmt (<- "true") ,bool)
|
||||
:false (cmt (<- "false") ,bool)
|
||||
:comment (* "&" (any (if-not (+ "\n" -1) 1)))
|
||||
:wordchars (+ :w (set "_-/*?!"))
|
||||
:reserved (+ "if" "then" "else" "let")
|
||||
:word (cmt (<- (if-not :reserved (* (range "az") (any :wordchars)))) ,word)
|
||||
:keyword (cmt (<- (* ":" :word)) ,kw)
|
||||
:hex (range "09" "af" "AF")
|
||||
:escape (* "\\" (+ (set `"'0?\abefnrtvz`)
|
||||
(* "x" :hex :hex)
|
||||
(* "u" [4 :hex])
|
||||
(* "U" [6 :hex])
|
||||
(error (constant "bad escape"))))
|
||||
:string (cmt (* "\"" (<- (any (+ :escape (if-not "\"" 1)))) "\"") ,str)
|
||||
:int (* (? "-") (range "19") (any (+ "_" (range "09"))))
|
||||
:float (* (? "-") (+ :int "0") "." (any (+ "_" (range "09"))))
|
||||
:zero "0"
|
||||
:number (cmt (<- (+ :float :int :zero)) ,num)
|
||||
:comment (* "&" (any (if-not (+ "\n" -1) 1)))
|
||||
:atom (+ :nil :true :false :keyword :number :word)
|
||||
:newline "\n"
|
||||
:terminator (* (any :ws) (some (+ :newline ";")))
|
||||
:separator (* (any :ws) (some (+ :newline ",")))
|
||||
:ws (any (+ :comment (set " \t")))
|
||||
:simple (+ :atom :string :word (* (constant "expected simple" :err-msg) :panic))
|
||||
:then (+ (* (+ (some :ws) (any :newline)) (any :ws) "then" (some :ws)) (* (constant "expected then" :err-msg) :panic))
|
||||
:else (+ (* (+ (some :ws) (any :newline)) (any :ws) "else" (some :ws)) (* (constant "expected else" :err-msg) :panic))
|
||||
:panic (cmt (* (<- (-> :err-msg)) (<- (line)) (<- (column)) (<- (* (any (if-not (+ "\n" -1) 1))))) ,panic)
|
||||
:non-binding (+ :atom :word :string :if :block
|
||||
(* (constant "expected non-binding" :err-msg) :panic))
|
||||
:tuple-term (* :number (some :separator))
|
||||
:tuple (* "(" (any :separator) (any :tuple-term) ")")
|
||||
:block (cmt (group (* "{" (some :line) (? :expression) "}")) ,block)
|
||||
:if (cmt (group (* "if" (some :ws) :simple :then :non-binding :else :non-binding)) ,ifl)
|
||||
:pattern (+ :atom :string (* (constant "expected pattern") :panic))
|
||||
:equals (* (any :ws) "=" (any :ws))
|
||||
:let (cmt (group (* "let" (some :ws) :pattern :equals :non-binding)) ,letl)
|
||||
:expression (+ :atom :string :block :if :let
|
||||
(* (constant "expected expression" :err-msg) :panic))
|
||||
:empty (* :ws :terminator)
|
||||
:line (* :ws :expression :ws :terminator)
|
||||
:main :tuple})
|
||||
|
||||
(def simple-tuple ~{
|
||||
:value (<- (range "az"))
|
||||
:separator (+ "," "\n")
|
||||
:ws (any (set " \t"))
|
||||
:term (* :ws :value :ws :separator)
|
||||
:tuple (group (* "(" (any :term) (? :value) :ws ")"))
|
||||
:main :tuple
|
||||
})
|
||||
|
||||
|
||||
# :empty/:line/:block are still giving me grief
|
||||
# also, somehow line & column numbers ar
|
||||
(def source
|
||||
``
|
||||
( a, b ,c,d )
|
||||
``
|
||||
)
|
||||
|
||||
|
||||
(peg/match simple-tuple source)
|
||||
)
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user