Compare commits

...

3 Commits

Author SHA1 Message Date
Scott Richmond
3e9f38ef5c update notes 2024-05-14 18:56:18 -04:00
Scott Richmond
b6e1d0e6ec clean up files 2024-05-14 18:56:06 -04:00
Scott Richmond
bc17fe5006 successfully flag tail calls 2024-05-14 18:52:11 -04:00
6 changed files with 1134 additions and 2081 deletions

View File

@ -1,49 +0,0 @@
(defmacro clj-loop
``A drop-in replacement for Clojure's loop form. Useful for the current project of converting Clojure code to Janet.
``
[bindings & body]
(assert (even? (length bindings)) "Binding tuple must have an even number of terms")
(def names @[])
(def args @[])
(loop [i :range [0 (length bindings)]]
(if (even? i)
(array/push names (get bindings i))
(array/push args (get bindings i))))
~(do (defn recur [,;names] ,;body) (recur ,;args)))
(defmacro defn+
[name & clauses]
~(defn ,name [& args]
(print "before do")
(do
(def arities @{})
(def clauses ,clauses)
(def bindingses (map first clauses))
(def bodies (map |(slice $ 1) clauses))
(print "before loop")
(loop [i :range [0 (length clauses)]]
(def bindings (get bindingses i))
(def arity (length bindings))
(assert (not (get arities i)) "Clauses must have different arities")
(def body (get bodies i))
(def clause ~(fn ,name ,bindings ,;body))
(put $arities arity clause))
(print "before quasiquote")
(fn [& args]
(def arity (length args))
(def clause (get arities arity))
(assert clause "No clause with that arity")
(clause ;args)))))
(defn+ add
([] 0)
([x] x)
([x y] (+ x y)))
(macex1
'(defn+ add
([] 0)
([x] x)
([x y] (+ x y))))

View File

@ -1,393 +0,0 @@
(comment
So the thing here is that this is a much, much simpler version of the rather unweildy Clojure parser framework I wrote. This leans pretty hard on the Janet PEG framework, but is also bespoke for my purposes here.
This lets me parse not raw text, but scanned tokens, which makes parsing a much easier lift. Also, it lets me do things like insert :break tokens, which really helps manage collection types and blocks.
At current, I do need to shift to returning a data structure rather than `nil`s and `true`s. That way, I can advance the parser by a certain amount, however much a passing test-rule would consume.
Do I really need to optimize this? I don't think so: we can do that later. I could store various information on stacks.
Alternately, returning data structures may well complicate things: KISS!
The idea, however, is that Jesus: we must not have test-rule and apply-rule: combinators without a fixed advance amount can't work that way. So we have only `apply-rule`, which `advance`s the parser by however much should be consumed. BUT! We can always, and ought wherever we might, stash the origin index and roll back any changes (i.e. in `!` and `*`)
)
(defn pprint [x] (printf "%M" x))
(os/cd "janet") ### XXX: to remove, repl only
(import ./scanner :prefix "")
(defn new-parser
``
Creates a new parser from a list of tokens. A parser has three fields in addition to `:tokens`:
:i the index, in the list of tokens, of the current token
:errors an array of errors, treated like a stack
:captured an array of captures (tokens or arrays of tokens), treated like a stack.
``
[{:tokens tokens}]
@{:tokens tokens :i 0 :errors @[] :captured @[]})
(defn pop-to
"Pops items from a stack until it contains `len` items. Returns the stack."
[stack len]
(assert (>= len 0))
(while (< len (length stack)) (array/pop stack))
stack)
(defn popn
"Pops n items from a stack. Returns the stack."
[stack n]
(loop [_ :range [0 n]] (array/pop stack))
stack)
(defn current "Returns the current token." [{:tokens tokens :i i}] (get tokens i))
(defn previous "Returns the token before the current token." [{:tokens tokens :i i}] (get tokens (dec i)))
(defn next "Returns the token after the current token." [{:tokens tokens :i i}] (get tokens (inc i)))
(def add +) ### I `def` `+` later; stash this
(defn advance
"Advances the parser by `count`, or 1 if count is not supplied."
[parser] (update parser :i inc))
(defn stash
"Stashes a parser state: captures the current index, error, and capture stacks."
[{:i i :errors errs :captured cap}]
{:i i :err-len (length errs) :cap-len (length cap)})
(defn restore
"Restores the parser to a stashed state: resets :i, pops any new errors and captures off their stacks."
[parser state]
(def {:i i :err-len err-len :cap-len cap-len} state)
(def {:errors errors :captured captured} parser)
(put parser :i i)
(pop-to errors err-len)
(pop-to captured cap-len)
parser)
(defn at "Returns the token at index i." [parser i] (get (get parser :tokens) i))
(defn capture "Captures the last token." [parser] (update parser :captured array/push (previous parser)))
(defn apply-keyword
"Applies a keyword-based rule. Consumes the passing token."
[kw parser]
(def curr (current parser))
(def type (get curr :type))
(if (= kw type)
(do
(advance parser)
true)
nil))
(defn apply-table
"Applies a table-based rule."
[rule parser]
(def curr (current parser))
(def rule-fn (get rule :rule))
(rule-fn parser))
(defn apply-rule
"Applies a rule: if the rule passes, consume all passing tokens and return true. If the rule fails, do nothing, returning `nil`."
[rule parser]
(case (type rule)
:keyword (apply-keyword rule parser)
:table (apply-table rule parser)
(error (string "unknown parser type: " (type rule)))))
(defn name
"Returns the name of a rule. Use this instead of (get rule :name), since bare keywords are also valid rules."
[rule]
(case (type rule)
:keyword rule
:table (rule :name)))
(defn rename
"Renames a rule. By convention, rule names are keywords."
[rule name] (put rule :name name))
### As noted above: we should not use this
### Instead, combinators which might not advance should stash state
### These are: !, *, ...?
(defn test
"Tests a rule: returns whether a rule passes but does not consume any tokens."
[rule parser]
(def origin (stash parser))
(def result (apply-rule rule parser))
(restore parser origin)
result)
(defn !
"Not: negates a rule. If the rule passes, does nothing, returns `nil`. If the rule fails, advances the parser a single token, returns true. Works well only for single token rules."
[rule]
@{:name (keyword (string "!" (name rule)))
:rule (fn !* [parser]
(def origin (stash parser))
(def result (apply-rule rule parser))
(restore parser origin)
(if-not result
(do (advance parser) true)
nil))})
(defn <-
"Capture: if the rule matches, consumes the token and pushes it to the capture stack. If the rule does not match, does nothing."
[rule]
@{:name (keyword (string "<-" (name rule)))
:rule (fn <-* [parser]
(def origin (stash parser))
(def result (apply-rule rule parser))
(if result (capture parser))
result)})
(defn +
"Choose a rule: matches if any one of the rules is matched. Order matters: rules are evaluated in order. It's useful to put any errors last."
[& rules]
@{:name (keyword (string/join (map name rules) "+"))
:rule (fn +* [parser]
(var passing nil)
(def origin (stash parser))
(loop [rule :in rules :while (not passing)]
(restore parser origin)
(set passing (apply-rule rule parser)))
(if-not passing (restore parser origin))
passing)})
(defn unreachable [&] (error "reached the unreachable"))
(defmacro declare [& names]
(def bindings @[])
(loop [name :in names]
(def binding ~(var ,name @{:name ,(keyword name) :rule unreachable}))
(array/push bindings binding))
~(upscope ,;bindings))
(defmacro defp [name rule]
(if (dyn name)
~(set ,name (put ,name :rule (,rule :rule)))
~(var ,name (rename ,rule ,(keyword name)))))
(comment (defn panic
[rule &opt msg]
@{:name (keyword (string "panic-" (name rule)))
:rule (fn panic* [parser]
(print "panicking!")
(def origin (current parser))
(var passing false)
(def skipped @[])
(while (not passing)
(array/push skipped (current parser))
(advance parser)
(set passing (apply-rule rule parser)))
(print "looped")
(def the-error @{:type :error :token origin :msg msg :skipped skipped})
(array/push (parser :errors) the-error)
(array/push (parser :captured) the-error)
true
)}))
# If we wanted to be robust, we wouldn't hard-code this
# To consider: use a stack of "panic-until" on a parser to dynamically decide how far to panic
(defp stop (+ :newline :semicolon :break))
(defn panic
"Panics the parser, consuming all tokens until the rule matches (including the match). It also adds an error node to both the capture and the error stacks."
[parser expected]
(print "panic! in the parser")
(def origin (current parser))
(var passing false)
(def skipped @[])
(while (not passing)
(array/push skipped (current parser))
(advance parser)
(set passing (apply-rule stop parser))
(print "phew; I'm done panicking")
(pprint (current parser))
(def the-error {:type :error :expected expected :token origin :skipped skipped})
(array/push (parser :errors) the-error)
(array/push (parser :captured) the-error)
(error parser)))
(defn *
"Sequences rules: matches if all rules are matched, in sequence."
[& rules]
@{:name (keyword (string/join (map name rules) "*"))
:rule (fn ** [parser]
(print "applying sequence")
(var passing true)
(def origin (stash parser))
(pprint origin)
(var failing nil)
(var passing true)
(loop [rule :in rules :while passing]
(def pass? (apply-rule rule parser))
(when (not pass?)
(set failing rule)
(set passing false)))
# this isn't the right thing to do: don't back out, panic!
# (when (not passing) (restore parser origin))
(if passing
passing
(panic parser (string "expected " (name failing)))))})
## next problem: where do we back out to, and how?
## the answer: an "anchor" point, where we `(try) something, and if it panics, start up with another line; in a block it will be line; in a script it will be toplevel`
## maybe terms in a tuple, list, dict
## it seems noteworthy that the things we want to return to are all either "any" or "some", and all either end with a newline or a break
## they're not all either any or some (empty data structures, but lines in blocks and scripts)
## also: there's a finite number of possibilities for any of them:
## another well-formed term, or the end of the item
## consider:
(comment
(defp tuple-term (nonbinding (some separator)))
(defp tuple-expr (order-1 (<- :lparen) (any separator) (any! tuple-term separator) (:rparen)))
)
## the tuple-term can be the anchor point
## also, in this case, we panic until a *separator*, not a *terminator*
## that suggests we need a panic-until parameter, rather than the hardcoded stop
## is this right?
(comment (defn order-1 [& rules]
@{:name (keyword (string/join (map name rules) "*"))
:rule (fn order-1 [parser]
(def result (test (first rules) parser))
(if result
(apply-rule (order-0 ;rules)) ### compile error: unknown symbol order-0
nil))}))
(defn capture-group
"Takes a parser and an origin state: takes everything captured since the origin, gathers it up in a single array, pops it all off the capture stack, and pushes the gathered captures back on the stack."
[parser origin]
(def from (get origin :cap-len))
(def captured (get parser :captured))
(def grouped (array/slice captured from -1))
(pop-to captured from)
(array/push captured grouped)
captured)
(defn &
"Groups rules: pops all captured patterns and pushes them on the captured stack as a single array."
[rule]
@{:name (keyword (string "&" (name rule)))
:rule (fn &* [parser]
(def origin (stash parser))
(def result (apply-rule rule parser))
(if result
(do
(capture-group parser origin)
true)
(do
(restore parser origin)
nil)))})
(defn fn-name
"Returns the name of a function."
[f]
(def rep (string f))
(string/slice rep 10 -2))
(defn /
"Substitution: takes a rule and a function, and applies the function to any captures arising from the rule; the captures are individual parameters to the function."
[rule f]
@{:name (keyword (string (name rule) "/" (fn-name f)))
:rule (fn /* [parser]
(def origin (stash parser))
(def captured (get parser :captured))
(def result (apply-rule (& rule) parser))
(if result
(do
(def grouped (array/pop captured))
(def applied (f ;grouped))
(array/push captured applied)
true)
(do
(restore parser origin)
nil)))})
(defn any
"Matches zero or more instances of a rule."
[rule]
@{:name (keyword (string "any-" (name rule)))
:rule (fn any* [parser]
(var result true)
(while result
(set result (apply-rule rule parser)))
true)})
(defn some
"Matches one or more instances of a rule."
[rule]
@{:name (keyword (string "some-" (name rule)))
:rule (fn some* [parser]
(def origin (stash parser))
(def result (apply-rule rule parser))
(if result
(do
(apply-rule (any rule) parser)
true)
(do
(restore parser origin)
nil)))})
(defn parse [tokens rule]
(def parser (new-parser tokens))
(try
(do (apply-rule rule parser) parser)
([err] err)))
(upscope #XXX for easier repl use
(defn literal->ast [token] {:type (get token :type) :data (get token :literal) :token token})
(defn word->ast [token] {:type (get token :type) :data (get token :lexeme) :token token})
(defn tuple->ast [origin & tokens] {:type :tuple :data tokens :token origin})
(defn if->ast [origin & tokens] {:type :if :data tokens :token origin})
(declare expression simple nonbinding)
(defp separator (some (+ :newline :comma :break)))
(defp separators? (any (+ :newline :comma :break)))
(defp terminator (some (+ :newline :semicolon :break)))
(defp terminators? (any (+ :newline :semicolon :break)))
(defp nls? (any :newline))
(defp atom (/ (<- (+ :nil :true :false :number :keyword :string)) literal->ast))
(defp word (/ (<- :word) word->ast))
(defp tuple-term (* nonbinding separator))
(defp tuple-literal (/ (* (<- :lparen) separators? (any tuple-term) :rparen) tuple->ast))
(defp if-expr (/ (* (<- :if) simple nls? :then nonbinding nls? :else nonbinding) if->ast))
# (defp root (<- (+ :word :keyword)))
# (defp follow (<- (+ :keyword :arg-tuple)))
# (defp synthetic (& (* root (some follow))))
(defp simple (+ atom word tuple-literal))
(defp nonbinding (+ atom word tuple-literal))
(defp expression (+ atom word tuple-literal))
(defp line (* expression (some terminator)))
# (defp toplevel)
(defp script (some line))
(def source
``
if (:)foo then :bar else :baz
``
)
(-> (scan source) (parse if-expr) pprint)
)
(comment
Okay, so I'm trying to figure out how to manage panics in this declarative (not recursive-descent) parser.
The question is how to do the thing where we pop out of the stack up to the most relevant rule.
Given that we may have deeply nested rules, we either want:
- A stack of rules in the parser, which
)

File diff suppressed because it is too large Load Diff

View File

@ -1,104 +0,0 @@
(defn ->kw [kw-str]
(keyword (slice kw-str 1)))
(defn word [str] [:word str])
(defn num [num] [:number (scan-number num)])
(defn kw [_ kw] [:keyword (->kw kw)])
(defn str [str] [:string str])
(defn bool [bool] (if (= bool "true") [:boolean true] [:boolean false]))
(defn nill [_] [:nil nil])
(defn not-empty? [x] (not (empty? x)))
(defn ifl [args] [:if ;args])
(defn letl [args] [:let ;args])
(defn block [args] [:block args])
(def errors @[])
(defn ? [x default] (if (nil? x) default x))
(defn panic [& args]
(def info (filter |(not= "" $) args))
(def [msg line col source] info)
(def error {:msg msg :line line :col col :source (? source "")})
(array/push errors error)
[:error error]
)
(do
(def ludus-grammar
~{:nil (cmt (<- "nil") ,nill)
:true (cmt (<- "true") ,bool)
:false (cmt (<- "false") ,bool)
:comment (* "&" (any (if-not (+ "\n" -1) 1)))
:wordchars (+ :w (set "_-/*?!"))
:reserved (+ "if" "then" "else" "let")
:word (cmt (<- (if-not :reserved (* (range "az") (any :wordchars)))) ,word)
:keyword (cmt (<- (* ":" :word)) ,kw)
:hex (range "09" "af" "AF")
:escape (* "\\" (+ (set `"'0?\abefnrtvz`)
(* "x" :hex :hex)
(* "u" [4 :hex])
(* "U" [6 :hex])
(error (constant "bad escape"))))
:string (cmt (* "\"" (<- (any (+ :escape (if-not "\"" 1)))) "\"") ,str)
:int (* (? "-") (range "19") (any (+ "_" (range "09"))))
:float (* (? "-") (+ :int "0") "." (any (+ "_" (range "09"))))
:zero "0"
:number (cmt (<- (+ :float :int :zero)) ,num)
:comment (* "&" (any (if-not (+ "\n" -1) 1)))
:atom (+ :nil :true :false :keyword :number :word)
:newline "\n"
:terminator (* (any :ws) (some (+ :newline ";")))
:separator (* (any :ws) (some (+ :newline ",")))
:ws (any (+ :comment (set " \t")))
:simple (+ :atom :string :word (* (constant "expected simple" :err-msg) :panic))
:then (+ (* (+ (some :ws) (any :newline)) (any :ws) "then" (some :ws)) (* (constant "expected then" :err-msg) :panic))
:else (+ (* (+ (some :ws) (any :newline)) (any :ws) "else" (some :ws)) (* (constant "expected else" :err-msg) :panic))
:panic (cmt (* (<- (-> :err-msg)) (<- (line)) (<- (column)) (<- (* (any (if-not (+ "\n" -1) 1))))) ,panic)
:non-binding (+ :atom :word :string :if :block
(* (constant "expected non-binding" :err-msg) :panic))
:tuple-term (* :number (some :separator))
:tuple (* "(" (any :separator) (any :tuple-term) ")")
:block (cmt (group (* "{" (some :line) (? :expression) "}")) ,block)
:if (cmt (group (* "if" (some :ws) :simple :then :non-binding :else :non-binding)) ,ifl)
:pattern (+ :atom :string (* (constant "expected pattern") :panic))
:equals (* (any :ws) "=" (any :ws))
:let (cmt (group (* "let" (some :ws) :pattern :equals :non-binding)) ,letl)
:expression (+ :atom :string :block :if :let
(* (constant "expected expression" :err-msg) :panic))
:empty (* :ws :terminator)
:line (* :ws :expression :ws :terminator)
:main :tuple})
(def simple-tuple ~{
:value (<- (range "az"))
:separator (+ "," "\n")
:ws (any (set " \t"))
:term (* :ws :value :ws :separator)
:tuple (group (* "(" (any :term) (? :value) :ws ")"))
:main :tuple
})
# :empty/:line/:block are still giving me grief
# also, somehow line & column numbers ar
(def source
``
( a, b ,c,d )
``
)
(peg/match simple-tuple source)
)

File diff suppressed because it is too large Load Diff

View File

@ -4,21 +4,21 @@
Tracking here, before I start writing this code, the kinds of validation we're hoping to accomplish:
* [x] splats come at the end of list, tuple, and dict patterns
* [x] no unbound names
* [x] no re-bound names
* [x] no unbound names with `use` forms
* [ ] first-level property access with pkg, e.g. `Foo :bar`--bar must be on Foo
* [x] recur in tail position in `loop` forms
* [x] recur not called outside of `loop` forms
- [ ] accept pkg-kws
* [x] `loop` form arity checking
* [x] arity checking of explicit named function calls
* [ ] flag tail calls
* [x] flag tail calls
* [x] no re-bound names
* [x] no unbound names
* [x] no unbound names with `use` forms
* [x] recur in tail position in `loop` forms
* [x] recur not called outside of `loop` forms
* [x] splats come at the end of list, tuple, and dict patterns
Imports are for a later iteration of Ludus:
* [ ] no circular imports DEFERRED
* [ ] correct imports DEFERRED
)
@ -290,7 +290,14 @@ Imports are for a later iteration of Ludus:
(defn- pkg-root [validator])
# * [ ] flag tail calls (where last term is not-partial args)
(defn- tail-call [validator])
(defn- tail-call [validator]
(def ast (validator :ast))
(when (ast :partial) (break validator))
(def status (validator :status))
(when (not (status :tail)) (break validator))
(def data (ast :data))
(def args (last data))
(set (args :tail-call) true))
# * [ ] arity checking if first term is name that resolves to a function and args aren't partial
# XXX: now just check number of args against arity map
@ -579,16 +586,16 @@ Imports are for a later iteration of Ludus:
(do
#(comment
(def source `
fn foo {
() -> :bar
(x) -> :foo
(x, y, ...z) -> :baz
fn bar () -> :bar
fn foo () -> match :foo with {
a -> bar ()
b -> :baz
}
foo (4)
`)
(def scanned (s/scan source))
(def parsed (p/parse scanned))
(def validator (new-validator parsed))
(pp validator)
((validate validator) :errors)
(validate validator)
(pp parsed)
)