ludus/janet/validate.janet
2024-06-24 22:03:57 -04:00

786 lines
21 KiB
Plaintext

### A validator for a Ludus AST
(comment
Tracking here, before I start writing this code, the kinds of validation we're hoping to accomplish:
* [x] ensure called keywords are only called w/ one arg
* [x] first-level property access with pkg, e.g. `Foo :bar`--bar must be on Foo
- [x] accept pkg-kws
* [x] validate dict patterns
* [x] compile string-patterns
* [x] `loop` form arity checking
* [x] arity checking of explicit named function calls
* [x] flag tail calls
* [x] no re-bound names
* [x] no unbound names
* [x] no unbound names with `use` forms
* [x] recur in tail position in `loop` forms
* [x] recur not called outside of `loop` forms
* [x] splats come at the end of list, tuple, and dict patterns
Deferred until a later iteration of Ludus:
* [ ] no circular imports DEFERRED
* [ ] correct imports DEFERRED
* [ ] validate `with` forms
)
(def- package-registry @{})
# (try (os/cd "janet") ([_] nil))
(import ./scanner :as s)
(import ./parser :as p)
(defn- new-validator [parser]
(def ast (parser :ast))
@{:ast ast
:errors @[]
:ctx @{}
:status @{}}
)
(var validate nil)
(def terminals [:number :string :bool :nil :placeholder])
(def simple-colls [:list :tuple :set :args])
(defn- simple-coll [validator]
(def ast (validator :ast))
(def data (ast :data))
(each node data
(set (validator :ast) node)
(validate validator))
validator)
(defn- iff [validator]
(def ast (validator :ast))
(def data (ast :data))
(each node data
(set (validator :ast) node)
(validate validator))
validator)
(defn- script [validator]
(def ast (validator :ast))
(def data (ast :data))
(def status (validator :status))
(set (status :toplevel) true)
(each node data
(set (validator :ast) node)
(validate validator))
validator)
(defn- block [validator]
(def ast (validator :ast))
(def data (ast :data))
(when (= 0 (length data))
(array/push (validator :errors)
{:node ast :msg "blocks may not be empty"})
(break validator))
(def status (validator :status))
(set (status :toplevel) nil)
(def tail? (status :tail))
(set (status :tail) false)
(def parent (validator :ctx))
(def ctx @{:^parent parent})
(set (validator :ctx) ctx)
(for i 0 (-> data length dec)
(set (validator :ast) (data i))
(validate validator))
(set (status :tail) tail?)
(set (validator :ast) (last data))
(validate validator)
(set (validator :ctx) parent)
validator)
(defn- resolve-local [ctx name]
(get ctx name))
(defn- resolve-name [ctx name]
(when (nil? ctx) (break nil))
(def node (get ctx name))
(if node node (resolve-name (get ctx :^parent) name)))
(defn- word [validator]
(def ast (validator :ast))
(def name (ast :data))
(def ctx (validator :ctx))
(def resolved (resolve-name ctx name))
(when (not resolved)
(array/push (validator :errors)
{:node ast :msg "unbound name"}))
validator)
### patterns
(var pattern nil)
(defn- lett [validator]
(def ast (validator :ast))
(def [lhs rhs] (ast :data))
# evaluate the expression first
# otherwise lhs names will appear bound
(set (validator :ast) rhs)
(validate validator)
(set (validator :ast) lhs)
(pattern validator)
validator)
(defn- splattern [validator]
(def ast (validator :ast))
(def status (validator :status))
(when (not (status :last))
(array/push (validator :errors)
{:node ast :msg "splats may only come last in collection patterns"}))
(def data (ast :data))
(when data
(set (validator :ast) data)
(pattern validator))
validator)
(defn- simple-coll-pattern [validator]
(def ast (validator :ast))
(def data (ast :data))
(when (empty? data) (break validator))
(def status (validator :status))
(for i 0 (-> data length dec)
(set (validator :ast) (get data i))
(pattern validator))
(set (status :last) true)
(set (validator :ast) (last data))
(pattern validator)
(set (status :last) nil)
validator)
(defn- word-pattern [validator]
(def ast (validator :ast))
(def name (ast :data))
(def ctx (validator :ctx))
(when (has-key? ctx name)
(def {:line line :input input} (get-in ctx [name :token]))
(array/push (validator :errors)
{:node ast :msg (string "name is already bound on line "
line " of " input)}))
(set (ctx name) ast)
# (pp ctx)
validator)
(def types [
:nil
:bool
:number
:keyword
:string
:set
:tuple
:dict
:list
:fn
:box
:pkg
])
(defn typed [validator]
(def ast (validator :ast))
(def [kw-type word] (ast :data))
(def type (kw-type :data))
(when (not (has-value? types type))
(array/push (validator :errors)
{:node kw-type :msg "unknown type"}))
(set (validator :ast) word)
(pattern validator))
(defn- str-pattern [validator]
(def ast (validator :ast))
(def data (ast :data))
(def last-term (-> data array/pop string))
(def grammar @{})
(def bindings @[])
(var current 0)
(each node data
(when (not (buffer? node))
(set (validator :ast) node)
(pattern validator))
(if (buffer? node)
(set (grammar (keyword current)) (string node))
(do
(set (grammar (keyword current))
~(<- (to ,(keyword (inc current)))))
(array/push bindings (node :data))))
(set current (inc current)))
(set (grammar (keyword current)) ~(* ,last-term -1))
(def rules (map keyword (range (length grammar))))
(set (grammar :main) ~(* ,;rules))
(set (ast :grammar) grammar)
(set (ast :compiled) (peg/compile grammar))
(set (ast :bindings) bindings))
(defn- pair [validator]
(def ast (validator :ast))
(def [_ patt] (ast :data))
(set (validator :ast) patt)
(pattern validator))
(defn- pattern* [validator]
# (print "PATTERN*")
(def ast (validator :ast))
(def type (ast :type))
# (print "validating pattern " type)
(cond
(has-value? terminals type) validator
(case type
:word (word-pattern validator)
:placeholder validator
:ignored validator
:word (word-pattern validator)
:list (simple-coll-pattern validator)
:tuple (simple-coll-pattern validator)
:dict (simple-coll-pattern validator)
:splat (splattern validator)
:typed (typed validator)
:interpolated (str-pattern validator)
:pair (pair validator)
)))
(set pattern pattern*)
# XXX: ensure guard includes only allowable names
# XXX: what to include here? (cf Elixir)
(defn- guard [validator])
(defn- match-clauses [validator clauses]
# (print "validating clauses in match-clauses")
(each clause clauses
(def parent (validator :ctx))
(def ctx @{:^parent parent})
(set (validator :ctx) ctx)
(def [lhs guard rhs] clause)
(set (validator :ast) lhs)
(pattern validator)
# (pp (validator :ctx))
# (pp (validator :ctx))
(when guard
(set (validator :ast) guard)
(validate validator))
(set (validator :ast) rhs)
(validate validator)
(set (validator :ctx) parent)))
(defn- matchh [validator]
# (print "validating in matchh")
(def ast (validator :ast))
(def [to-match clauses] (ast :data))
# (print "validating expression:")
# (pp to-match)
(set (validator :ast) to-match)
(validate validator)
# (print "validating clauses")
(match-clauses validator clauses)
validator)
(defn- declare [validator fnn]
(def status (validator :status))
(def declared (get status :declared @{}))
(set (declared fnn) true)
(set (status :declared) declared)
# (print "declared function " (fnn :name))
# (pp declared)
validator)
(defn- define [validator fnn]
(def status (validator :status))
(def declared (get status :declared @{}))
(set (declared fnn) nil)
(set (status :declared) declared)
# (print "defined function " (fnn :name))
# (pp declared)
validator)
(defn- fnn [validator]
(def ast (validator :ast))
(def name (ast :name))
# (print "function name: " name)
(def status (validator :status))
(def tail? (status :tail))
(set (status :tail) true)
(when name
(def ctx (validator :ctx))
(def resolved (ctx name))
(when (and resolved (not= :nothing (resolved :data)))
(def {:line line :input input} (get-in ctx [name :token]))
(array/push (validator :errors)
{:node ast :msg (string "name is already bound on line " line " of " input)}))
(when (and resolved (= :nothing (resolved :data)))
(define validator resolved))
(set (ctx name) ast))
(def data (ast :data))
(when (= data :nothing)
(break (declare validator ast)))
(match-clauses validator data)
(set (status :tail) tail?)
(def rest-arities @{})
(def arities @{:rest rest-arities})
(each clause data
# (print "CLAUSE:")
# (pp clause)
(def patt (first clause))
(def params (patt :data))
(def arity (length params))
# (print "checking clause with arity " arity)
(def rest-param? (and (> arity 0) (= :splat ((last params) :type))))
(if rest-param?
(set (rest-arities arity) true)
(set (arities arity) true)))
# (pp arities)
(set (ast :arities) arities)
validator)
(defn- ref [validator]
(def ast (validator :ast))
(def ctx (validator :ctx))
(def expr (ast :data))
(set (validator :ast) expr)
(validate validator)
(def name (ast :name))
(def resolved (ctx name))
(when resolved
(def {:line line :input input} (get-in ctx [name :token]))
(array/push (validator :errors)
{:node ast :msg (string "name is already bound on line " line " of " input)}))
(set (ctx name) ast)
validator)
(defn- interpolated [validator]
(def ast (validator :ast))
(def data (ast :data))
(each node data
(when (not (buffer? node))
(set (validator :ast) node)
(validate validator))))
### TODO:
# * [ ] ensure properties are on pkgs (if *only* pkgs from root)
(defn- pkg-root [validator]
# (print "validating pkg-root access")
(def ast (validator :ast))
(def ctx (validator :ctx))
(def terms (ast :data))
(def pkg-name ((first terms) :data))
(def the-pkg (resolve-name ctx pkg-name))
(when (not the-pkg)
(array/push (validator :errors)
{:node ast :msg "unbound pkg name"})
(break validator))
(def member (get terms 1))
(def accessed (case (member :type)
:keyword (get-in the-pkg [:pkg (member :data)])
:pkg-kw (get-in the-pkg [:pkg (member :data)])
:args (do
(array/push (validator :errors)
{:node member :msg "cannot call a pkg"}
(break validator)))))
(when (not accessed)
# (print "no member " (member :data) " on " pkg-name)
(array/push (validator :errors)
{:node member :msg "invalid pkg access"})
(break validator))
# TODO: validate nested pkg access
)
# (defn- tail-call [validator]
# (def ast (validator :ast))
# (when (ast :partial) (break validator))
# (def status (validator :status))
# (when (not (status :tail)) (break validator))
# (def data (ast :data))
# (def args (last data))
# (set (args :tail-call) true))
(defn- check-arity [validator]
# (print "CHECKING ARITY")
(def ast (validator :ast))
# (when (ast :partial) (break validator))
(def ctx (validator :ctx))
(def data (ast :data))
(def fn-word (first data))
# (pp fn-word)
(def the-fn (resolve-name ctx (fn-word :data)))
# (print "the called function: " the-fn)
# (pp the-fn)
(when (not the-fn) (break validator))
# (print "the function is not nil")
# (print "the function type is " (type the-fn))
(when (= :function (type the-fn)) (break validator))
(when (= :cfunction (type the-fn)) (break validator))
# (print "the function is not a janet fn")
# (print "fn type: " (the-fn :type))
(when (not= :fn (the-fn :type)) (break validator))
# (print "fn name: " (the-fn :name))
(def arities (the-fn :arities))
# when there aren't arities yet, break, since that means we're making a recursive function call
# TODO: enahnce this so that we can determine arities *before* all function bodies; this ensures arity-checking for self-recursive calls
(when (not arities) (break validator))
# (print "arities: ")
# (pp arities)
(def args (get data 1))
(def num-args (length (args :data)))
# (print "called with #args " num-args)
# (pp (get (validator :ctx) "bar"))
(when (has-key? arities num-args) (break validator))
# (print "arities: ")
# (pp arities)
(when (not arities) (break validator))
(def rest-arities (keys (arities :rest)))
(when (empty? rest-arities)
(array/push (validator :errors)
{:node ast :msg "mismatched arity"})
(break validator))
(def rest-min (min ;rest-arities))
(when (< num-args rest-min)
(array/push (validator :errors)
{:node ast :msg "mismatched arity"}))
validator)
(defn- kw-root [validator]
(def ast (validator :ast))
(def data (ast :data))
(def [_ args] data)
(when (not= :args (args :type))
(break (array/push (validator :errors)
{:node args :msg "called keyword expects an argument"})))
(when (not= 1 (length (args :data)))
(array/push (validator :errors)
{:node args :msg "called keywords take one argument"})))
(defn- synthetic [validator]
(def ast (validator :ast))
(def data (ast :data))
(def status (validator :status))
(def ftype ((first data) :type))
(def stype ((get data 1) :type))
(def ltype ((last data) :type))
(set (status :pkg-access?) nil)
(when (= ftype :pkg-name)
(set (status :pkg-access?) true))
(each node data
(set (validator :ast) node)
(validate validator))
(set (validator :ast) ast)
# (print "ftype " ftype)
# (print "stype " stype)
# (print "ltype " ltype)
(when (= ftype :pkg-name) (pkg-root validator))
(when (= ftype :keyword) (kw-root validator))
# (when (= ltype :args) (tail-call validator))
(when (and (= ftype :word) (= stype :args))
(check-arity validator))
validator)
(defn- pair [validator]
(def ast (validator :ast))
(def [k v] (ast :data))
(set (validator :ast) k)
(validate validator)
(set (validator :ast) v)
(validate validator))
(defn- splat [validator]
(def ast (validator :ast))
(when (get-in validator [:status :pkg])
(array/push (validator :errors)
{:node ast :msg "splats are not allowed in pkgs"})
(break validator))
(def data (ast :data))
(when data
(set (validator :ast) data)
(validate validator))
validator)
(defn- dict [validator]
(def ast (validator :ast))
(def data (ast :data))
(each node data
(set (validator :ast) node)
(validate validator))
validator)
(defn- whenn [validator]
(def ast (validator :ast))
(def data (ast :data))
(each node data
(def [lhs rhs] node)
(set (validator :ast) lhs)
(validate validator)
(set (validator :ast) rhs)
(validate validator))
validator)
# XXX: do this!
(defn- withh [validator])
# XXX: tail calls in last position
(defn- doo [validator]
(def ast (validator :ast))
(def data (ast :data))
(each node data
(set (validator :ast) node)
(validate validator))
validator)
(defn- usee [validator]
(def ast (validator :ast))
(def data (ast :data))
(set (validator :ast) data)
(validate validator)
(def name (data :data))
(def ctx (validator :ctx))
(def pkg (get-in ctx [name :pkg] @{}))
(loop [[k v] :pairs pkg]
(set (ctx (string k)) v))
validator)
(defn- pkg-entry [validator pkg]
(def ast (validator :ast))
(def status (validator :status))
(when (= :pkg-pair (ast :type))
(set (status :pkg-access?) true))
(def data (ast :data))
(def [key value] (ast :data))
# (print "PKG ENTRY***")
# (pp key)
# (pp value)
(set (validator :ast) key)
(validate validator)
(set (validator :ast) value)
(validate validator)
(def entry (if (= :pkg-name (value :type))
(resolve-name (validator :ctx) (string (value :data)))
value))
# (print "entry at " (key :data))
# (pp entry)
(set (status :pkg-access?) nil)
(def kw (key :data))
# (pp kw)
(set (pkg kw) entry)
# (pp pkg)
validator)
(defn- pkg [validator]
(def ast (validator :ast))
(def data (ast :data))
(def name (ast :name))
(def pkg @{})
(each node data
(set (validator :ast) node)
(pkg-entry validator pkg))
(set (ast :pkg) pkg)
# (print "THE PACKAGE")
# (pp pkg)
(def ctx (validator :ctx))
(set (ctx name) ast)
validator)
(defn- ns [validator]
(def ast (validator :ast))
(def data (ast :data))
(def name (ast :name))
(def parent (validator :ctx))
(def ctx @{:^parent parent})
(def block (data :data))
(each node block
(set (validator :ast) node)
(validate validator))
(set (ast :pkg) ctx)
(set (parent name) ast)
validator)
(defn- loopp [validator]
(def ast (validator :ast))
(def status (validator :status))
(def data (ast :data))
(def input (first data))
# (print "LOOP INPUT")
# (pp input)
(def clauses (get data 1))
(def input-arity (length (input :data)))
(set (ast :arity) input-arity)
# (print "input arity to loop " input-arity)
(set (validator :ast) input)
(validate validator)
# harmonize arities
(def rest-arities @{})
(each clause clauses
# (print "CLAUSE:")
# (pp clause)
(def patt (first clause))
(def params (patt :data))
(def clause-arity (length params))
# (print "checking clause with arity " clause-arity)
(def rest-param? (= :splat (get (last params) :type)))
(when (and
(not rest-param?) (not= clause-arity input-arity))
(array/push (validator :errors)
{:node patt :msg "arity mismatch"}))
(when rest-param?
(set (rest-arities clause-arity) patt)))
# (pp rest-arities)
(loop [[arity patt] :pairs rest-arities]
(when (< input-arity arity)
(array/push (validator :errors)
{:node patt :msg "arity mismatch"})))
(def loop? (status :loop))
(set (status :loop) input-arity)
(def tail? (status :tail))
(set (status :tail) true)
(match-clauses validator clauses)
(set (status :loop) loop?)
(set (status :tail) tail?)
validator)
(defn- recur [validator]
(def ast (validator :ast))
(def status (validator :status))
(def loop-arity (status :loop))
(when (not loop-arity)
(array/push (validator :errors)
{:node ast :msg "recur may only be used inside a loop"})
(break validator))
(def called-with (get-in ast [:data :data]))
(def recur-arity (length called-with))
# (print "loop arity " loop-arity)
# (print "recur arity" recur-arity)
(when (not= recur-arity loop-arity)
(array/push (validator :errors)
{:node ast :msg "recur must have the same number of args as its loop"}))
(when (not (status :tail))
(array/push (validator :errors)
{:node ast :msg "recur must be in tail position"}))
(set (validator :ast) (ast :data))
(validate validator))
(defn- repeatt [validator]
(def ast (validator :ast))
(def [times body] (ast :data))
(set (validator :ast) times)
(validate validator)
(set (validator :ast) body)
(validate validator))
(defn- panic [validator]
(def ast (validator :ast))
(def data (ast :data))
(set (validator :ast) data)
(validate validator))
(defn- testt [validator]
(def ast (validator :ast))
(def [_ body] (ast :data))
(set (validator :ast) body)
(validate validator))
(defn- pkg-name [validator]
(def ast (validator :ast))
(def name (ast :data))
(def ctx (validator :ctx))
(def pkg (resolve-name ctx name))
(when (not pkg)
(array/push (validator :errors)
{:node ast :msg "unbound name"}))
validator)
(defn- pkg-kw [validator]
# (print "validating pkg-kw")
(def ast (validator :ast))
(def pkg-access? (get-in validator [:status :pkg-access?]))
# (print "pkg-access? " pkg-access?)
(when (not pkg-access?)
(array/push (validator :errors)
{:node ast :msg "cannot use pkg-kw here"}))
validator)
(defn- pkg-pair [validator]
# (print "validating pkg-pair")
(def ast (validator :ast))
(def status (validator :status))
(def [_ pkg] (ast :data))
(set (status :pkg-access?) true)
(set (validator :ast) pkg)
(validate validator)
(set (status :pkg-access?) nil)
validator)
(defn- kw [validator]
(def status (validator :status))
(set (status :pkg-access?) nil)
validator)
(defn- validate* [validator]
(def ast (validator :ast))
(def type (ast :type))
# (print "validating node " type)
(cond
(has-value? terminals type) validator
(has-value? simple-colls type) (simple-coll validator)
(case type
:keyword (kw validator)
:if (iff validator)
:let (lett validator)
:script (script validator)
:block (block validator)
:word (word validator)
:fn (fnn validator)
:match (matchh validator)
:interpolated (interpolated validator)
:synthetic (synthetic validator)
:do (doo validator)
:dict (dict validator)
:test (testt validator)
:panic (panic validator)
:repeat (repeatt validator)
:when (whenn validator)
:splat (splat validator)
:pair (pair validator)
:pkg-pair (pkg-pair validator)
:ns (ns validator)
:pkg (pkg validator)
:pkg-name (pkg-name validator)
:pkg-kw (pkg-kw validator)
:use (usee validator)
:loop (loopp validator)
:recur (recur validator)
:ref (ref validator)
(error (string "unknown node type " type)))))
(set validate validate*)
(defn- cleanup [validator]
(def declared (get-in validator [:status :declared] {}))
(when (any? declared)
(each declaration declared
(array/push (validator :errors) {:node declaration :msg "declared fn, but not defined"})))
validator)
(defn valid [ast &opt ctx]
(default ctx @{})
(def validator (new-validator ast))
(def base-ctx @{:^parent ctx})
(set (validator :ctx) base-ctx)
(validate validator)
(cleanup validator))
(import ./base :as b)
# (do
(comment
(def source `
dec (12)
`)
(def scanned (s/scan source))
(def parsed (p/parse scanned))
(def validated (valid parsed b/ctx))
# (get-in validated [:status :declared])
# (validated :ctx)
)