Get parser combinator strategy working

This commit is contained in:
Scott Richmond 2023-05-16 16:06:18 -04:00
parent 919ab5ca34
commit cbd78ce7f7

View File

@ -2,16 +2,18 @@
(:require (:require
[ludus.scanner :as scan])) [ludus.scanner :as scan]))
(def msgs {
})
(defn ? [val default] (if (nil? val) default val)) (defn ? [val default] (if (nil? val) default val))
(defn ok? [{status :status}] (defn ok? [{status :status}]
(= status :ok)) (= status :ok))
(defn pass? [{status :status}] (or (= status :ok) (= status :quiet))) (def failing #{:err :none})
(def passing #{:ok :group :silent})
(defn pass? [{status :status}] (contains? passing status))
(defn fail? [{status :status}] (conatins? failing status))
(defn data [{d :data}] d) (defn data [{d :data}] d)
@ -19,33 +21,39 @@
(defn pname [parser] (? (:name parser) parser)) (defn pname [parser] (? (:name parser) parser))
(defn str-part [kw] (apply str (next (str kw))))
(defn kw+str [kw mystr] (keyword (str (str-part kw) mystr)))
(defn value [token] (defn value [token]
(if (= :none (:literal token)) (:lexeme token) (:literal token))) (if (= :none (:literal token)) (:lexeme token) (:literal token)))
(defn apply-kw-parser [kw tokens] (defn apply-kw-parser [kw tokens]
(let [token (first tokens)] (let [token (first tokens)]
(println "applying kw parser " kw " to " token) ;(println "applying kw parser " kw " to " token)
(if (= kw (:type token)) (if (= kw (:type token))
{:status :ok :type kw :data [(value token)] :token token :remaining (rest tokens)} {:status :ok
{:status :err :token token :trace [kw] :remaining (rest tokens)}))) :type kw
:data (if (value token) [(value token)] [])
:token token
:remaining (rest tokens)}
{:status :none :token token :trace [kw] :remaining (rest tokens)})))
(defn apply-fn-parser [parser tokens] (defn apply-fn-parser [parser tokens]
(println "applying fn parser" parser ", " tokens)
(let [rule (:rule parser) name (:name parser)] (let [rule (:rule parser) name (:name parser)]
(println "appying fn parser " name " to " (first tokens)) ;(println "appying fn parser " name " to " (first tokens))
(rule tokens))) (rule tokens)))
(defn apply-parser [parser tokens] (defn apply-parser [parser tokens]
(if (keyword? parser) (cond
(apply-kw-parser parser tokens) (keyword? parser) (apply-kw-parser parser tokens)
(apply-fn-parser parser tokens))) (:rule parser) (apply-fn-parser parser tokens)
:else (throw (Exception. "`apply-parser` requires a parser"))))
(defn pmap [f parser] (fn [tokens] (f (apply-parser parser tokens))))
(defn choice [name parsers] (defn choice [name parsers]
{:name name {:name name
:rule (fn [tokens] :rule (fn choice-fn [tokens]
(println "entering CHOICE" name) ;(println "entering CHOICE" name)
(loop [ps parsers] (loop [ps parsers]
(let [result (apply-parser (first ps) tokens) (let [result (apply-parser (first ps) tokens)
rem-ts (remaining result) rem-ts (remaining result)
@ -54,57 +62,71 @@
(pass? result) (pass? result)
{:status :ok :type name :data [result] :token (first tokens) :remaining rem-ts} {:status :ok :type name :data [result] :token (first tokens) :remaining rem-ts}
(= :err (:status result))
(update result :trace #(conj % name))
(empty? rem-ps) (empty? rem-ps)
{:status :err :token (first tokens) :trace [name] :remaining rem-ts} {:status :none :token (first tokens) :trace [name] :remaining rem-ts}
:else (recur rem-ps)))))}) :else (recur rem-ps)))))})
(defn order [name parsers] (defn order [name parsers]
{:name name {:name name
:rule (fn [tokens] :rule (fn order-fn [tokens]
(println "entering ORDER" name) ;(println "entering ORDER" name)
(let [origin (first tokens)] (let [origin (first tokens)
(loop [ps parsers first-result (apply-parser (first parsers) tokens)]
results [] (case (:status first-result)
ts tokens] (:err :none)
{:status :none
:token (first tokens)
:trace [name]
:remaining tokens}
(:ok :quiet :group)
(loop [ps (rest parsers)
results (case (:status first-result)
:ok [first-result]
:quiet []
:group (:data first-result))
ts (remaining first-result)]
(let [result (apply-parser (first ps) ts) (let [result (apply-parser (first ps) ts)
res-rem (remaining result)] res-rem (remaining result)]
(if (empty? (rest ps)) (if (empty? (rest ps))
(case (:status result) (case (:status result)
:ok {:status :group
:ok {:status :ok
:type name :type name
:data (conj results result) :data (conj results result)
:token origin :token origin
:remaining res-rem} :remaining res-rem}
:quiet {:status :ok :quiet {:status :group
:type name :type name
:data results :data results
:token origin :token origin
:remaining res-rem} :remaining res-rem}
:group {:status :ok :group {:status :group
:type name :type name
:data (vec (concat results (:data result))) :data (vec (concat results (:data result)))
:token origin :token origin
:remaining res-rem} :remaining res-rem}
:err (update result :trace #(conj % name))) (:err :none)
(assoc (update result :trace #(conj % name)) :status :err))
(case (:status result) (case (:status result)
:ok (recur (rest ps) (conj results result) res-rem) :ok (recur (rest ps) (conj results result) res-rem)
:group (recur (rest ps) :group (recur (rest ps)
;; TODO: fix this? (vec (concat results (:data result)))
;; This is supposed to undo the :quiet/:group thing
(vec (concat results
(filter #(= (:status %) :ok) (:data result))))
res-rem) res-rem)
:quiet (recur (rest ps) results res-rem) :quiet (recur (rest ps) results res-rem)
:err (update result :trace #(conj % name))))))))}) (:err :none)
(assoc (update result :trace #(conj % name)) :status :err))))))))})
(defn quiet [parser] (defn quiet [parser]
{:name (? (:name parser) parser) {:name (kw+str (? (:name parser) parser) "-quiet")
:rule (fn [tokens] :rule (fn quiet-fn [tokens]
(let [result (apply-parser parser tokens)] (let [result (apply-parser parser tokens)]
(if (pass? result) (if (pass? result)
(assoc result :status :quiet) (assoc result :status :quiet)
@ -113,45 +135,50 @@
(defn zero+ (defn zero+
([parser] (zero+ (pname parser) parser)) ([parser] (zero+ (pname parser) parser))
([name parser] ([name parser]
{:name name {:name (kw+str name "-zero+")
:rule (fn [tokens] :rule (fn zero+fn [tokens]
(println "entering ZERO+") ;(println "entering ZERO+")
(loop [results [] (loop [results []
ts tokens ts tokens]
back tokens] ;(println "looping ZERO+" (? (:name parser) parser))
(println "looping ZERO+" (:name parser))
(let [result (apply-parser parser ts)] (let [result (apply-parser parser ts)]
(if (pass? result) (case (:status result)
(recur (conj results result) (remaining result) ts) :ok (recur (conj results result) (remaining result))
{:status :group :type name :data results :token (first tokens) :remaining ts} :group (recur (vec (concat results (:data result))) (remaining result))
))))})) :quiet (recur results (remaining result))
{:status :group :type name :data results :token (first tokens) :remaining ts}))))}))
(defn one+ (defn one+
([parser] (one+ (pname parser) parser)) ([parser] (one+ (pname parser) parser))
([name parser] ([name parser]
{:name name {:name (kw+str name "-one+")
:rule (fn [tokens] :rule (fn one+fn [tokens]
(let [result (apply-parser parser tokens) (let [first-result (apply-parser parser tokens)
rest (zero+ name parser)] rest-parser (zero+ name parser)]
(case (:status result) (case (:status first-result)
(:ok :quiet) (:ok :group)
(let [rest-result (apply-parser rest (remaining result)) (let [rest-result (apply-parser rest-parser (remaining first-result))]
rest-data (data rest-result)
rest-remaining (remaining rest-result)]
(println rest-data)
{:status :group {:status :group
:type name :type name
:data (vec (concat (data result) (second rest-data)) ) :data (vec (concat [first-result] (data rest-result)))
:token (first tokens) :token (first tokens)
:remaining rest-remaining}) :remaining (remaining rest-result)})
:err result)))})) :quiet
(let [rest-result (apply-parser rest-parser (remaining first-result))]
{:status :quiet
:type name
:data []
:token (first tokens)
:remaining (remaining rest-result)})
(:err :none) first-result)))}))
(defn maybe (defn maybe
([parser] (maybe (pname parser) parser)) ([parser] (maybe (pname parser) parser))
([name parser] ([name parser]
{:name name {:name (kw+str name "-maybe")
:rule (fn [tokens] :rule (fn maybe-fn [tokens]
(let [result (apply-parser parser tokens)] (let [result (apply-parser parser tokens)]
(if (pass? result) (if (pass? result)
result result
@ -159,43 +186,56 @@
)))})) )))}))
(comment (comment
"So one thing I'm thinking about is the fact that zero+, one+, maybe all only really make sense in the context of an `order` call. So that idea is that anything that's in one of these should be added to the `order`'s data vector, rather than putting it in a subordinate structure. "
If I'm not mistaken, the Ludus grammer requires *no* lookahead, the first token in an expression tells you what kind of expression it is:
This is much the same as the `quiet` idea: there should be some kind of internal representation of the thing. Rather, there is one ambiguity: synthetic expressions can start with words or keywords.
A bare word can be assimilated to synthetic expressions. Interestingly, so can synthetic.
*** The parsing strategy is the same: consume as many things until you can't get anymore.
And now the `group` status has broken `quiet` The fact that a bare keyword is evaluated like a literal doesn't matter.
TODO: the concats put things into lists/seqs, and thus lett and iff are out of order. So:
literal -> literal
keyword -> synthetic
word -> synthetic
( -> tuple
[ -> list
#{ -> dict
@{ -> struct
ns -> ns
let -> let
do -> pipeline
etc.
") Because there's now NO lookahead, we can easily distinguish between orderings that don't match at all, and ones which match on the first token.
(defn group Because of that, we can also distinguish between no-match and errors
([parser] (pname parser) parser)
([name parser] (fn [tokens] ")
(let [result (apply-parser parser tokens)
data (map :data (:data result))]
{assoc result :data data}))))
(declare expression) (declare expression)
(def literal (choice :literal [:nil :true :false :number :string :keyword])) (def literal (choice :literal [:nil :true :false :number :string]))
(def separator (one+ (choice :separator [:comma :newline]))) (def separator (choice :separator [:comma :newline]))
(def nls? (quiet (zero+ :nls :newline))) (def nls? (quiet (zero+ :nls :newline)))
(def pattern (choice :pattern [:literal :word])) ;; stupid to start (def pattern (choice :pattern [:literal :word])) ;; stupid to start
(def iff (order :iff [ (def iff (order :iff [
(quiet :if) nls? (quiet :if)
nls?
expression expression
nls? (quiet :then) nls?
(quiet :then)
expression expression
nls? (quiet :else) nls?
(quiet :else)
expression])) expression]))
(def lett (order :let [ (def lett (order :let [
@ -205,19 +245,35 @@
nls? nls?
expression])) expression]))
(def tuple-entries (order :tuple-entries [(quiet separator) expression])) (def tuple-entry (order :tuple-entry [(quiet (one+ separator)) expression]))
(def tuple (order :tuple (def tuple (order :tuple
[(quiet :lparen) [(quiet :lparen)
(quiet (zero+ separator))
(maybe expression) (maybe expression)
(zero+ tuple-entries) (zero+ tuple-entry)
(quiet (zero+ separator))
(quiet :rparen)])) (quiet :rparen)]))
(def splat (order :splat [(quiet :splat) :word]))
(def list-term (choice :list-term [splat expression]))
(def list-entry (order :list-entry [(quiet (one+ separator)) list-term]))
(def listt (order :list
[(quiet :lbracket)
(quiet (zero+ separator))
(maybe list-term)
(zero+ list-entry)
(quiet (zero+ separator))
(quiet :rbracket)]))
(def synth-root (choice :synth-root [:keyword :word])) (def synth-root (choice :synth-root [:keyword :word]))
(def synth-term (choice :synth-term [:tuple :keyword])) (def synth-term (choice :synth-term [tuple :keyword]))
(def synthetic (order :synthetic [synth-root (one+ synth-term)])) (def synthetic (order :synthetic [synth-root (zero+ synth-term)]))
(def terminator (choice :terminator [:newline :semicolon])) (def terminator (choice :terminator [:newline :semicolon]))
@ -225,16 +281,31 @@
(def block (order :block [(quiet :lbrace) nls? expression (zero+ block-line) nls? (quiet :rbrace)])) (def block (order :block [(quiet :lbrace) nls? expression (zero+ block-line) nls? (quiet :rbrace)]))
(def expression (choice :expression [tuple literal lett iff synthetic :word block])) (def expression (choice :expression [lett iff synthetic block listt tuple literal]))
(def foo (order :foo [:number :keyword])) (def importt (order :import [(quiet :import) :string (quiet :as) :word]))
(def eg (:tokens (scan/scan "let foo = :bar"))) (def toplevel (choice :toplevel [importt expression]))
(def result (apply-parser expression eg)) (def script-line (order :script-line [(quiet terminator) toplevel]))
(def script (order :script [nls? toplevel (zero+ script-line) nls? (quiet :eof)]))
(def eg (:tokens (scan/scan
""
)))
eg
(println eg)
(def result (apply-parser script eg))
result result
(println result)
(defn clean [node] (defn clean [node]
(if (map? node) (if (map? node)
(-> node (-> node
@ -242,7 +313,7 @@ result
:status :status
:remaining :remaining
:token) :token)
(update :data #(map clean %))) (update :data #(into [] (map clean) %)))
node)) node))
(defn tap [x] (println "\n\n\n\n******NEW PARSE\n\n:::=> " x "\n\n") x) (defn tap [x] (println "\n\n\n\n******NEW PARSE\n\n:::=> " x "\n\n") x)