Get parser combinator strategy working
This commit is contained in:
parent
919ab5ca34
commit
cbd78ce7f7
|
@ -2,16 +2,18 @@
|
|||
(:require
|
||||
[ludus.scanner :as scan]))
|
||||
|
||||
(def msgs {
|
||||
|
||||
})
|
||||
|
||||
(defn ? [val default] (if (nil? val) default val))
|
||||
|
||||
(defn ok? [{status :status}]
|
||||
(= status :ok))
|
||||
|
||||
(defn pass? [{status :status}] (or (= status :ok) (= status :quiet)))
|
||||
(def failing #{:err :none})
|
||||
|
||||
(def passing #{:ok :group :silent})
|
||||
|
||||
(defn pass? [{status :status}] (contains? passing status))
|
||||
|
||||
(defn fail? [{status :status}] (conatins? failing status))
|
||||
|
||||
(defn data [{d :data}] d)
|
||||
|
||||
|
@ -19,33 +21,39 @@
|
|||
|
||||
(defn pname [parser] (? (:name parser) parser))
|
||||
|
||||
(defn str-part [kw] (apply str (next (str kw))))
|
||||
|
||||
(defn kw+str [kw mystr] (keyword (str (str-part kw) mystr)))
|
||||
|
||||
(defn value [token]
|
||||
(if (= :none (:literal token)) (:lexeme token) (:literal token)))
|
||||
|
||||
(defn apply-kw-parser [kw tokens]
|
||||
(let [token (first tokens)]
|
||||
(println "applying kw parser " kw " to " token)
|
||||
;(println "applying kw parser " kw " to " token)
|
||||
(if (= kw (:type token))
|
||||
{:status :ok :type kw :data [(value token)] :token token :remaining (rest tokens)}
|
||||
{:status :err :token token :trace [kw] :remaining (rest tokens)})))
|
||||
{:status :ok
|
||||
:type kw
|
||||
:data (if (value token) [(value token)] [])
|
||||
:token token
|
||||
:remaining (rest tokens)}
|
||||
{:status :none :token token :trace [kw] :remaining (rest tokens)})))
|
||||
|
||||
(defn apply-fn-parser [parser tokens]
|
||||
(println "applying fn parser" parser ", " tokens)
|
||||
(let [rule (:rule parser) name (:name parser)]
|
||||
(println "appying fn parser " name " to " (first tokens))
|
||||
;(println "appying fn parser " name " to " (first tokens))
|
||||
(rule tokens)))
|
||||
|
||||
(defn apply-parser [parser tokens]
|
||||
(if (keyword? parser)
|
||||
(apply-kw-parser parser tokens)
|
||||
(apply-fn-parser parser tokens)))
|
||||
|
||||
(defn pmap [f parser] (fn [tokens] (f (apply-parser parser tokens))))
|
||||
(cond
|
||||
(keyword? parser) (apply-kw-parser parser tokens)
|
||||
(:rule parser) (apply-fn-parser parser tokens)
|
||||
:else (throw (Exception. "`apply-parser` requires a parser"))))
|
||||
|
||||
(defn choice [name parsers]
|
||||
{:name name
|
||||
:rule (fn [tokens]
|
||||
(println "entering CHOICE" name)
|
||||
:rule (fn choice-fn [tokens]
|
||||
;(println "entering CHOICE" name)
|
||||
(loop [ps parsers]
|
||||
(let [result (apply-parser (first ps) tokens)
|
||||
rem-ts (remaining result)
|
||||
|
@ -54,57 +62,71 @@
|
|||
(pass? result)
|
||||
{:status :ok :type name :data [result] :token (first tokens) :remaining rem-ts}
|
||||
|
||||
(= :err (:status result))
|
||||
(update result :trace #(conj % name))
|
||||
|
||||
(empty? rem-ps)
|
||||
{:status :err :token (first tokens) :trace [name] :remaining rem-ts}
|
||||
{:status :none :token (first tokens) :trace [name] :remaining rem-ts}
|
||||
|
||||
:else (recur rem-ps)))))})
|
||||
|
||||
(defn order [name parsers]
|
||||
{:name name
|
||||
:rule (fn [tokens]
|
||||
(println "entering ORDER" name)
|
||||
(let [origin (first tokens)]
|
||||
(loop [ps parsers
|
||||
results []
|
||||
ts tokens]
|
||||
:rule (fn order-fn [tokens]
|
||||
;(println "entering ORDER" name)
|
||||
(let [origin (first tokens)
|
||||
first-result (apply-parser (first parsers) tokens)]
|
||||
(case (:status first-result)
|
||||
(:err :none)
|
||||
{:status :none
|
||||
:token (first tokens)
|
||||
:trace [name]
|
||||
:remaining tokens}
|
||||
|
||||
(:ok :quiet :group)
|
||||
(loop [ps (rest parsers)
|
||||
results (case (:status first-result)
|
||||
:ok [first-result]
|
||||
:quiet []
|
||||
:group (:data first-result))
|
||||
ts (remaining first-result)]
|
||||
(let [result (apply-parser (first ps) ts)
|
||||
res-rem (remaining result)]
|
||||
(if (empty? (rest ps))
|
||||
(case (:status result)
|
||||
|
||||
:ok {:status :ok
|
||||
:ok {:status :group
|
||||
:type name
|
||||
:data (conj results result)
|
||||
:token origin
|
||||
:remaining res-rem}
|
||||
|
||||
:quiet {:status :ok
|
||||
:quiet {:status :group
|
||||
:type name
|
||||
:data results
|
||||
:token origin
|
||||
:remaining res-rem}
|
||||
|
||||
:group {:status :ok
|
||||
:group {:status :group
|
||||
:type name
|
||||
:data (vec (concat results (:data result)))
|
||||
:token origin
|
||||
:remaining res-rem}
|
||||
|
||||
:err (update result :trace #(conj % name)))
|
||||
(:err :none)
|
||||
(assoc (update result :trace #(conj % name)) :status :err))
|
||||
|
||||
(case (:status result)
|
||||
:ok (recur (rest ps) (conj results result) res-rem)
|
||||
:group (recur (rest ps)
|
||||
;; TODO: fix this?
|
||||
;; This is supposed to undo the :quiet/:group thing
|
||||
(vec (concat results
|
||||
(filter #(= (:status %) :ok) (:data result))))
|
||||
(vec (concat results (:data result)))
|
||||
res-rem)
|
||||
:quiet (recur (rest ps) results res-rem)
|
||||
:err (update result :trace #(conj % name))))))))})
|
||||
(:err :none)
|
||||
(assoc (update result :trace #(conj % name)) :status :err))))))))})
|
||||
|
||||
(defn quiet [parser]
|
||||
{:name (? (:name parser) parser)
|
||||
:rule (fn [tokens]
|
||||
{:name (kw+str (? (:name parser) parser) "-quiet")
|
||||
:rule (fn quiet-fn [tokens]
|
||||
(let [result (apply-parser parser tokens)]
|
||||
(if (pass? result)
|
||||
(assoc result :status :quiet)
|
||||
|
@ -113,45 +135,50 @@
|
|||
(defn zero+
|
||||
([parser] (zero+ (pname parser) parser))
|
||||
([name parser]
|
||||
{:name name
|
||||
:rule (fn [tokens]
|
||||
(println "entering ZERO+")
|
||||
{:name (kw+str name "-zero+")
|
||||
:rule (fn zero+fn [tokens]
|
||||
;(println "entering ZERO+")
|
||||
(loop [results []
|
||||
ts tokens
|
||||
back tokens]
|
||||
(println "looping ZERO+" (:name parser))
|
||||
ts tokens]
|
||||
;(println "looping ZERO+" (? (:name parser) parser))
|
||||
(let [result (apply-parser parser ts)]
|
||||
(if (pass? result)
|
||||
(recur (conj results result) (remaining result) ts)
|
||||
{:status :group :type name :data results :token (first tokens) :remaining ts}
|
||||
))))}))
|
||||
(case (:status result)
|
||||
:ok (recur (conj results result) (remaining result))
|
||||
:group (recur (vec (concat results (:data result))) (remaining result))
|
||||
:quiet (recur results (remaining result))
|
||||
{:status :group :type name :data results :token (first tokens) :remaining ts}))))}))
|
||||
|
||||
(defn one+
|
||||
([parser] (one+ (pname parser) parser))
|
||||
([name parser]
|
||||
{:name name
|
||||
:rule (fn [tokens]
|
||||
(let [result (apply-parser parser tokens)
|
||||
rest (zero+ name parser)]
|
||||
(case (:status result)
|
||||
(:ok :quiet)
|
||||
(let [rest-result (apply-parser rest (remaining result))
|
||||
rest-data (data rest-result)
|
||||
rest-remaining (remaining rest-result)]
|
||||
(println rest-data)
|
||||
{:name (kw+str name "-one+")
|
||||
:rule (fn one+fn [tokens]
|
||||
(let [first-result (apply-parser parser tokens)
|
||||
rest-parser (zero+ name parser)]
|
||||
(case (:status first-result)
|
||||
(:ok :group)
|
||||
(let [rest-result (apply-parser rest-parser (remaining first-result))]
|
||||
{:status :group
|
||||
:type name
|
||||
:data (vec (concat (data result) (second rest-data)) )
|
||||
:data (vec (concat [first-result] (data rest-result)))
|
||||
:token (first tokens)
|
||||
:remaining rest-remaining})
|
||||
:remaining (remaining rest-result)})
|
||||
|
||||
:err result)))}))
|
||||
:quiet
|
||||
(let [rest-result (apply-parser rest-parser (remaining first-result))]
|
||||
{:status :quiet
|
||||
:type name
|
||||
:data []
|
||||
:token (first tokens)
|
||||
:remaining (remaining rest-result)})
|
||||
|
||||
(:err :none) first-result)))}))
|
||||
|
||||
(defn maybe
|
||||
([parser] (maybe (pname parser) parser))
|
||||
([name parser]
|
||||
{:name name
|
||||
:rule (fn [tokens]
|
||||
{:name (kw+str name "-maybe")
|
||||
:rule (fn maybe-fn [tokens]
|
||||
(let [result (apply-parser parser tokens)]
|
||||
(if (pass? result)
|
||||
result
|
||||
|
@ -159,43 +186,56 @@
|
|||
)))}))
|
||||
|
||||
(comment
|
||||
"So one thing I'm thinking about is the fact that zero+, one+, maybe all only really make sense in the context of an `order` call. So that idea is that anything that's in one of these should be added to the `order`'s data vector, rather than putting it in a subordinate structure.
|
||||
"
|
||||
If I'm not mistaken, the Ludus grammer requires *no* lookahead, the first token in an expression tells you what kind of expression it is:
|
||||
|
||||
This is much the same as the `quiet` idea: there should be some kind of internal representation of the thing.
|
||||
Rather, there is one ambiguity: synthetic expressions can start with words or keywords.
|
||||
A bare word can be assimilated to synthetic expressions. Interestingly, so can synthetic.
|
||||
|
||||
***
|
||||
The parsing strategy is the same: consume as many things until you can't get anymore.
|
||||
|
||||
And now the `group` status has broken `quiet`
|
||||
The fact that a bare keyword is evaluated like a literal doesn't matter.
|
||||
|
||||
TODO: the concats put things into lists/seqs, and thus lett and iff are out of order.
|
||||
So:
|
||||
literal -> literal
|
||||
keyword -> synthetic
|
||||
word -> synthetic
|
||||
( -> tuple
|
||||
[ -> list
|
||||
#{ -> dict
|
||||
@{ -> struct
|
||||
ns -> ns
|
||||
let -> let
|
||||
do -> pipeline
|
||||
|
||||
etc.
|
||||
|
||||
Because there's now NO lookahead, we can easily distinguish between orderings that don't match at all, and ones which match on the first token.
|
||||
|
||||
Because of that, we can also distinguish between no-match and errors
|
||||
|
||||
")
|
||||
|
||||
(defn group
|
||||
([parser] (pname parser) parser)
|
||||
([name parser] (fn [tokens]
|
||||
(let [result (apply-parser parser tokens)
|
||||
data (map :data (:data result))]
|
||||
{assoc result :data data}))))
|
||||
|
||||
|
||||
(declare expression)
|
||||
|
||||
(def literal (choice :literal [:nil :true :false :number :string :keyword]))
|
||||
(def literal (choice :literal [:nil :true :false :number :string]))
|
||||
|
||||
(def separator (one+ (choice :separator [:comma :newline])))
|
||||
(def separator (choice :separator [:comma :newline]))
|
||||
|
||||
(def nls? (quiet (zero+ :nls :newline)))
|
||||
|
||||
(def pattern (choice :pattern [:literal :word])) ;; stupid to start
|
||||
|
||||
(def iff (order :iff [
|
||||
(quiet :if) nls?
|
||||
(quiet :if)
|
||||
nls?
|
||||
expression
|
||||
nls? (quiet :then)
|
||||
nls?
|
||||
(quiet :then)
|
||||
expression
|
||||
nls? (quiet :else)
|
||||
nls?
|
||||
(quiet :else)
|
||||
expression]))
|
||||
|
||||
(def lett (order :let [
|
||||
|
@ -205,19 +245,35 @@
|
|||
nls?
|
||||
expression]))
|
||||
|
||||
(def tuple-entries (order :tuple-entries [(quiet separator) expression]))
|
||||
(def tuple-entry (order :tuple-entry [(quiet (one+ separator)) expression]))
|
||||
|
||||
(def tuple (order :tuple
|
||||
[(quiet :lparen)
|
||||
(quiet (zero+ separator))
|
||||
(maybe expression)
|
||||
(zero+ tuple-entries)
|
||||
(zero+ tuple-entry)
|
||||
(quiet (zero+ separator))
|
||||
(quiet :rparen)]))
|
||||
|
||||
(def splat (order :splat [(quiet :splat) :word]))
|
||||
|
||||
(def list-term (choice :list-term [splat expression]))
|
||||
|
||||
(def list-entry (order :list-entry [(quiet (one+ separator)) list-term]))
|
||||
|
||||
(def listt (order :list
|
||||
[(quiet :lbracket)
|
||||
(quiet (zero+ separator))
|
||||
(maybe list-term)
|
||||
(zero+ list-entry)
|
||||
(quiet (zero+ separator))
|
||||
(quiet :rbracket)]))
|
||||
|
||||
(def synth-root (choice :synth-root [:keyword :word]))
|
||||
|
||||
(def synth-term (choice :synth-term [:tuple :keyword]))
|
||||
(def synth-term (choice :synth-term [tuple :keyword]))
|
||||
|
||||
(def synthetic (order :synthetic [synth-root (one+ synth-term)]))
|
||||
(def synthetic (order :synthetic [synth-root (zero+ synth-term)]))
|
||||
|
||||
(def terminator (choice :terminator [:newline :semicolon]))
|
||||
|
||||
|
@ -225,16 +281,31 @@
|
|||
|
||||
(def block (order :block [(quiet :lbrace) nls? expression (zero+ block-line) nls? (quiet :rbrace)]))
|
||||
|
||||
(def expression (choice :expression [tuple literal lett iff synthetic :word block]))
|
||||
(def expression (choice :expression [lett iff synthetic block listt tuple literal]))
|
||||
|
||||
(def foo (order :foo [:number :keyword]))
|
||||
(def importt (order :import [(quiet :import) :string (quiet :as) :word]))
|
||||
|
||||
(def eg (:tokens (scan/scan "let foo = :bar")))
|
||||
(def toplevel (choice :toplevel [importt expression]))
|
||||
|
||||
(def result (apply-parser expression eg))
|
||||
(def script-line (order :script-line [(quiet terminator) toplevel]))
|
||||
|
||||
(def script (order :script [nls? toplevel (zero+ script-line) nls? (quiet :eof)]))
|
||||
|
||||
|
||||
(def eg (:tokens (scan/scan
|
||||
""
|
||||
)))
|
||||
|
||||
eg
|
||||
|
||||
(println eg)
|
||||
|
||||
(def result (apply-parser script eg))
|
||||
|
||||
result
|
||||
|
||||
(println result)
|
||||
|
||||
(defn clean [node]
|
||||
(if (map? node)
|
||||
(-> node
|
||||
|
@ -242,7 +313,7 @@ result
|
|||
:status
|
||||
:remaining
|
||||
:token)
|
||||
(update :data #(map clean %)))
|
||||
(update :data #(into [] (map clean) %)))
|
||||
node))
|
||||
|
||||
(defn tap [x] (println "\n\n\n\n******NEW PARSE\n\n:::=> " x "\n\n") x)
|
||||
|
|
Loading…
Reference in New Issue
Block a user