From 5b1ff5aef39ce2486ce3b79925e99815a7652891 Mon Sep 17 00:00:00 2001 From: Scott Richmond Date: Sun, 7 May 2023 22:49:19 -0400 Subject: [PATCH] Many iterations of parser combinator strategies. Not yet working. --- src/ludus/parser-new.clj | 231 ++++++++++++++++++++++++++++++++++++--- src/ludus/scanner.clj | 16 ++- tokens | 47 ++++++++ 3 files changed, 272 insertions(+), 22 deletions(-) create mode 100644 tokens diff --git a/src/ludus/parser-new.clj b/src/ludus/parser-new.clj index d94bd41..3536db5 100644 --- a/src/ludus/parser-new.clj +++ b/src/ludus/parser-new.clj @@ -1,32 +1,227 @@ (ns ludus.parser-new - (:require - [ludus.scanner :as scan])) + (:require + [ludus.scanner :as scan])) -(defn ok? [[ok]] - (= ok :ok)) +(def msgs { -(defn kw->type [kw] (apply str (next (str kw)))) + }) -(defn match [kw token] - (if (= kw (:type token)) - [:ok token] - [:error token (str "Expected " (kw->type kw))])) +(defn ? [val default] (if (nil? val) default val)) -(defn parser - ([kw] {:type kw :fn #(match kw %)}) - ([kw err] {:type kw :fn #(assoc (match kw %) 2 err)})) +(defn ok? [{status :status}] + (= status :ok)) +(defn pass? [{status :status}] (or (= status :ok) (= status :quiet))) -(defn choice [& args]) +(defn data [{d :data}] d) -(def eg (:tokens (scan/scan "123 :foo"))) +(defn remaining [{r :remaining}] r) -(def word (parser :word "fuck")) +(defn pname [parser] (? (:name parser) parser)) -(word (first eg)) +(defn value [token] + (if (= :none (:literal token)) (:lexeme token) (:literal token))) + +(defn apply-kw-parser [kw tokens] + (let [token (first tokens)] + (println "applying kw parser " kw " to " token) + (if (= kw (:type token)) + {:status :ok :type kw :data [(value token)] :token token :remaining (rest tokens)} + {:status :err :token token :trace [kw] :remaining (rest tokens)}))) + +(defn apply-fn-parser [parser tokens] + (println "applying fn parser" parser ", " tokens) + (let [rule (:rule parser) name (:name parser)] + (println "appying fn parser " name " to " (first tokens)) + (rule tokens))) + +(defn apply-parser [parser tokens] + (if (keyword? parser) + (apply-kw-parser parser tokens) + (apply-fn-parser parser tokens))) + +(defn pmap [f parser] (fn [tokens] (f (apply-parser parser tokens)))) + +(defn choice [name parsers] + {:name name + :rule (fn [tokens] + (println "entering CHOICE" name) + (loop [ps parsers] + (let [result (apply-parser (first ps) tokens) + rem-ts (remaining result) + rem-ps (rest ps)] + (cond + (pass? result) + {:status :ok :type name :data [result] :token (first tokens) :remaining rem-ts} + + (empty? rem-ps) + {:status :err :token (first tokens) :trace [name] :remaining rem-ts} + :else (recur rem-ps)))))}) + +(defn order [name parsers] + {:name name + :rule (fn [tokens] + (println "entering ORDER" name) + (let [origin (first tokens)] + (loop [ps parsers + results [] + ts tokens] + (let [result (apply-parser (first ps) ts) + res-rem (remaining result)] + (if (empty? (rest ps)) + (case (:status result) + + :ok {:status :ok + :type name + :data (conj results result) + :token origin + :remaining res-rem} + + :quiet {:status :ok + :type name + :data results + :token origin + :remaining res-rem} + + :group {:status :ok + :type name + :data (concat results (:data result)) + :token origin + :remaining res-rem} + + :err (update result :trace #(conj % name))) + + (case (:status result) + :ok (recur (rest ps) (conj results result) res-rem) + :group (recur (rest ps) + ;; TODO: fix this? + ;; This is supposed to undo the :quiet/:group thing + (concat results + (filter #(= (:status %) :ok) (:data result))) + res-rem) + :quiet (recur (rest ps) results res-rem) + :err (update result :trace #(conj % name))))))))}) + +(defn quiet [parser] + {:name (? (:name parser) parser) + :rule (fn [tokens] + (let [result (apply-parser parser tokens)] + (if (pass? result) + (assoc result :status :quiet) + result)))}) + +(defn one+ + ([parser] (one+ (pname parser) parser)) + ([name parser] + {:name name + :rule (fn [tokens] + (let [result (apply-parser parser tokens) + rest (zero+ name parser)] + (case (:status result) + (:ok :quiet) + (let [rest-result (apply-parser rest (remaining result)) + rest-data (data rest-result) + rest-remaining (remaining rest-result)] + (println rest-data) + {:status :group + :type name + :data (concat (data result) (second rest-data)) + :token (first tokens) + :remaining rest-remaining}) + + :err result)))})) + +(defn zero+ + ([parser] (zero+ (pname parser) parser)) + ([name parser] + {:name name + :rule (fn [tokens] + (println "entering ZERO+") + (loop [results [] + ts tokens + back tokens] + (println "looping ZERO+" (:name parser)) + (let [result (apply-parser parser ts)] + (if (pass? result) + (recur (conj results result) (remaining result) ts) + {:status :group :type name :data results :token (first tokens) :remaining ts} + ))))})) + +(defn maybe + ([parser] (maybe (pname parser) parser)) + ([name parser] + {:name name + :rule (fn [tokens] + (let [result (apply-parser parser tokens)] + (if (pass? result) + result + {:status :group :type name :data [] :token (first tokens) :remaining tokens} + )))})) (comment + "So one thing I'm thinking about is the fact that zero+, one+, maybe all only really make sense in the context of an `order` call. So that idea is that anything that's in one of these should be added to the `order`'s data vector, rather than putting it in a subordinate structure. -(def string (parser :string)) + This is much the same as the `quiet` idea: there should be some kind of internal representation of the thing. -) \ No newline at end of file + *** + + And now the `group` status has broken `quiet` + + +") + +(defn group + ([parser] (pname parser) parser) + ([name parser] (fn [tokens] + (let [result (apply-parser parser tokens) + data (map :data (:data result))] + {assoc result :data data})))) + + +(declare expression) + +(def literal (choice :literal [:nil :true :false :number :string :keyword])) + +(def separator (one+ (choice :separator [:comma :newline]))) + +(def nls? (quiet (zero+ :nls :newline))) + +(def tuple-entries (order :tuple-entries [(quiet separator) expression])) + +(def tuple (order :tuple + [(quiet :lparen) + (maybe expression) + (zero+ tuple-entries) + (quiet :rparen)])) + +(def expression (choice :expression [tuple literal])) + +(def foo (order :foo [:number :keyword])) + +(def eg (:tokens (scan/scan "(1, 2, 3)"))) + +(def result (apply-parser tuple eg)) + +result + +(defn clean [node] + (if (map? node) + (-> node + (dissoc + :status + :remaining + :token) + (update :data #(map clean %))) + node)) + +(defn tap [x] (println "\n\n\n\n******NEW PARSE\n\n:::=> " x "\n\n") x) + +(def my-data (-> result clean tap)) + +my-data + +(def my-first (-> my-data first)) + +(def my-sec (map :data (-> my-data second :data))) + +(concat my-first my-sec) \ No newline at end of file diff --git a/src/ludus/scanner.clj b/src/ludus/scanner.clj index 99d7967..9187d1f 100644 --- a/src/ludus/scanner.clj +++ b/src/ludus/scanner.clj @@ -11,20 +11,20 @@ "cond" :cond ;; impl "do" :do ;; impl "else" :else ;; impl - "false" :false ;; impl + "false" :false ;; impl -> literal word "fn" :fn ;; impl "if" :if ;; impl "import" :import ;; impl "let" :let ;; impl "loop" :loop ;; impl "match" :match ;; impl - "nil" :nil ;; impl + "nil" :nil ;; impl -> literal word "ns" :ns ;; impl ;; "panic!" :panic ;; impl (should be a function) "recur" :recur ;; impl "ref" :ref ;; impl "then" :then ;; impl - "true" :true ;; impl + "true" :true ;; impl -> literal word "with" :with ;; impl ;; actor model/concurrency @@ -42,6 +42,12 @@ ;; "module" :module ;; not necessary if we don't have datatypes }) +(def literal-words { + "true" true + "false" false + "nil" nil +}) + (defn- new-scanner "Creates a new scanner." [source] @@ -195,7 +201,9 @@ word (str char)] (let [curr (current-char scanner)] (cond - (terminates? curr) (add-token scanner (get reserved-words word :word)) + (terminates? curr) (add-token scanner + (get reserved-words word :word) + (get literal-words word :none)) (word-char? curr) (recur (advance scanner) (str word curr)) :else (add-error scanner (str "Unexpected " curr " after word " word ".")))))) diff --git a/tokens b/tokens new file mode 100644 index 0000000..23d11ef --- /dev/null +++ b/tokens @@ -0,0 +1,47 @@ +TOKENS: + +:nil +:true +:false +:word +:keyword +:number +:string + +:as +:cond +:do +:else +:fn +:if +:import +:let +:loop +:ref +:then +:with + +:receive +:spawn +:repeat +:test +:when + +:lparen +:rparen +:lbrace +:rbrace +:lbracket +:rbracket +:semicolon +:comma +:newline +:backslash +:equals +:pipeline +:rarrow +:startdict +:startstruct +:startset +:splat +:eof \ No newline at end of file