From 214e94465d520c1f345d9ec0d88b25fc249726fa Mon Sep 17 00:00:00 2001 From: Scott Richmond Date: Sat, 30 Apr 2022 13:56:14 -0400 Subject: [PATCH] clj -> cljc --- src/ludus/analyzer.cljc | 22 + src/ludus/ast.cljc | 2 + src/ludus/collections.cljc | 1 + src/ludus/core.cljc | 33 ++ src/ludus/data.cljc | 1 + src/ludus/interpreter.cljc | 394 +++++++++++++++++ src/ludus/parser.cljc | 851 +++++++++++++++++++++++++++++++++++++ src/ludus/prelude.cljc | 69 +++ src/ludus/scanner.cljc | 318 ++++++++++++++ src/ludus/show.cljc | 53 +++ src/ludus/token.cljc | 11 + 11 files changed, 1755 insertions(+) create mode 100644 src/ludus/analyzer.cljc create mode 100644 src/ludus/ast.cljc create mode 100644 src/ludus/collections.cljc create mode 100644 src/ludus/core.cljc create mode 100644 src/ludus/data.cljc create mode 100644 src/ludus/interpreter.cljc create mode 100644 src/ludus/parser.cljc create mode 100644 src/ludus/prelude.cljc create mode 100644 src/ludus/scanner.cljc create mode 100644 src/ludus/show.cljc create mode 100644 src/ludus/token.cljc diff --git a/src/ludus/analyzer.cljc b/src/ludus/analyzer.cljc new file mode 100644 index 0000000..bc96e2a --- /dev/null +++ b/src/ludus/analyzer.cljc @@ -0,0 +1,22 @@ +(ns ludus.analyzer + (:require + [ludus.ast :as ast] + [ludus.token :as token])) + +(defn analyze [ast] ast) + +(comment " + Here's where we do a bunch of static analysis. + Some things we might wish for: + * No unused bindings + * No unbound names + * Compound `loop` and `gen` forms must have LHS's (tuple patterns) of the same length + * Recur must be in tail position in `loop`s + * Tail call optimization for simple recursion (rewrite it as a loop?) + * Check arities for statically known functions + * Enforce single-member tuple after called keywords + * Placeholders may only appear in tuples in synthetic expressions + * Each of these may have zero or one placeholders + * Function arities are correct + * Arity of called keywords must be 1 +") \ No newline at end of file diff --git a/src/ludus/ast.cljc b/src/ludus/ast.cljc new file mode 100644 index 0000000..0f8ca7c --- /dev/null +++ b/src/ludus/ast.cljc @@ -0,0 +1,2 @@ +(ns ludus.ast) + diff --git a/src/ludus/collections.cljc b/src/ludus/collections.cljc new file mode 100644 index 0000000..5637314 --- /dev/null +++ b/src/ludus/collections.cljc @@ -0,0 +1 @@ +(ns ludus.collections) \ No newline at end of file diff --git a/src/ludus/core.cljc b/src/ludus/core.cljc new file mode 100644 index 0000000..825611e --- /dev/null +++ b/src/ludus/core.cljc @@ -0,0 +1,33 @@ +(ns ludus.core + "A tree-walk interpreter for the Ludus language." + (:require + [ludus.scanner :as scanner] + [ludus.parser :as parser] + [ludus.interpreter :as interpreter] + [ludus.show :as show] + [clojure.pprint :as pp]) + (:gen-class)) + +(defn- run [source] + (let [scanned (scanner/scan source)] + (if (not-empty (:errors scanned)) + (do + (println "I found some scanning errors!") + (pp/pprint (:errors scanned)) + (System/exit 65)) + (let [parsed (parser/parse scanned)] + (if (not-empty (:errors parsed)) + (do + (println "I found some parsing errors!") + (pp/pprint (:errors parsed)) + (System/exit 66)) + (let [interpreted (interpreter/interpret parsed)] + (println (show/show interpreted)) + (System/exit 0))))))) + +(defn -main [& args] + (cond + (= (count args) 1) (run (slurp (first args))) + :else (do + (println "Usage: ludus [script]") + (System/exit 64)))) \ No newline at end of file diff --git a/src/ludus/data.cljc b/src/ludus/data.cljc new file mode 100644 index 0000000..fa91073 --- /dev/null +++ b/src/ludus/data.cljc @@ -0,0 +1 @@ +(ns ludus.data) \ No newline at end of file diff --git a/src/ludus/interpreter.cljc b/src/ludus/interpreter.cljc new file mode 100644 index 0000000..2b65108 --- /dev/null +++ b/src/ludus/interpreter.cljc @@ -0,0 +1,394 @@ +(ns ludus.interpreter + (:require + [ludus.parser :as parser] + [ludus.scanner :as scanner] + [ludus.ast :as ast] + [ludus.prelude :as prelude] + [ludus.data :as data] + [clojure.pprint :as pp])) + +;; right now this is not very efficient: +;; it's got runtime checking +;; we should be able to do these checks statically +;; that's for later, tho +(defn- resolve-word[word ctx-vol] + (let [ctx @ctx-vol] + (if (contains? ctx word) + (get ctx word) + (if (contains? ctx ::parent) + (recur word (::parent ctx)) + (throw (ex-info (str "Unbound name: " word) {})))))) + +(declare interpret-ast match interpret) + +(defn- match-tuple [pattern value ctx-vol] + (cond + (not (vector? value)) {:success false :reason "Could not match non-tuple value to tuple"} + + (not (= ::data/tuple (first value))) {:success false :reason "Could not match list to tuple"} + + (not (= (:length pattern) (dec (count value)))) + {:success false :reason "Cannot match tuples of different lengths"} + + (= 0 (:length pattern) (dec (count value))) {:success true :ctx {}} + + :else (let [members (:members pattern)] + (loop [i (:length pattern) + ctx {}] + (if (= 0 i) + {:success true :ctx ctx} + (let [match? (match (nth members (dec i)) (nth value i) ctx-vol)] + (if (:success match?) + (recur (dec i) (merge ctx (:ctx match?))) + {:success false :reason (str "Could not match " pattern " with " value)}))))))) + +(defn- match [pattern value ctx-vol] + (let [ctx @ctx-vol] + (case (::ast/type pattern) + ::ast/placeholder {:success true :ctx {}} + + ::ast/atom + (let [match-value (:value pattern)] + (if (= match-value value) + {:success true :ctx {}} + {:success false + :reason (str "No match: Could not match " match-value " with " value)})) + + ::ast/word + (let [word (:word pattern)] + (if (contains? ctx word) + {:success false :reason (str "Name " word " is already bound")} + {:success true :ctx {word value}})) + + ::ast/tuple (match-tuple pattern value ctx-vol) + + (throw (ex-info "Unknown pattern" {:pattern pattern}))))) + +(defn- update-ctx [ctx new-ctx] + (merge ctx new-ctx)) + +;; TODO: get "if let" pattern working +;; TODO: get typed exceptions to distinguish panics +(defn- interpret-let [ast ctx] + (let [pattern (:pattern ast) + expr (:expr ast) + value (interpret-ast expr ctx) + match (match pattern value ctx) + success (:success match)] + (if success + (vswap! ctx update-ctx (:ctx match)) + (throw (ex-info (:reason match) {}))) + value)) + +(defn- interpret-if [ast ctx] + (let [if-expr (:if ast) + then-expr (:then ast) + else-expr (:else ast) + if-value (interpret-ast if-expr ctx)] + (if if-value + (interpret-ast then-expr ctx) + (interpret-ast else-expr ctx)))) + +(defn- interpret-match [ast ctx] + (let [match-expr (:expr ast) + expr (interpret-ast match-expr ctx) + clauses (:clauses ast)] + (loop [clause (first clauses) + clauses (rest clauses)] + (if clause + (let [pattern (:pattern clause) + body (:body clause) + new-ctx (volatile! {::parent ctx}) + match? (match pattern expr new-ctx) + success (:success match?) + clause-ctx (:ctx match?)] + (if success + (do + (vswap! new-ctx #(merge % clause-ctx)) + (interpret-ast body new-ctx)) + (recur (first clauses) (rest clauses)))) + (throw (ex-info "Match Error: No match found" {})))))) + +(defn- interpret-cond [ast ctx] + (let [clauses (:clauses ast)] + (loop [clause (first clauses) + clauses (rest clauses)] + (if (not clause) + (throw (ex-info "Cond Error: No match found" {})) + (let [test-expr (:test clause) + body (:body clause) + truthy? (boolean (interpret-ast test-expr ctx))] + (if truthy? + (interpret-ast body ctx) + (recur (first clauses) (rest clauses)) + ) + ) + ) + ))) + +(defn- interpret-called-kw [kw tuple ctx] + ;; TODO: check this statically + (if (not (= 1 (:length tuple))) + (throw (ex-info "Called keywords must be unary" {})) + (let [kw (interpret-ast kw ctx) + map (second (interpret-ast tuple ctx))] + (if (::data/struct map) + (if (contains? map kw) + (kw map) + (if (= (::data/type map) ::data/ns) + (throw (ex-info (str "Namespace error: no member " kw " in ns " (::data/name map)) {})) + (throw (ex-info (str "Struct error: no member at " kw) {})))) + (get map kw)) + ))) + +(defn- call-fn [lfn tuple ctx] + (cond + (= ::data/partial (first tuple)) + {::data/type ::data/clj + :name (str (:name lfn) "{partial}") + :body (fn [arg] + (call-fn + lfn + (concat [::data/tuple] (replace {::data/placeholder arg} (rest tuple))) + ctx))} + + (= (::data/type lfn) ::data/clj) (apply (:body lfn) (next tuple)) + + (= (::data/type lfn) ::data/fn) + (let [clauses (:clauses lfn)] + (loop [clause (first clauses) + clauses (rest clauses)] + (if clause + (let [pattern (:pattern clause) + body (:body clause) + new-ctx (volatile! {::parent ctx}) + match? (match pattern tuple new-ctx) + success (:success match?) + clause-ctx (:ctx match?)] + (if success + (do + (vswap! new-ctx #(merge % clause-ctx)) + (interpret-ast body new-ctx)) + (recur (first clauses) (rest clauses)))) + + (throw (ex-info "Match Error: No match found" {:fn-name (:name lfn)}))))) + + (keyword? lfn) + (if (= 2 (count tuple)) + (let [target (second tuple) kw lfn] + (if (::data/struct target) + (if (contains? target kw) + (kw target) + (if (= (::data/type target) ::data/ns) + (throw (ex-info (str "Namespace error: no member " kw " in ns" (::data/name target)) {})) + (throw (ex-info (str "Struct error: no member at " kw) {})) + ) + ) + (kw target))) + (throw (ex-info "Called keywords take a single argument" {}))) + + :else (throw (ex-info "I don't know how to call that" {:fn lfn})))) + +(defn- interpret-synthetic-term [prev-value curr ctx] + (let [type (::ast/type curr)] + (if (= type ::ast/atom) + (if (::data/struct prev-value) + (if (contains? prev-value (:value curr)) + (get prev-value (:value curr)) + (if (= (::data/type prev-value) ::data/ns) + (throw (ex-info (str "Namespace error: no member " (:value curr) " in ns " (::data/name prev-value)) {})) + (throw (ex-info (str "Struct error: no member " (:value curr)) {})))) + (get prev-value (:value curr))) + (call-fn prev-value (interpret-ast curr ctx) ctx)))) + +(defn- interpret-synthetic [ast ctx] + (let [terms (:terms ast) + first (first terms) + second (second terms) + rest (rest (rest terms)) + first-term-type (::ast/type first) + first-val (if (= first-term-type ::ast/atom) + (interpret-called-kw first second ctx) + (interpret-synthetic-term (interpret-ast first ctx) second ctx))] + (reduce #(interpret-synthetic-term %1 %2 ctx) first-val rest))) + +(defn- interpret-fn [ast ctx] + (let [name (:name ast) + clauses (:clauses ast)] + (if (= name ::ast/anon) + {::data/type ::data/fn + :name name + :clauses clauses} + (let [fn {::data/type ::data/fn + :name name + :clauses clauses}] + (if (contains? @ctx name) + (throw (ex-info (str "Name " name " is already bound") {})) + (do + (vswap! ctx update-ctx {name fn}) + fn)))))) + +(defn- interpret-do [ast ctx] + (let [exprs (:exprs ast) + origin (interpret-ast (first exprs) ctx) + fns (rest exprs)] + (reduce #(call-fn (interpret-ast %2 ctx) [::data/tuple %1] ctx) origin fns))) + +(defn- map-values [f] + (map (fn [kv] + (let [[k v] kv] + [k (f v)])))) + +(defn- interpret-ns [ast ctx] + (let [members (:members ast) + name (:name ast)] + (if (contains? @ctx name) + (throw (ex-info (str "ns name " name " is already bound") {})) + (let [ns (into + {::data/struct true ::data/type ::data/ns ::data/name name} + (map-values #(interpret-ast % ctx)) + members)] + (do + (vswap! ctx update-ctx {name ns}) + ns))))) + +(defn- interpret-import [ast ctx] + (let [path (:path ast) + name (:name ast)] + (if (contains? @ctx name) + (throw (ex-info (str "Name " name " is alrady bound") {})) + (let [result ;; TODO: add any error handling at all + (-> path + (slurp) + (scanner/scan) + (parser/parse) + (interpret))] + (vswap! ctx update-ctx {name result}) + result ;; TODO: test this! + )))) + +(defn interpret-ast [ast ctx] + (case (::ast/type ast) + + ::ast/atom (:value ast) + + ::ast/word (resolve-word (:word ast) ctx) + + ::ast/let (interpret-let ast ctx) + + ::ast/if (interpret-if ast ctx) + + ::ast/match (interpret-match ast ctx) + + ::ast/cond (interpret-cond ast ctx) + + ::ast/synthetic (interpret-synthetic ast ctx) + + ::ast/fn (interpret-fn ast ctx) + + ::ast/pipeline (interpret-do ast ctx) + + ::ast/placeholder ::data/placeholder + + ::ast/ns (interpret-ns ast ctx) + + ::ast/import (interpret-import ast ctx) + + ::ast/block + (let [exprs (:exprs ast) + inner (pop exprs) + last (peek exprs) + ctx (volatile! {::parent ctx})] + (run! #(interpret-ast % ctx) inner) + (interpret-ast last ctx)) + + ::ast/script + (let [exprs (:exprs ast) + inner (pop exprs) + last (peek exprs) + ctx (volatile! prelude/prelude)] + (run! #(interpret-ast % ctx) inner) + (interpret-ast last ctx)) + + ;; note that, excepting tuples and structs, + ;; runtime representations are bare + ;; tuples are vectors with a special first member + ::ast/tuple + (let [members (:members ast)] + (into + [(if (:partial ast) ::data/partial ::data/tuple)] + (map #(interpret-ast % ctx)) members)) + + ::ast/list + (let [members (:members ast)] + (into [] (map #(interpret-ast % ctx)) members)) + + ::ast/set + (let [members (:members ast)] + (into #{} (map #(interpret-ast % ctx)) members)) + + ::ast/hash + (let [members (:members ast)] + (into {} (map-values #(interpret-ast % ctx)) members)) + + ::ast/struct + (let [members (:members ast)] + (into {::data/struct true} (map-values #(interpret-ast % ctx)) members)) + + (throw (ex-info "Unknown AST node type" {:node ast})))) + +(defn interpret [parsed] + (try + (interpret-ast (::parser/ast parsed) {}) + (catch clojure.lang.ExceptionInfo e + (println "Ludus panicked!") + (println (ex-message e)) + (pp/pprint (ex-data e)) + (System/exit 67)))) + +(comment + + (def source " + + let foo = 2 + + match foo with { + 1 -> :one + 2 -> :two + else -> :oops + } + + ns bar { + foo + } + + bar :foo + + ") + + (println "") + (println "****************************************") + (println "*** *** NEW INTERPRETATION *** ***") + (println "") + + (-> source + (scanner/scan) + (parser/parse) + (interpret) + (pp/pprint))) + +(comment " + + Left to do: + * if-let pattern + * improve panics + * add location info for panics + * refactor calling keywords + * refactor accessing structs vs. hashes + + ") + + + + + diff --git a/src/ludus/parser.cljc b/src/ludus/parser.cljc new file mode 100644 index 0000000..6542422 --- /dev/null +++ b/src/ludus/parser.cljc @@ -0,0 +1,851 @@ +(ns ludus.parser + (:require + [ludus.token :as token] + [ludus.scanner :as scanner] + [ludus.ast :as ast] + [clojure.pprint :as pp] + [clojure.set :as s])) + +;; a parser map and some functions to work with them +(defn- parser [tokens] + {::tokens tokens ::token 0 ::ast {} ::errors []}) + +(defn- current [parser] + (nth (::tokens parser) (::token parser) nil)) + +(defn- ppeek [parser] + (nth (::tokens parser) (inc (::token parser)) nil)) + +(defn- at-end? [parser] + (let [curr (current parser)] + (or (nil? curr) (= ::token/eof (::token/type curr))))) + +(defn- advance [parser] + (update parser ::token inc)) + +(defn- token-type [parser] + (::token/type (current parser))) + +(defn- node-type [parser] + (get-in parser [::ast ::ast/type])) + +;; some forward declarations +(declare parse-expr parse-word parse-pattern) + +;; handle some errors +(def sync-on #{::token/newline + ::token/semicolon + ::token/comma + ::token/rparen + ::token/rbracket + ::token/rbrace + ::token/eof}) + +(defn- psync [parser message origin end] + (let [poison {::ast/type ::ast/poison + :message message + :origin origin + :end end}] + (-> parser + (assoc ::ast poison) + (update ::errors conj poison)))) + +(defn- poisoned? [parser] + (= ::ast/poison (get-in parser [::ast ::ast/type]))) + +(defn- panic + ([parser message] (panic parser message sync-on)) + ([parser message sync-on] + (println (str "PANIC!!! in the parser: " message)) + (let [sync-on (conj (if (set? sync-on) sync-on #{sync-on}) ::token/eof) + origin (current parser)] + (loop [parser parser] + (let [curr (current parser) + type (::token/type curr)] + (if (or (at-end? parser) (contains? sync-on type)) + (psync parser message origin curr) + (recur (advance parser)))))))) + +;; some helper functions +(defn- expect [tokens message parser] + (let [curr (current parser) + tokens (if (set? tokens) tokens #{tokens}) + type (::token/type curr)] + (if (contains? tokens type) + (advance parser) + (-> parser + (advance) + (panic message tokens))))) + +(defn- expect* [tokens message parser] + (let [curr (current parser) + tokens (if (set? tokens) tokens #{tokens}) + type (::token/type curr)] + (if (contains? tokens type) + {:success true :parser (advance parser)} + {:success false :parser (panic (advance parser) message)}))) + +(defn- accept [tokens parser] + (let [curr (current parser) + tokens (if (set? tokens) tokens #{tokens}) + type (::token/type curr)] + (if (contains? tokens type) + (advance parser) + parser))) + +(defn- accept-many [tokens parser] + (let [tokens (if (set? tokens) tokens #{tokens})] + (loop [parser parser] + (let [curr (current parser) + type (::token/type curr)] + (if (contains? tokens type) + (recur (advance parser)) + parser))))) + +;; various parsing functions +(defn- parse-atom [parser] + (let [token (current parser)] + (-> parser + (advance) + (assoc ::ast {::ast/type ::ast/atom + :token token + :value (::token/literal token)})))) + +;; just a quick and dirty map to associate atomic words with values +(def atomic-words {::token/nil nil + ::token/true true + ::token/false false}) + +(defn parse-atomic-word [parser] + (let [token (current parser)] + (-> parser + (advance) + (assoc ::ast {::ast/type ::ast/atom + :token token + :value (get atomic-words (::token/type token))})))) + +(defn- add-member [members member] + (if (nil? member) + members + (conj members member))) + +(defn- contains-placeholder? [members] + (< 0 (count (filter #(= ::ast/placeholder (::ast/type %1)) members)))) + +(defn- parse-fn-tuple [origin] + (loop [parser (accept-many #{::token/newline ::token/comma} (advance origin)) + members [] + current_member nil] + (let [curr (current parser)] + (case (token-type parser) + ::token/rparen (let [ms (add-member members current_member)] + (assoc (advance parser) ::ast + {::ast/type ::ast/tuple + :length (count ms) + :members ms + :partial (contains-placeholder? ms)})) + + (::token/comma ::token/newline) + (recur + (accept-many #{::token/comma ::token/newline} parser) + (add-member members current_member) nil) + + (::token/rbrace ::token/rbracket) + (panic parser (str "Mismatched enclosure in tuple: " (::token/lexeme curr))) + + ::token/placeholder + (if (contains-placeholder? members) + (recur + (advance parser) + members + (panic parser "Partially applied functions must be unary. (Only one placeholder allowed in partial application.)" curr)) + (recur + (advance parser) members {::ast/type ::ast/placeholder})) + + ::token/eof + (panic (assoc origin ::errors (::errors parser)) "Unterminated tuple" ::token/eof) + + (let [parsed (parse-expr parser #{::token/comma ::token/newline ::token/rparen})] + (recur parsed members (::ast parsed))))))) + +(defn- parse-tuple [origin] + (loop [parser (accept-many #{::token/newline ::token/comma} (advance origin)) + members [] + current_member nil] + (let [curr (current parser)] + (case (token-type parser) + ::token/rparen (let [ms (add-member members current_member)] + (assoc (advance parser) ::ast + {::ast/type ::ast/tuple + :length (count ms) + :members ms})) + + (::token/comma ::token/newline) + (recur + (accept-many #{::token/comma ::token/newline} parser) + (add-member members current_member) nil) + + (::token/rbrace ::token/rbracket) + (panic parser (str "Mismatched enclosure in tuple: " (::token/lexeme curr))) + + ::token/placeholder + (recur + (advance parser) + members + (panic parser "Placeholders in tuples may only be in function calls." curr)) + + ::token/eof + (panic (assoc origin ::errors (::errors parser)) "Unterminated tuple" ::token/eof) + + (let [parsed (parse-expr parser #{::token/comma ::token/newline ::token/rparen})] + (recur parsed members (::ast parsed))))))) + +(defn- parse-list [origin] + (loop [parser (accept-many #{::token/newline ::token/comma} (advance origin)) + members [] + current_member nil] + (let [curr (current parser)] + (case (token-type parser) + ::token/rbracket (let [ms (add-member members current_member)] + (assoc (advance parser) ::ast + {::ast/type ::ast/list + :members ms})) + + (::token/comma ::token/newline) + (recur + (accept-many #{::token/comma ::token/newline} parser) + (add-member members current_member) nil) + + (::token/rbrace ::token/rparen) + (panic parser (str "Mismatched enclosure in list: " (::token/lexeme curr))) + + ::token/eof + (panic (assoc origin ::errors (::errors parser)) "Unterminated list" ::token/eof) + + (let [parsed (parse-expr parser #{::token/comma ::token/newline ::token/rbracket})] + (recur parsed members (::ast parsed))))))) + +(defn- parse-set [origin] + (loop [parser (accept-many #{::token/newline ::token/comma} (advance origin)) + members [] + current_member nil] + (let [curr (current parser)] + (case (token-type parser) + ::token/rbrace (let [ms (add-member members current_member)] + (assoc (advance parser) ::ast + {::ast/type ::ast/set + :members ms})) + + (::token/comma ::token/newline) + (recur + (accept-many #{::token/comma ::token/newline} parser) + (add-member members current_member) nil) + + (::token/rbracket ::token/rparen) + (panic parser (str "Mismatched enclosure in set: " (::token/lexeme curr))) + + ::token/eof + (panic (assoc origin ::errors (::errors parser)) "Unterminated set" ::token/eof) + + (let [parsed (parse-expr parser #{::token/comma ::token/newline ::token/rbrace})] + (recur parsed members (::ast parsed))))))) + +(defn- parse-hash [origin] + (loop [parser (accept-many #{::token/newline ::token/comma} (advance origin)) + members {} + current_member nil] + (let [curr (current parser)] + (case (token-type parser) + ::token/rbrace (let [ms (add-member members current_member)] + (assoc (advance parser) ::ast + {::ast/type ::ast/hash + :members ms})) + + (::token/comma ::token/newline) + (recur + (accept-many #{::token/comma ::token/newline} parser) + (add-member members current_member) nil) + + (::token/rbracket ::token/rparen) + (panic parser (str "Mismatched enclosure in hashmap: " (::token/lexeme curr))) + + ::token/eof + (panic (assoc origin ::errors (::errors parser)) "Unterminated hashmap" ::token/eof) + + ::token/word + (if (not current_member) (let [parsed (parse-word parser) word (get-in parsed [::ast :word])] + (recur parsed members {(keyword word) (::ast parsed)})) + (panic parser "Hashmap entries must be single words or keyword+expression pairs." #{::token/rbrace})) + + ::token/keyword + (if (not current_member) (let [kw (parse-atom parser) expr (parse-expr kw #{::token/comma ::token/newline ::token/rbrace})] + (recur expr members {(:value (::ast kw)) (::ast expr)})) + (panic parser "Hashmap entries must be single words or keyword+expression pairs." #{::token/rbrace})) + + (panic parser "Hashmap entries must be single words or keyword+expression pairs" #{::token/rbrace}))))) + +(defn- parse-struct [origin] + (loop [parser (accept-many #{::token/newline ::token/comma} (advance origin)) + members {} + current_member nil] + (let [curr (current parser)] + (case (token-type parser) + ::token/rbrace (let [ms (add-member members current_member)] + (assoc (advance parser) ::ast + {::ast/type ::ast/struct + :members ms})) + + (::token/comma ::token/newline) + (recur + (accept-many #{::token/comma ::token/newline} parser) + (add-member members current_member) nil) + + (::token/rbracket ::token/rparen) + (panic parser (str "Mismatched enclosure in struct: " (::token/lexeme curr))) + + ::token/eof + (panic (assoc origin ::errors (::errors parser)) "Unterminated struct" ::token/eof) + + ::token/word + (if (not current_member) (let [parsed (parse-word parser) word (get-in parsed [::ast :word])] + (recur parsed members {(keyword word) (::ast parsed)})) + (panic parser "Struct entries must be single words or keyword+expression pairs." #{::token/rbrace})) + + ::token/keyword + (if (not current_member) (let [kw (parse-atom parser) expr (parse-expr kw #{::token/comma ::token/newline ::token/rbrace})] + (recur expr members {(:value (::ast kw)) (::ast expr)})) + (panic parser "Struct entries must be single words or keyword+expression pairs." #{::token/rbrace})) + + (panic parser "Struct entries must be single words or keyword+expression pairs" #{::token/rbrace}))))) + +(defn- parse-ns [ns-root] + (let [name (expect* #{::token/word} "Expected ns name" (advance ns-root)) + origin (expect* #{::token/lbrace} "Expected { after ns name" (:parser name))] + (cond + (not (:success name)) (panic parser "Expected ns name" #{::token/newline}) + + (not (:success origin)) (panic (:parser name) "Expected { after ns name") + + :else + (loop [parser (accept-many #{::token/newline ::token/comma} (:parser origin)) + members {} + current_member nil] + (let [curr (current parser)] + (case (token-type parser) + ::token/rbrace (let [ms (add-member members current_member)] + (assoc (advance parser) ::ast + {::ast/type ::ast/ns + :name (get-in (parse-word (advance ns-root)) [::ast :word]) + :members ms})) + + (::token/comma ::token/newline) + (recur + (accept-many #{::token/comma ::token/newline} parser) + (add-member members current_member) nil) + + (::token/rbracket ::token/rparen) + (panic parser (str "Mismatched enclosure in ns: " (::token/lexeme curr))) + + ::token/eof + (panic (assoc origin ::errors (::errors parser)) "Unterminated ns" ::token/eof) + + ::token/word + (if (not current_member) (let [parsed (parse-word parser) word (get-in parsed [::ast :word])] + (recur parsed members {(keyword word) (::ast parsed)})) + (panic parser "ns entries must be single words or keyword+expression pairs." #{::token/rbrace})) + + ::token/keyword + (if (not current_member) (let [kw (parse-atom parser) expr (parse-expr kw #{::token/comma ::token/newline ::token/rbrace})] + (recur expr members {(:value (::ast kw)) (::ast expr)})) + (panic parser "ns entries must be single words or keyword+expression pairs." #{::token/rbrace})) + + (panic parser "ns entries must be single words or keyword+expression pairs" #{::token/rbrace}))))))) + +(defn- parse-block [origin] + (loop [parser (accept-many #{::token/newline ::token/semicolon} (advance origin)) + exprs [] + current_expr nil] + (let [curr (current parser)] + (case (token-type parser) + ::token/rbrace + (let [es (add-member exprs current_expr)] + (if (empty? es) + (advance (panic parser "Blocks must have at least one expression")) + (assoc (advance parser) ::ast {::ast/type ::ast/block + :exprs es}))) + + (::token/semicolon ::token/newline) + (recur + (accept-many #{::token/newline ::token/semicolon} parser) + (add-member exprs current_expr) nil) + + (::token/rbracket ::token/rparen) + (panic parser (str "Mismatched enclosure in block: " (::token/lexeme curr))) + + ::token/eof + (panic (assoc origin ::errors (::errors parser)) "Unterminated block" ::token/eof) + + (let [parsed + (if current_expr + (panic parser "Expected end of expression" #{::token/semicolon ::token/newline}) + (parse-expr parser))] + (recur parsed exprs (::ast parsed))))))) + +(defn parse-script [parser] + (loop [parser (accept-many #{::token/newline ::token/semicolon} parser) + exprs [] + current_expr nil] + (case (token-type parser) + ::token/eof + (let [es (add-member exprs current_expr)] + (if (empty? es) + (panic parser "Scripts must have at least one expression") + (assoc parser ::ast {::ast/type ::ast/script :exprs es}))) + + (::token/semicolon ::token/newline) + (recur + (accept-many #{::token/semicolon ::token/newline} parser) + (add-member exprs current_expr) + nil) + + (let [parsed + (if current_expr + (panic parser "Expected end of expression" #{::token/semicolon ::token/newline}) + (parse-expr parser))] + + (recur parsed exprs (::ast parsed)))))) + +(defn- parse-synthetic [parser] + (loop [parser parser + terms []] + (let [curr (current parser) + type (::token/type curr)] + (case type + ::token/keyword + (recur (advance parser) (conj terms (::ast (parse-atom parser)))) + + ::token/word + (recur (advance parser) (conj terms (::ast (parse-word parser)))) + + ::token/lparen + (let [parsed (parse-fn-tuple parser)] + (recur parsed (conj terms (::ast parsed)))) + + (assoc parser ::ast {::ast/type ::ast/synthetic :terms terms}))))) + +(defn- parse-word [parser] + (let [curr (current parser)] + (-> parser + (advance) + (assoc ::ast {::ast/type ::ast/word :word (::token/lexeme curr)})))) + +(def sync-pattern (s/union sync-on #{::token/equals ::token/rarrow})) + +(defn- parse-tuple-pattern [origin] + (loop [parser (accept-many #{::token/newline ::token/comma} (advance origin)) + members [] + current_member nil] + (let [curr (current parser)] + (case (token-type parser) + ::token/rparen (let [ms (add-member members current_member)] + (assoc (advance parser) ::ast + {::ast/type ::ast/tuple + :length (count ms) + :members ms})) + + (::token/comma ::token/newline) + (recur + (accept-many #{::token/comma ::token/newline} parser) + (add-member members current_member) nil) + + (::token/rbrace ::token/rbracket) + (panic parser (str "Mismatched enclosure in tuple: " (::token/lexeme curr))) + + ::token/eof + (panic (assoc origin ::errors (::errors parser)) "Unterminated tuple" ::token/eof) + + (let [parsed (parse-pattern parser)] + (recur parsed members (::ast parsed))))))) + +(defn- parse-pattern [parser] + (let [curr (current parser) + type (::token/type curr)] + (case type + (::token/placeholder ::token/ignored) (-> parser + (advance) + (assoc ::ast {::ast/type ::ast/placeholder})) + + ::token/word (parse-word parser) + + (::token/number ::token/string ::token/keyword) (parse-atom parser) + + ::token/lparen (parse-tuple-pattern parser) + + ::token/error + (panic parser (:message (current parser)) sync-pattern) + + (panic parser "Expected pattern" sync-pattern)))) + +(defn- parse-let-expr [parser pattern] + (let [expr (parse-expr parser)] + (assoc expr ::ast {::ast/type ::ast/let + :pattern (::ast pattern) :expr (::ast expr)}))) + +(defn- parse-assignment [parser] + (let [assignment (expect* ::token/equals "Expected assignment" parser) + success (:success assignment)] + (if success + (parse-let-expr (:parser assignment) parser) + (panic parser "Expected assignment")))) + +(defn- parse-let [parser] + (let [pattern (parse-pattern (advance parser))] + (parse-assignment pattern))) + +(defn- parse-else [parser] + (let [ast (::ast parser) + else-kw (expect* ::token/else "Expected else clause after then" parser) + success (:success else-kw) + else-kw-parser (:parser else-kw)] + (if success + (let [expr (parse-expr else-kw-parser) + else-expr (::ast expr)] + (assoc expr ::ast (assoc ast :else else-expr))) + else-kw-parser))) + +(defn- parse-then [parser] + (let [ast (::ast parser) + then-kw (expect* ::token/then "Expected then clause after if" parser) + success (:success then-kw) + then-kw-parser (:parser then-kw)] + (if success + (let [expr (parse-expr then-kw-parser (conj sync-on ::token/else)) + then-expr (::ast expr)] + (parse-else (accept ::token/newline (assoc expr ::ast (assoc ast :then then-expr))))) + then-kw-parser))) + +(defn- parse-if [parser] + (let [if-expr (parse-expr (advance parser) #{::token/newline ::token/then}) + ast (assoc if-expr ::ast {::ast/type ::ast/if :if (::ast if-expr)})] + (parse-then (accept ::token/newline ast)))) + +(defn- parse-match-clause [parser] + (let [pattern (if (= ::token/else (token-type parser)) + (-> parser (advance) (assoc ::ast {::ast/type ::ast/placeholder})) + (parse-pattern parser)) + rarrow (expect* #{::token/rarrow} "Expected arrow after pattern" pattern)] + (if (:success rarrow) + (let [body (parse-expr (:parser rarrow))] + (assoc body ::ast {::ast/type ::ast/clause + :pattern (::ast pattern) :body (::ast body)})) + (panic pattern "Expected -> in match clause. Clauses must be in the form pattern -> expression" #{::token/newline ::token/rbrace})))) + +(defn- parse-match-clauses [parser] + (loop [parser (accept-many #{::token/newline} (advance parser)) + clauses []] + (let [curr (current parser)] + (case (::token/type curr) + ::token/rbrace + (if (< 0 (count clauses)) + (assoc (advance parser) ::ast {::ast/type ::ast/clauses :clauses clauses}) + (panic parser "Expected one or more clauses" #{::rbrace})) + + ::token/newline + (recur (accept-many #{::token/newline} parser) clauses) + + (let [clause (parse-match-clause parser)] + (recur (accept-many #{::token/newline} clause) (conj clauses (::ast clause)))))))) + +(defn- parse-match [parser] + (let [match-expr (parse-expr (advance parser) #{::token/with}) + match-header (expect* #{::token/with} "Expected with" match-expr)] + (if (:success match-header) + (let [clauses (:parser match-header)] + (if (= (token-type clauses) ::token/lbrace) + ;; match expression with one or many clauses in braces + (let [clauses (parse-match-clauses clauses)] + (assoc clauses ::ast {::ast/type ::ast/match + :expr (::ast match-expr) + :clauses (get-in clauses [::ast :clauses])})) + ;; match expression with single match clause + (let [clause (parse-match-clause clauses)] + (assoc clause ::ast {::ast/type ::ast/match + :expr (::ast match-expr) + :clauses [(::ast clause)]})))) + + (panic parser "Expected with after match expression")))) + +(defn- parse-cond-clause [parser] + (let [expr (if + (contains? #{::token/else ::token/placeholder} (token-type parser)) + (-> parser + (advance) + (assoc ::ast {::ast/type ::ast/atom + :token (current parser) + :value true})) + (parse-expr parser)) + rarrow (expect* #{::token/rarrow} "Expected arrow after expression in cond clause" expr)] + (if (:success rarrow) + (let [body (parse-expr (:parser rarrow))] + (assoc body ::ast {::ast/type ::ast/clause + :test (::ast expr) :body (::ast body)})) + (panic expr "Expected -> in cond clause. Clauses must be in the form test_expression -> result_expression" #{::token/newline ::token/rbrace})))) + +(defn- parse-cond-clauses [parser] + (loop [parser (accept-many #{::token/newline} parser) + clauses []] + (let [curr (current parser)] + (case (::token/type curr) + ::token/rbrace + (if (< 0 (count clauses)) + (assoc (advance parser) ::ast {::ast/type ::ast/clauses :clauses clauses}) + (panic parser "Expected one or more clauses" #{::rbrace})) + + + ::token/newline + (recur (accept-many #{::token/newline} parser) clauses) + + (let [clause (parse-cond-clause parser)] + (recur (accept-many #{::token/newline} clause) (conj clauses (::ast clause)))))))) + +(defn- parse-cond [parser] + (let [header + (expect* #{::token/lbrace} "Expected { after cond" (advance parser))] + (if (:success header) + (let [clauses (parse-cond-clauses (:parser header))] + (assoc clauses ::ast {::ast/type ::ast/cond + :clauses (get-in clauses [::ast :clauses])}) + ) + (panic parser "Expected { after cond") + ) + ) + ) + +(defn- parse-fn-clause [parser] + (if (not (= ::token/lparen (token-type parser))) + (panic parser "Function clauses must begin with tuple patterns") + (let [pattern (parse-tuple-pattern parser) + arrow (expect* #{::token/rarrow} "Expected arrow" pattern) + body (parse-expr (:parser arrow))] + (if (:success arrow) + (assoc body ::ast {::ast/type ::ast/clause + :pattern (::ast pattern) :body (::ast body)}) + (panic pattern "Expected -> in function clause. Clauses must be in the form of (pattern) -> expression"))))) + +(defn- parse-fn-clauses [parser] + (loop [parser (accept-many #{::token/newline} (advance parser)) + clauses []] + (let [curr (current parser)] + (case (::token/type curr) + ::token/rbrace + (if (< 0 (count clauses)) + (assoc (advance parser) ::ast {::ast/type ::ast/clauses :clauses clauses}) + (panic parser "Expected one or more function clauses" #{::token/rbrace})) + + ::token/newline + (recur (accept-many #{::token/newline} parser) clauses) + + (let [clause (parse-fn-clause parser)] + (recur (accept-many #{::token/newline} clause) (conj clauses (::ast clause)))))))) + +(defn- parse-named-fn [parser] + (let [name (parse-word parser)] + (case (token-type name) + ::token/lparen + (let [clause (parse-fn-clause name)] + (assoc clause ::ast {::ast/type ::ast/fn + :name (get-in name [::ast :word]) + :clauses [(::ast clause)]})) + + ::token/lbrace + (let [clauses (parse-fn-clauses name)] + (assoc clauses ::ast {::ast/type ::ast/fn + :name (get-in name [::ast :word]) + :clauses (get-in clauses [::ast :clauses])})) + + (panic name "Expected one or more function clauses")))) + +(defn- parse-fn [parser] + (let [first (advance parser)] + (case (::token/type (current first)) + ::token/lparen + (let [clause (parse-fn-clause first)] + (assoc clause ::ast {::ast/type ::ast/fn + :name ::ast/anon + :clauses [(::ast clause)]})) + + ::token/word (parse-named-fn first) + + (panic parser "Expected name or clause after fn")))) + +(defn- parse-do [parser] + (let [first (advance parser)] + (loop [parser first + exprs []] + (let [expr (parse-expr parser) + expr+newline (accept ::token/newline expr) + next (token-type expr+newline)] + (if (= ::token/pipeline next) + (recur (advance expr+newline) (conj exprs (::ast expr))) + (assoc expr ::ast {::ast/type ::ast/pipeline + :exprs (conj exprs (::ast expr))}) + ))))) + +(defn- parse-import [parser] + (let [path (parse-atom (advance parser)) + as (expect* #{::token/as} "Expected as after path" path) + named? (if (:success as) + (expect* #{::token/word} "Expected name binding after as" (:parser as)) + nil) + name (if (:success named?) + (parse-word (:parser as)) + nil + )] + (cond + (not= ::token/string (token-type (advance parser))) + (panic parser "Expected path after import" #{::token/newline}) + + (not (:success as)) + (panic parser "Expected as after path" #{::token/newline}) + + (not (:success named?)) + (panic parser "Expected name binding after as") + + :else + (assoc name ::ast {::ast/type ::ast/import + :path (get-in path [::ast :value]) + :name (get-in name [::ast :word])})))) + +(defn- parse-expr + ([parser] (parse-expr parser sync-on)) + ([parser sync-on] + (let [token (current parser)] + (case (::token/type token) + + (::token/number ::token/string) + (parse-atom parser) + + ::token/keyword + (let [next (ppeek parser) + type (::token/type next)] + (if (= type ::token/lparen) + (parse-synthetic parser) + (parse-atom parser))) + + ::token/word + (let [next (ppeek parser) + type (::token/type next)] + (case type + (::token/lparen ::token/keyword) (parse-synthetic parser) + (parse-word parser))) + + (::token/nil ::token/true ::token/false) + (parse-atomic-word parser) + + ::token/lparen (parse-tuple parser) + + ::token/lbracket (parse-list parser) + + ::token/startset (parse-set parser) + + ::token/starthash (parse-hash parser) + + ::token/startstruct (parse-struct parser) + + ::token/lbrace (parse-block parser) + + ::token/let (parse-let parser) + + ::token/if (parse-if parser) + + ::token/match (parse-match parser) + + ::token/fn (parse-fn parser) + + ::token/do (parse-do parser) + + ::token/cond (parse-cond parser) + + ::token/ns (parse-ns parser) + + ::token/import (parse-import parser) + + ;; TODO: improve handling of comments? + ;; Scanner now just skips comments + ;; ::token/comment (advance parser) + + ::token/error (panic parser (:message token) sync-on) + + (::token/rparen ::token/rbrace ::token/rbracket) + (panic parser (str "Unbalanced enclosure: " (::token/lexeme token))) + + (::token/semicolon ::token/comma) + (panic parser (str "Unexpected delimiter: " (::token/lexeme token))) + + (panic parser "Expected expression" sync-on))))) + +(defn parse [lexed] + (-> lexed + (:tokens) + (parser) + (parse-script))) + +(comment + (def pp pp/pprint) + (def source "cond { _ -> :foo } + + ") + (def lexed (scanner/scan source)) + (def tokens (:tokens lexed)) + (def p (parser tokens)) + + (println "") + (println "") + (println "******************************************************") + (println "") + (println "*** *** NEW PARSE *** ***") + + (-> p + (parse-script) + (::ast) + (pp))) + +(comment " + Further thoughts/still to do: + * Functions docstrings + * Cond expressions + * Loops + * Structs + * Namespaces + * Types (:|) + * Modules + * Add `as` clauses to patterns + * Add `when` clauses to patterns + * var/mut + * ref/swap + * Splats in lists, hashmaps, sets + * AST nodes should include tokens/locations + - at current, only atoms do this + * Improve error handling in hashmap parsing + * Consider error handling in match expressions + * Add treatment of ignored variables + * Placeholders + * How much in parser, how much in analysis? + + Some architectural changes: + * UGH, this code is just kind of a mess and hard to reason about + * Especially sequential forms + * Parsers are hard + * One idea: + * Refactor everything so that it returns a success or failure + * Because this is all stateless, in sequential forms, you can just do all the things + * This lets you do one let (with everything building up) and then a cond with bespoke errors/panics + * This also still lets you encapsulate parsererrors with poisoned nodes + + ") + + + + + + diff --git a/src/ludus/prelude.cljc b/src/ludus/prelude.cljc new file mode 100644 index 0000000..85ccd80 --- /dev/null +++ b/src/ludus/prelude.cljc @@ -0,0 +1,69 @@ +(ns ludus.prelude + (:require + [ludus.data :as data] + [ludus.show])) + +(def eq {:name "eq" + ::data/type ::data/clj + :body =}) + +(def add {:name "add" + ::data/type ::data/clj + :body +}) + +(def sub {:name "sub" + ::data/type ::data/clj + :body -}) + +(def mult {:name "mult" + ::data/type ::data/clj + :body *}) + +(def div {:name "div" + ::data/type ::data/clj + :body /}) + +(def inc- {:name "inc" + ::data/type ::data/clj + :body inc}) + +(def dec- {:name "dec" + ::data/type ::data/clj + :body dec}) + +(def ld-not {:name "not" + ::data/type ::data/clj + :body not}) + +(def panic! {:name "panic!" + ::data/type ::data/clj + :body (fn [& args] (throw (ex-info (apply str (interpose " " args)) {})))}) + +(def print- {:name "print" + ::data/type ::data/clj + :body (fn [& args] + (println (apply str args)) + :ok)}) + +(declare show) + +(defn- show-vector [v] + (if (= (first v) ::data/tuple) + (str "(" (apply str (into [] (comp (map (:body show)) (interpose ", ")) (next v))) ")") + (str "[" (apply str (into [] (comp (map (:body show)) (interpose ", ")) v)) "]"))) + +(def show {:name "show" + ::data/type ::data/clj + :body ludus.show/show}) + +(def prelude {"eq" eq + "add" add + "panic!" panic! + "print" print- + "sub" sub + "mult" mult + "div" div + "inc" inc- + "dec" dec- + "not" not + "show" show}) \ No newline at end of file diff --git a/src/ludus/scanner.cljc b/src/ludus/scanner.cljc new file mode 100644 index 0000000..ee5c378 --- /dev/null +++ b/src/ludus/scanner.cljc @@ -0,0 +1,318 @@ +(ns ludus.scanner + (:require + [ludus.token :as token] + [clojure.pprint :as pp] + [clojure.edn :as edn] + [clojure.string :as s])) + +(def reserved-words + "List of Ludus reserved words." + ;; see ludus-spec repo for more info + {"as" ::token/as + "cond" ::token/cond ;; impl + "data" ::token/data + "do" ::token/do ;; impl + "else" ::token/else ;; impl + "false" ::token/false ;; impl + "fn" ::token/fn ;; impl + "if" ::token/if ;; impl + "import" ::token/import + "let" ::token/let ;; impl + "match" ::token/match ;; impl + "mut" ::token/mut + "nil" ::token/nil ;; impl + "ref" ::token/ref + "then" ::token/then ;; impl + "true" ::token/true ;; impl + "var" ::token/var + "with" ::token/with ;; impl + ;; below here, probable + "defer" ::token/defer + "gen" ::token/gen + "loop" ::token/loop + "ns" ::token/ns + "recur" ::token/recur + "repeat" ::token/repeat + "test" ::token/test + "wait" ::token/wait + "yield" ::token/yield + ;; below here, possible + "when" ::token/when}) + +(defn- new-scanner + "Creates a new scanner." + [source] + {::source source + ::length (count source) + ::errors [] + ::start 0 + ::current 0 + ::line 1 + ::tokens []}) + +(defn- at-end? + "Tests if a scanner is at end of input." + [scanner] + (>= (::current scanner) (::length scanner))) + +(defn- current-char + "Gets the current character of the scanner." + [scanner] + (nth (::source scanner) (::current scanner) nil)) + +(defn- advance + "Advances the scanner by a single character." + [scanner] + (update scanner ::current inc)) + +(defn- next-char + "Gets the next character from the scanner." + [scanner] + (current-char (advance scanner))) + +(defn- current-lexeme + [scanner] + (subs (::source scanner) (::start scanner) (::current scanner))) + +(defn- char-in-range? [start end char] + (and char + (>= (int char) (int start)) + (<= (int char) (int end)))) + +(defn- digit? [c] + (char-in-range? \0 \9 c)) + +(defn- nonzero-digit? [c] + (char-in-range? \1 \9 c)) + +;; for now, use very basic ASCII charset in words +;; TODO: research the implications of using the whole +;; (defn- alpha? [c] (boolean (re-find #"\p{L}" (str c)))) +(defn- alpha? [c] + (or (char-in-range? \a \z c) (char-in-range? \A \Z c))) + +;; legal characters in words +(def word-chars #{\_ \? \! \* \/}) + +(defn- word-char? [c] + (or (alpha? c) (digit? c) (contains? word-chars c))) + +(defn- whitespace? [c] + (or (= c \space) (= c \tab))) + +(def terminators #{\: \; \newline \{ \} \( \) \[ \] \$ \# \- \< \& \, \| nil \\}) + +(defn- terminates? [c] + (or (whitespace? c) (contains? terminators c))) + +(defn- add-token + ([scanner token-type] + (add-token scanner token-type nil)) + ([scanner token-type literal] + (update scanner ::tokens conj + (token/token + token-type + (current-lexeme scanner) + literal + (::line scanner) + (::start scanner))))) + +;; TODO: errors should also be in the vector of tokens +;; The goal is to be able to be able to hand this to an LSP? +;; Do we need a different structure +(defn- add-error [scanner msg] + (let [token (token/token + ::token/error + (current-lexeme scanner) + nil + (::line scanner) + (::start scanner)) + err-token (assoc token :message msg)] + (-> scanner + (update ::errors conj err-token) + (update ::tokens conj err-token)))) + +(defn- add-keyword + [scanner] + (loop [scanner scanner + key ""] + (let [char (current-char scanner)] + (cond + (terminates? char) (add-token scanner ::token/keyword (keyword key)) + (word-char? char) (recur (advance scanner) (str key char)) + :else (add-error scanner (str "Unexpected " char "after keyword :" key)))))) + +;; TODO: improve number parsing? +;; Currently this uses Clojure's number formatting rules (since we use the EDN reader) +;; These rules are here: https://cljs.github.io/api/syntax/number +(defn- add-number [char scanner] + (loop [scanner scanner + num (str char) + float? false] + (let [curr (current-char scanner)] + (cond + (= curr \_) (recur (advance scanner) num float?) ;; consume underscores unharmed + (= curr \.) (if float? + (add-error scanner (str "Unexpected second decimal point after " num ".")) + (recur (advance scanner) (str num curr) true)) + (terminates? curr) (add-token scanner ::token/number (edn/read-string num)) + (digit? curr) (recur (advance scanner) (str num curr) float?) + :else (add-error scanner (str "Unexpected " curr " after number " num ".")))))) + +;; TODO: add string interpolation +;; This still has to be devised +(defn- add-string + [scanner] + (loop [scanner scanner + string ""] + (let [char (current-char scanner)] + (case char + \newline (add-error scanner "Unterminated string.") + \" (add-token (advance scanner) ::token/string string) + \\ (let [next (next-char scanner) + scanner (if (= next \newline) + (update scanner ::line inc) + scanner)] + (recur (advance (advance scanner)) (str string next))) + (if (at-end? scanner) + (add-error scanner "Unterminated string.") + (recur (advance scanner) (str string char))))))) + +(defn- add-word + [char scanner] + (loop [scanner scanner + word (str char)] + (let [curr (current-char scanner)] + (cond + (terminates? curr) (add-token scanner (get reserved-words word ::token/word)) + (word-char? curr) (recur (advance scanner) (str word curr)) + :else (add-error scanner (str "Unexpected " curr " after word " word ".")))))) + +(defn- add-ignored + [scanner] + (loop [scanner scanner + ignored "_"] + (let [char (current-char scanner)] + (cond + (terminates? char) (add-token scanner ::token/ignored) + (word-char? char) (recur (advance scanner) (str ignored char)) + :else (add-error scanner (str "Unexpected " char " after word " ignored ".")))))) + +(defn- add-comment [char scanner] + (loop [scanner scanner + comm (str char)] + (let [char (current-char scanner)] + (if (= \newline char) + (update scanner ::line inc) + ;;(if (s/starts-with? comm "&&&") + ;;(add-token (update scanner ::line inc) ::token/docstring) + ;;(add-token (update scanner ::line inc) ::token/comment)) + (recur (advance scanner) (str comm char)))))) + +(defn- scan-token [scanner] + (let [char (current-char scanner) + scanner (advance scanner) + next (current-char scanner)] + (case char + ;; one-character tokens + \( (add-token scanner ::token/lparen) + \) (add-token scanner ::token/rparen) + \{ (add-token scanner ::token/lbrace) + \} (add-token scanner ::token/rbrace) + \[ (add-token scanner ::token/lbracket) + \] (add-token scanner ::token/rbracket) + \; (add-token scanner ::token/semicolon) + \, (add-token scanner ::token/comma) + \newline (add-token (update scanner ::line inc) ::token/newline) + \\ (add-token scanner ::token/backslash) + \= (add-token scanner ::token/equals) + \> (add-token scanner ::token/pipeline) + + ;; two-character tokens + ;; -> + \- (cond + (= next \>) (add-token (advance scanner) ::token/rarrow) + (digit? next) (add-number char scanner) + :else (add-error scanner (str "Expected -> or negative number. Got " char next))) + + ;; at current we're not using this + ;; <- + \< (if (= next \-) + (add-token (advance scanner) ::token/larrow) + (add-error scanner (str "Expected <-. Got " char next))) + + ;; |> + ;; Consider => , with =>> for bind + ; \| (if (= next \>) + ; (add-token (advance scanner) ::token/pipeline) + ; (add-error scanner (str "Expected |>. Got " char next))) + + ;; possible additional operator: bind/result + ;; possible additional operator: bind/some + ;; oh god, monads + ;; additional arrow possibilities: >> ||> ~> => !> + + ;; hashmap #{ + \# (if (= next \{) + (add-token (advance scanner) ::token/starthash) + (add-error scanner (str "Expected beginning of hash: #{. Got " char next))) + + ;; set ${ + \$ (if (= next \{) + (add-token (advance scanner) ::token/startset) + (add-error scanner (str "Expected beginning of set: ${. Got " char next))) + + ;; struct @{ + \@ (if (= next \{) + (add-token (advance scanner) ::token/startstruct) + (add-error scanner (str "Expected beginning of struct: @{. Got " char next))) + + ;; placeholders + ;; there's a flat _, and then ignored words + \_ (cond + (terminates? next) (add-token scanner ::token/placeholder) + (alpha? next) (add-ignored scanner) + :else (add-error scanner (str "Expected placeholder: _. Got " char next))) + + ;; comments + ;; & starts an inline comment + ;; TODO: include comments in scanned file + ;; TODO, maybe: add doc comments: &&& (or perhaps a docstring in an fn?) + \& (add-comment char scanner) + + ;; keywords + \: (cond + (alpha? next) (add-keyword scanner) + :else (add-error scanner (str "Expected keyword. Got " char next))) + + ;; splats + \. (let [after_next (current-char (advance scanner))] + (if (= ".." (str next after_next)) + (add-token (advance (advance scanner)) ::token/splat) + (add-error scanner (str "Expected splat: ... . Got " (str "." next after_next))))) + + ;; strings + \" (add-string scanner) + + ;; word matches + (cond + (whitespace? char) scanner ;; for now just skip whitespace characters + (digit? char) (add-number char scanner) + (alpha? char) (add-word char scanner) + :else (add-error scanner (str "Unexpected character: " char)))))) + +(defn- next-token [scanner] + (assoc scanner ::start (::current scanner))) + +(defn scan [source] + (loop [scanner (new-scanner source)] + (if (at-end? scanner) + (let [scanner (add-token scanner ::token/eof)] + {:tokens (::tokens scanner) + :errors (::errors scanner)}) + (recur (-> scanner (scan-token) (next-token)))))) + +(comment + (def source "@{") + + (pp/pprint (scan source))) diff --git a/src/ludus/show.cljc b/src/ludus/show.cljc new file mode 100644 index 0000000..05b776a --- /dev/null +++ b/src/ludus/show.cljc @@ -0,0 +1,53 @@ +(ns ludus.show + (:require + [ludus.data :as data] + [clojure.pprint :as pp])) + +(declare show show-linear show-keyed) + +(defn- show-vector [v] + (if (= (first v) ::data/tuple) + (str "(" (apply str (into [] show-linear (next v))) ")") + (str "[" (apply str (into [] show-linear v)) "]"))) + +(defn- show-map [v] + (cond + (or (= (::data/type v) ::data/fn) + (= (::data/type v) ::data/clj)) + (str "fn " (:name v)) + + (= (::data/type v) ::data/ns) + (str "ns " (::data/name v) " {" + (apply str (into [] show-keyed (dissoc v ::data/struct ::data/type ::data/name))) + "}") + + (::data/struct v) + (str "@{" (apply str (into [] show-keyed (dissoc v ::data/struct))) "}") + + :else + (str "#{" (apply str (into [] show-keyed v)) "}") + + )) + +(defn- show-set [v] + (str "${" (apply str (into [] show-linear v)) "}")) + +(defn show [v] + (cond + (string? v) (str "\"" v "\"") + (number? v) (str v) + (keyword? v) (str v) + (boolean? v) (str v) + (nil? v) "nil" + (vector? v) (show-vector v) + (set? v) (show-set v) + (map? v) (show-map v) + :else (with-out-str (pp/pprint v)))) + +(def show-linear (comp (map show) (interpose ", "))) + +(def show-keyed (comp + (map #(str (show (first %)) " " (show (second %)))) + (interpose ", "))) + +(show {::data/type ::data/fn :name "foo"}) diff --git a/src/ludus/token.cljc b/src/ludus/token.cljc new file mode 100644 index 0000000..e3d8d58 --- /dev/null +++ b/src/ludus/token.cljc @@ -0,0 +1,11 @@ +(ns ludus.token) + +(defn token + ([type text] + (token type text nil 1)) + ([type text literal line start] + {::type type + ::lexeme text + ::literal literal + ::line line + ::start start}))