Repl things

First pass at janet scanner
Take some notes clj->janet
2024-01-07 20:10:45 -05:00 · 2024-01-07 20:10:16 -05:00 · 2024-01-07 20:10:02 -05:00 · 2024-01-07 20:09:01 -05:00 · 2023-12-31 18:37:19 -05:00 · 2023-12-31 18:37:09 -05:00
7 changed files with 397 additions and 10 deletions
--- a/.gitignore
+++ b/.gitignore
@ -29,3 +29,4 @@ node_modules/
 .cljs_node_repl/
 .helix/
 target/repl-port
+.repl-buffer
--- a/clj_to_janet.md
+++ b/clj_to_janet.md
@ -0,0 +1,42 @@
+# From Clojure to Janet
+## How to convert files
+
+### Comments
+`% s ; r # <esc>`
+
+
+### called keyword property access
+Use a macro:
+- Select `\ \( : ret <esc>`
+- Record the macro: `Q n <esc> v e c get <esc> e e a <space> <esc> p <esc> Q`
+- Then just `q` until you've changed everything
+
+### Chars don't exist
+- \char -> "char", e.g.
+- \newline -> "\n", etc.
+- \a -> "a"
+
+### Use mutable arrays and tables
+Where data structures are mutable, add `@`s.
+
+### Sets & tables
+Sets don't exist in Janet. Use tables with values set to `true`.
+
+### Strings -> number literals
+- Clj uses `edn/read-string`; Janet uses `parse-all`
+
+### `loop` -> `defn recur`
+- Clj's `loop` is very different from Janet's `loop`
+- As a quick and dirty workaround, change it to an interior recursive function
+- Janet has tail calls, so this is nearly as efficient (paying the overhead for creating the function)
+- An optimization is to pull out these functions and declare them at the toplevel
+
+### current-char
+
+
+### Straight replacements:
+- nth -> get
+- assoc -> put
+- conj -> array/push
+- str -> string
+- substr -> slice
--- a/17
+++ b/17
@ -1,6 +1,17 @@
 # start a repl
-repl:
-	clj -X:repl
-
 build:
 	shadow-cljs release module
+
+repl:
+	kitten @ launch --type=os-window --allow-remote-control --cwd=current --title=hx_repl:ludus janet -s
+
+repeater:
+	kitten @ launch --type=os-window --allow-remote-control --cwd=current --title=hx_repl:ludus bat
+
+eval:
+	sd "$" "\n" | sd "(\n)+" "\n" | kitten @ send-text -m "title:hx_repl:ludus" --stdin
+
+buffer:
+	sd "$" "\n" > .repl-buffer
+	kitten @ send-text -m "title:hx_repl:ludus" --from-file .repl-buffer
+
--- a/src/ludus/base.cljc
+++ b/src/ludus/base.cljc
@ -85,18 +85,23 @@
                     (println (stringify-args args))
                     :ok)})

+(def refs (atom {})) ;; atom not volatile!, maybe we'll be multithreaded someday
+
 (def deref- {:name "deref"
             ::data/type ::data/clj
             :body (fn [ref]
                     (if (::data/ref ref)
-                       (deref (::data/value ref))
+                       (get @refs (::data/name ref))
                       (throw (ex-info "Cannot deref something that is not a ref" {}))))})

 (def set!- {:name "set!"
            ::data/type ::data/clj
            :body (fn [ref value]
                    (if (::data/ref ref)
-                      (reset! (::data/value ref) value)
+                      (do 
+                        (swap! refs assoc (::data/name ref) value)
+                        (reset! (::data/value ref) value)
+                        value)
                      (throw (ex-info "Cannot set! something that is not a ref" {}))))})

 (def show {:name "show"
--- a/src/ludus/interpreter.cljc
+++ b/src/ludus/interpreter.cljc
@ -673,6 +673,7 @@
    (let [value (interpret-ast expr ctx)
          box (atom value)
          ref {::data/ref true ::data/value box ::data/name name}]
+      (swap! base/refs assoc name value)
      (vswap! ctx update-ctx {name ref})
      ref)))

--- a/src/ludus/node.cljc
+++ b/src/ludus/node.cljc
@ -39,11 +39,12 @@
         user_result (i/interpret-safe source user_parsed {} testing?)
         result_str (show/show user_result)
         test_results @i/test-results
+         state @base/refs
         post_scanned (s/scan pre/postlude "postlude")
         post_tokens (:tokens post_scanned)
         post_parsed (p/apply-parser g/script post_tokens)
         post_result (i/interpret-safe source post_parsed {} false)
-         ludus_result (assoc post_result :result result_str :test test_results)
+         ludus_result (assoc post_result :result result_str :test test_results :state state)
         clj_result (ld->clj ludus_result)
         ]
     (cond
@ -64,14 +65,15 @@

 (defn test-run [source] (run source true))

-(comment
+(do

  (def source "

-    ")
+add (1, 2)

-  (-> source run :test println)
+")
+  (-> source run :result)

  )

-
+(+ 1 2)
--- a/src/ludus/scanner.janet
+++ b/src/ludus/scanner.janet
@ -0,0 +1,325 @@
+(def reserved-words
+  "List of Ludus reserved words."
+  ## see ludus-spec repo for more info
+  {"as" :as ## impl for `import`# not yet for patterns
+   "do" :do ## impl
+   "else" :else ## impl
+   "false" :false ## impl -> literal word
+   "fn" :fn ## impl
+   "if" :if ## impl
+   "import" :import ## impl
+   "let" :let ## impl
+   "loop" :loop ## impl
+   "match" :match ## impl
+   "nil" :nil ## impl -> literal word
+   "ns" :ns ## impl
+   "panic!" :panic ## impl (should _not_ be a function)
+   "recur" :recur ## impl
+   "ref" :ref ## impl
+   "then" :then ## impl
+   "true" :true ## impl -> literal word
+   "use" :use ## wip
+   "with" :with ## impl
+   "when" :when ## impl, replaces cond
+   "repeat" :repeat ## syntax sugar over "loop": still unclear what this syntax could be
+   "test" :test
+   })
+
+(def literal-words {"true" true
+                    "false" false
+                    "nil" nil
+                    })
+
+(defn- new-scanner
+  "Creates a new scanner."
+  [source input]
+  @{:source source
+   :input input
+   :length (length source)
+   :errors @[]
+   :start 0
+   :current 0
+   :line 1
+   :tokens @[]})
+
+(defn- at-end?
+  "Tests if a scanner is at end of input."
+  [scanner]
+  (>= (get scanner :current) (get scanner :length)))
+
+(defn- current-char
+  "Gets the current character of the scanner."
+  [scanner]
+  (let [source (get scanner :source)
+        current (get scanner :current)
+        length (length source)]
+    (if (>= current length)
+      nil
+      (string/from-bytes (get source current)))))
+
+(defn- advance
+  "Advances the scanner by a single character."
+  [scanner]
+  (update scanner :current inc))
+
+(defn- next-char
+  "Gets the next character from the scanner."
+  [scanner]
+  (let [source (get scanner :source)
+    current (get scanner :current)
+    next (inc current)
+    length (length source)]
+    (if (>= next length)
+      nil
+      (string/from-bytes (get source current)))))
+
+(defn- current-lexeme
+  [scanner]
+  (slice (get scanner :source) (get scanner :start) (get scanner :current)))
+
+(defn- char-code [char] (get char 0))
+
+(defn- char-in-range? [start end char]
+  (and char
+    (>= (char-code char) (char-code start))
+    (<= (char-code char) (char-code end))))
+
+(defn- digit? [c]
+  (char-in-range? "0" "9" c))
+
+(defn- nonzero-digit? [c]
+  (char-in-range? "1" "9" c))
+
+## for now, use very basic ASCII charset in words
+## TODO: research the implications of using the whole 
+## (defn- alpha? [c] (boolean (re-find #"\p{L}" (string c))))
+(defn- alpha? [c]
+  (or (char-in-range? "a" "z" c) (char-in-range? "A" "Z" c)))
+
+(defn- lower? [c] (char-in-range? "a" "z" c))
+
+(defn- upper? [c] (char-in-range? "A" "Z" c))
+
+## legal characters in words
+(def word-chars {"_" true "?" true "!" true "*" true "/" true})
+
+(defn- word-char? [c]
+  (or (alpha? c) (digit? c) (get word-chars c)))
+
+(defn- whitespace? [c]
+  (or (= c " ") (= c "\t")))
+
+(def terminators {
+":" true
+"#" true
+"\n" true
+"{" true
+"}" true
+"(" true
+")" true
+"[" true
+"]" true
+"$" true
+"#" true
+"-" true
+"=" true
+"&" true
+"," true
+">" true
+# nil was in here; I don't want to be passing in literal nil to this function anyway, and "nil" is also not it...
+"\"" true})
+
+(defn- terminates? [c]
+  (or (nil? c) (whitespace? c) (get terminators c)))
+
+(defn- add-token
+  [scanner token-type &opt literal]
+   (update scanner :tokens array/push
+      {:type token-type
+       :lexeme (current-lexeme scanner)
+       :literal literal
+       :line (get scanner :line)
+       :start (get scanner :start)
+       :source (get scanner :source)
+       :start (get scanner :input)}))
+
+## TODO: errors should also be in the vector of tokens
+## The goal is to be able to be able to hand this to an LSP?
+## Do we need a different structure
+(defn- add-error [scanner msg]
+  (let [token {:type :error
+               :lexeme (current-lexeme scanner)
+               :literal nil
+               :line (get scanner :line)
+               :start (get scanner :start)
+               :source (get scanner :source)
+               :input (get scanner :input)
+               :message msg}]
+    (-> scanner
+      (update :errors array/push token)
+      (update :tokens array/push token))))
+
+(defn- add-keyword
+  [scanner]
+  (defn recur [scanner key]
+    (let [char (current-char scanner)]
+      (cond
+        (terminates? char) (add-token scanner :keyword (keyword key))
+        (word-char? char) (recur (advance scanner) (string key char))
+        :else (add-error scanner (string "Unexpected " char "after keyword :" key)))))
+	(recur scanner ""))
+
+(defn- read-literal [lit] (-> lit parse-all first))
+
+## TODO: improve number parsing?
+## Currently this uses Clojure's number formatting rules (since we use the EDN reader)
+## These rules are here: https://cljs.github.io/api/syntax/number
+(defn- add-number [char scanner]
+  (defn recur [scanner num float?]
+    (let [curr (current-char scanner)]
+      (cond
+        (= curr "_") (recur (advance scanner) num float?) ## consume underscores unharmed
+        (= curr ".") (if float?
+                      (add-error scanner (string "Unexpected second decimal point after " num "."))
+                      (recur (advance scanner) (buffer/push num curr) true))
+        (terminates? curr) (add-token scanner :number (read-literal num))
+        (digit? curr) (recur (advance scanner) (buffer/push num curr) float?)
+        :else (add-error scanner (string "Unexpected " curr " after number " num ".")))))
+  (recur scanner (buffer char) false))
+
+## TODO: activate string interpolation
+(defn- add-string
+  [scanner]
+  (defn recur [scanner buff interpolate?]
+    (let [char (current-char scanner)]
+      (case char
+        "{" (recur (advance scanner) (buffer/push buff char) true)
+        # allow multiline strings
+        "\n" (recur (update (advance scanner) :line inc) (buffer/push buff char) interpolate?)
+        "\"" (if interpolate?
+             #(add-token (advance scanner) :interpolated string)
+             #no interpolation yet
+             (add-token (advance scanner) :string (string buff))
+             (add-token (advance scanner) :string (string buff)))
+        "\\" (let [next (next-char scanner)
+                 scanner (if (= next "\n")
+                           (update scanner :line inc)
+                           scanner)]
+             (recur (advance (advance scanner)) (buffer/push buff next) interpolate?))
+        (if (at-end? scanner)
+          (add-error scanner "Unterminated string.")
+          (recur (advance scanner) (buffer/push buff char) interpolate?)))))
+  (recur scanner @"" false))
+
+(defn- add-word
+  [char scanner]
+  (defn recur [scanner word]
+    (let [curr (current-char scanner)]
+      (cond
+        (terminates? curr) (add-token scanner
+                             (get reserved-words (string word) :word) 
+                             (get literal-words (string word) :none))
+        (word-char? curr) (recur (advance scanner) (buffer/push word curr))
+        :else (add-error scanner (string "Unexpected " curr " after word " word ".")))))
+  (recur scanner (buffer char)))
+
+(defn- add-ignored
+  [scanner]
+  (defn recur [scanner ignored]
+    (let [char (current-char scanner)]
+      (cond
+        (terminates? char) (add-token scanner :ignored)
+        (word-char? char) (recur (advance scanner) (buffer/push ignored char))
+        :else (add-error scanner (string "Unexpected " char " after word " ignored ".")))))
+  (recur scanner @"_"))
+
+(defn- add-comment [char scanner]
+  (defn recur [scanner comm]
+    (let [char (current-char scanner)]
+      (if (= "\n" char)
+        scanner # for now, we don't do anything with comments; can be added later
+        (recur (advance scanner) (buffer/push comm char)))))
+  (recur scanner (buffer char)))
+
+(defn- scan-token [scanner]
+  (let [char (current-char scanner)
+        scanner (advance scanner)
+        next (current-char scanner)]
+    (case char
+      ## one-character tokens
+      ## :break is a special zero-char token before closing braces
+      ## it makes parsing much simpler
+      "(" (add-token scanner :lparen)
+      ")" (add-token (add-token scanner :break) :rparen)
+      "{" (add-token scanner :lbrace)
+      "}" (add-token (add-token scanner :break) :rbrace)
+      "[" (add-token scanner :lbracket)
+      "]" (add-token (add-token scanner :break) :rbracket)
+      "#" (add-token scanner :semicolon)
+      "," (add-token scanner :comma)
+      "\n" (add-token (update scanner :line inc) :newline)
+      "\\" (add-token scanner :backslash)
+      "=" (add-token scanner :equals)
+      ">" (add-token scanner :pipeline)
+
+      ## two-character tokens
+      ## ->
+      "-" (cond
+           (= next ">") (add-token (advance scanner) :rarrow)
+           (digit? next) (add-number char scanner)
+           :else (add-error scanner (string "Expected -> or negative number after `-`. Got `" char next "`")))
+
+      ## dict #{
+      "#" (if (= next "{")
+           (add-token (advance scanner) :startdict)
+           (add-error scanner (string "Expected beginning of dict: #{. Got " char next)))
+
+      ## set ${
+      "$" (if (= next "{")
+           (add-token (advance scanner) :startset)
+           (add-error scanner (string "Expected beginning of set: ${. Got " char next)))
+
+      ## placeholders
+      ## there's a flat _, and then ignored words
+      "_" (cond
+           (terminates? next) (add-token scanner :placeholder)
+           (alpha? next) (add-ignored scanner)
+           :else (add-error scanner (string "Expected placeholder: _. Got " char next)))
+
+      ## comments
+      ## & starts an inline comment
+      "&" (add-comment char scanner)
+
+      ## keywords
+      ":" (cond
+           (alpha? next) (add-keyword scanner)
+           :else (add-error scanner (string "Expected keyword. Got " char next)))
+
+      ## splats
+      "." (let [after_next (current-char (advance scanner))]
+           (if (= ".." (string next after_next))
+             (add-token (advance (advance scanner)) :splat)
+             (add-error scanner (string "Expected splat: ... . Got " (string "." next after_next)))))
+
+      ## strings
+      "\"" (add-string scanner)
+
+      ## word matches
+      (cond
+        (whitespace? char) scanner ## for now just skip whitespace characters
+        (digit? char) (add-number char scanner)
+        (upper? char) (add-word char scanner) ## no datatypes for now
+        (lower? char) (add-word char scanner)
+        :else (add-error scanner (string "Unexpected character: " char))))))
+
+(defn- next-token [scanner]
+  (put scanner :start (get scanner :current)))
+
+(defn scan [source input]
+  (defn recur [scanner]
+    (if (at-end? scanner)
+      (let [scanner (add-token (add-token scanner :break) :eof)]
+        {:tokens (get scanner :tokens)
+         :errors (get scanner :errors)})
+      (recur (-> scanner (scan-token) (next-token)))))
+  (recur (new-scanner source input)))
Author	SHA1	Message	Date
Scott Richmond	ff40d395f8	Repl things	2024-01-07 20:10:45 -05:00
Scott Richmond	307bebfa53	First pass at janet scanner	2024-01-07 20:10:16 -05:00
Scott Richmond	153c5a358e	Take some notes clj->janet	2024-01-07 20:10:02 -05:00
Scott Richmond	2cbc39029c	Janet repl things	2024-01-07 20:09:01 -05:00
Scott Richmond	1520b1d8e7	Repl cruft	2023-12-31 18:37:19 -05:00
Scott Richmond	83bfc01275	Try with kitten	2023-12-31 18:37:09 -05:00
Scott Richmond	14780bf6b8	First pass at new state model: all refs are included in results.	2023-12-27 12:24:12 -05:00