From 2027490614f311f7751e2a832d78f7ac6740e361 Mon Sep 17 00:00:00 2001 From: Scott Richmond Date: Tue, 16 Jul 2024 20:12:21 -0400 Subject: [PATCH] improve some things --- src/parser.janet | 84 +++++++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 44 deletions(-) diff --git a/src/parser.janet b/src/parser.janet index d04e675..4088874 100644 --- a/src/parser.janet +++ b/src/parser.janet @@ -21,6 +21,26 @@ (if-not (dyn name) (error "recursive functions must be declared before they are defined")) ~(set ,name (defn- ,name ,;forms))) +### Some more human-readable formatting +(defn- pp-tok [token] + (if (not token) (break "nil")) + (def {:line line :lexeme lex :type type :start start} token) + (string "<" line "[" start "]" ": " type ": " lex ">")) + +(defn- pp-ast [ast &opt indent] + (default indent 0) + (def {:token token :data data :type type} ast) + (def pretty-tok (pp-tok token)) + (def data-rep (if (= :array (janet-type data)) + (string "[\n" + (string/join (map (fn [x] (pp-ast x (inc indent))) data) + (string (string/repeat " " indent) "\n")) + "\n" (string/repeat " " indent) "]") + data + )) + (string (string/repeat " " indent) type ": " pretty-tok " " data-rep) +) + ### Next: a data structure for a parser (defn- new-parser "Creates a new parser data structure to pass around" @@ -78,7 +98,9 @@ (has-value? terminators ttype)) # breakers are what terminate panics -(def breaking [:break :newline :semicolon :comma :eof :then :else :arrow]) +(def breaking [:break :newline :semicolon :comma :eof +# :then :else :arrow +]) (defn- breaks? "Returns true if the current token in the parser should break a panic" @@ -92,16 +114,12 @@ [parser message] # (print "Panic in the parser: " message) (def origin (current parser)) - # (advance parser) - (def skipped @[origin]) + (def skipped @[]) (while (not (breaks? parser)) (array/push skipped (current parser)) (advance parser)) - # but we actually don't want to advance past all breaking tokens - # this is the source of the off-by-one errors - # only newlines and semicolons should be skipped here; everything else needs to be parsed normaly - # wait, except for we don't actually advance past the breaking token (array/push skipped (current parser)) + # (advance parser) (def err {:type :error :data skipped :token origin :msg message}) (update parser :errors array/push err) (error err)) @@ -343,7 +361,6 @@ (def origin (current parser)) (advance parser) # consume the :lparen (def ast {:type :tuple :data @[] :token origin}) - # (while (separates? parser) (advance parser)) # consume any separators (while (not (check parser :rparen)) (accept-many parser ;separates) (when (check parser :eof) @@ -360,8 +377,8 @@ (def origin (current parser)) (advance parser) (def ast {:type :list :data @[] :token origin}) - (while (separates? parser) (advance parser)) (while (not (check parser :rbracket)) + (accept-many parser ;separates) (when (check parser :eof) (def err {:type :error :token origin :msg "unclosed bracket"}) (array/push (parser :errors) err) @@ -375,8 +392,7 @@ ) (capture nonbinding parser))) (array/push (ast :data) term) - (try (separators parser) - ([e] (array/push (ast :data) e)))) + (capture separators parser)) (advance parser) ast) @@ -384,8 +400,8 @@ (def origin (current parser)) (advance parser) (def ast {:type :set :data @[] :token origin}) - (while (separates? parser) (advance parser)) (while (not (check parser :rbrace)) + (accept-many parser ;separates) (when (check parser :eof) (def err {:type :error :token origin :msg "unclosed brace"}) (array/push (parser :errors) err) @@ -399,8 +415,7 @@ ) (capture nonbinding parser))) (array/push (ast :data) term) - (try (separators parser) - ([e] (array/push (ast :data) e)))) + (capture separators parser)) (advance parser) ast) @@ -408,8 +423,8 @@ (def origin (current parser)) (advance parser) (def ast {:type :dict :data @[] :token origin}) - (while (separates? parser) (advance parser)) (while (not (check parser :rbrace)) + (accept-many parser ;separates) (when (check parser :eof) (def err {:type :error :token origin :msg "unclosed brace"}) (array/push (parser :errors) err) @@ -429,7 +444,7 @@ (try (panic parser (string "expected dict term, got " (type origin))) ([e] e)) )) (array/push (ast :data) term) - (try (separators parser) ([e] (array/push (ast :data) e)))) + (capture separators parser)) (advance parser) ast) @@ -480,8 +495,8 @@ (def origin (current parser)) (advance parser) (def ast {:type :list :data @[] :token origin}) - (while (separates? parser) (advance parser)) (while (not (check parser :rbracket)) + (accept-many parser ;separates) (when (check parser :eof) (def err {:type :error :token origin :msg "unclosed bracket"}) (array/push (parser :errors) err) @@ -494,8 +509,7 @@ {:type :splat :data splatted :token origin}) (capture pattern parser))) (array/push (ast :data) term) - (try (separators parser) - ([e] (array/push (ast :data) e)))) + (capture separators parser)) (advance parser) ast) @@ -503,8 +517,8 @@ (def origin (current parser)) (advance parser) (def ast {:type :dict :data @[] :token origin}) - (while (separates? parser) (advance parser)) (while (not (check parser :rbrace)) + (accept-many parser ;separates) (when (check parser :eof) (def err {:type :error :token origin :msg "unclosed brace"}) (array/push (parser :errors) err) @@ -524,7 +538,7 @@ (try (panic parser (string "expected dict term, got " (type origin))) ([e] e)) )) (array/push (ast :data) term) - (try (separators parser) ([e] (array/push (ast :data) e)))) + (capture separators parser)) (advance parser) ast) @@ -565,17 +579,17 @@ (defn- iff [parser] (def ast {:type :if :data @[] :token (current parser)}) (advance parser) #consume the if - (array/push (ast :data) (capture simple parser)) + (array/push (ast :data) (simple parser)) (accept-many parser :newline) (if-let [err (expect-ret parser :then)] (array/push (ast :data) err) (advance parser)) - (array/push (ast :data) (capture nonbinding parser)) + (array/push (ast :data) (nonbinding parser)) (accept-many parser :newline) (if-let [err (expect-ret parser :else)] (array/push (ast :data) err) (advance parser)) - (array/push (ast :data) (capture nonbinding parser)) + (array/push (ast :data) (nonbinding parser)) ast) (defn- literal-terminator? [token] @@ -1111,6 +1125,7 @@ (def origin (current parser)) (def lines @[]) (while (not (check parser :eof)) + (print "starting script loop with " (pp-tok origin)) (accept-many parser ;terminators) (array/push lines (capture toplevel parser)) (capture terminator parser)) @@ -1122,29 +1137,10 @@ (set (parser :ast) ast) parser) -(defn- pp-tok [token] - (if (not token) (break "nil")) - (def {:line line :lexeme lex :type type :start start} token) - (string "<" line "[" start "]" ": " type ": " lex ">")) - -(defn- pp-ast [ast &opt indent] - (default indent 0) - (def {:token token :data data :type type} ast) - (def pretty-tok (pp-tok token)) - (def data-rep (if (= :array (janet-type data)) - (string "[\n" - (string/join (map (fn [x] (pp-ast x (inc indent))) data) - (string (string/repeat " " indent) "\n")) - "\n" (string/repeat " " indent) "]") - data - )) - (string (string/repeat " " indent) type ": " pretty-tok " " data-rep) -) - (do # (comment (def source ` -(,,,,,,1, 2, foo, :three) +(,,,,,1, 2 bar, foo) `) (def scanned (s/scan source))