From da1f1f731d809e0b66d06961ed953aa7846b4129 Mon Sep 17 00:00:00 2001 From: Matt Nish-Lapidus Date: Sun, 6 Jul 2025 19:24:40 -0400 Subject: [PATCH] dissociated press examples --- dissociated-press-ngrammatch.ld | 39 +++++++++++++++++++++++++++++++++ dissociated-press.ld | 20 +++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 dissociated-press-ngrammatch.ld create mode 100644 dissociated-press.ld diff --git a/dissociated-press-ngrammatch.ld b/dissociated-press-ngrammatch.ld new file mode 100644 index 0000000..10b7b04 --- /dev/null +++ b/dissociated-press-ngrammatch.ld @@ -0,0 +1,39 @@ +let (:ok, corpus) = fetch("https://alea.ludus.dev/twc/corpus/raw/branch/main/jarg2912.txt") +let corpus_clean = do corpus > downcase > words + +let pattern_len = 5 +let corpus_len = count(corpus_clean) +let start_position = random(0, sub(corpus_len, pattern_len)) +let starting_ngram = slice_n(corpus_clean, start_position, pattern_len) + +fn indices_of_ngram { + (text, pat, indices, res, 0) -> res + (text, pat, indices) -> indices_of_ngram(text, pat, indices, [], dec(count(indices))) + (text, pat, indices, res, i) -> { + let test_ngram = slice_n(text, at(indices, i), count(pat)) + if eq?(test_ngram, pat) + then indices_of_ngram(text, pat, indices, append(res, at(indices, i)), dec(i)) + else indices_of_ngram(text, pat, indices, res, dec(i)) + } +} + +fn press! { + (text, pat, 0, n, res) -> res + (text, pat, i, n) -> press!(text, pat, i, n, "") + (text, pat, i, n, res) -> { + let sub_pat_len = sub(count(pat), inc(n)) + let sub_pat = slice(pat, sub_pat_len, dec(count(pat))) + let inds = indices_of(text, at(sub_pat, 0)) + let ind = random(indices_of_ngram(text, sub_pat, inds)) + if ind + then { + let new_pat = slice_n(text, add(ind, inc(n)) , count(pat)) + let new_res = concat(trim(res), " ", sentence(new_pat)) + + press!(text, new_pat, dec(i), n, new_res) + } + else press!(text, pat, 0, n, res) + } +} + +press!(corpus_clean, starting_ngram, 5, 1) diff --git a/dissociated-press.ld b/dissociated-press.ld new file mode 100644 index 0000000..afffb2c --- /dev/null +++ b/dissociated-press.ld @@ -0,0 +1,20 @@ +let (:ok, corpus) = fetch("https://alea.ludus.dev/twc/corpus/raw/branch/main/jarg2912.txt") +let corpus_clean = do corpus > downcase > words + +let pattern_len = 5 +let corpus_len = count(corpus_clean) +let start_position = random(0, sub(corpus_len, pattern_len)) +let starting_ngram = slice_n(corpus_clean, start_position, pattern_len) + +fn press! { + (text, pat, 0, res) -> res + (text, pat, i) -> press!(text, pat, i, "") + (text, pat, i, res) -> { + let ind = random(indices_of(text, last(pat))) + let new_pat = slice_n(corpus_clean, inc(ind), count(pat)) + let new_res = concat(trim(res), " ", sentence(new_pat)) + press!(text, new_pat, dec(i), new_res) + } +} + +press!(corpus_clean, starting_ngram, 4)