let (:ok, corpus) = fetch("https://alea.ludus.dev/twc/corpus/raw/branch/main/jarg2912.txt") let corpus_clean = do corpus > downcase > words let pattern_len = 5 let corpus_len = count(corpus_clean) let start_position = random(0, sub(corpus_len, pattern_len)) let starting_ngram = slice_n(corpus_clean, start_position, pattern_len) fn indices_of_ngram { (text, pat, indices, res, 0) -> res (text, pat, indices) -> indices_of_ngram(text, pat, indices, [], dec(count(indices))) (text, pat, indices, res, i) -> { let test_ngram = slice_n(text, at(indices, i), count(pat)) if eq?(test_ngram, pat) then indices_of_ngram(text, pat, indices, append(res, at(indices, i)), dec(i)) else indices_of_ngram(text, pat, indices, res, dec(i)) } } fn press! { (text, pat, 0, n, res) -> res (text, pat, i, n) -> press!(text, pat, i, n, "") (text, pat, i, n, res) -> { let sub_pat_len = sub(count(pat), inc(n)) let sub_pat = slice(pat, sub_pat_len, dec(count(pat))) let inds = indices_of(text, at(sub_pat, 0)) let ind = random(indices_of_ngram(text, sub_pat, inds)) if ind then { let new_pat = slice_n(text, add(ind, inc(n)) , count(pat)) let new_res = concat(trim(res), " ", sentence(new_pat)) press!(text, new_pat, dec(i), n, new_res) } else press!(text, pat, 0, n, res) } } press!(corpus_clean, starting_ngram, 5, 1)