ludus-scripts/dissociated-press-ngrammatch.ld

40 lines
1.4 KiB
Plaintext

let (:ok, corpus) = fetch("https://alea.ludus.dev/twc/corpus/raw/branch/main/jarg2912.txt")
let corpus_clean = do corpus > downcase > words
let pattern_len = 5
let corpus_len = count(corpus_clean)
let start_position = random(0, sub(corpus_len, pattern_len))
let starting_ngram = slice_n(corpus_clean, start_position, pattern_len)
fn indices_of_ngram {
(text, pat, indices, res, 0) -> res
(text, pat, indices) -> indices_of_ngram(text, pat, indices, [], dec(count(indices)))
(text, pat, indices, res, i) -> {
let test_ngram = slice_n(text, at(indices, i), count(pat))
if eq?(test_ngram, pat)
then indices_of_ngram(text, pat, indices, append(res, at(indices, i)), dec(i))
else indices_of_ngram(text, pat, indices, res, dec(i))
}
}
fn press! {
(text, pat, 0, n, res) -> res
(text, pat, i, n) -> press!(text, pat, i, n, "")
(text, pat, i, n, res) -> {
let sub_pat_len = sub(count(pat), inc(n))
let sub_pat = slice(pat, sub_pat_len, dec(count(pat)))
let inds = indices_of(text, at(sub_pat, 0))
let ind = random(indices_of_ngram(text, sub_pat, inds))
if ind
then {
let new_pat = slice_n(text, add(ind, inc(n)) , count(pat))
let new_res = concat(trim(res), " ", sentence(new_pat))
press!(text, new_pat, dec(i), n, new_res)
}
else press!(text, pat, 0, n, res)
}
}
press!(corpus_clean, starting_ngram, 5, 2)