From 012bd5b9b25b193143f7ca63473f03ac59f5751b Mon Sep 17 00:00:00 2001 From: Dragory <2606411+Dragory@users.noreply.github.com> Date: Wed, 21 Oct 2020 21:36:50 +0300 Subject: [PATCH] Fix faulty chunking for received user messages The chunks now properly handle code blocks and prefer to split at a newline rather than arbitrarily within the text. --- src/data/Thread.js | 11 ++++--- src/utils.js | 72 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 6 deletions(-) diff --git a/src/data/Thread.js b/src/data/Thread.js index 386a33f..1aeee44 100644 --- a/src/data/Thread.js +++ b/src/data/Thread.js @@ -89,13 +89,12 @@ class Thread { if (typeof content === "string") { // Content is a string, chunk it and send it as individual messages. // Files (attachments) are only sent with the last message. - const chunks = utils.chunk(content, 2000); + const chunks = utils.chunkMessageLines(content); for (const [i, chunk] of chunks.entries()) { - let msg; - if (i === chunks.length - 1) { - // Only send embeds, files, etc. with the last message - msg = await bot.createMessage(this.channel_id, chunk, file); - } + // Only send embeds, files, etc. with the last message + const msg = (i === chunks.length - 1) + ? await bot.createMessage(this.channel_id, chunk, file) + : await bot.createMessage(this.channel_id, chunk); firstMessage = firstMessage || msg; } diff --git a/src/utils.js b/src/utils.js index 83118fe..54a0d0c 100644 --- a/src/utils.js +++ b/src/utils.js @@ -386,6 +386,77 @@ function messageContentIsWithinMaxLength(content) { return true; } +/** + * Splits a string into chunks, preferring to split at a newline + * @param {string} str + * @param {number} [maxChunkLength=2000] + * @returns {string[]} + */ +function chunkByLines(str, maxChunkLength = 2000) { + if (str.length < maxChunkLength) { + return [str]; + } + + const chunks = []; + + while (str.length) { + if (str.length <= maxChunkLength) { + chunks.push(str); + break; + } + + const slice = str.slice(0, maxChunkLength); + + const lastLineBreakIndex = slice.lastIndexOf("\n"); + if (lastLineBreakIndex === -1) { + chunks.push(str.slice(0, maxChunkLength)); + str = str.slice(maxChunkLength); + } else { + chunks.push(str.slice(0, lastLineBreakIndex)); + str = str.slice(lastLineBreakIndex + 1); + } + } + + return chunks; +} + +/** + * Chunks a long message to multiple smaller messages, retaining leading and trailing line breaks, open code blocks, etc. + * + * Default maxChunkLength is 1990, a bit under the message length limit of 2000, so we have space to add code block + * shenanigans to the start/end when needed. Take this into account when choosing a custom maxChunkLength as well. + */ +function chunkMessageLines(str, maxChunkLength = 1990) { + const chunks = chunkByLines(str, maxChunkLength); + let openCodeBlock = false; + + return chunks.map(_chunk => { + // If the chunk starts with a newline, add an invisible unicode char so Discord doesn't strip it away + if (_chunk[0] === "\n") _chunk = "\u200b" + _chunk; + // If the chunk ends with a newline, add an invisible unicode char so Discord doesn't strip it away + if (_chunk[_chunk.length - 1] === "\n") _chunk = _chunk + "\u200b"; + // If the previous chunk had an open code block, open it here again + if (openCodeBlock) { + openCodeBlock = false; + if (_chunk.startsWith("```")) { + // Edge case: chunk starts with a code block delimiter, e.g. the previous chunk and this one were split right before the end of a code block + // Fix: just strip the code block delimiter away from here, we don't need it anymore + _chunk = _chunk.slice(3); + } else { + _chunk = "```" + _chunk; + } + } + // If the chunk has an open code block, close it and open it again in the next chunk + const codeBlockDelimiters = _chunk.match(/```/g); + if (codeBlockDelimiters && codeBlockDelimiters.length % 2 !== 0) { + _chunk += "```"; + openCodeBlock = true; + } + + return _chunk; + }); +} + module.exports = { BotError, @@ -428,6 +499,7 @@ module.exports = { readMultilineConfigValue, messageContentIsWithinMaxLength, + chunkMessageLines, noop, };