Fix faulty chunking for received user messages

The chunks now properly handle code blocks and prefer to split
at a newline rather than arbitrarily within the text.
cshd
Dragory 2020-10-21 21:36:50 +03:00
parent 0a98b5cb49
commit 012bd5b9b2
No known key found for this signature in database
GPG Key ID: 5F387BA66DF8AAC1
2 changed files with 77 additions and 6 deletions

View File

@ -89,13 +89,12 @@ class Thread {
if (typeof content === "string") {
// Content is a string, chunk it and send it as individual messages.
// Files (attachments) are only sent with the last message.
const chunks = utils.chunk(content, 2000);
const chunks = utils.chunkMessageLines(content);
for (const [i, chunk] of chunks.entries()) {
let msg;
if (i === chunks.length - 1) {
// Only send embeds, files, etc. with the last message
msg = await bot.createMessage(this.channel_id, chunk, file);
}
const msg = (i === chunks.length - 1)
? await bot.createMessage(this.channel_id, chunk, file)
: await bot.createMessage(this.channel_id, chunk);
firstMessage = firstMessage || msg;
}

View File

@ -386,6 +386,77 @@ function messageContentIsWithinMaxLength(content) {
return true;
}
/**
* Splits a string into chunks, preferring to split at a newline
* @param {string} str
* @param {number} [maxChunkLength=2000]
* @returns {string[]}
*/
function chunkByLines(str, maxChunkLength = 2000) {
if (str.length < maxChunkLength) {
return [str];
}
const chunks = [];
while (str.length) {
if (str.length <= maxChunkLength) {
chunks.push(str);
break;
}
const slice = str.slice(0, maxChunkLength);
const lastLineBreakIndex = slice.lastIndexOf("\n");
if (lastLineBreakIndex === -1) {
chunks.push(str.slice(0, maxChunkLength));
str = str.slice(maxChunkLength);
} else {
chunks.push(str.slice(0, lastLineBreakIndex));
str = str.slice(lastLineBreakIndex + 1);
}
}
return chunks;
}
/**
* Chunks a long message to multiple smaller messages, retaining leading and trailing line breaks, open code blocks, etc.
*
* Default maxChunkLength is 1990, a bit under the message length limit of 2000, so we have space to add code block
* shenanigans to the start/end when needed. Take this into account when choosing a custom maxChunkLength as well.
*/
function chunkMessageLines(str, maxChunkLength = 1990) {
const chunks = chunkByLines(str, maxChunkLength);
let openCodeBlock = false;
return chunks.map(_chunk => {
// If the chunk starts with a newline, add an invisible unicode char so Discord doesn't strip it away
if (_chunk[0] === "\n") _chunk = "\u200b" + _chunk;
// If the chunk ends with a newline, add an invisible unicode char so Discord doesn't strip it away
if (_chunk[_chunk.length - 1] === "\n") _chunk = _chunk + "\u200b";
// If the previous chunk had an open code block, open it here again
if (openCodeBlock) {
openCodeBlock = false;
if (_chunk.startsWith("```")) {
// Edge case: chunk starts with a code block delimiter, e.g. the previous chunk and this one were split right before the end of a code block
// Fix: just strip the code block delimiter away from here, we don't need it anymore
_chunk = _chunk.slice(3);
} else {
_chunk = "```" + _chunk;
}
}
// If the chunk has an open code block, close it and open it again in the next chunk
const codeBlockDelimiters = _chunk.match(/```/g);
if (codeBlockDelimiters && codeBlockDelimiters.length % 2 !== 0) {
_chunk += "```";
openCodeBlock = true;
}
return _chunk;
});
}
module.exports = {
BotError,
@ -428,6 +499,7 @@ module.exports = {
readMultilineConfigValue,
messageContentIsWithinMaxLength,
chunkMessageLines,
noop,
};