diff --git a/src/activitypub/helpers.js b/src/activitypub/helpers.js index 01cfd86d10..e6eb2e1c08 100644 --- a/src/activitypub/helpers.js +++ b/src/activitypub/helpers.js @@ -339,6 +339,52 @@ Helpers.resolveObjects = async (ids) => { return objects.length === 1 ? objects[0] : objects; }; +const titleishTags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'title', 'p', 'span']; +const titleRegex = new RegExp(`<(${titleishTags.join('|')})>(.+?)`, 'm'); +Helpers.generateTitle = (html) => { + // Given an html string, generates a more appropriate title if possible + let title; + + // Try the first paragraph-like element + const match = html.match(titleRegex); + if (match && match.index === 0) { + title = match[2]; + } + + // Fall back to newline splitting (i.e. if no paragraph elements) + title = title || html.split('\n').filter(Boolean).shift(); + + // Discard everything after a line break element + title = title.replace(/.*/g, ''); + + // Strip html + title = utils.stripHTMLTags(title); + + // Split sentences and use only first one + const sentences = title + .split(/(\.|\?|!)\s/) + .reduce((memo, cur, idx, sentences) => { + if (idx % 2) { + memo.push(`${sentences[idx - 1]}${cur}`); + } else if (idx === sentences.length - 1) { + memo.push(cur); + } + + return memo; + }, []); + + if (sentences.length > 1) { + title = sentences.shift(); + } + + // Truncate down if too long + if (title.length > meta.config.maximumTitleLength) { + title = `${title.slice(0, meta.config.maximumTitleLength - 3)}...`; + } + + return title; +}; + Helpers.remoteAnchorToLocalProfile = async (content, isMarkdown = false) => { let anchorRegex; if (isMarkdown) { diff --git a/src/activitypub/notes.js b/src/activitypub/notes.js index 6ccb2ca209..ef4abe9add 100644 --- a/src/activitypub/notes.js +++ b/src/activitypub/notes.js @@ -165,11 +165,7 @@ Notes.assert = async (uid, input, options = { skipChecks: false }) => { // mainPid ok to leave as-is if (!title) { - // Naive pre-processing prior to sbd tokenization - let sbdInput = content || sourceContent; - sbdInput = sbdInput.replace('

', '

\n

'); - - const sentences = tokenizer.sentences(sbdInput, { sanitize: true, newline_boundaries: true }); + const sentences = tokenizer.sentences(content || sourceContent, { sanitize: true }); title = sentences.shift(); }