diff --git a/src/activitypub/helpers.js b/src/activitypub/helpers.js
index 01cfd86d10..e6eb2e1c08 100644
--- a/src/activitypub/helpers.js
+++ b/src/activitypub/helpers.js
@@ -339,6 +339,52 @@ Helpers.resolveObjects = async (ids) => {
return objects.length === 1 ? objects[0] : objects;
};
+const titleishTags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'title', 'p', 'span'];
+const titleRegex = new RegExp(`<(${titleishTags.join('|')})>(.+?)\\1>`, 'm');
+Helpers.generateTitle = (html) => {
+ // Given an html string, generates a more appropriate title if possible
+ let title;
+
+ // Try the first paragraph-like element
+ const match = html.match(titleRegex);
+ if (match && match.index === 0) {
+ title = match[2];
+ }
+
+ // Fall back to newline splitting (i.e. if no paragraph elements)
+ title = title || html.split('\n').filter(Boolean).shift();
+
+ // Discard everything after a line break element
+ title = title.replace(/
.*/g, '');
+
+ // Strip html
+ title = utils.stripHTMLTags(title);
+
+ // Split sentences and use only first one
+ const sentences = title
+ .split(/(\.|\?|!)\s/)
+ .reduce((memo, cur, idx, sentences) => {
+ if (idx % 2) {
+ memo.push(`${sentences[idx - 1]}${cur}`);
+ } else if (idx === sentences.length - 1) {
+ memo.push(cur);
+ }
+
+ return memo;
+ }, []);
+
+ if (sentences.length > 1) {
+ title = sentences.shift();
+ }
+
+ // Truncate down if too long
+ if (title.length > meta.config.maximumTitleLength) {
+ title = `${title.slice(0, meta.config.maximumTitleLength - 3)}...`;
+ }
+
+ return title;
+};
+
Helpers.remoteAnchorToLocalProfile = async (content, isMarkdown = false) => {
let anchorRegex;
if (isMarkdown) {
diff --git a/src/activitypub/notes.js b/src/activitypub/notes.js
index 6ccb2ca209..ef4abe9add 100644
--- a/src/activitypub/notes.js
+++ b/src/activitypub/notes.js
@@ -165,11 +165,7 @@ Notes.assert = async (uid, input, options = { skipChecks: false }) => {
// mainPid ok to leave as-is
if (!title) {
- // Naive pre-processing prior to sbd tokenization
- let sbdInput = content || sourceContent;
- sbdInput = sbdInput.replace('
', '
\n'); - - const sentences = tokenizer.sentences(sbdInput, { sanitize: true, newline_boundaries: true }); + const sentences = tokenizer.sentences(content || sourceContent, { sanitize: true }); title = sentences.shift(); }