diff --git a/install/package.json b/install/package.json index 5f1285ebc1..fb6826807c 100644 --- a/install/package.json +++ b/install/package.json @@ -51,7 +51,6 @@ "bootswatch": "5.3.3", "chalk": "4.1.2", "chart.js": "4.4.5", - "cheerio": "^1.0.0-rc.12", "cli-graph": "3.2.2", "clipboard": "2.0.11", "colors": "1.4.0", diff --git a/src/activitypub/helpers.js b/src/activitypub/helpers.js index d2539cfee4..e0a76486b8 100644 --- a/src/activitypub/helpers.js +++ b/src/activitypub/helpers.js @@ -5,7 +5,7 @@ const process = require('process'); const nconf = require('nconf'); const winston = require('winston'); const validator = require('validator'); -const cheerio = require('cheerio'); +// const cheerio = require('cheerio'); const crypto = require('crypto'); const meta = require('../meta'); @@ -292,13 +292,17 @@ Helpers.resolveObjects = async (ids) => { return objects.length === 1 ? objects[0] : objects; }; +const titleishTags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'title', 'p', 'span']; +const titleRegex = new RegExp(`<(?:${titleishTags.join('|')})>(.+?)`); Helpers.generateTitle = (html) => { // Given an html string, generates a more appropriate title if possible - const $ = cheerio.load(html); let title; - // Try the first paragraph element - title = $('h1, h2, h3, h4, h5, h6, title, p, span').first().text(); + // Try the first paragraph-like element + const match = html.match(titleRegex); + if (match) { + title = match[1]; + } // Fall back to newline splitting (i.e. if no paragraph elements) title = title || html.split('\n').filter(Boolean).shift(); diff --git a/test/activitypub.js b/test/activitypub.js index efc8d02b91..5ab60526f2 100644 --- a/test/activitypub.js +++ b/test/activitypub.js @@ -108,13 +108,23 @@ describe('ActivityPub integration', () => { }); }); - describe('.generateTitle', () => { + describe.only('.generateTitle', () => { it('should take the first paragraph element\'s text', () => { const source = '

Lorem ipsum dolor sit amet

consectetur adipiscing elit. Integer tincidunt metus scelerisque, dignissim risus a, fermentum leo. Pellentesque eleifend ullamcorper risus tempus vestibulum. Proin mollis ipsum et magna lobortis, at pretium enim pharetra. Ut vel ex metus. Mauris faucibus lectus et nulla iaculis, et pellentesque elit pellentesque. Aliquam rhoncus nec nulla eu lacinia. Maecenas cursus iaculis ligula, eu pharetra ex suscipit sit amet.

'; const title = activitypub.helpers.generateTitle(source); assert.strictEqual(title, 'Lorem ipsum dolor sit amet'); }); + it('should also accept a couple other tags like h1 or span', () => { + let source = '

Lorem ipsum dolor sit amet

consectetur adipiscing elit. Integer tincidunt metus scelerisque, dignissim risus a, fermentum leo. Pellentesque eleifend ullamcorper risus tempus vestibulum. Proin mollis ipsum et magna lobortis, at pretium enim pharetra. Ut vel ex metus. Mauris faucibus lectus et nulla iaculis, et pellentesque elit pellentesque. Aliquam rhoncus nec nulla eu lacinia. Maecenas cursus iaculis ligula, eu pharetra ex suscipit sit amet.

'; + let title = activitypub.helpers.generateTitle(source); + assert.strictEqual(title, 'Lorem ipsum dolor sit amet'); + + source = 'Lorem ipsum dolor sit amet

consectetur adipiscing elit. Integer tincidunt metus scelerisque, dignissim risus a, fermentum leo. Pellentesque eleifend ullamcorper risus tempus vestibulum. Proin mollis ipsum et magna lobortis, at pretium enim pharetra. Ut vel ex metus. Mauris faucibus lectus et nulla iaculis, et pellentesque elit pellentesque. Aliquam rhoncus nec nulla eu lacinia. Maecenas cursus iaculis ligula, eu pharetra ex suscipit sit amet.

'; + title = activitypub.helpers.generateTitle(source); + assert.strictEqual(title, 'Lorem ipsum dolor sit amet'); + }); + it('should take the first line\'s text if no matched elements', () => { const source = 'Lorem ipsum dolor sit amet\n\nconsectetur adipiscing elit. Integer tincidunt metus scelerisque, dignissim risus a, fermentum leo. Pellentesque eleifend ullamcorper risus tempus vestibulum. Proin mollis ipsum et magna lobortis, at pretium enim pharetra. Ut vel ex metus. Mauris faucibus lectus et nulla iaculis, et pellentesque elit pellentesque. Aliquam rhoncus nec nulla eu lacinia. Maecenas cursus iaculis ligula, eu pharetra ex suscipit sit amet.'; const title = activitypub.helpers.generateTitle(source);