diff --git a/install/data/defaults.json b/install/data/defaults.json index f5e1e0d94e..902eac1976 100644 --- a/install/data/defaults.json +++ b/install/data/defaults.json @@ -191,5 +191,7 @@ "reconnectionDelay": 1500, "disableCustomUserSkins": 0, "activitypubEnabled": 1, - "activitypubAllowLoopback": 0 + "activitypubAllowLoopback": 0, + "activitypubContentPruneDays": 30, + "activitypubUserPruneDays": 7 } diff --git a/public/language/en-GB/admin/settings/activitypub.json b/public/language/en-GB/admin/settings/activitypub.json index 73661cd778..6e22bce742 100644 --- a/public/language/en-GB/admin/settings/activitypub.json +++ b/public/language/en-GB/admin/settings/activitypub.json @@ -2,6 +2,11 @@ "intro-lead": "What is Federation?", "intro-body": "NodeBB is able to communicate with other NodeBB instances that support it. This is achieved through a protocol called ActivityPub. If enabled, NodeBB will also be able to communicate with other apps and websites that use ActivityPub (e.g. Mastodon, Peertube, etc.)", "general": "General", + "pruning": "Content Pruning", + "content-pruning": "Days to keep remote content", + "content-pruning-help": "Note that remote content that has received engagement (a reply or a upvote/downvote) will be preserved. (0 for disabled)", + "user-pruning": "Days to cache remote user accounts", + "user-pruning-help": "Remote user accounts will only be pruned if they have no posts. Otherwise they will be re-retrieved. (0 for disabled)", "enabled": "Enable Federation", "enabled-help": "If enabled, will allow this NodeBB will be able to communicate with all Activitypub-enabled clients on the wider fediverse.", "allowLoopback": "Allow loopback processing", diff --git a/src/activitypub/actors.js b/src/activitypub/actors.js index 158ee9b893..a49e514293 100644 --- a/src/activitypub/actors.js +++ b/src/activitypub/actors.js @@ -1,8 +1,11 @@ 'use strict'; const nconf = require('nconf'); +const winston = require('winston'); const db = require('../database'); +const meta = require('../meta'); +const batch = require('../batch'); const user = require('../user'); const utils = require('../utils'); const TTLCache = require('../cache/ttl'); @@ -211,6 +214,11 @@ Actors.getLocalFollowersCount = async (id) => { }; Actors.remove = async (id) => { + /** + * Remove ActivityPub related metadata pertaining to a remote id + * + * Note: don't call this directly! It is called as part of user.deleteAccount + */ const exists = await db.isSortedSetMember('usersRemote:lastCrawled', id); if (!exists) { return false; @@ -235,3 +243,50 @@ Actors.remove = async (id) => { db.sortedSetRemove('usersRemote:lastCrawled', id), ]); }; + +Actors.prune = async () => { + /** + * Clear out remote user accounts that do not have content on the forum anywhere + * Re-crawl those that have not been updated recently + */ + winston.verbose('[actors/prune] Started scheduled pruning of remote user accounts'); + + const days = parseInt(meta.config.activitypubUserPruneDays, 10); + const timestamp = Date.now() - (1000 * 60 * 60 * 24 * days); + const uids = await db.getSortedSetRangeByScore('usersRemote:lastCrawled', 0, -1, 0, timestamp); + if (!uids.length) { + winston.verbose('[actors/prune] No remote users to prune, all done.'); + return; + } + + winston.verbose(`[actors/prune] Found ${uids.length} remote users last crawled more than ${days} days ago`); + let deletionCount = 0; + const reassertionSet = new Set(); + + await batch.processArray(uids, async (uids) => { + const exists = await db.exists(uids.map(uid => `userRemote:${uid}`)); + const counts = await db.sortedSetsCard(uids.map(uid => `uid:${uid}:posts`)); + await Promise.all(uids.map(async (uid, idx) => { + if (!exists[idx]) { + // id in zset but not asserted, handle and return early + await db.sortedSetRemove('usersRemote:lastCrawled', uid); + return; + } + + const count = counts[idx]; + if (count < 1) { + await user.deleteAccount(uid); + deletionCount += 1; + } else { + reassertionSet.add(uid); + } + })); + }, { + batch: 50, + interval: 1000, + }); + + winston.verbose(`[actors/prune] ${deletionCount} remote users pruned, re-asserting ${reassertionSet.size} remote users.`); + + await Actors.assert(Array.from(reassertionSet), { update: true }); +}; diff --git a/src/activitypub/index.js b/src/activitypub/index.js index bdc3d1a687..055be9d895 100644 --- a/src/activitypub/index.js +++ b/src/activitypub/index.js @@ -45,6 +45,7 @@ ActivityPub.actors = require('./actors'); ActivityPub.startJobs = () => { // winston.verbose('[activitypub/jobs] Registering jobs.'); new CronJob('0 0 * * *', ActivityPub.notes.prune, null, true, null, null, false); // change last argument to true for debugging + new CronJob('0 1 * * *', ActivityPub.actors.prune, null, true, null, null, false); // change last argument to true for debugging }; ActivityPub.resolveId = async (uid, id) => { diff --git a/src/user/delete.js b/src/user/delete.js index c6eb13809f..971777f9f1 100644 --- a/src/user/delete.js +++ b/src/user/delete.js @@ -95,6 +95,7 @@ module.exports = function (User) { const userData = await db.getObject(utils.isNumber(uid) ? `user:${uid}` : `userRemote:${uid}`); if (!userData || !userData.username) { + console.log('ERRORING', uid, userData); delete deletesInProgress[uid]; throw new Error('[[error:no-user]]'); } diff --git a/src/views/admin/settings/activitypub.tpl b/src/views/admin/settings/activitypub.tpl index 5ca5a6d874..4e117896cb 100644 --- a/src/views/admin/settings/activitypub.tpl +++ b/src/views/admin/settings/activitypub.tpl @@ -24,6 +24,28 @@ +
+
[[admin/settings/activitypub:pruning]]
+
+
+
+ + +
+ [[admin/settings/activitypub:content-pruning-help]] +
+
+
+ + +
+ [[admin/settings/activitypub:user-pruning-help]] +
+
+
+
+
+
[[admin/settings/activitypub:server-filtering]]