From 76b7306daa2d18044b84b09ea0ba1998f0b8077d Mon Sep 17 00:00:00 2001 From: Giteabot Date: Fri, 13 Feb 2026 12:30:42 +0800 Subject: [PATCH] Fix bug when do LFS GC (#36500) (#36608) Backport #36500 by @lunny Fix #36448 Removed unnecessary parameters from the LFS GC process and switched to an ORDER BY id ASC strategy with a last-ID cursor to avoid missing or duplicating meta object IDs. Signed-off-by: Lunny Xiao Co-authored-by: Lunny Xiao Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- models/git/lfs.go | 27 +++++---------- models/git/lfs_test.go | 61 +++++++++++++++++++++++++++++++++ services/repository/lfs.go | 6 ++-- services/repository/lfs_test.go | 30 +++++++++++++++- 4 files changed, 101 insertions(+), 23 deletions(-) create mode 100644 models/git/lfs_test.go diff --git a/models/git/lfs.go b/models/git/lfs.go index 8bba060ff9..e8fa3151ef 100644 --- a/models/git/lfs.go +++ b/models/git/lfs.go @@ -343,15 +343,12 @@ func IterateRepositoryIDsWithLFSMetaObjects(ctx context.Context, f func(ctx cont // IterateLFSMetaObjectsForRepoOptions provides options for IterateLFSMetaObjectsForRepo type IterateLFSMetaObjectsForRepoOptions struct { - OlderThan timeutil.TimeStamp - UpdatedLessRecentlyThan timeutil.TimeStamp - OrderByUpdated bool - LoopFunctionAlwaysUpdates bool + OlderThan timeutil.TimeStamp + UpdatedLessRecentlyThan timeutil.TimeStamp } // IterateLFSMetaObjectsForRepo provides a iterator for LFSMetaObjects per Repo func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(context.Context, *LFSMetaObject, int64) error, opts *IterateLFSMetaObjectsForRepoOptions) error { - var start int batchSize := setting.Database.IterateBufferSize engine := db.GetEngine(ctx) type CountLFSMetaObject struct { @@ -359,7 +356,7 @@ func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(cont LFSMetaObject `xorm:"extends"` } - id := int64(0) + lastID := int64(0) for { beans := make([]*CountLFSMetaObject, 0, batchSize) @@ -372,29 +369,23 @@ func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(cont if !opts.UpdatedLessRecentlyThan.IsZero() { sess.And("`lfs_meta_object`.updated_unix < ?", opts.UpdatedLessRecentlyThan) } - sess.GroupBy("`lfs_meta_object`.id") - if opts.OrderByUpdated { - sess.OrderBy("`lfs_meta_object`.updated_unix ASC") - } else { - sess.And("`lfs_meta_object`.id > ?", id) - sess.OrderBy("`lfs_meta_object`.id ASC") - } - if err := sess.Limit(batchSize, start).Find(&beans); err != nil { + sess.GroupBy("`lfs_meta_object`.id"). + And("`lfs_meta_object`.id > ?", lastID). + OrderBy("`lfs_meta_object`.id ASC") + + if err := sess.Limit(batchSize).Find(&beans); err != nil { return err } if len(beans) == 0 { return nil } - if !opts.LoopFunctionAlwaysUpdates { - start += len(beans) - } for _, bean := range beans { if err := f(ctx, &bean.LFSMetaObject, bean.Count); err != nil { return err } } - id = beans[len(beans)-1].ID + lastID = beans[len(beans)-1].ID } } diff --git a/models/git/lfs_test.go b/models/git/lfs_test.go new file mode 100644 index 0000000000..4c0242f439 --- /dev/null +++ b/models/git/lfs_test.go @@ -0,0 +1,61 @@ +// Copyright 2026 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package git_test + +import ( + "bytes" + "context" + "strconv" + "testing" + "time" + + "code.gitea.io/gitea/models/db" + git_model "code.gitea.io/gitea/models/git" + repo_model "code.gitea.io/gitea/models/repo" + "code.gitea.io/gitea/models/unittest" + "code.gitea.io/gitea/modules/lfs" + "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/test" + "code.gitea.io/gitea/modules/timeutil" + + "github.com/stretchr/testify/assert" +) + +func TestIterateLFSMetaObjectsForRepoUpdatesDoNotSkip(t *testing.T) { + assert.NoError(t, unittest.PrepareTestDatabase()) + + ctx := t.Context() + repo, err := repo_model.GetRepositoryByOwnerAndName(ctx, "user2", "repo1") + assert.NoError(t, err) + + defer test.MockVariableValue(&setting.Database.IterateBufferSize, 1)() + + created := make([]*git_model.LFSMetaObject, 0, 3) + for i := range 3 { + content := []byte("gitea-lfs-" + strconv.Itoa(i)) + pointer, err := lfs.GeneratePointer(bytes.NewReader(content)) + assert.NoError(t, err) + + meta, err := git_model.NewLFSMetaObject(ctx, repo.ID, pointer) + assert.NoError(t, err) + created = append(created, meta) + } + + iterated := make([]int64, 0, len(created)) + cutoff := time.Now().Add(24 * time.Hour) + iterErr := git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, meta *git_model.LFSMetaObject, count int64) error { + iterated = append(iterated, meta.ID) + _, err := db.GetEngine(ctx).ID(meta.ID).Cols("updated_unix").Update(&git_model.LFSMetaObject{ + UpdatedUnix: timeutil.TimeStamp(time.Now().Unix()), + }) + return err + }, &git_model.IterateLFSMetaObjectsForRepoOptions{ + OlderThan: timeutil.TimeStamp(cutoff.Unix()), + UpdatedLessRecentlyThan: timeutil.TimeStamp(cutoff.Unix()), + }) + assert.NoError(t, iterErr) + + expected := []int64{created[0].ID, created[1].ID, created[2].ID} + assert.Equal(t, expected, iterated) +} diff --git a/services/repository/lfs.go b/services/repository/lfs.go index 4d48881b87..5ef2dbdac4 100644 --- a/services/repository/lfs.go +++ b/services/repository/lfs.go @@ -123,10 +123,8 @@ func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.R // // It is likely that a week is potentially excessive but it should definitely be enough that any // unassociated LFS object is genuinely unassociated. - OlderThan: timeutil.TimeStamp(opts.OlderThan.Unix()), - UpdatedLessRecentlyThan: timeutil.TimeStamp(opts.UpdatedLessRecentlyThan.Unix()), - OrderByUpdated: true, - LoopFunctionAlwaysUpdates: true, + OlderThan: timeutil.TimeStamp(opts.OlderThan.Unix()), + UpdatedLessRecentlyThan: timeutil.TimeStamp(opts.UpdatedLessRecentlyThan.Unix()), }) if err == errStop { diff --git a/services/repository/lfs_test.go b/services/repository/lfs_test.go index 7fb202f42d..1335d48cb1 100644 --- a/services/repository/lfs_test.go +++ b/services/repository/lfs_test.go @@ -14,6 +14,7 @@ import ( "code.gitea.io/gitea/modules/lfs" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/storage" + "code.gitea.io/gitea/modules/test" repo_service "code.gitea.io/gitea/services/repository" "github.com/stretchr/testify/assert" @@ -22,7 +23,8 @@ import ( func TestGarbageCollectLFSMetaObjects(t *testing.T) { unittest.PrepareTestEnv(t) - setting.LFS.StartServer = true + defer test.MockVariableValue(&setting.LFS.StartServer, true)() + err := storage.Init() assert.NoError(t, err) @@ -46,6 +48,32 @@ func TestGarbageCollectLFSMetaObjects(t *testing.T) { assert.ErrorIs(t, err, git_model.ErrLFSObjectNotExist) } +func TestGarbageCollectLFSMetaObjectsForRepoAutoFix(t *testing.T) { + unittest.PrepareTestEnv(t) + + defer test.MockVariableValue(&setting.LFS.StartServer, true)() + + err := storage.Init() + assert.NoError(t, err) + + repo := unittest.AssertExistsAndLoadBean(t, &repo_model.Repository{ID: 1}) + + // add lfs object + lfsContent := []byte("gitea2") + lfsOid := storeObjectInRepo(t, repo.ID, &lfsContent) + + err = repo_service.GarbageCollectLFSMetaObjectsForRepo(t.Context(), repo, repo_service.GarbageCollectLFSMetaObjectsOptions{ + LogDetail: func(string, ...any) {}, + AutoFix: true, + OlderThan: time.Now().Add(24 * time.Hour * 7), + UpdatedLessRecentlyThan: time.Now().Add(24 * time.Hour * 3), + }) + assert.NoError(t, err) + + _, err = git_model.GetLFSMetaObjectByOid(t.Context(), repo.ID, lfsOid) + assert.ErrorIs(t, err, git_model.ErrLFSObjectNotExist) +} + func storeObjectInRepo(t *testing.T, repositoryID int64, content *[]byte) string { pointer, err := lfs.GeneratePointer(bytes.NewReader(*content)) assert.NoError(t, err)