Fix bug when do LFS GC (#36500) (#36608)

Backport #36500 by @lunny

Fix #36448

Removed unnecessary parameters from the LFS GC process and switched to
an ORDER BY id ASC strategy with a last-ID cursor to avoid missing or
duplicating meta object IDs.

Signed-off-by: Lunny Xiao <xiaolunwen@gmail.com>
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Giteabot
2026-02-13 12:30:42 +08:00
committed by GitHub
parent 8e412ababf
commit 76b7306daa
4 changed files with 101 additions and 23 deletions

View File

@@ -343,15 +343,12 @@ func IterateRepositoryIDsWithLFSMetaObjects(ctx context.Context, f func(ctx cont
// IterateLFSMetaObjectsForRepoOptions provides options for IterateLFSMetaObjectsForRepo
type IterateLFSMetaObjectsForRepoOptions struct {
OlderThan timeutil.TimeStamp
UpdatedLessRecentlyThan timeutil.TimeStamp
OrderByUpdated bool
LoopFunctionAlwaysUpdates bool
OlderThan timeutil.TimeStamp
UpdatedLessRecentlyThan timeutil.TimeStamp
}
// IterateLFSMetaObjectsForRepo provides a iterator for LFSMetaObjects per Repo
func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(context.Context, *LFSMetaObject, int64) error, opts *IterateLFSMetaObjectsForRepoOptions) error {
var start int
batchSize := setting.Database.IterateBufferSize
engine := db.GetEngine(ctx)
type CountLFSMetaObject struct {
@@ -359,7 +356,7 @@ func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(cont
LFSMetaObject `xorm:"extends"`
}
id := int64(0)
lastID := int64(0)
for {
beans := make([]*CountLFSMetaObject, 0, batchSize)
@@ -372,29 +369,23 @@ func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(cont
if !opts.UpdatedLessRecentlyThan.IsZero() {
sess.And("`lfs_meta_object`.updated_unix < ?", opts.UpdatedLessRecentlyThan)
}
sess.GroupBy("`lfs_meta_object`.id")
if opts.OrderByUpdated {
sess.OrderBy("`lfs_meta_object`.updated_unix ASC")
} else {
sess.And("`lfs_meta_object`.id > ?", id)
sess.OrderBy("`lfs_meta_object`.id ASC")
}
if err := sess.Limit(batchSize, start).Find(&beans); err != nil {
sess.GroupBy("`lfs_meta_object`.id").
And("`lfs_meta_object`.id > ?", lastID).
OrderBy("`lfs_meta_object`.id ASC")
if err := sess.Limit(batchSize).Find(&beans); err != nil {
return err
}
if len(beans) == 0 {
return nil
}
if !opts.LoopFunctionAlwaysUpdates {
start += len(beans)
}
for _, bean := range beans {
if err := f(ctx, &bean.LFSMetaObject, bean.Count); err != nil {
return err
}
}
id = beans[len(beans)-1].ID
lastID = beans[len(beans)-1].ID
}
}

61
models/git/lfs_test.go Normal file
View File

@@ -0,0 +1,61 @@
// Copyright 2026 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package git_test
import (
"bytes"
"context"
"strconv"
"testing"
"time"
"code.gitea.io/gitea/models/db"
git_model "code.gitea.io/gitea/models/git"
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/models/unittest"
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/test"
"code.gitea.io/gitea/modules/timeutil"
"github.com/stretchr/testify/assert"
)
func TestIterateLFSMetaObjectsForRepoUpdatesDoNotSkip(t *testing.T) {
assert.NoError(t, unittest.PrepareTestDatabase())
ctx := t.Context()
repo, err := repo_model.GetRepositoryByOwnerAndName(ctx, "user2", "repo1")
assert.NoError(t, err)
defer test.MockVariableValue(&setting.Database.IterateBufferSize, 1)()
created := make([]*git_model.LFSMetaObject, 0, 3)
for i := range 3 {
content := []byte("gitea-lfs-" + strconv.Itoa(i))
pointer, err := lfs.GeneratePointer(bytes.NewReader(content))
assert.NoError(t, err)
meta, err := git_model.NewLFSMetaObject(ctx, repo.ID, pointer)
assert.NoError(t, err)
created = append(created, meta)
}
iterated := make([]int64, 0, len(created))
cutoff := time.Now().Add(24 * time.Hour)
iterErr := git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, meta *git_model.LFSMetaObject, count int64) error {
iterated = append(iterated, meta.ID)
_, err := db.GetEngine(ctx).ID(meta.ID).Cols("updated_unix").Update(&git_model.LFSMetaObject{
UpdatedUnix: timeutil.TimeStamp(time.Now().Unix()),
})
return err
}, &git_model.IterateLFSMetaObjectsForRepoOptions{
OlderThan: timeutil.TimeStamp(cutoff.Unix()),
UpdatedLessRecentlyThan: timeutil.TimeStamp(cutoff.Unix()),
})
assert.NoError(t, iterErr)
expected := []int64{created[0].ID, created[1].ID, created[2].ID}
assert.Equal(t, expected, iterated)
}

View File

@@ -123,10 +123,8 @@ func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.R
//
// It is likely that a week is potentially excessive but it should definitely be enough that any
// unassociated LFS object is genuinely unassociated.
OlderThan: timeutil.TimeStamp(opts.OlderThan.Unix()),
UpdatedLessRecentlyThan: timeutil.TimeStamp(opts.UpdatedLessRecentlyThan.Unix()),
OrderByUpdated: true,
LoopFunctionAlwaysUpdates: true,
OlderThan: timeutil.TimeStamp(opts.OlderThan.Unix()),
UpdatedLessRecentlyThan: timeutil.TimeStamp(opts.UpdatedLessRecentlyThan.Unix()),
})
if err == errStop {

View File

@@ -14,6 +14,7 @@ import (
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/storage"
"code.gitea.io/gitea/modules/test"
repo_service "code.gitea.io/gitea/services/repository"
"github.com/stretchr/testify/assert"
@@ -22,7 +23,8 @@ import (
func TestGarbageCollectLFSMetaObjects(t *testing.T) {
unittest.PrepareTestEnv(t)
setting.LFS.StartServer = true
defer test.MockVariableValue(&setting.LFS.StartServer, true)()
err := storage.Init()
assert.NoError(t, err)
@@ -46,6 +48,32 @@ func TestGarbageCollectLFSMetaObjects(t *testing.T) {
assert.ErrorIs(t, err, git_model.ErrLFSObjectNotExist)
}
func TestGarbageCollectLFSMetaObjectsForRepoAutoFix(t *testing.T) {
unittest.PrepareTestEnv(t)
defer test.MockVariableValue(&setting.LFS.StartServer, true)()
err := storage.Init()
assert.NoError(t, err)
repo := unittest.AssertExistsAndLoadBean(t, &repo_model.Repository{ID: 1})
// add lfs object
lfsContent := []byte("gitea2")
lfsOid := storeObjectInRepo(t, repo.ID, &lfsContent)
err = repo_service.GarbageCollectLFSMetaObjectsForRepo(t.Context(), repo, repo_service.GarbageCollectLFSMetaObjectsOptions{
LogDetail: func(string, ...any) {},
AutoFix: true,
OlderThan: time.Now().Add(24 * time.Hour * 7),
UpdatedLessRecentlyThan: time.Now().Add(24 * time.Hour * 3),
})
assert.NoError(t, err)
_, err = git_model.GetLFSMetaObjectByOid(t.Context(), repo.ID, lfsOid)
assert.ErrorIs(t, err, git_model.ErrLFSObjectNotExist)
}
func storeObjectInRepo(t *testing.T, repositoryID int64, content *[]byte) string {
pointer, err := lfs.GeneratePointer(bytes.NewReader(*content))
assert.NoError(t, err)