mirror of
https://github.com/go-gitea/gitea.git
synced 2026-02-14 18:47:20 +01:00
Backport #36500 by @lunny Fix #36448 Removed unnecessary parameters from the LFS GC process and switched to an ORDER BY id ASC strategy with a last-ID cursor to avoid missing or duplicating meta object IDs. Signed-off-by: Lunny Xiao <xiaolunwen@gmail.com> Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -343,15 +343,12 @@ func IterateRepositoryIDsWithLFSMetaObjects(ctx context.Context, f func(ctx cont
|
||||
|
||||
// IterateLFSMetaObjectsForRepoOptions provides options for IterateLFSMetaObjectsForRepo
|
||||
type IterateLFSMetaObjectsForRepoOptions struct {
|
||||
OlderThan timeutil.TimeStamp
|
||||
UpdatedLessRecentlyThan timeutil.TimeStamp
|
||||
OrderByUpdated bool
|
||||
LoopFunctionAlwaysUpdates bool
|
||||
OlderThan timeutil.TimeStamp
|
||||
UpdatedLessRecentlyThan timeutil.TimeStamp
|
||||
}
|
||||
|
||||
// IterateLFSMetaObjectsForRepo provides a iterator for LFSMetaObjects per Repo
|
||||
func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(context.Context, *LFSMetaObject, int64) error, opts *IterateLFSMetaObjectsForRepoOptions) error {
|
||||
var start int
|
||||
batchSize := setting.Database.IterateBufferSize
|
||||
engine := db.GetEngine(ctx)
|
||||
type CountLFSMetaObject struct {
|
||||
@@ -359,7 +356,7 @@ func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(cont
|
||||
LFSMetaObject `xorm:"extends"`
|
||||
}
|
||||
|
||||
id := int64(0)
|
||||
lastID := int64(0)
|
||||
|
||||
for {
|
||||
beans := make([]*CountLFSMetaObject, 0, batchSize)
|
||||
@@ -372,29 +369,23 @@ func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(cont
|
||||
if !opts.UpdatedLessRecentlyThan.IsZero() {
|
||||
sess.And("`lfs_meta_object`.updated_unix < ?", opts.UpdatedLessRecentlyThan)
|
||||
}
|
||||
sess.GroupBy("`lfs_meta_object`.id")
|
||||
if opts.OrderByUpdated {
|
||||
sess.OrderBy("`lfs_meta_object`.updated_unix ASC")
|
||||
} else {
|
||||
sess.And("`lfs_meta_object`.id > ?", id)
|
||||
sess.OrderBy("`lfs_meta_object`.id ASC")
|
||||
}
|
||||
if err := sess.Limit(batchSize, start).Find(&beans); err != nil {
|
||||
sess.GroupBy("`lfs_meta_object`.id").
|
||||
And("`lfs_meta_object`.id > ?", lastID).
|
||||
OrderBy("`lfs_meta_object`.id ASC")
|
||||
|
||||
if err := sess.Limit(batchSize).Find(&beans); err != nil {
|
||||
return err
|
||||
}
|
||||
if len(beans) == 0 {
|
||||
return nil
|
||||
}
|
||||
if !opts.LoopFunctionAlwaysUpdates {
|
||||
start += len(beans)
|
||||
}
|
||||
|
||||
for _, bean := range beans {
|
||||
if err := f(ctx, &bean.LFSMetaObject, bean.Count); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
id = beans[len(beans)-1].ID
|
||||
lastID = beans[len(beans)-1].ID
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
61
models/git/lfs_test.go
Normal file
61
models/git/lfs_test.go
Normal file
@@ -0,0 +1,61 @@
|
||||
// Copyright 2026 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package git_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"strconv"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"code.gitea.io/gitea/models/db"
|
||||
git_model "code.gitea.io/gitea/models/git"
|
||||
repo_model "code.gitea.io/gitea/models/repo"
|
||||
"code.gitea.io/gitea/models/unittest"
|
||||
"code.gitea.io/gitea/modules/lfs"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
"code.gitea.io/gitea/modules/test"
|
||||
"code.gitea.io/gitea/modules/timeutil"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestIterateLFSMetaObjectsForRepoUpdatesDoNotSkip(t *testing.T) {
|
||||
assert.NoError(t, unittest.PrepareTestDatabase())
|
||||
|
||||
ctx := t.Context()
|
||||
repo, err := repo_model.GetRepositoryByOwnerAndName(ctx, "user2", "repo1")
|
||||
assert.NoError(t, err)
|
||||
|
||||
defer test.MockVariableValue(&setting.Database.IterateBufferSize, 1)()
|
||||
|
||||
created := make([]*git_model.LFSMetaObject, 0, 3)
|
||||
for i := range 3 {
|
||||
content := []byte("gitea-lfs-" + strconv.Itoa(i))
|
||||
pointer, err := lfs.GeneratePointer(bytes.NewReader(content))
|
||||
assert.NoError(t, err)
|
||||
|
||||
meta, err := git_model.NewLFSMetaObject(ctx, repo.ID, pointer)
|
||||
assert.NoError(t, err)
|
||||
created = append(created, meta)
|
||||
}
|
||||
|
||||
iterated := make([]int64, 0, len(created))
|
||||
cutoff := time.Now().Add(24 * time.Hour)
|
||||
iterErr := git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, meta *git_model.LFSMetaObject, count int64) error {
|
||||
iterated = append(iterated, meta.ID)
|
||||
_, err := db.GetEngine(ctx).ID(meta.ID).Cols("updated_unix").Update(&git_model.LFSMetaObject{
|
||||
UpdatedUnix: timeutil.TimeStamp(time.Now().Unix()),
|
||||
})
|
||||
return err
|
||||
}, &git_model.IterateLFSMetaObjectsForRepoOptions{
|
||||
OlderThan: timeutil.TimeStamp(cutoff.Unix()),
|
||||
UpdatedLessRecentlyThan: timeutil.TimeStamp(cutoff.Unix()),
|
||||
})
|
||||
assert.NoError(t, iterErr)
|
||||
|
||||
expected := []int64{created[0].ID, created[1].ID, created[2].ID}
|
||||
assert.Equal(t, expected, iterated)
|
||||
}
|
||||
@@ -123,10 +123,8 @@ func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.R
|
||||
//
|
||||
// It is likely that a week is potentially excessive but it should definitely be enough that any
|
||||
// unassociated LFS object is genuinely unassociated.
|
||||
OlderThan: timeutil.TimeStamp(opts.OlderThan.Unix()),
|
||||
UpdatedLessRecentlyThan: timeutil.TimeStamp(opts.UpdatedLessRecentlyThan.Unix()),
|
||||
OrderByUpdated: true,
|
||||
LoopFunctionAlwaysUpdates: true,
|
||||
OlderThan: timeutil.TimeStamp(opts.OlderThan.Unix()),
|
||||
UpdatedLessRecentlyThan: timeutil.TimeStamp(opts.UpdatedLessRecentlyThan.Unix()),
|
||||
})
|
||||
|
||||
if err == errStop {
|
||||
|
||||
@@ -14,6 +14,7 @@ import (
|
||||
"code.gitea.io/gitea/modules/lfs"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
"code.gitea.io/gitea/modules/storage"
|
||||
"code.gitea.io/gitea/modules/test"
|
||||
repo_service "code.gitea.io/gitea/services/repository"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
@@ -22,7 +23,8 @@ import (
|
||||
func TestGarbageCollectLFSMetaObjects(t *testing.T) {
|
||||
unittest.PrepareTestEnv(t)
|
||||
|
||||
setting.LFS.StartServer = true
|
||||
defer test.MockVariableValue(&setting.LFS.StartServer, true)()
|
||||
|
||||
err := storage.Init()
|
||||
assert.NoError(t, err)
|
||||
|
||||
@@ -46,6 +48,32 @@ func TestGarbageCollectLFSMetaObjects(t *testing.T) {
|
||||
assert.ErrorIs(t, err, git_model.ErrLFSObjectNotExist)
|
||||
}
|
||||
|
||||
func TestGarbageCollectLFSMetaObjectsForRepoAutoFix(t *testing.T) {
|
||||
unittest.PrepareTestEnv(t)
|
||||
|
||||
defer test.MockVariableValue(&setting.LFS.StartServer, true)()
|
||||
|
||||
err := storage.Init()
|
||||
assert.NoError(t, err)
|
||||
|
||||
repo := unittest.AssertExistsAndLoadBean(t, &repo_model.Repository{ID: 1})
|
||||
|
||||
// add lfs object
|
||||
lfsContent := []byte("gitea2")
|
||||
lfsOid := storeObjectInRepo(t, repo.ID, &lfsContent)
|
||||
|
||||
err = repo_service.GarbageCollectLFSMetaObjectsForRepo(t.Context(), repo, repo_service.GarbageCollectLFSMetaObjectsOptions{
|
||||
LogDetail: func(string, ...any) {},
|
||||
AutoFix: true,
|
||||
OlderThan: time.Now().Add(24 * time.Hour * 7),
|
||||
UpdatedLessRecentlyThan: time.Now().Add(24 * time.Hour * 3),
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
_, err = git_model.GetLFSMetaObjectByOid(t.Context(), repo.ID, lfsOid)
|
||||
assert.ErrorIs(t, err, git_model.ErrLFSObjectNotExist)
|
||||
}
|
||||
|
||||
func storeObjectInRepo(t *testing.T, repositoryID int64, content *[]byte) string {
|
||||
pointer, err := lfs.GeneratePointer(bytes.NewReader(*content))
|
||||
assert.NoError(t, err)
|
||||
|
||||
Reference in New Issue
Block a user