diff --git a/gradle/changelog/lfs_performance.yaml b/gradle/changelog/lfs_performance.yaml new file mode 100644 index 0000000000..88556161b6 --- /dev/null +++ b/gradle/changelog/lfs_performance.yaml @@ -0,0 +1,2 @@ +- type: changed + description: Improved performance of LFS imports for imported repositories and mirrors diff --git a/scm-plugins/scm-git-plugin/src/main/java/sonia/scm/repository/spi/GitPullCommand.java b/scm-plugins/scm-git-plugin/src/main/java/sonia/scm/repository/spi/GitPullCommand.java index 290ad629c6..0e9b4f6fab 100644 --- a/scm-plugins/scm-git-plugin/src/main/java/sonia/scm/repository/spi/GitPullCommand.java +++ b/scm-plugins/scm-git-plugin/src/main/java/sonia/scm/repository/spi/GitPullCommand.java @@ -51,6 +51,8 @@ import sonia.scm.repository.api.PullResponse; import java.io.File; import java.io.IOException; +import java.util.HashSet; +import java.util.Set; public class GitPullCommand extends AbstractGitPushOrPullCommand @@ -225,6 +227,7 @@ public class GitPullCommand extends AbstractGitPushOrPullCommand } private void fetchLfs(PullCommandRequest request, Git git, LfsLoader.LfsLoaderLogger lfsLoaderLogger) throws IOException { + Set alreadyVisited = new HashSet<>(1000); open().getRefDatabase().getRefs().forEach( ref -> lfsLoader.inspectTree( ref.getObjectId(), @@ -233,7 +236,8 @@ public class GitPullCommand extends AbstractGitPushOrPullCommand new MirrorCommandResult.LfsUpdateResult(), repository, pullHttpConnectionProvider.createHttpConnectionFactory(request), - request.getRemoteUrl().toString() + request.getRemoteUrl().toString(), + alreadyVisited ) ); } diff --git a/scm-plugins/scm-git-plugin/src/main/java/sonia/scm/repository/spi/LfsLoader.java b/scm-plugins/scm-git-plugin/src/main/java/sonia/scm/repository/spi/LfsLoader.java index b7a077aacc..f41696593f 100644 --- a/scm-plugins/scm-git-plugin/src/main/java/sonia/scm/repository/spi/LfsLoader.java +++ b/scm-plugins/scm-git-plugin/src/main/java/sonia/scm/repository/spi/LfsLoader.java @@ -24,6 +24,7 @@ package sonia.scm.repository.spi; +import com.google.common.annotations.VisibleForTesting; import jakarta.inject.Inject; import org.eclipse.jgit.errors.MissingObjectException; import org.eclipse.jgit.lfs.Lfs; @@ -51,7 +52,8 @@ import java.io.IOException; import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Path; -import java.util.Collection; +import java.util.HashSet; +import java.util.Set; class LfsLoader { @@ -71,8 +73,24 @@ class LfsLoader { sonia.scm.repository.Repository repository, HttpConnectionFactory httpConnectionFactory, String url) { - EntryHandler entryHandler = new EntryHandler(repository, gitRepository, mirrorLog, lfsUpdateResult, httpConnectionFactory); - inspectTree(newObjectId, entryHandler, gitRepository, mirrorLog, lfsUpdateResult, url); + inspectTree(newObjectId, gitRepository, mirrorLog, lfsUpdateResult, repository, httpConnectionFactory, url, new HashSet<>(1000)); + } + + void inspectTree(ObjectId newObjectId, + Repository gitRepository, + LfsLoaderLogger mirrorLog, + LfsUpdateResult lfsUpdateResult, + sonia.scm.repository.Repository repository, + HttpConnectionFactory httpConnectionFactory, + String url, + Set alreadyVisited) { + EntryHandler entryHandler = createEntryHandler(gitRepository, mirrorLog, lfsUpdateResult, repository, httpConnectionFactory); + inspectTree(newObjectId, entryHandler, gitRepository, mirrorLog, lfsUpdateResult, url, alreadyVisited); + } + + @VisibleForTesting + EntryHandler createEntryHandler(Repository gitRepository, LfsLoaderLogger mirrorLog, LfsUpdateResult lfsUpdateResult, sonia.scm.repository.Repository repository, HttpConnectionFactory httpConnectionFactory) { + return new EntryHandler(repository, gitRepository, mirrorLog, lfsUpdateResult, httpConnectionFactory); } private void inspectTree(ObjectId newObjectId, @@ -80,20 +98,25 @@ class LfsLoader { Repository gitRepository, LfsLoaderLogger mirrorLog, LfsUpdateResult lfsUpdateResult, - String sourceUrl) { + String sourceUrl, + Set alreadyVisited) { try { gitRepository .getConfig() .setString(ConfigConstants.CONFIG_SECTION_LFS, null, ConfigConstants.CONFIG_KEY_URL, computeLfsUrl(sourceUrl)); TreeWalk treeWalk = new TreeWalk(gitRepository); - treeWalk.setFilter(new ScmLfsPointerFilter()); + treeWalk.setFilter(new FilteringScmLfsPointerFilter(alreadyVisited)); treeWalk.setRecursive(true); RevWalk revWalk = new RevWalk(gitRepository); revWalk.markStart(revWalk.parseCommit(newObjectId)); for (RevCommit commit : revWalk) { + if (!alreadyVisited.add(commit.toObjectId())) { + LOG.trace("skipping commit {}", commit); + break; + } treeWalk.reset(); treeWalk.addTree(commit.getTree()); while (treeWalk.next()) { @@ -115,7 +138,31 @@ class LfsLoader { } } - private class EntryHandler { + @VisibleForTesting + Path downloadLfsResource(Lfs lfs, Repository gitRepository, HttpConnectionFactory connectionFactory, LfsPointer lfsPointer) throws IOException { + return SmudgeFilter.downloadLfsResource( + lfs, + gitRepository, + connectionFactory, + lfsPointer + ) + .iterator() + .next(); + } + + @VisibleForTesting + void storeLfsBlob(AnyLongObjectId oid, Path tempFilePath, BlobStore lfsBlobStore) throws IOException { + LOG.trace("temporary lfs file: {}", tempFilePath); + Files.copy( + tempFilePath, + lfsBlobStore + .create(oid.name()) + .getOutputStream() + ); + } + + @VisibleForTesting + class EntryHandler { private final BlobStore lfsBlobStore; private final Repository gitRepository; @@ -137,15 +184,19 @@ class LfsLoader { this.httpConnectionFactory = httpConnectionFactory; } - private void handleTreeEntry(TreeWalk treeWalk) { + @VisibleForTesting + void handleTreeEntry(TreeWalk treeWalk) { try (InputStream is = gitRepository.open(treeWalk.getObjectId(0), Constants.OBJ_BLOB).openStream()) { LfsPointer lfsPointer = LfsPointer.parseLfsPointer(is); AnyLongObjectId oid = lfsPointer.getOid(); if (lfsBlobStore.get(oid.name()) == null) { Path tempFilePath = loadLfsFile(lfsPointer); - storeLfsBlob(oid, tempFilePath); - Files.delete(tempFilePath); + try { + storeLfsBlob(oid, tempFilePath, lfsBlobStore); + } finally { + Files.delete(tempFilePath); + } } } catch (Exception e) { LOG.warn("failed to load lfs file", e); @@ -161,23 +212,12 @@ class LfsLoader { Lfs lfs = new Lfs(gitRepository); lfs.getMediaFile(lfsPointer.getOid()); - Collection paths = SmudgeFilter.downloadLfsResource( + return downloadLfsResource( lfs, gitRepository, httpConnectionFactory, lfsPointer ); - return paths.iterator().next(); - } - - private void storeLfsBlob(AnyLongObjectId oid, Path tempFilePath) throws IOException { - LOG.trace("temporary lfs file: {}", tempFilePath); - Files.copy( - tempFilePath, - lfsBlobStore - .create(oid.name()) - .getOutputStream() - ); } } @@ -207,4 +247,22 @@ class LfsLoader { return super.include(walk); } } + + private static class FilteringScmLfsPointerFilter extends ScmLfsPointerFilter { + + private final Set alreadyVisited; + + private FilteringScmLfsPointerFilter(Set alreadyVisited) { + this.alreadyVisited = alreadyVisited; + } + + @Override + public boolean include(TreeWalk walk) throws IOException { + if (!alreadyVisited.add(walk.getObjectId(0))) { + LOG.trace("skipping object {}", walk.getObjectId(0)); + return false; + } + return super.include(walk); + } + } } diff --git a/scm-plugins/scm-git-plugin/src/test/java/sonia/scm/repository/spi/LfsLoaderTest.java b/scm-plugins/scm-git-plugin/src/test/java/sonia/scm/repository/spi/LfsLoaderTest.java new file mode 100644 index 0000000000..61784491d5 --- /dev/null +++ b/scm-plugins/scm-git-plugin/src/test/java/sonia/scm/repository/spi/LfsLoaderTest.java @@ -0,0 +1,186 @@ +/* + * MIT License + * + * Copyright (c) 2020-present Cloudogu GmbH and Contributors + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package sonia.scm.repository.spi; + +import org.eclipse.jgit.lfs.Lfs; +import org.eclipse.jgit.lfs.LfsPointer; +import org.eclipse.jgit.lfs.lib.AnyLongObjectId; +import org.eclipse.jgit.lfs.lib.LongObjectId; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.Repository; +import org.eclipse.jgit.transport.http.HttpConnectionFactory; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnit; +import org.mockito.junit.MockitoRule; +import sonia.scm.repository.GitUtil; +import sonia.scm.repository.api.MirrorCommandResult; +import sonia.scm.store.BlobStore; +import sonia.scm.web.lfs.LfsBlobStoreFactory; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import static java.util.Arrays.asList; +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +/* + * This test uses a repository with the following layout: + * 7100a0f (branch/b) Add fourth png + * d89ee93 (branch/a) Add third png + | * f3134d6 (master) Add second png + |/ + * 62c8598 Add first png + * cccd744 init lfs + * + * Each commit with the text "Add ..." adds exactly one lfs file to the repository. + */ +public class LfsLoaderTest extends ZippedRepositoryTestBase { + + @Rule + public MockitoRule rule = MockitoJUnit.rule(); + + @Mock + private LfsBlobStoreFactory lfsBlobStoreFactory; + @Mock + private BlobStore lfsBlobStore; + @Mock + private LfsLoader.LfsLoaderLogger lfsLoaderLogger; + @Mock + private MirrorCommandResult.LfsUpdateResult lfsUpdateResult; + @Mock + private HttpConnectionFactory httpConnectionFactory; + + private LfsLoader lfsLoader; + + private LfsLoader.EntryHandler entryHandler; + private final Map storedBlobs = new HashMap<>(); + private Path lfsTemp; + + @Before + public void initLfsBlobStore() { + when(lfsBlobStoreFactory.getLfsBlobStore(repository)).thenReturn(lfsBlobStore); + } + + @Before + public void initLfsLoader() { + lfsLoader = new LfsLoader(lfsBlobStoreFactory) { + @Override + Path downloadLfsResource(Lfs lfs, Repository gitRepository, HttpConnectionFactory connectionFactory, LfsPointer lfsPointer) throws IOException { + Path file = lfsTemp.resolve(lfsPointer.getOid().getName()); + Files.createFile(file); + return file; + } + + @Override + void storeLfsBlob(AnyLongObjectId oid, Path tempFilePath, BlobStore lfsBlobStore) { + storedBlobs.put(oid, tempFilePath); + } + + @Override + EntryHandler createEntryHandler(Repository gitRepository, LfsLoaderLogger mirrorLog, MirrorCommandResult.LfsUpdateResult lfsUpdateResult, sonia.scm.repository.Repository repository, HttpConnectionFactory httpConnectionFactory) { + entryHandler = spy(super.createEntryHandler(gitRepository, mirrorLog, lfsUpdateResult, repository, httpConnectionFactory)); + return entryHandler; + } + }; + } + + @Before + public void initLfsTemp() throws IOException { + lfsTemp = tempFolder.newFolder("lfs").toPath(); + } + + @Test + public void shouldLoadAllLfsFiles() throws IOException { + lfsLoader.inspectTree( + ObjectId.fromString("f3134d622484981329034ef63c6c1c9b0e5c5232"), + GitUtil.open(repositoryDirectory), + lfsLoaderLogger, + lfsUpdateResult, + repository, + httpConnectionFactory, + "http://localhost:8081/scm/repo.git" + ); + + assertThat(storedBlobs) + .hasSize(2) + .containsAllEntriesOf( + Map.of( + LongObjectId.fromString("53c234e5e8472b6ac51c1ae1cab3fe06fad053beb8ebfd8977b010655bfdd3c3"), lfsTemp.resolve("53c234e5e8472b6ac51c1ae1cab3fe06fad053beb8ebfd8977b010655bfdd3c3"), + LongObjectId.fromString("4355a46b19d348dc2f57c046f8ef63d4538ebb936000f3c9ee954a27460dd865"), lfsTemp.resolve("4355a46b19d348dc2f57c046f8ef63d4538ebb936000f3c9ee954a27460dd865") + )); + assertThat(lfsTemp).isEmptyDirectory(); + } + + @Test + public void shouldCheckLfsFilesOnlyOnce() throws IOException { + Set alreadyVisited = new HashSet<>(); + + try (Repository repository = GitUtil.open(repositoryDirectory)) { + asList( + "d89ee931a676bec618177fb4ae6e056f8907a82b", // branch/a + "7100a0f377b142a9a746ee0b18cb4124309c0900" // branch/b + ) + .forEach( + objId -> lfsLoader.inspectTree( + ObjectId.fromString(objId), + repository, + lfsLoaderLogger, + lfsUpdateResult, + this.repository, + httpConnectionFactory, + "http://localhost:8081/scm/repo.git", + alreadyVisited + ) + ); + } + + assertThat(storedBlobs).hasSize(3); + // The second entry handler created for the last revision (branch/b) should be used only once, because all other + // object ids for this revision have been handled by the other revision (branch/a) before: + verify(entryHandler).handleTreeEntry(any()); + } + + @Override + protected String getType() { + return "git"; + } + + @Override + protected String getZippedRepositoryResource() { + return "sonia/scm/repository/spi/scm-git-spi-lfs-loader-test.zip"; + } +} diff --git a/scm-plugins/scm-git-plugin/src/test/resources/sonia/scm/repository/spi/scm-git-spi-lfs-loader-test.zip b/scm-plugins/scm-git-plugin/src/test/resources/sonia/scm/repository/spi/scm-git-spi-lfs-loader-test.zip new file mode 100644 index 0000000000..b2c4ed57e4 Binary files /dev/null and b/scm-plugins/scm-git-plugin/src/test/resources/sonia/scm/repository/spi/scm-git-spi-lfs-loader-test.zip differ