mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-11-03 20:36:07 +01:00 
			
		
		
		
	improve performance of diffs (#32393)
This has two major changes that significantly reduce the amount of work done for large diffs: * Kill a running git process when reaching the maximum number of files in a diff, preventing it from processing the entire diff. * When loading a diff with the URL param `file-only=true`, skip loading stats. This speeds up loading both hidden files of a diff and sections of a diff when clicking the "Show More" button. A couple of minor things from profiling are also included: * Reuse existing repo in `PrepareViewPullInfo` if head and base are the same. The performance impact is going to depend heavily on the individual diff and the hardware it runs on, but when testing locally on a diff changing 100k+ lines over hundreds of files, I'm seeing a roughly 75% reduction in time to load the result of "Show More" --------- Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
This commit is contained in:
		@@ -328,6 +328,7 @@ func Diff(ctx *context.Context) {
 | 
			
		||||
		MaxLineCharacters:  setting.Git.MaxGitDiffLineCharacters,
 | 
			
		||||
		MaxFiles:           maxFiles,
 | 
			
		||||
		WhitespaceBehavior: gitdiff.GetWhitespaceFlag(ctx.Data["WhitespaceBehavior"].(string)),
 | 
			
		||||
		FileOnly:           fileOnly,
 | 
			
		||||
	}, files...)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		ctx.NotFound("GetDiff", err)
 | 
			
		||||
 
 | 
			
		||||
@@ -611,6 +611,8 @@ func PrepareCompareDiff(
 | 
			
		||||
		maxLines, maxFiles = -1, -1
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	fileOnly := ctx.FormBool("file-only")
 | 
			
		||||
 | 
			
		||||
	diff, err := gitdiff.GetDiff(ctx, ci.HeadGitRepo,
 | 
			
		||||
		&gitdiff.DiffOptions{
 | 
			
		||||
			BeforeCommitID:     beforeCommitID,
 | 
			
		||||
@@ -621,6 +623,7 @@ func PrepareCompareDiff(
 | 
			
		||||
			MaxFiles:           maxFiles,
 | 
			
		||||
			WhitespaceBehavior: whitespaceBehavior,
 | 
			
		||||
			DirectComparison:   ci.DirectComparison,
 | 
			
		||||
			FileOnly:           fileOnly,
 | 
			
		||||
		}, ctx.FormStrings("files")...)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		ctx.ServerError("GetDiffRangeWithWhitespaceBehavior", err)
 | 
			
		||||
 
 | 
			
		||||
@@ -395,12 +395,12 @@ func PrepareViewPullInfo(ctx *context.Context, issue *issues_model.Issue) *git.C
 | 
			
		||||
	var headBranchSha string
 | 
			
		||||
	// HeadRepo may be missing
 | 
			
		||||
	if pull.HeadRepo != nil {
 | 
			
		||||
		headGitRepo, err := gitrepo.OpenRepository(ctx, pull.HeadRepo)
 | 
			
		||||
		headGitRepo, closer, err := gitrepo.RepositoryFromContextOrOpen(ctx, pull.HeadRepo)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			ctx.ServerError("OpenRepository", err)
 | 
			
		||||
			ctx.ServerError("RepositoryFromContextOrOpen", err)
 | 
			
		||||
			return nil
 | 
			
		||||
		}
 | 
			
		||||
		defer headGitRepo.Close()
 | 
			
		||||
		defer closer.Close()
 | 
			
		||||
 | 
			
		||||
		if pull.Flow == issues_model.PullRequestFlowGithub {
 | 
			
		||||
			headBranchExist = headGitRepo.IsBranchExist(pull.HeadBranch)
 | 
			
		||||
@@ -747,6 +747,7 @@ func viewPullFiles(ctx *context.Context, specifiedStartCommit, specifiedEndCommi
 | 
			
		||||
		MaxLineCharacters:  setting.Git.MaxGitDiffLineCharacters,
 | 
			
		||||
		MaxFiles:           maxFiles,
 | 
			
		||||
		WhitespaceBehavior: gitdiff.GetWhitespaceFlag(ctx.Data["WhitespaceBehavior"].(string)),
 | 
			
		||||
		FileOnly:           fileOnly,
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if !willShowSpecifiedCommit {
 | 
			
		||||
 
 | 
			
		||||
@@ -380,18 +380,11 @@ func (diffFile *DiffFile) GetType() int {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// GetTailSection creates a fake DiffLineSection if the last section is not the end of the file
 | 
			
		||||
func (diffFile *DiffFile) GetTailSection(gitRepo *git.Repository, leftCommitID, rightCommitID string) *DiffSection {
 | 
			
		||||
func (diffFile *DiffFile) GetTailSection(gitRepo *git.Repository, leftCommit, rightCommit *git.Commit) *DiffSection {
 | 
			
		||||
	if len(diffFile.Sections) == 0 || diffFile.Type != DiffFileChange || diffFile.IsBin || diffFile.IsLFSFile {
 | 
			
		||||
		return nil
 | 
			
		||||
	}
 | 
			
		||||
	leftCommit, err := gitRepo.GetCommit(leftCommitID)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil
 | 
			
		||||
	}
 | 
			
		||||
	rightCommit, err := gitRepo.GetCommit(rightCommitID)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	lastSection := diffFile.Sections[len(diffFile.Sections)-1]
 | 
			
		||||
	lastLine := lastSection.Lines[len(lastSection.Lines)-1]
 | 
			
		||||
	leftLineCount := getCommitFileLineCount(leftCommit, diffFile.Name)
 | 
			
		||||
@@ -536,11 +529,6 @@ parsingLoop:
 | 
			
		||||
			lastFile := createDiffFile(diff, line)
 | 
			
		||||
			diff.End = lastFile.Name
 | 
			
		||||
			diff.IsIncomplete = true
 | 
			
		||||
			_, err := io.Copy(io.Discard, reader)
 | 
			
		||||
			if err != nil {
 | 
			
		||||
				// By the definition of io.Copy this never returns io.EOF
 | 
			
		||||
				return diff, fmt.Errorf("error during io.Copy: %w", err)
 | 
			
		||||
			}
 | 
			
		||||
			break parsingLoop
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
@@ -1101,6 +1089,7 @@ type DiffOptions struct {
 | 
			
		||||
	MaxFiles           int
 | 
			
		||||
	WhitespaceBehavior git.TrustedCmdArgs
 | 
			
		||||
	DirectComparison   bool
 | 
			
		||||
	FileOnly           bool
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// GetDiff builds a Diff between two commits of a repository.
 | 
			
		||||
@@ -1109,12 +1098,16 @@ type DiffOptions struct {
 | 
			
		||||
func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) {
 | 
			
		||||
	repoPath := gitRepo.Path
 | 
			
		||||
 | 
			
		||||
	var beforeCommit *git.Commit
 | 
			
		||||
	commit, err := gitRepo.GetCommit(opts.AfterCommitID)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	cmdDiff := git.NewCommand(gitRepo.Ctx)
 | 
			
		||||
	cmdCtx, cmdCancel := context.WithCancel(ctx)
 | 
			
		||||
	defer cmdCancel()
 | 
			
		||||
 | 
			
		||||
	cmdDiff := git.NewCommand(cmdCtx)
 | 
			
		||||
	objectFormat, err := gitRepo.GetObjectFormat()
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
@@ -1136,6 +1129,12 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi
 | 
			
		||||
			AddArguments(opts.WhitespaceBehavior...).
 | 
			
		||||
			AddDynamicArguments(actualBeforeCommitID, opts.AfterCommitID)
 | 
			
		||||
		opts.BeforeCommitID = actualBeforeCommitID
 | 
			
		||||
 | 
			
		||||
		var err error
 | 
			
		||||
		beforeCommit, err = gitRepo.GetCommit(opts.BeforeCommitID)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return nil, err
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// In git 2.31, git diff learned --skip-to which we can use to shortcut skip to file
 | 
			
		||||
@@ -1163,14 +1162,16 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi
 | 
			
		||||
			Dir:     repoPath,
 | 
			
		||||
			Stdout:  writer,
 | 
			
		||||
			Stderr:  stderr,
 | 
			
		||||
		}); err != nil {
 | 
			
		||||
		}); err != nil && err.Error() != "signal: killed" {
 | 
			
		||||
			log.Error("error during GetDiff(git diff dir: %s): %v, stderr: %s", repoPath, err, stderr.String())
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		_ = writer.Close()
 | 
			
		||||
	}()
 | 
			
		||||
 | 
			
		||||
	diff, err := ParsePatch(ctx, opts.MaxLines, opts.MaxLineCharacters, opts.MaxFiles, reader, parsePatchSkipToFile)
 | 
			
		||||
	diff, err := ParsePatch(cmdCtx, opts.MaxLines, opts.MaxLineCharacters, opts.MaxFiles, reader, parsePatchSkipToFile)
 | 
			
		||||
	// Ensure the git process is killed if it didn't exit already
 | 
			
		||||
	cmdCancel()
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, fmt.Errorf("unable to ParsePatch: %w", err)
 | 
			
		||||
	}
 | 
			
		||||
@@ -1205,37 +1206,28 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi
 | 
			
		||||
		}
 | 
			
		||||
		diffFile.IsGenerated = isGenerated.Value()
 | 
			
		||||
 | 
			
		||||
		tailSection := diffFile.GetTailSection(gitRepo, opts.BeforeCommitID, opts.AfterCommitID)
 | 
			
		||||
		tailSection := diffFile.GetTailSection(gitRepo, beforeCommit, commit)
 | 
			
		||||
		if tailSection != nil {
 | 
			
		||||
			diffFile.Sections = append(diffFile.Sections, tailSection)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	separator := "..."
 | 
			
		||||
	if opts.DirectComparison {
 | 
			
		||||
		separator = ".."
 | 
			
		||||
	if opts.FileOnly {
 | 
			
		||||
		return diff, nil
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	diffPaths := []string{opts.BeforeCommitID + separator + opts.AfterCommitID}
 | 
			
		||||
	if len(opts.BeforeCommitID) == 0 || opts.BeforeCommitID == objectFormat.EmptyObjectID().String() {
 | 
			
		||||
		diffPaths = []string{objectFormat.EmptyTree().String(), opts.AfterCommitID}
 | 
			
		||||
	}
 | 
			
		||||
	diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
 | 
			
		||||
	if err != nil && strings.Contains(err.Error(), "no merge base") {
 | 
			
		||||
		// git >= 2.28 now returns an error if base and head have become unrelated.
 | 
			
		||||
		// previously it would return the results of git diff --shortstat base head so let's try that...
 | 
			
		||||
		diffPaths = []string{opts.BeforeCommitID, opts.AfterCommitID}
 | 
			
		||||
		diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
 | 
			
		||||
	}
 | 
			
		||||
	stats, err := GetPullDiffStats(gitRepo, opts)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	diff.NumFiles, diff.TotalAddition, diff.TotalDeletion = stats.NumFiles, stats.TotalAddition, stats.TotalDeletion
 | 
			
		||||
 | 
			
		||||
	return diff, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type PullDiffStats struct {
 | 
			
		||||
	TotalAddition, TotalDeletion int
 | 
			
		||||
	NumFiles, TotalAddition, TotalDeletion int
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// GetPullDiffStats
 | 
			
		||||
@@ -1259,12 +1251,12 @@ func GetPullDiffStats(gitRepo *git.Repository, opts *DiffOptions) (*PullDiffStat
 | 
			
		||||
		diffPaths = []string{objectFormat.EmptyTree().String(), opts.AfterCommitID}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	_, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
 | 
			
		||||
	diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
 | 
			
		||||
	if err != nil && strings.Contains(err.Error(), "no merge base") {
 | 
			
		||||
		// git >= 2.28 now returns an error if base and head have become unrelated.
 | 
			
		||||
		// previously it would return the results of git diff --shortstat base head so let's try that...
 | 
			
		||||
		diffPaths = []string{opts.BeforeCommitID, opts.AfterCommitID}
 | 
			
		||||
		_, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
 | 
			
		||||
		diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
 | 
			
		||||
	}
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
 
 | 
			
		||||
@@ -358,6 +358,7 @@ func (t *TemporaryUploadRepository) DiffIndex() (*gitdiff.Diff, error) {
 | 
			
		||||
			Stderr:  stderr,
 | 
			
		||||
			PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error {
 | 
			
		||||
				_ = stdoutWriter.Close()
 | 
			
		||||
				defer cancel()
 | 
			
		||||
				diff, finalErr = gitdiff.ParsePatch(t.ctx, setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, stdoutReader, "")
 | 
			
		||||
				if finalErr != nil {
 | 
			
		||||
					log.Error("ParsePatch: %v", finalErr)
 | 
			
		||||
@@ -371,11 +372,15 @@ func (t *TemporaryUploadRepository) DiffIndex() (*gitdiff.Diff, error) {
 | 
			
		||||
			log.Error("Unable to ParsePatch in temporary repo %s (%s). Error: %v", t.repo.FullName(), t.basePath, finalErr)
 | 
			
		||||
			return nil, finalErr
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// If the process exited early, don't error
 | 
			
		||||
		if err != context.Canceled {
 | 
			
		||||
			log.Error("Unable to run diff-index pipeline in temporary repo %s (%s). Error: %v\nStderr: %s",
 | 
			
		||||
				t.repo.FullName(), t.basePath, err, stderr)
 | 
			
		||||
			return nil, fmt.Errorf("Unable to run diff-index pipeline in temporary repo %s. Error: %w\nStderr: %s",
 | 
			
		||||
				t.repo.FullName(), err, stderr)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(t.ctx, t.basePath, git.TrustedCmdArgs{"--cached"}, "HEAD")
 | 
			
		||||
	if err != nil {
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user