mirror of
https://github.com/go-gitea/gitea.git
synced 2026-02-15 02:57:40 +01:00
1. fix a performance regression when using line-by-line highlighting * the root cause is that chroma's `lexers.Get` is slow and a lexer cache is missing during recent changes 2. clarify the chroma lexer detection behavior * now we fully manage our logic to detect lexer, and handle overriding problems, everything is fully under control 3. clarify "code analyze" behavior, now only 2 usages: * only use file name and language to detect lexer (very fast), mainly for "diff" page which contains a lot of files * if no lexer is detected by file name and language, use code content to detect again (slow), mainly for "view file" or "blame" page, which can get best result 4. fix git diff bug, it caused "broken pipe" error for large diff files
333 lines
11 KiB
Go
333 lines
11 KiB
Go
// Copyright 2020 The Gitea Authors. All rights reserved.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
package git
|
|
|
|
import (
|
|
"bufio"
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"code.gitea.io/gitea/modules/git/gitcmd"
|
|
"code.gitea.io/gitea/modules/log"
|
|
"code.gitea.io/gitea/modules/setting"
|
|
"code.gitea.io/gitea/modules/util"
|
|
)
|
|
|
|
// RawDiffType output format: diff or patch
|
|
type RawDiffType string
|
|
|
|
const (
|
|
RawDiffNormal RawDiffType = "diff"
|
|
RawDiffPatch RawDiffType = "patch"
|
|
)
|
|
|
|
// GetRawDiff dumps diff results of repository in given commit ID to io.Writer.
|
|
func GetRawDiff(repo *Repository, commitID string, diffType RawDiffType, writer io.Writer) (retErr error) {
|
|
cmd, err := getRepoRawDiffForFileCmd(repo.Ctx, repo, "", commitID, diffType, "")
|
|
if err != nil {
|
|
return fmt.Errorf("getRepoRawDiffForFileCmd: %w", err)
|
|
}
|
|
return cmd.WithStdoutCopy(writer).RunWithStderr(repo.Ctx)
|
|
}
|
|
|
|
// GetFileDiffCutAroundLine cuts the old or new part of the diff of a file around a specific line number
|
|
func GetFileDiffCutAroundLine(
|
|
repo *Repository, startCommit, endCommit, treePath string,
|
|
line int64, old bool, numbersOfLine int,
|
|
) (ret string, retErr error) {
|
|
cmd, err := getRepoRawDiffForFileCmd(repo.Ctx, repo, startCommit, endCommit, RawDiffNormal, treePath)
|
|
if err != nil {
|
|
return "", fmt.Errorf("getRepoRawDiffForFileCmd: %w", err)
|
|
}
|
|
stdoutReader, stdoutClose := cmd.MakeStdoutPipe()
|
|
defer stdoutClose()
|
|
cmd.WithPipelineFunc(func(ctx gitcmd.Context) error {
|
|
ret, err = CutDiffAroundLine(stdoutReader, line, old, numbersOfLine)
|
|
return err
|
|
})
|
|
return ret, cmd.RunWithStderr(repo.Ctx)
|
|
}
|
|
|
|
// getRepoRawDiffForFile returns an io.Reader for the diff results of file in given commit ID
|
|
// and a "finish" function to wait for the git command and clean up resources after reading is done.
|
|
func getRepoRawDiffForFileCmd(_ context.Context, repo *Repository, startCommit, endCommit string, diffType RawDiffType, file string) (*gitcmd.Command, error) {
|
|
commit, err := repo.GetCommit(endCommit)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var files []string
|
|
if len(file) > 0 {
|
|
files = append(files, file)
|
|
}
|
|
|
|
cmd := gitcmd.NewCommand().WithDir(repo.Path)
|
|
switch diffType {
|
|
case RawDiffNormal:
|
|
if len(startCommit) != 0 {
|
|
cmd.AddArguments("diff").
|
|
AddOptionFormat("--find-renames=%s", setting.Git.DiffRenameSimilarityThreshold).
|
|
AddDynamicArguments(startCommit, endCommit).AddDashesAndList(files...)
|
|
} else if commit.ParentCount() == 0 {
|
|
cmd.AddArguments("show").AddDynamicArguments(endCommit).AddDashesAndList(files...)
|
|
} else {
|
|
c, err := commit.Parent(0)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
cmd.AddArguments("diff").
|
|
AddOptionFormat("--find-renames=%s", setting.Git.DiffRenameSimilarityThreshold).
|
|
AddDynamicArguments(c.ID.String(), endCommit).AddDashesAndList(files...)
|
|
}
|
|
case RawDiffPatch:
|
|
if len(startCommit) != 0 {
|
|
query := fmt.Sprintf("%s...%s", endCommit, startCommit)
|
|
cmd.AddArguments("format-patch", "--no-signature", "--stdout", "--root").AddDynamicArguments(query).AddDashesAndList(files...)
|
|
} else if commit.ParentCount() == 0 {
|
|
cmd.AddArguments("format-patch", "--no-signature", "--stdout", "--root").AddDynamicArguments(endCommit).AddDashesAndList(files...)
|
|
} else {
|
|
c, err := commit.Parent(0)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
query := fmt.Sprintf("%s...%s", endCommit, c.ID.String())
|
|
cmd.AddArguments("format-patch", "--no-signature", "--stdout").AddDynamicArguments(query).AddDashesAndList(files...)
|
|
}
|
|
default:
|
|
return nil, util.NewInvalidArgumentErrorf("invalid diff type: %s", diffType)
|
|
}
|
|
return cmd, nil
|
|
}
|
|
|
|
// ParseDiffHunkString parse the diff hunk content and return
|
|
func ParseDiffHunkString(diffHunk string) (leftLine, leftHunk, rightLine, rightHunk int) {
|
|
ss := strings.Split(diffHunk, "@@")
|
|
ranges := strings.Split(ss[1][1:], " ")
|
|
leftRange := strings.Split(ranges[0], ",")
|
|
leftLine, _ = strconv.Atoi(leftRange[0][1:])
|
|
if len(leftRange) > 1 {
|
|
leftHunk, _ = strconv.Atoi(leftRange[1])
|
|
}
|
|
if len(ranges) > 1 {
|
|
rightRange := strings.Split(ranges[1], ",")
|
|
rightLine, _ = strconv.Atoi(rightRange[0])
|
|
if len(rightRange) > 1 {
|
|
rightHunk, _ = strconv.Atoi(rightRange[1])
|
|
}
|
|
} else {
|
|
log.Debug("Parse line number failed: %v", diffHunk)
|
|
rightLine = leftLine
|
|
rightHunk = leftHunk
|
|
}
|
|
if rightLine == 0 {
|
|
// FIXME: GIT-DIFF-CUT-BUG search this tag to see details
|
|
// this is only a hacky patch, the rightLine&rightHunk might still be incorrect in some cases.
|
|
rightLine++
|
|
}
|
|
return leftLine, leftHunk, rightLine, rightHunk
|
|
}
|
|
|
|
// Example: @@ -1,8 +1,9 @@ => [..., 1, 8, 1, 9]
|
|
var hunkRegex = regexp.MustCompile(`^@@ -(?P<beginOld>[0-9]+)(,(?P<endOld>[0-9]+))? \+(?P<beginNew>[0-9]+)(,(?P<endNew>[0-9]+))? @@`)
|
|
|
|
const cmdDiffHead = "diff --git "
|
|
|
|
func isHeader(lof string, inHunk bool) bool {
|
|
return strings.HasPrefix(lof, cmdDiffHead) || (!inHunk && (strings.HasPrefix(lof, "---") || strings.HasPrefix(lof, "+++")))
|
|
}
|
|
|
|
// CutDiffAroundLine cuts a diff of a file in way that only the given line + numberOfLine above it will be shown
|
|
// it also recalculates hunks and adds the appropriate headers to the new diff.
|
|
// Warning: Only one-file diffs are allowed.
|
|
func CutDiffAroundLine(originalDiff io.Reader, line int64, old bool, numbersOfLine int) (string, error) {
|
|
if line == 0 || numbersOfLine == 0 {
|
|
// no line or num of lines => no diff
|
|
return "", nil
|
|
}
|
|
|
|
scanner := bufio.NewScanner(originalDiff)
|
|
hunk := make([]string, 0)
|
|
|
|
// begin is the start of the hunk containing searched line
|
|
// end is the end of the hunk ...
|
|
// currentLine is the line number on the side of the searched line (differentiated by old)
|
|
// otherLine is the line number on the opposite side of the searched line (differentiated by old)
|
|
var begin, end, currentLine, otherLine int64
|
|
var headerLines int
|
|
|
|
inHunk := false
|
|
|
|
for scanner.Scan() {
|
|
lof := scanner.Text()
|
|
// Add header to enable parsing
|
|
|
|
if isHeader(lof, inHunk) {
|
|
if strings.HasPrefix(lof, cmdDiffHead) {
|
|
inHunk = false
|
|
}
|
|
hunk = append(hunk, lof)
|
|
headerLines++
|
|
}
|
|
if currentLine > line {
|
|
break
|
|
}
|
|
// Detect "hunk" with contains commented lof
|
|
if strings.HasPrefix(lof, "@@") {
|
|
inHunk = true
|
|
// Already got our hunk. End of hunk detected!
|
|
if len(hunk) > headerLines {
|
|
break
|
|
}
|
|
// A map with named groups of our regex to recognize them later more easily
|
|
submatches := hunkRegex.FindStringSubmatch(lof)
|
|
groups := make(map[string]string)
|
|
for i, name := range hunkRegex.SubexpNames() {
|
|
if i != 0 && name != "" {
|
|
groups[name] = submatches[i]
|
|
}
|
|
}
|
|
if old {
|
|
begin, _ = strconv.ParseInt(groups["beginOld"], 10, 64)
|
|
end, _ = strconv.ParseInt(groups["endOld"], 10, 64)
|
|
// init otherLine with begin of opposite side
|
|
otherLine, _ = strconv.ParseInt(groups["beginNew"], 10, 64)
|
|
} else {
|
|
begin, _ = strconv.ParseInt(groups["beginNew"], 10, 64)
|
|
if groups["endNew"] != "" {
|
|
end, _ = strconv.ParseInt(groups["endNew"], 10, 64)
|
|
} else {
|
|
end = 0
|
|
}
|
|
// init otherLine with begin of opposite side
|
|
otherLine, _ = strconv.ParseInt(groups["beginOld"], 10, 64)
|
|
}
|
|
end += begin // end is for real only the number of lines in hunk
|
|
// lof is between begin and end
|
|
if begin <= line && end >= line {
|
|
hunk = append(hunk, lof)
|
|
currentLine = begin
|
|
continue
|
|
}
|
|
} else if len(hunk) > headerLines {
|
|
hunk = append(hunk, lof)
|
|
// Count lines in context
|
|
switch lof[0] {
|
|
case '+':
|
|
if !old {
|
|
currentLine++
|
|
} else {
|
|
otherLine++
|
|
}
|
|
case '-':
|
|
if old {
|
|
currentLine++
|
|
} else {
|
|
otherLine++
|
|
}
|
|
case '\\':
|
|
// FIXME: handle `\ No newline at end of file`
|
|
default:
|
|
currentLine++
|
|
otherLine++
|
|
}
|
|
}
|
|
}
|
|
if err := scanner.Err(); err != nil {
|
|
return "", fmt.Errorf("CutDiffAroundLine: scan: %w", err)
|
|
}
|
|
|
|
// No hunk found
|
|
if currentLine == 0 {
|
|
return "", nil
|
|
}
|
|
// headerLines + hunkLine (1) = totalNonCodeLines
|
|
if len(hunk)-headerLines-1 <= numbersOfLine {
|
|
// No need to cut the hunk => return existing hunk
|
|
return strings.Join(hunk, "\n"), nil
|
|
}
|
|
var oldBegin, oldNumOfLines, newBegin, newNumOfLines int64
|
|
if old {
|
|
oldBegin = currentLine
|
|
newBegin = otherLine
|
|
} else {
|
|
oldBegin = otherLine
|
|
newBegin = currentLine
|
|
}
|
|
// headers + hunk header
|
|
newHunk := make([]string, headerLines)
|
|
// transfer existing headers
|
|
copy(newHunk, hunk[:headerLines])
|
|
// transfer last n lines
|
|
newHunk = append(newHunk, hunk[len(hunk)-numbersOfLine-1:]...)
|
|
// calculate newBegin, ... by counting lines
|
|
for i := len(hunk) - 1; i >= len(hunk)-numbersOfLine; i-- {
|
|
switch hunk[i][0] {
|
|
case '+':
|
|
newBegin--
|
|
newNumOfLines++
|
|
case '-':
|
|
oldBegin--
|
|
oldNumOfLines++
|
|
default:
|
|
oldBegin--
|
|
newBegin--
|
|
newNumOfLines++
|
|
oldNumOfLines++
|
|
}
|
|
}
|
|
|
|
// "git diff" outputs "@@ -1 +1,3 @@" for "OLD" => "A\nB\nC"
|
|
// FIXME: GIT-DIFF-CUT-BUG But there is a bug in CutDiffAroundLine, then the "Patch" stored in the comment model becomes "@@ -1,1 +0,4 @@"
|
|
// It may generate incorrect results for difference cases, for example: delete 2 line add 1 line, delete 2 line add 2 line etc, need to double check.
|
|
// For example: "L1\nL2" => "A\nB", then the patch shows "L2" as line 1 on the left (deleted part)
|
|
|
|
// construct the new hunk header
|
|
newHunk[headerLines] = fmt.Sprintf("@@ -%d,%d +%d,%d @@",
|
|
oldBegin, oldNumOfLines, newBegin, newNumOfLines)
|
|
return strings.Join(newHunk, "\n"), nil
|
|
}
|
|
|
|
// GetAffectedFiles returns the affected files between two commits
|
|
func GetAffectedFiles(repo *Repository, branchName, oldCommitID, newCommitID string, env []string) ([]string, error) {
|
|
if oldCommitID == emptySha1ObjectID.String() || oldCommitID == emptySha256ObjectID.String() {
|
|
startCommitID, err := repo.GetCommitBranchStart(env, branchName, newCommitID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if startCommitID == "" {
|
|
return nil, fmt.Errorf("cannot find the start commit of %s", newCommitID)
|
|
}
|
|
oldCommitID = startCommitID
|
|
}
|
|
|
|
affectedFiles := make([]string, 0, 32)
|
|
|
|
// Run `git diff --name-only` to get the names of the changed files
|
|
cmd := gitcmd.NewCommand("diff", "--name-only").AddDynamicArguments(oldCommitID, newCommitID)
|
|
stdoutReader, stdoutReaderClose := cmd.MakeStdoutPipe()
|
|
defer stdoutReaderClose()
|
|
err := cmd.WithEnv(env).WithDir(repo.Path).
|
|
WithPipelineFunc(func(ctx gitcmd.Context) error {
|
|
// Now scan the output from the command
|
|
scanner := bufio.NewScanner(stdoutReader)
|
|
for scanner.Scan() {
|
|
path := strings.TrimSpace(scanner.Text())
|
|
if len(path) == 0 {
|
|
continue
|
|
}
|
|
affectedFiles = append(affectedFiles, path)
|
|
}
|
|
return scanner.Err()
|
|
}).
|
|
Run(repo.Ctx)
|
|
if err != nil {
|
|
log.Error("Unable to get affected files for commits from %s to %s in %s: %v", oldCommitID, newCommitID, repo.Path, err)
|
|
}
|
|
|
|
return affectedFiles, err
|
|
}
|