diff --git a/modules/git/catfile_batch_reader.go b/modules/git/catfile_batch_reader.go
index 7d2e496ace..8a0b342079 100644
--- a/modules/git/catfile_batch_reader.go
+++ b/modules/git/catfile_batch_reader.go
@@ -12,11 +12,15 @@ import (
"math"
"strconv"
"strings"
+ "sync/atomic"
+ "time"
"code.gitea.io/gitea/modules/git/gitcmd"
"code.gitea.io/gitea/modules/log"
)
+var catFileBatchDebugWaitClose atomic.Int64
+
type catFileBatchCommunicator struct {
cancel context.CancelFunc
reqWriter io.Writer
@@ -36,7 +40,14 @@ func newCatFileBatch(ctx context.Context, repoPath string, cmdCatFile *gitcmd.Co
ctx, ctxCancel := context.WithCancelCause(ctx)
// We often want to feed the commits in order into cat-file --batch, followed by their trees and subtrees as necessary.
- stdinWriter, stdoutReader, pipeClose := cmdCatFile.MakeStdinStdoutPipe()
+ stdinWriter, stdoutReader, stdPipeClose := cmdCatFile.MakeStdinStdoutPipe()
+ pipeClose := func() {
+ if delay := catFileBatchDebugWaitClose.Load(); delay > 0 {
+ time.Sleep(time.Duration(delay)) // for testing purpose only
+ }
+ stdPipeClose()
+ }
+
ret = &catFileBatchCommunicator{
debugGitCmd: cmdCatFile,
cancel: func() { ctxCancel(nil) },
diff --git a/modules/git/catfile_batch_test.go b/modules/git/catfile_batch_test.go
index 8f6b1f5eff..69662ffc1a 100644
--- a/modules/git/catfile_batch_test.go
+++ b/modules/git/catfile_batch_test.go
@@ -5,9 +5,11 @@ package git
import (
"io"
+ "os"
"path/filepath"
"sync"
"testing"
+ "time"
"code.gitea.io/gitea/modules/test"
@@ -37,6 +39,45 @@ func testCatFileBatch(t *testing.T) {
require.Error(t, err)
})
+ simulateQueryTerminated := func(pipeCloseDelay, pipeReadDelay time.Duration) (errRead error) {
+ catFileBatchDebugWaitClose.Store(int64(pipeCloseDelay))
+ defer catFileBatchDebugWaitClose.Store(0)
+ batch, err := NewBatch(t.Context(), filepath.Join(testReposDir, "repo1_bare"))
+ require.NoError(t, err)
+ defer batch.Close()
+ _, _ = batch.QueryInfo("e2129701f1a4d54dc44f03c93bca0a2aec7c5449")
+ var c *catFileBatchCommunicator
+ switch b := batch.(type) {
+ case *catFileBatchLegacy:
+ c = b.batchCheck
+ _, _ = c.reqWriter.Write([]byte("in-complete-line-"))
+ case *catFileBatchCommand:
+ c = b.batch
+ _, _ = c.reqWriter.Write([]byte("info"))
+ default:
+ t.FailNow()
+ }
+
+ wg := sync.WaitGroup{}
+ wg.Go(func() {
+ time.Sleep(pipeReadDelay)
+ var n int
+ n, errRead = c.respReader.Read(make([]byte, 100))
+ assert.Zero(t, n)
+ })
+ time.Sleep(10 * time.Millisecond)
+ c.debugGitCmd.DebugKill()
+ wg.Wait()
+ return errRead
+ }
+
+ t.Run("QueryTerminated", func(t *testing.T) {
+ err := simulateQueryTerminated(0, 20*time.Millisecond)
+ assert.ErrorIs(t, err, os.ErrClosed) // pipes are closed faster
+ err = simulateQueryTerminated(40*time.Millisecond, 20*time.Millisecond)
+ assert.ErrorIs(t, err, io.EOF) // reader is faster
+ })
+
batch, err := NewBatch(t.Context(), filepath.Join(testReposDir, "repo1_bare"))
require.NoError(t, err)
defer batch.Close()
@@ -60,30 +101,4 @@ func testCatFileBatch(t *testing.T) {
require.NoError(t, err)
require.Equal(t, "file1\n", string(content))
})
-
- t.Run("QueryTerminated", func(t *testing.T) {
- var c *catFileBatchCommunicator
- switch b := batch.(type) {
- case *catFileBatchLegacy:
- c = b.batchCheck
- _, _ = c.reqWriter.Write([]byte("in-complete-line-"))
- case *catFileBatchCommand:
- c = b.batch
- _, _ = c.reqWriter.Write([]byte("info"))
- default:
- t.FailNow()
- return
- }
-
- wg := sync.WaitGroup{}
- wg.Go(func() {
- buf := make([]byte, 100)
- _, _ = c.respReader.Read(buf)
- n, errRead := c.respReader.Read(buf)
- assert.Zero(t, n)
- assert.ErrorIs(t, errRead, io.EOF) // the pipe is closed due to command being killed
- })
- c.debugGitCmd.DebugKill()
- wg.Wait()
- })
}
diff --git a/modules/git/gitcmd/pipe.go b/modules/git/gitcmd/pipe.go
index a1e91fdf8e..d0ce3e2dc6 100644
--- a/modules/git/gitcmd/pipe.go
+++ b/modules/git/gitcmd/pipe.go
@@ -9,6 +9,14 @@ import (
)
type PipeBufferReader interface {
+ // Read should be used in the same goroutine as command's Wait
+ // When Reader in one goroutine, command's Wait in another goroutine, then the command exits, the pipe will be closed:
+ // * If the Reader goroutine reads faster, it will read all remaining data and then get io.EOF
+ // * But this io.EOF doesn't mean the Reader has gotten complete data, the data might still be corrupted
+ // * If the Reader goroutine reads slower, it will get os.ErrClosed because the os.Pipe is closed ahead when the command exits
+ //
+ // When using 2 goroutines, no clear solution to distinguish these two cases or make Reader knows whether the data is complete
+ // It should avoid using Reader in a different goroutine than the command if the Read error needs to be handled.
Read(p []byte) (n int, err error)
Bytes() []byte
}
diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go
index a62177c45c..6b29582208 100644
--- a/services/gitdiff/gitdiff.go
+++ b/services/gitdiff/gitdiff.go
@@ -16,6 +16,7 @@ import (
"path"
"sort"
"strings"
+ "time"
"code.gitea.io/gitea/models/db"
git_model "code.gitea.io/gitea/models/git"
@@ -123,8 +124,14 @@ type DiffHTMLOperation struct {
// BlobExcerptChunkSize represent max lines of excerpt
const BlobExcerptChunkSize = 20
-// MaxDiffHighlightEntireFileSize is the maximum file size that will be highlighted with "entire file diff"
-const MaxDiffHighlightEntireFileSize = 1 * 1024 * 1024
+// Chroma seems extremely slow when highlighting large files, it might take dozens or hundreds of milliseconds.
+// When fully highlighting a diff with a lot of large files, it would take many seconds or even dozens of seconds.
+// So, don't highlight the entire file if it's too large, or highlighting takes too long.
+// When there is no full-file highlighting, the legacy "line-by-line" highlighting is still applied as the fallback.
+const (
+ MaxFullFileHighlightSizeLimit = 256 * 1024
+ MaxFullFileHighlightTimeLimit = 2 * time.Second
+)
// GetType returns the type of DiffLine.
func (d *DiffLine) GetType() int {
@@ -564,7 +571,7 @@ func getCommitFileLineCountAndLimitedContent(commit *git.Commit, filePath string
if err != nil {
return 0, nil
}
- w := &limitByteWriter{limit: MaxDiffHighlightEntireFileSize + 1}
+ w := &limitByteWriter{limit: MaxFullFileHighlightSizeLimit + 1}
lineCount, err = blob.GetBlobLineCount(w)
if err != nil {
return 0, nil
@@ -1317,6 +1324,8 @@ func GetDiffForRender(ctx context.Context, repoLink string, gitRepo *git.Reposit
return nil, err
}
+ startTime := time.Now()
+
checker, err := attribute.NewBatchChecker(gitRepo, opts.AfterCommitID, []string{attribute.LinguistVendored, attribute.LinguistGenerated, attribute.LinguistLanguage, attribute.GitlabLanguage, attribute.Diff})
if err != nil {
return nil, err
@@ -1356,7 +1365,8 @@ func GetDiffForRender(ctx context.Context, repoLink string, gitRepo *git.Reposit
diffFile.Sections = append(diffFile.Sections, tailSection)
}
- shouldFullFileHighlight := !setting.Git.DisableDiffHighlight && attrDiff.Value() == ""
+ shouldFullFileHighlight := attrDiff.Value() == "" // only do highlight if no custom diff command
+ shouldFullFileHighlight = shouldFullFileHighlight && time.Since(startTime) < MaxFullFileHighlightTimeLimit
if shouldFullFileHighlight {
if limitedContent.LeftContent != nil {
diffFile.highlightedLeftLines.value = highlightCodeLinesForDiffFile(diffFile, true /* left */, limitedContent.LeftContent.buf.Bytes())
@@ -1380,7 +1390,7 @@ func highlightCodeLinesForDiffFile(diffFile *DiffFile, isLeft bool, rawContent [
}
func highlightCodeLines(name, lang string, sections []*DiffSection, isLeft bool, rawContent []byte) map[int]template.HTML {
- if setting.Git.DisableDiffHighlight || len(rawContent) > MaxDiffHighlightEntireFileSize {
+ if setting.Git.DisableDiffHighlight || len(rawContent) > MaxFullFileHighlightSizeLimit {
return nil
}
diff --git a/services/gitdiff/highlightdiff.go b/services/gitdiff/highlightdiff.go
index 47b41319cb..1de3963788 100644
--- a/services/gitdiff/highlightdiff.go
+++ b/services/gitdiff/highlightdiff.go
@@ -298,15 +298,21 @@ func (hcd *highlightCodeDiff) convertToPlaceholders(htmlContent template.HTML) s
return res.String()
}
-func (hcd *highlightCodeDiff) extractNextPlaceholder(buf []byte, lastIdx int) (idx int, placeholder rune, runeLen int, token string) {
- for idx = lastIdx; idx < len(buf); {
- placeholder, runeLen = utf8.DecodeRune(buf[idx:])
- if token = hcd.placeholderTokenMap[placeholder]; token != "" {
- break
+// recoverOneRune tries to recover one rune
+// * if the rune is a placeholder, it will be recovered to the corresponding content
+// * otherwise it will be returned as is
+func (hcd *highlightCodeDiff) recoverOneRune(buf []byte) (r rune, runeLen int, isSingleTag bool, recovered string) {
+ r, runeLen = utf8.DecodeRune(buf)
+ token := hcd.placeholderTokenMap[r]
+ if token == "" {
+ return r, runeLen, false, "" // rune itself, not a placeholder
+ } else if token[0] == '<' {
+ if token[1] == '<' {
+ return 0, runeLen, false, token[1 : len(token)-1] // full tag `<content>`, recover to `content`
}
- idx += runeLen
+ return r, runeLen, true, token // single tag
}
- return idx, placeholder, runeLen, token
+ return 0, runeLen, false, token // HTML entity
}
func (hcd *highlightCodeDiff) recoverOneDiff(str string) template.HTML {
@@ -315,49 +321,65 @@ func (hcd *highlightCodeDiff) recoverOneDiff(str string) template.HTML {
var diffCodeOpenTag string
diffCodeCloseTag := hcd.placeholderTokenMap[hcd.diffCodeClose]
strBytes := util.UnsafeStringToBytes(str)
+
+ // this loop is slightly longer than expected, for performance consideration
for idx := 0; idx < len(strBytes); {
- newIdx, placeholder, lastRuneLen, token := hcd.extractNextPlaceholder(strBytes, idx)
- if newIdx != idx {
+ // take a look at the next rune
+ r, runeLen, isSingleTag, recovered := hcd.recoverOneRune(strBytes[idx:])
+ idx += runeLen
+
+ // loop section 1: if it isn't a single tag, then try to find the following runes until the next single tag, and recover them together
+ if !isSingleTag {
if diffCodeOpenTag != "" {
+ // start the "added/removed diff tag" if the current token is in the diff part
sb.WriteString(diffCodeOpenTag)
- sb.Write(strBytes[idx:newIdx])
- sb.WriteString(diffCodeCloseTag)
- } else {
- sb.Write(strBytes[idx:newIdx])
}
- } // else: no text content before, the current token is a placeholder
- if token == "" {
- break // reaches the string end, no more placeholder
- }
- idx = newIdx + lastRuneLen
-
- // for HTML entity
- if token[0] == '&' {
- sb.WriteString(token)
- continue
- }
-
- // for various HTML tags
- var recovered string
- if token[1] == '<' { // full tag `<content>`, recover to `content`
- recovered = token[1 : len(token)-1]
- if diffCodeOpenTag != "" {
- recovered = diffCodeOpenTag + recovered + diffCodeCloseTag
- } // else: just use the recovered content
- } else if token[1] != '/' { // opening tag
- if placeholder == hcd.diffCodeAddedOpen || placeholder == hcd.diffCodeRemovedOpen {
- diffCodeOpenTag = token
+ if recovered != "" {
+ sb.WriteString(recovered)
+ } else {
+ sb.WriteRune(r)
+ }
+ // inner loop to recover following runes until the next single tag
+ for idx < len(strBytes) {
+ r, runeLen, isSingleTag, recovered = hcd.recoverOneRune(strBytes[idx:])
+ idx += runeLen
+ if isSingleTag {
+ break
+ }
+ if recovered != "" {
+ sb.WriteString(recovered)
+ } else {
+ sb.WriteRune(r)
+ }
+ }
+ if diffCodeOpenTag != "" {
+ // end the "added/removed diff tag" if the current token is in the diff part
+ sb.WriteString(diffCodeCloseTag)
+ }
+ }
+
+ if !isSingleTag {
+ break // the inner loop has already consumed all remaining runes, no more single tag found
+ }
+
+ // loop section 2: for opening/closing HTML tags
+ placeholder := r
+ if recovered[1] != '/' { // opening tag
+ if placeholder == hcd.diffCodeAddedOpen || placeholder == hcd.diffCodeRemovedOpen {
+ diffCodeOpenTag = recovered
+ recovered = ""
} else {
- recovered = token
tagStack = append(tagStack, recovered)
}
} else { // closing tag
if placeholder == hcd.diffCodeClose {
diffCodeOpenTag = "" // the highlighted diff is closed, no more diff
+ recovered = ""
} else if len(tagStack) != 0 {
- recovered = token
tagStack = tagStack[:len(tagStack)-1]
- } // else: if no opening tag in stack yet, skip the closing tag
+ } else {
+ recovered = ""
+ }
}
sb.WriteString(recovered)
}
diff --git a/services/gitdiff/highlightdiff_test.go b/services/gitdiff/highlightdiff_test.go
index dd7e0984c7..b99b7e3675 100644
--- a/services/gitdiff/highlightdiff_test.go
+++ b/services/gitdiff/highlightdiff_test.go
@@ -77,15 +77,13 @@ func TestDiffWithHighlight(t *testing.T) {
t.Run("ComplexDiff1", func(t *testing.T) {
oldCode, _ := highlight.RenderCodeFast("a.go", "Go", `xxx || yyy`)
- newCode, _ := highlight.RenderCodeFast("a.go", "Go", `bot.xxx || bot.yyy`)
+ newCode, _ := highlight.RenderCodeFast("a.go", "Go", `bot&xxx || bot&yyy`)
hcd := newHighlightCodeDiff()
out := hcd.diffLineWithHighlight(DiffLineAdd, oldCode, newCode)
assert.Equal(t, strings.ReplaceAll(`
-bot
-.
+bot&
xxx ||
-bot
-.
+bot&
yyy`, "\n", ""), string(out))
})