Refactor highlight and diff (#36599)

1. fix a performance regression when using line-by-line highlighting
* the root cause is that chroma's `lexers.Get` is slow and a lexer cache
is missing during recent changes
2. clarify the chroma lexer detection behavior
* now we fully manage our logic to detect lexer, and handle overriding
problems, everything is fully under control
3. clarify "code analyze" behavior, now only 2 usages:
* only use file name and language to detect lexer (very fast), mainly
for "diff" page which contains a lot of files
* if no lexer is detected by file name and language, use code content to
detect again (slow), mainly for "view file" or "blame" page, which can
get best result
4. fix git diff bug, it caused "broken pipe" error for large diff files
This commit is contained in:
wxiaoguang
2026-02-13 08:15:46 +08:00
committed by GitHub
parent d69b786097
commit 0d8bd7720d
12 changed files with 427 additions and 155 deletions

View File

@@ -40,6 +40,7 @@ import (
"code.gitea.io/gitea/modules/translation"
"code.gitea.io/gitea/modules/util"
"github.com/alecthomas/chroma/v2"
"github.com/sergi/go-diff/diffmatchpatch"
stdcharset "golang.org/x/net/html/charset"
"golang.org/x/text/encoding"
@@ -306,6 +307,7 @@ type DiffSection struct {
language *diffVarMutable[string]
highlightedLeftLines *diffVarMutable[map[int]template.HTML]
highlightedRightLines *diffVarMutable[map[int]template.HTML]
highlightLexer *diffVarMutable[chroma.Lexer]
FileName string
Lines []*DiffLine
@@ -347,8 +349,10 @@ func (diffSection *DiffSection) getLineContentForRender(lineIdx int, diffLine *D
if setting.Git.DisableDiffHighlight {
return template.HTML(html.EscapeString(diffLine.Content[1:]))
}
h, _ = highlight.RenderCodeFast(diffSection.FileName, fileLanguage, diffLine.Content[1:])
return h
if diffSection.highlightLexer.value == nil {
diffSection.highlightLexer.value = highlight.DetectChromaLexerByFileName(diffSection.FileName, fileLanguage)
}
return highlight.RenderCodeByLexer(diffSection.highlightLexer.value, diffLine.Content[1:])
}
func (diffSection *DiffSection) getDiffLineForRender(diffLineType DiffLineType, leftLine, rightLine *DiffLine, locale translation.Locale) DiffInline {
@@ -391,6 +395,12 @@ func (diffSection *DiffSection) getDiffLineForRender(diffLineType DiffLineType,
// GetComputedInlineDiffFor computes inline diff for the given line.
func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine, locale translation.Locale) DiffInline {
defer func() {
if err := recover(); err != nil {
// the logic is too complex in this function, help to catch any panic because Golang template doesn't print the stack
log.Error("panic in GetComputedInlineDiffFor: %v\nStack: %s", err, log.Stack(2))
}
}()
// try to find equivalent diff line. ignore, otherwise
switch diffLine.Type {
case DiffLineSection:
@@ -452,6 +462,7 @@ type DiffFile struct {
// for render purpose only, will be filled by the extra loop in GitDiffForRender, the maps of lines are 0-based
language diffVarMutable[string]
highlightRender diffVarMutable[chroma.Lexer] // cache render (atm: lexer) for current file, only detect once for line-by-line mode
highlightedLeftLines diffVarMutable[map[int]template.HTML]
highlightedRightLines diffVarMutable[map[int]template.HTML]
}
@@ -932,6 +943,7 @@ func skipToNextDiffHead(input *bufio.Reader) (line string, err error) {
func newDiffSectionForDiffFile(curFile *DiffFile) *DiffSection {
return &DiffSection{
language: &curFile.language,
highlightLexer: &curFile.highlightRender,
highlightedLeftLines: &curFile.highlightedLeftLines,
highlightedRightLines: &curFile.highlightedRightLines,
}
@@ -1395,7 +1407,8 @@ func highlightCodeLines(name, lang string, sections []*DiffSection, isLeft bool,
}
content := util.UnsafeBytesToString(charset.ToUTF8(rawContent, charset.ConvertOpts{}))
highlightedNewContent, _ := highlight.RenderCodeFast(name, lang, content)
lexer := highlight.DetectChromaLexerByFileName(name, lang)
highlightedNewContent := highlight.RenderCodeByLexer(lexer, content)
unsafeLines := highlight.UnsafeSplitHighlightedLines(highlightedNewContent)
lines := make(map[int]template.HTML, len(unsafeLines))
// only save the highlighted lines we need, but not the whole file, to save memory

View File

@@ -11,6 +11,8 @@ import (
"io"
"code.gitea.io/gitea/modules/setting"
"github.com/alecthomas/chroma/v2"
)
type BlobExcerptOptions struct {
@@ -65,6 +67,7 @@ func BuildBlobExcerptDiffSection(filePath string, reader io.Reader, opts BlobExc
chunkSize := BlobExcerptChunkSize
section := &DiffSection{
language: &diffVarMutable[string]{value: language},
highlightLexer: &diffVarMutable[chroma.Lexer]{},
highlightedLeftLines: &diffVarMutable[map[int]template.HTML]{},
highlightedRightLines: &diffVarMutable[map[int]template.HTML]{},
FileName: filePath,

View File

@@ -76,8 +76,8 @@ func TestDiffWithHighlight(t *testing.T) {
})
t.Run("ComplexDiff1", func(t *testing.T) {
oldCode, _ := highlight.RenderCodeFast("a.go", "Go", `xxx || yyy`)
newCode, _ := highlight.RenderCodeFast("a.go", "Go", `bot&xxx || bot&yyy`)
oldCode, _, _ := highlight.RenderCodeSlowGuess("a.go", "Go", `xxx || yyy`)
newCode, _, _ := highlight.RenderCodeSlowGuess("a.go", "Go", `bot&xxx || bot&yyy`)
hcd := newHighlightCodeDiff()
out := hcd.diffLineWithHighlight(DiffLineAdd, oldCode, newCode)
assert.Equal(t, strings.ReplaceAll(`