Refactor highlight and diff (#36599)

1. fix a performance regression when using line-by-line highlighting
* the root cause is that chroma's `lexers.Get` is slow and a lexer cache
is missing during recent changes
2. clarify the chroma lexer detection behavior
* now we fully manage our logic to detect lexer, and handle overriding
problems, everything is fully under control
3. clarify "code analyze" behavior, now only 2 usages:
* only use file name and language to detect lexer (very fast), mainly
for "diff" page which contains a lot of files
* if no lexer is detected by file name and language, use code content to
detect again (slow), mainly for "view file" or "blame" page, which can
get best result
4. fix git diff bug, it caused "broken pipe" error for large diff files
This commit is contained in:
wxiaoguang
2026-02-13 08:15:46 +08:00
committed by GitHub
parent d69b786097
commit 0d8bd7720d
12 changed files with 427 additions and 155 deletions

View File

@@ -28,44 +28,37 @@ const (
// GetRawDiff dumps diff results of repository in given commit ID to io.Writer.
func GetRawDiff(repo *Repository, commitID string, diffType RawDiffType, writer io.Writer) (retErr error) {
diffOutput, diffFinish, err := getRepoRawDiffForFile(repo.Ctx, repo, "", commitID, diffType, "")
cmd, err := getRepoRawDiffForFileCmd(repo.Ctx, repo, "", commitID, diffType, "")
if err != nil {
return err
return fmt.Errorf("getRepoRawDiffForFileCmd: %w", err)
}
defer func() {
err := diffFinish()
if retErr == nil {
retErr = err // only return command's error if no previous error
}
}()
_, err = io.Copy(writer, diffOutput)
return err
return cmd.WithStdoutCopy(writer).RunWithStderr(repo.Ctx)
}
// GetFileDiffCutAroundLine cuts the old or new part of the diff of a file around a specific line number
func GetFileDiffCutAroundLine(
repo *Repository, startCommit, endCommit, treePath string,
line int64, old bool, numbersOfLine int,
) (_ string, retErr error) {
diffOutput, diffFinish, err := getRepoRawDiffForFile(repo.Ctx, repo, startCommit, endCommit, RawDiffNormal, treePath)
) (ret string, retErr error) {
cmd, err := getRepoRawDiffForFileCmd(repo.Ctx, repo, startCommit, endCommit, RawDiffNormal, treePath)
if err != nil {
return "", err
return "", fmt.Errorf("getRepoRawDiffForFileCmd: %w", err)
}
defer func() {
err := diffFinish()
if retErr == nil {
retErr = err // only return command's error if no previous error
}
}()
return CutDiffAroundLine(diffOutput, line, old, numbersOfLine)
stdoutReader, stdoutClose := cmd.MakeStdoutPipe()
defer stdoutClose()
cmd.WithPipelineFunc(func(ctx gitcmd.Context) error {
ret, err = CutDiffAroundLine(stdoutReader, line, old, numbersOfLine)
return err
})
return ret, cmd.RunWithStderr(repo.Ctx)
}
// getRepoRawDiffForFile returns an io.Reader for the diff results of file in given commit ID
// and a "finish" function to wait for the git command and clean up resources after reading is done.
func getRepoRawDiffForFile(ctx context.Context, repo *Repository, startCommit, endCommit string, diffType RawDiffType, file string) (io.Reader, func() gitcmd.RunStdError, error) {
func getRepoRawDiffForFileCmd(_ context.Context, repo *Repository, startCommit, endCommit string, diffType RawDiffType, file string) (*gitcmd.Command, error) {
commit, err := repo.GetCommit(endCommit)
if err != nil {
return nil, nil, err
return nil, err
}
var files []string
if len(file) > 0 {
@@ -84,7 +77,7 @@ func getRepoRawDiffForFile(ctx context.Context, repo *Repository, startCommit, e
} else {
c, err := commit.Parent(0)
if err != nil {
return nil, nil, err
return nil, err
}
cmd.AddArguments("diff").
AddOptionFormat("--find-renames=%s", setting.Git.DiffRenameSimilarityThreshold).
@@ -99,25 +92,15 @@ func getRepoRawDiffForFile(ctx context.Context, repo *Repository, startCommit, e
} else {
c, err := commit.Parent(0)
if err != nil {
return nil, nil, err
return nil, err
}
query := fmt.Sprintf("%s...%s", endCommit, c.ID.String())
cmd.AddArguments("format-patch", "--no-signature", "--stdout").AddDynamicArguments(query).AddDashesAndList(files...)
}
default:
return nil, nil, util.NewInvalidArgumentErrorf("invalid diff type: %s", diffType)
return nil, util.NewInvalidArgumentErrorf("invalid diff type: %s", diffType)
}
stdoutReader, stdoutReaderClose := cmd.MakeStdoutPipe()
err = cmd.StartWithStderr(ctx)
if err != nil {
stdoutReaderClose()
return nil, nil, err
}
return stdoutReader, func() gitcmd.RunStdError {
stdoutReaderClose()
return cmd.WaitWithStderr()
}, nil
return cmd, nil
}
// ParseDiffHunkString parse the diff hunk content and return
@@ -254,7 +237,7 @@ func CutDiffAroundLine(originalDiff io.Reader, line int64, old bool, numbersOfLi
}
}
if err := scanner.Err(); err != nil {
return "", err
return "", fmt.Errorf("CutDiffAroundLine: scan: %w", err)
}
// No hunk found

View File

@@ -306,6 +306,10 @@ func (c *Command) MakeStdinPipe() (writer PipeWriter, closer func()) {
// MakeStdoutPipe creates a reader for the command's stdout.
// The returned closer function must be called by the caller to close the pipe.
// After the pipe reader is closed, the unread data will be discarded.
//
// If the process (git command) still tries to write after the pipe is closed, the Wait error will be "signal: broken pipe".
// WithPipelineFunc + Run won't return "broken pipe" error in this case if the callback returns no error.
// But if you are calling Start / Wait family functions, you should either drain the pipe before close it, or handle the Wait error correctly.
func (c *Command) MakeStdoutPipe() (reader PipeReader, closer func()) {
return c.makeStdoutStderr(&c.cmdStdout)
}

View File

@@ -11,20 +11,16 @@ import (
gohtml "html"
"html/template"
"io"
"path"
"strings"
"sync"
"code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/util"
"github.com/alecthomas/chroma/v2"
"github.com/alecthomas/chroma/v2/formatters/html"
"github.com/alecthomas/chroma/v2/lexers"
"github.com/alecthomas/chroma/v2/styles"
"github.com/go-enry/go-enry/v2"
)
// don't index files larger than this many bytes for performance purposes
@@ -84,85 +80,21 @@ func UnsafeSplitHighlightedLines(code template.HTML) (ret [][]byte) {
}
}
func getChromaLexerByLanguage(fileName, lang string) chroma.Lexer {
lang, _, _ = strings.Cut(lang, "?") // maybe, the value from gitattributes might contain `?` parameters?
ext := path.Ext(fileName)
// the "lang" might come from enry, it has different naming for some languages
switch lang {
case "F#":
lang = "FSharp"
case "Pascal":
lang = "ObjectPascal"
case "C":
if ext == ".C" || ext == ".H" {
lang = "C++"
}
}
if lang == "" && util.AsciiEqualFold(ext, ".sql") {
// there is a bug when using MySQL lexer: "--\nSELECT", the second line will be rendered as comment incorrectly
lang = "SQL"
}
// lexers.Get is slow if the language name can't be matched directly: it does extra "Match" call to iterate all lexers
return lexers.Get(lang)
}
// GetChromaLexerWithFallback returns a chroma lexer by given file name, language and code content. All parameters can be optional.
// When code content is provided, it will be slow if no lexer is found by file name or language.
// If no lexer is found, it will return the fallback lexer.
func GetChromaLexerWithFallback(fileName, lang string, code []byte) (lexer chroma.Lexer) {
if lang != "" {
lexer = getChromaLexerByLanguage(fileName, lang)
}
if lexer == nil {
fileExt := path.Ext(fileName)
if val, ok := globalVars().highlightMapping[fileExt]; ok {
lexer = getChromaLexerByLanguage(fileName, val) // use mapped value to find lexer
}
}
if lexer == nil {
// when using "code" to detect, analyze.GetCodeLanguage is slower, it iterates many rules to detect language from content
// this is the old logic: use enry to detect language, and use chroma to render, but their naming is different for some languages
enryLanguage := analyze.GetCodeLanguage(fileName, code)
lexer = getChromaLexerByLanguage(fileName, enryLanguage)
if lexer == nil {
if enryLanguage != enry.OtherLanguage {
log.Warn("No chroma lexer found for enry detected language: %s (file: %s), need to fix the language mapping between enry and chroma.", enryLanguage, fileName)
}
lexer = lexers.Match(fileName) // lexers.Match will search by its basename and extname
}
}
return util.IfZero(lexer, lexers.Fallback)
}
func renderCode(fileName, language, code string, slowGuess bool) (output template.HTML, lexerName string) {
// RenderCodeSlowGuess tries to get a lexer by file name and language first,
// if not found, it will try to guess the lexer by code content, which is slow (more than several hundreds of milliseconds).
func RenderCodeSlowGuess(fileName, language, code string) (output template.HTML, lexer chroma.Lexer, lexerDisplayName string) {
// diff view newline will be passed as empty, change to literal '\n' so it can be copied
// preserve literal newline in blame view
if code == "" || code == "\n" {
return "\n", ""
return "\n", nil, ""
}
if len(code) > sizeLimit {
return template.HTML(template.HTMLEscapeString(code)), ""
return template.HTML(template.HTMLEscapeString(code)), nil, ""
}
var codeForGuessLexer []byte
if slowGuess {
// it is slower to guess lexer by code content, so only do it when necessary
codeForGuessLexer = util.UnsafeStringToBytes(code)
}
lexer := GetChromaLexerWithFallback(fileName, language, codeForGuessLexer)
return RenderCodeByLexer(lexer, code), formatLexerName(lexer.Config().Name)
}
func RenderCodeFast(fileName, language, code string) (output template.HTML, lexerName string) {
return renderCode(fileName, language, code, false)
}
func RenderCodeSlowGuess(fileName, language, code string) (output template.HTML, lexerName string) {
return renderCode(fileName, language, code, true)
lexer = detectChromaLexerWithAnalyze(fileName, language, util.UnsafeStringToBytes(code)) // it is also slow
return RenderCodeByLexer(lexer, code), lexer, formatLexerName(lexer.Config().Name)
}
// RenderCodeByLexer returns a HTML version of code string with chroma syntax highlighting classes
@@ -204,7 +136,7 @@ func RenderFullFile(fileName, language string, code []byte) ([]template.HTML, st
html.PreventSurroundingPre(true),
)
lexer := GetChromaLexerWithFallback(fileName, language, code)
lexer := detectChromaLexerWithAnalyze(fileName, language, code)
lexerName := formatLexerName(lexer.Config().Name)
iterator, err := lexer.Tokenise(nil, string(code))

View File

@@ -205,36 +205,3 @@ func TestUnsafeSplitHighlightedLines(t *testing.T) {
assert.Equal(t, "<span>a</span>\n", string(ret[0]))
assert.Equal(t, "<span>b\n</span>", string(ret[1]))
}
func TestGetChromaLexer(t *testing.T) {
globalVars().highlightMapping[".my-html"] = "HTML"
t.Cleanup(func() { delete(globalVars().highlightMapping, ".my-html") })
cases := []struct {
fileName string
language string
content string
expected string
}{
{"test.py", "", "", "Python"},
{"any-file", "javascript", "", "JavaScript"},
{"any-file", "", "/* vim: set filetype=python */", "Python"},
{"any-file", "", "", "fallback"},
{"test.fs", "", "", "Forth"},
{"test.fs", "F#", "", "FSharp"},
{"test.fs", "", "let x = 1", "FSharp"},
{"test.c", "", "", "C"},
{"test.C", "", "", "C++"},
{"OLD-CODE.PAS", "", "", "ObjectPascal"},
{"test.my-html", "", "", "HTML"},
}
for _, c := range cases {
lexer := GetChromaLexerWithFallback(c.fileName, c.language, []byte(c.content))
if assert.NotNil(t, lexer, "case: %+v", c) {
assert.Equal(t, c.expected, lexer.Config().Name, "case: %+v", c)
}
}
}

View File

@@ -0,0 +1,279 @@
// Copyright 2026 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package highlight
import (
"path"
"strings"
"sync"
"code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/log"
"github.com/alecthomas/chroma/v2"
"github.com/alecthomas/chroma/v2/lexers"
"github.com/go-enry/go-enry/v2"
)
const mapKeyLowerPrefix = "lower/"
// chromaLexers is fully managed by us to do fast lookup for chroma lexers by file name or language name
// Don't use lexers.Get because it is very slow in many cases (iterate all rules, filepath glob match, etc.)
var chromaLexers = sync.OnceValue(func() (ret struct {
conflictingExtLangMap map[string]string
lowerNameMap map[string]chroma.Lexer // lexer name (lang name) in lower-case
fileBaseMap map[string]chroma.Lexer
fileExtMap map[string]chroma.Lexer
fileParts []struct {
part string
lexer chroma.Lexer
}
},
) {
ret.lowerNameMap = make(map[string]chroma.Lexer)
ret.fileBaseMap = make(map[string]chroma.Lexer)
ret.fileExtMap = make(map[string]chroma.Lexer)
// Chroma has overlaps in file extension for different languages,
// When we need to do fast render, there is no way to detect the language by content,
// So we can only choose some default languages for the overlapped file extensions.
ret.conflictingExtLangMap = map[string]string{
".as": "ActionScript 3", // ActionScript
".asm": "NASM", // TASM, NASM, RGBDS Assembly, Z80 Assembly
".ASM": "NASM",
".bas": "VB.net", // QBasic
".bf": "Beef", // Brainfuck
".fs": "FSharp", // Forth
".gd": "GDScript", // GDScript3
".h": "C", // Objective-C
".hcl": "Terraform", // HCL
".hh": "C++", // HolyC
".inc": "PHP", // ObjectPascal, POVRay, SourcePawn, PHTML
".m": "Objective-C", // Matlab, Mathematica, Mason
".mc": "Mason", // MonkeyC
".network": "SYSTEMD", // INI
".php": "PHP", // PHTML
".php3": "PHP", // PHTML
".php4": "PHP", // PHTML
".php5": "PHP", // PHTML
".pl": "Perl", // Prolog, Raku
".pm": "Perl", // Promela, Raku
".pp": "ObjectPascal", // Puppet
".s": "ArmAsm", // GAS
".S": "ArmAsm", // R, GAS
".service": "SYSTEMD", // INI
".socket": "SYSTEMD", // INI
".sql": "SQL", // MySQL
".t": "Perl", // Raku
".ts": "TypeScript", // TypoScript
".v": "V", // verilog
".xslt": "HTML", // XML
}
isPlainPattern := func(key string) bool {
return !strings.ContainsAny(key, "*?[]") // only support simple patterns
}
setMapWithLowerKey := func(m map[string]chroma.Lexer, key string, lexer chroma.Lexer) {
if _, conflict := m[key]; conflict {
panic("duplicate key in lexer map: " + key + ", need to add it to conflictingExtLangMap")
}
m[key] = lexer
m[mapKeyLowerPrefix+strings.ToLower(key)] = lexer
}
processFileName := func(fileName string, lexer chroma.Lexer) bool {
if isPlainPattern(fileName) {
// full base name match
setMapWithLowerKey(ret.fileBaseMap, fileName, lexer)
return true
}
if strings.HasPrefix(fileName, "*") {
// ext name match: "*.js"
fileExt := strings.Trim(fileName, "*")
if isPlainPattern(fileExt) {
presetName := ret.conflictingExtLangMap[fileExt]
if presetName == "" || lexer.Config().Name == presetName {
setMapWithLowerKey(ret.fileExtMap, fileExt, lexer)
}
return true
}
}
if strings.HasSuffix(fileName, "*") {
// part match: "*.env.*"
filePart := strings.Trim(fileName, "*")
if isPlainPattern(filePart) {
ret.fileParts = append(ret.fileParts, struct {
part string
lexer chroma.Lexer
}{
part: filePart,
lexer: lexer,
})
return true
}
}
return false
}
expandGlobPatterns := func(patterns []string) []string {
// expand patterns like "file.[ch]" to "file.c" and "file.h", only one pair of "[]" is supported, enough for current Chroma lexers
for idx, s := range patterns {
idx1 := strings.IndexByte(s, '[')
idx2 := strings.IndexByte(s, ']')
if idx1 != -1 && idx2 != -1 && idx2 > idx1+1 {
left, mid, right := s[:idx1], s[idx1+1:idx2], s[idx2+1:]
patterns[idx] = left + mid[0:1] + right
for i := 1; i < len(mid); i++ {
patterns = append(patterns, left+mid[i:i+1]+right)
}
}
}
return patterns
}
// add lexers to our map, for fast lookup
for _, lexer := range lexers.GlobalLexerRegistry.Lexers {
cfg := lexer.Config()
ret.lowerNameMap[strings.ToLower(lexer.Config().Name)] = lexer
for _, alias := range cfg.Aliases {
ret.lowerNameMap[strings.ToLower(alias)] = lexer
}
for _, s := range expandGlobPatterns(cfg.Filenames) {
if !processFileName(s, lexer) {
panic("unsupported file name pattern in lexer: " + s)
}
}
for _, s := range expandGlobPatterns(cfg.AliasFilenames) {
if !processFileName(s, lexer) {
panic("unsupported alias file name pattern in lexer: " + s)
}
}
}
// final check: make sure the default ext-lang mapping is correct, nothing is missing
for ext, lexerName := range ret.conflictingExtLangMap {
if lexer, ok := ret.fileExtMap[ext]; !ok || lexer.Config().Name != lexerName {
panic("missing default ext-lang mapping for: " + ext)
}
}
return ret
})
func normalizeFileNameLang(fileName, fileLang string) (string, string) {
fileName = path.Base(fileName)
fileLang, _, _ = strings.Cut(fileLang, "?") // maybe, the value from gitattributes might contain `?` parameters?
ext := path.Ext(fileName)
// the "lang" might come from enry or gitattributes, it has different naming for some languages
switch fileLang {
case "F#":
fileLang = "FSharp"
case "Pascal":
fileLang = "ObjectPascal"
case "C":
if ext == ".C" || ext == ".H" {
fileLang = "C++"
}
}
return fileName, fileLang
}
func DetectChromaLexerByFileName(fileName, fileLang string) chroma.Lexer {
lexer, _ := detectChromaLexerByFileName(fileName, fileLang)
return lexer
}
func detectChromaLexerByFileName(fileName, fileLang string) (_ chroma.Lexer, byLang bool) {
fileName, fileLang = normalizeFileNameLang(fileName, fileLang)
fileExt := path.Ext(fileName)
// apply custom mapping for file extension, highest priority, for example:
// * ".my-js" -> ".js"
// * ".my-html" -> "HTML"
if fileExt != "" {
if val, ok := globalVars().highlightMapping[fileExt]; ok {
if strings.HasPrefix(val, ".") {
fileName = "dummy" + val
fileLang = ""
} else {
fileLang = val
}
}
}
// try to use language for lexer name
if fileLang != "" {
lexer := chromaLexers().lowerNameMap[strings.ToLower(fileLang)]
if lexer != nil {
return lexer, true
}
}
if fileName == "" {
return lexers.Fallback, false
}
// try base name
{
baseName := path.Base(fileName)
if lexer, ok := chromaLexers().fileBaseMap[baseName]; ok {
return lexer, false
} else if lexer, ok = chromaLexers().fileBaseMap[mapKeyLowerPrefix+strings.ToLower(baseName)]; ok {
return lexer, false
}
}
if fileExt == "" {
return lexers.Fallback, false
}
// try ext name
{
if lexer, ok := chromaLexers().fileExtMap[fileExt]; ok {
return lexer, false
} else if lexer, ok = chromaLexers().fileExtMap[mapKeyLowerPrefix+strings.ToLower(fileExt)]; ok {
return lexer, false
}
}
// try file part match, for example: ".env.local" for "*.env.*"
// it assumes that there must be a dot in filename (fileExt isn't empty)
for _, item := range chromaLexers().fileParts {
if strings.Contains(fileName, item.part) {
return item.lexer, false
}
}
return lexers.Fallback, false
}
// detectChromaLexerWithAnalyze returns a chroma lexer by given file name, language and code content. All parameters can be optional.
// When code content is provided, it will be slow if no lexer is found by file name or language.
// If no lexer is found, it will return the fallback lexer.
func detectChromaLexerWithAnalyze(fileName, lang string, code []byte) chroma.Lexer {
lexer, byLang := detectChromaLexerByFileName(fileName, lang)
// if lang is provided, and it matches a lexer, use it directly
if byLang {
return lexer
}
// if a lexer is detected and there is no conflict for the file extension, use it directly
fileExt := path.Ext(fileName)
_, hasConflicts := chromaLexers().conflictingExtLangMap[fileExt]
if !hasConflicts && lexer != lexers.Fallback {
return lexer
}
// try to detect language by content, for best guessing for the language
// when using "code" to detect, analyze.GetCodeLanguage is slow, it iterates many rules to detect language from content
analyzedLanguage := analyze.GetCodeLanguage(fileName, code)
lexer = DetectChromaLexerByFileName(fileName, analyzedLanguage)
if lexer == lexers.Fallback {
if analyzedLanguage != enry.OtherLanguage {
log.Warn("No chroma lexer found for enry detected language: %s (file: %s), need to fix the language mapping between enry and chroma.", analyzedLanguage, fileName)
}
}
return lexer
}

View File

@@ -0,0 +1,90 @@
// Copyright 2026 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package highlight
import (
"strings"
"testing"
"github.com/alecthomas/chroma/v2/lexers"
"github.com/stretchr/testify/assert"
)
func BenchmarkDetectChromaLexerByFileName(b *testing.B) {
for b.Loop() {
// BenchmarkDetectChromaLexerByFileName-12 18214717 61.35 ns/op
DetectChromaLexerByFileName("a.sql", "")
}
}
func BenchmarkDetectChromaLexerWithAnalyze(b *testing.B) {
b.StopTimer()
code := []byte(strings.Repeat("SELECT * FROM table;\n", 1000))
b.StartTimer()
for b.Loop() {
// BenchmarkRenderCodeSlowGuess-12 87946 13310 ns/op
detectChromaLexerWithAnalyze("a", "", code)
}
}
func BenchmarkChromaAnalyze(b *testing.B) {
b.StopTimer()
code := strings.Repeat("SELECT * FROM table;\n", 1000)
b.StartTimer()
for b.Loop() {
// comparing to detectChromaLexerWithAnalyze (go-enry), "chroma/lexers.Analyse" is very slow
// BenchmarkChromaAnalyze-12 519 2247104 ns/op
lexers.Analyse(code)
}
}
func BenchmarkRenderCodeByLexer(b *testing.B) {
b.StopTimer()
code := strings.Repeat("SELECT * FROM table;\n", 1000)
lexer := DetectChromaLexerByFileName("a.sql", "")
b.StartTimer()
for b.Loop() {
// Really slow .......
// BenchmarkRenderCodeByLexer-12 22 47159038 ns/op
RenderCodeByLexer(lexer, code)
}
}
func TestDetectChromaLexer(t *testing.T) {
globalVars().highlightMapping[".my-html"] = "HTML"
t.Cleanup(func() { delete(globalVars().highlightMapping, ".my-html") })
cases := []struct {
fileName string
language string
content string
expected string
}{
{"test.py", "", "", "Python"},
{"any-file", "javascript", "", "JavaScript"},
{"any-file", "", "/* vim: set filetype=python */", "Python"},
{"any-file", "", "", "fallback"},
{"test.fs", "", "", "FSharp"},
{"test.fs", "F#", "", "FSharp"},
{"test.fs", "", "let x = 1", "FSharp"},
{"test.c", "", "", "C"},
{"test.C", "", "", "C++"},
{"OLD-CODE.PAS", "", "", "ObjectPascal"},
{"test.my-html", "", "", "HTML"},
{"a.php", "", "", "PHP"},
{"a.sql", "", "", "SQL"},
{"dhcpd.conf", "", "", "ISCdhcpd"},
{".env.my-production", "", "", "Bash"},
}
for _, c := range cases {
lexer := detectChromaLexerWithAnalyze(c.fileName, c.language, []byte(c.content))
if assert.NotNil(t, lexer, "case: %+v", c) {
assert.Equal(t, c.expected, lexer.Config().Name, "case: %+v", c)
}
}
}

View File

@@ -72,7 +72,8 @@ func writeStrings(buf *bytes.Buffer, strs ...string) error {
func HighlightSearchResultCode(filename, language string, lineNums []int, code string) []*ResultLine {
// we should highlight the whole code block first, otherwise it doesn't work well with multiple line highlighting
hl, _ := highlight.RenderCodeFast(filename, language, code)
lexer := highlight.DetectChromaLexerByFileName(filename, language)
hl := highlight.RenderCodeByLexer(lexer, code)
highlightedLines := strings.Split(string(hl), "\n")
// The lineNums outputted by render might not match the original lineNums, because "highlight" removes the last `\n`

View File

@@ -56,7 +56,7 @@ func Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error
}
}()
lexer := highlight.GetChromaLexerWithFallback("", lang, nil) // don't use content to detect, it is too slow
lexer := highlight.DetectChromaLexerByFileName("", lang) // don't use content to detect, it is too slow
lexer = chroma.Coalesce(lexer)
sb := &strings.Builder{}

View File

@@ -267,7 +267,7 @@ func renderBlame(ctx *context.Context, blameParts []*gitrepo.BlamePart, commitNa
bufContent := buf.Bytes()
bufContent = charset.ToUTF8(bufContent, charset.ConvertOpts{})
highlighted, lexerName := highlight.RenderCodeSlowGuess(path.Base(ctx.Repo.TreePath), language, util.UnsafeBytesToString(bufContent))
highlighted, _, lexerDisplayName := highlight.RenderCodeSlowGuess(path.Base(ctx.Repo.TreePath), language, util.UnsafeBytesToString(bufContent))
unsafeLines := highlight.UnsafeSplitHighlightedLines(highlighted)
for i, br := range rows {
var line template.HTML
@@ -280,5 +280,5 @@ func renderBlame(ctx *context.Context, blameParts []*gitrepo.BlamePart, commitNa
ctx.Data["EscapeStatus"] = escapeStatus
ctx.Data["BlameRows"] = rows
ctx.Data["LexerName"] = lexerName
ctx.Data["LexerName"] = lexerDisplayName
}

View File

@@ -40,6 +40,7 @@ import (
"code.gitea.io/gitea/modules/translation"
"code.gitea.io/gitea/modules/util"
"github.com/alecthomas/chroma/v2"
"github.com/sergi/go-diff/diffmatchpatch"
stdcharset "golang.org/x/net/html/charset"
"golang.org/x/text/encoding"
@@ -306,6 +307,7 @@ type DiffSection struct {
language *diffVarMutable[string]
highlightedLeftLines *diffVarMutable[map[int]template.HTML]
highlightedRightLines *diffVarMutable[map[int]template.HTML]
highlightLexer *diffVarMutable[chroma.Lexer]
FileName string
Lines []*DiffLine
@@ -347,8 +349,10 @@ func (diffSection *DiffSection) getLineContentForRender(lineIdx int, diffLine *D
if setting.Git.DisableDiffHighlight {
return template.HTML(html.EscapeString(diffLine.Content[1:]))
}
h, _ = highlight.RenderCodeFast(diffSection.FileName, fileLanguage, diffLine.Content[1:])
return h
if diffSection.highlightLexer.value == nil {
diffSection.highlightLexer.value = highlight.DetectChromaLexerByFileName(diffSection.FileName, fileLanguage)
}
return highlight.RenderCodeByLexer(diffSection.highlightLexer.value, diffLine.Content[1:])
}
func (diffSection *DiffSection) getDiffLineForRender(diffLineType DiffLineType, leftLine, rightLine *DiffLine, locale translation.Locale) DiffInline {
@@ -391,6 +395,12 @@ func (diffSection *DiffSection) getDiffLineForRender(diffLineType DiffLineType,
// GetComputedInlineDiffFor computes inline diff for the given line.
func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine, locale translation.Locale) DiffInline {
defer func() {
if err := recover(); err != nil {
// the logic is too complex in this function, help to catch any panic because Golang template doesn't print the stack
log.Error("panic in GetComputedInlineDiffFor: %v\nStack: %s", err, log.Stack(2))
}
}()
// try to find equivalent diff line. ignore, otherwise
switch diffLine.Type {
case DiffLineSection:
@@ -452,6 +462,7 @@ type DiffFile struct {
// for render purpose only, will be filled by the extra loop in GitDiffForRender, the maps of lines are 0-based
language diffVarMutable[string]
highlightRender diffVarMutable[chroma.Lexer] // cache render (atm: lexer) for current file, only detect once for line-by-line mode
highlightedLeftLines diffVarMutable[map[int]template.HTML]
highlightedRightLines diffVarMutable[map[int]template.HTML]
}
@@ -932,6 +943,7 @@ func skipToNextDiffHead(input *bufio.Reader) (line string, err error) {
func newDiffSectionForDiffFile(curFile *DiffFile) *DiffSection {
return &DiffSection{
language: &curFile.language,
highlightLexer: &curFile.highlightRender,
highlightedLeftLines: &curFile.highlightedLeftLines,
highlightedRightLines: &curFile.highlightedRightLines,
}
@@ -1395,7 +1407,8 @@ func highlightCodeLines(name, lang string, sections []*DiffSection, isLeft bool,
}
content := util.UnsafeBytesToString(charset.ToUTF8(rawContent, charset.ConvertOpts{}))
highlightedNewContent, _ := highlight.RenderCodeFast(name, lang, content)
lexer := highlight.DetectChromaLexerByFileName(name, lang)
highlightedNewContent := highlight.RenderCodeByLexer(lexer, content)
unsafeLines := highlight.UnsafeSplitHighlightedLines(highlightedNewContent)
lines := make(map[int]template.HTML, len(unsafeLines))
// only save the highlighted lines we need, but not the whole file, to save memory

View File

@@ -11,6 +11,8 @@ import (
"io"
"code.gitea.io/gitea/modules/setting"
"github.com/alecthomas/chroma/v2"
)
type BlobExcerptOptions struct {
@@ -65,6 +67,7 @@ func BuildBlobExcerptDiffSection(filePath string, reader io.Reader, opts BlobExc
chunkSize := BlobExcerptChunkSize
section := &DiffSection{
language: &diffVarMutable[string]{value: language},
highlightLexer: &diffVarMutable[chroma.Lexer]{},
highlightedLeftLines: &diffVarMutable[map[int]template.HTML]{},
highlightedRightLines: &diffVarMutable[map[int]template.HTML]{},
FileName: filePath,

View File

@@ -76,8 +76,8 @@ func TestDiffWithHighlight(t *testing.T) {
})
t.Run("ComplexDiff1", func(t *testing.T) {
oldCode, _ := highlight.RenderCodeFast("a.go", "Go", `xxx || yyy`)
newCode, _ := highlight.RenderCodeFast("a.go", "Go", `bot&xxx || bot&yyy`)
oldCode, _, _ := highlight.RenderCodeSlowGuess("a.go", "Go", `xxx || yyy`)
newCode, _, _ := highlight.RenderCodeSlowGuess("a.go", "Go", `bot&xxx || bot&yyy`)
hcd := newHighlightCodeDiff()
out := hcd.diffLineWithHighlight(DiffLineAdd, oldCode, newCode)
assert.Equal(t, strings.ReplaceAll(`