diff --git a/modules/git/diff.go b/modules/git/diff.go
index a198695fc0..d7732eaa29 100644
--- a/modules/git/diff.go
+++ b/modules/git/diff.go
@@ -28,44 +28,37 @@ const (
// GetRawDiff dumps diff results of repository in given commit ID to io.Writer.
func GetRawDiff(repo *Repository, commitID string, diffType RawDiffType, writer io.Writer) (retErr error) {
- diffOutput, diffFinish, err := getRepoRawDiffForFile(repo.Ctx, repo, "", commitID, diffType, "")
+ cmd, err := getRepoRawDiffForFileCmd(repo.Ctx, repo, "", commitID, diffType, "")
if err != nil {
- return err
+ return fmt.Errorf("getRepoRawDiffForFileCmd: %w", err)
}
- defer func() {
- err := diffFinish()
- if retErr == nil {
- retErr = err // only return command's error if no previous error
- }
- }()
- _, err = io.Copy(writer, diffOutput)
- return err
+ return cmd.WithStdoutCopy(writer).RunWithStderr(repo.Ctx)
}
// GetFileDiffCutAroundLine cuts the old or new part of the diff of a file around a specific line number
func GetFileDiffCutAroundLine(
repo *Repository, startCommit, endCommit, treePath string,
line int64, old bool, numbersOfLine int,
-) (_ string, retErr error) {
- diffOutput, diffFinish, err := getRepoRawDiffForFile(repo.Ctx, repo, startCommit, endCommit, RawDiffNormal, treePath)
+) (ret string, retErr error) {
+ cmd, err := getRepoRawDiffForFileCmd(repo.Ctx, repo, startCommit, endCommit, RawDiffNormal, treePath)
if err != nil {
- return "", err
+ return "", fmt.Errorf("getRepoRawDiffForFileCmd: %w", err)
}
- defer func() {
- err := diffFinish()
- if retErr == nil {
- retErr = err // only return command's error if no previous error
- }
- }()
- return CutDiffAroundLine(diffOutput, line, old, numbersOfLine)
+ stdoutReader, stdoutClose := cmd.MakeStdoutPipe()
+ defer stdoutClose()
+ cmd.WithPipelineFunc(func(ctx gitcmd.Context) error {
+ ret, err = CutDiffAroundLine(stdoutReader, line, old, numbersOfLine)
+ return err
+ })
+ return ret, cmd.RunWithStderr(repo.Ctx)
}
// getRepoRawDiffForFile returns an io.Reader for the diff results of file in given commit ID
// and a "finish" function to wait for the git command and clean up resources after reading is done.
-func getRepoRawDiffForFile(ctx context.Context, repo *Repository, startCommit, endCommit string, diffType RawDiffType, file string) (io.Reader, func() gitcmd.RunStdError, error) {
+func getRepoRawDiffForFileCmd(_ context.Context, repo *Repository, startCommit, endCommit string, diffType RawDiffType, file string) (*gitcmd.Command, error) {
commit, err := repo.GetCommit(endCommit)
if err != nil {
- return nil, nil, err
+ return nil, err
}
var files []string
if len(file) > 0 {
@@ -84,7 +77,7 @@ func getRepoRawDiffForFile(ctx context.Context, repo *Repository, startCommit, e
} else {
c, err := commit.Parent(0)
if err != nil {
- return nil, nil, err
+ return nil, err
}
cmd.AddArguments("diff").
AddOptionFormat("--find-renames=%s", setting.Git.DiffRenameSimilarityThreshold).
@@ -99,25 +92,15 @@ func getRepoRawDiffForFile(ctx context.Context, repo *Repository, startCommit, e
} else {
c, err := commit.Parent(0)
if err != nil {
- return nil, nil, err
+ return nil, err
}
query := fmt.Sprintf("%s...%s", endCommit, c.ID.String())
cmd.AddArguments("format-patch", "--no-signature", "--stdout").AddDynamicArguments(query).AddDashesAndList(files...)
}
default:
- return nil, nil, util.NewInvalidArgumentErrorf("invalid diff type: %s", diffType)
+ return nil, util.NewInvalidArgumentErrorf("invalid diff type: %s", diffType)
}
-
- stdoutReader, stdoutReaderClose := cmd.MakeStdoutPipe()
- err = cmd.StartWithStderr(ctx)
- if err != nil {
- stdoutReaderClose()
- return nil, nil, err
- }
- return stdoutReader, func() gitcmd.RunStdError {
- stdoutReaderClose()
- return cmd.WaitWithStderr()
- }, nil
+ return cmd, nil
}
// ParseDiffHunkString parse the diff hunk content and return
@@ -254,7 +237,7 @@ func CutDiffAroundLine(originalDiff io.Reader, line int64, old bool, numbersOfLi
}
}
if err := scanner.Err(); err != nil {
- return "", err
+ return "", fmt.Errorf("CutDiffAroundLine: scan: %w", err)
}
// No hunk found
diff --git a/modules/git/gitcmd/command.go b/modules/git/gitcmd/command.go
index f780cdf6c9..e9b51802fe 100644
--- a/modules/git/gitcmd/command.go
+++ b/modules/git/gitcmd/command.go
@@ -306,6 +306,10 @@ func (c *Command) MakeStdinPipe() (writer PipeWriter, closer func()) {
// MakeStdoutPipe creates a reader for the command's stdout.
// The returned closer function must be called by the caller to close the pipe.
// After the pipe reader is closed, the unread data will be discarded.
+//
+// If the process (git command) still tries to write after the pipe is closed, the Wait error will be "signal: broken pipe".
+// WithPipelineFunc + Run won't return "broken pipe" error in this case if the callback returns no error.
+// But if you are calling Start / Wait family functions, you should either drain the pipe before close it, or handle the Wait error correctly.
func (c *Command) MakeStdoutPipe() (reader PipeReader, closer func()) {
return c.makeStdoutStderr(&c.cmdStdout)
}
diff --git a/modules/highlight/highlight.go b/modules/highlight/highlight.go
index fc8699829c..c7416c7a10 100644
--- a/modules/highlight/highlight.go
+++ b/modules/highlight/highlight.go
@@ -11,20 +11,16 @@ import (
gohtml "html"
"html/template"
"io"
- "path"
"strings"
"sync"
- "code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/util"
"github.com/alecthomas/chroma/v2"
"github.com/alecthomas/chroma/v2/formatters/html"
- "github.com/alecthomas/chroma/v2/lexers"
"github.com/alecthomas/chroma/v2/styles"
- "github.com/go-enry/go-enry/v2"
)
// don't index files larger than this many bytes for performance purposes
@@ -84,85 +80,21 @@ func UnsafeSplitHighlightedLines(code template.HTML) (ret [][]byte) {
}
}
-func getChromaLexerByLanguage(fileName, lang string) chroma.Lexer {
- lang, _, _ = strings.Cut(lang, "?") // maybe, the value from gitattributes might contain `?` parameters?
- ext := path.Ext(fileName)
- // the "lang" might come from enry, it has different naming for some languages
- switch lang {
- case "F#":
- lang = "FSharp"
- case "Pascal":
- lang = "ObjectPascal"
- case "C":
- if ext == ".C" || ext == ".H" {
- lang = "C++"
- }
- }
- if lang == "" && util.AsciiEqualFold(ext, ".sql") {
- // there is a bug when using MySQL lexer: "--\nSELECT", the second line will be rendered as comment incorrectly
- lang = "SQL"
- }
- // lexers.Get is slow if the language name can't be matched directly: it does extra "Match" call to iterate all lexers
- return lexers.Get(lang)
-}
-
-// GetChromaLexerWithFallback returns a chroma lexer by given file name, language and code content. All parameters can be optional.
-// When code content is provided, it will be slow if no lexer is found by file name or language.
-// If no lexer is found, it will return the fallback lexer.
-func GetChromaLexerWithFallback(fileName, lang string, code []byte) (lexer chroma.Lexer) {
- if lang != "" {
- lexer = getChromaLexerByLanguage(fileName, lang)
- }
-
- if lexer == nil {
- fileExt := path.Ext(fileName)
- if val, ok := globalVars().highlightMapping[fileExt]; ok {
- lexer = getChromaLexerByLanguage(fileName, val) // use mapped value to find lexer
- }
- }
-
- if lexer == nil {
- // when using "code" to detect, analyze.GetCodeLanguage is slower, it iterates many rules to detect language from content
- // this is the old logic: use enry to detect language, and use chroma to render, but their naming is different for some languages
- enryLanguage := analyze.GetCodeLanguage(fileName, code)
- lexer = getChromaLexerByLanguage(fileName, enryLanguage)
- if lexer == nil {
- if enryLanguage != enry.OtherLanguage {
- log.Warn("No chroma lexer found for enry detected language: %s (file: %s), need to fix the language mapping between enry and chroma.", enryLanguage, fileName)
- }
- lexer = lexers.Match(fileName) // lexers.Match will search by its basename and extname
- }
- }
-
- return util.IfZero(lexer, lexers.Fallback)
-}
-
-func renderCode(fileName, language, code string, slowGuess bool) (output template.HTML, lexerName string) {
+// RenderCodeSlowGuess tries to get a lexer by file name and language first,
+// if not found, it will try to guess the lexer by code content, which is slow (more than several hundreds of milliseconds).
+func RenderCodeSlowGuess(fileName, language, code string) (output template.HTML, lexer chroma.Lexer, lexerDisplayName string) {
// diff view newline will be passed as empty, change to literal '\n' so it can be copied
// preserve literal newline in blame view
if code == "" || code == "\n" {
- return "\n", ""
+ return "\n", nil, ""
}
if len(code) > sizeLimit {
- return template.HTML(template.HTMLEscapeString(code)), ""
+ return template.HTML(template.HTMLEscapeString(code)), nil, ""
}
- var codeForGuessLexer []byte
- if slowGuess {
- // it is slower to guess lexer by code content, so only do it when necessary
- codeForGuessLexer = util.UnsafeStringToBytes(code)
- }
- lexer := GetChromaLexerWithFallback(fileName, language, codeForGuessLexer)
- return RenderCodeByLexer(lexer, code), formatLexerName(lexer.Config().Name)
-}
-
-func RenderCodeFast(fileName, language, code string) (output template.HTML, lexerName string) {
- return renderCode(fileName, language, code, false)
-}
-
-func RenderCodeSlowGuess(fileName, language, code string) (output template.HTML, lexerName string) {
- return renderCode(fileName, language, code, true)
+ lexer = detectChromaLexerWithAnalyze(fileName, language, util.UnsafeStringToBytes(code)) // it is also slow
+ return RenderCodeByLexer(lexer, code), lexer, formatLexerName(lexer.Config().Name)
}
// RenderCodeByLexer returns a HTML version of code string with chroma syntax highlighting classes
@@ -204,7 +136,7 @@ func RenderFullFile(fileName, language string, code []byte) ([]template.HTML, st
html.PreventSurroundingPre(true),
)
- lexer := GetChromaLexerWithFallback(fileName, language, code)
+ lexer := detectChromaLexerWithAnalyze(fileName, language, code)
lexerName := formatLexerName(lexer.Config().Name)
iterator, err := lexer.Tokenise(nil, string(code))
diff --git a/modules/highlight/highlight_test.go b/modules/highlight/highlight_test.go
index 69aff07b04..d026210475 100644
--- a/modules/highlight/highlight_test.go
+++ b/modules/highlight/highlight_test.go
@@ -205,36 +205,3 @@ func TestUnsafeSplitHighlightedLines(t *testing.T) {
assert.Equal(t, "a\n", string(ret[0]))
assert.Equal(t, "b\n", string(ret[1]))
}
-
-func TestGetChromaLexer(t *testing.T) {
- globalVars().highlightMapping[".my-html"] = "HTML"
- t.Cleanup(func() { delete(globalVars().highlightMapping, ".my-html") })
-
- cases := []struct {
- fileName string
- language string
- content string
- expected string
- }{
- {"test.py", "", "", "Python"},
-
- {"any-file", "javascript", "", "JavaScript"},
- {"any-file", "", "/* vim: set filetype=python */", "Python"},
- {"any-file", "", "", "fallback"},
-
- {"test.fs", "", "", "Forth"},
- {"test.fs", "F#", "", "FSharp"},
- {"test.fs", "", "let x = 1", "FSharp"},
-
- {"test.c", "", "", "C"},
- {"test.C", "", "", "C++"},
- {"OLD-CODE.PAS", "", "", "ObjectPascal"},
- {"test.my-html", "", "", "HTML"},
- }
- for _, c := range cases {
- lexer := GetChromaLexerWithFallback(c.fileName, c.language, []byte(c.content))
- if assert.NotNil(t, lexer, "case: %+v", c) {
- assert.Equal(t, c.expected, lexer.Config().Name, "case: %+v", c)
- }
- }
-}
diff --git a/modules/highlight/lexerdetect.go b/modules/highlight/lexerdetect.go
new file mode 100644
index 0000000000..5b39617566
--- /dev/null
+++ b/modules/highlight/lexerdetect.go
@@ -0,0 +1,279 @@
+// Copyright 2026 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package highlight
+
+import (
+ "path"
+ "strings"
+ "sync"
+
+ "code.gitea.io/gitea/modules/analyze"
+ "code.gitea.io/gitea/modules/log"
+
+ "github.com/alecthomas/chroma/v2"
+ "github.com/alecthomas/chroma/v2/lexers"
+ "github.com/go-enry/go-enry/v2"
+)
+
+const mapKeyLowerPrefix = "lower/"
+
+// chromaLexers is fully managed by us to do fast lookup for chroma lexers by file name or language name
+// Don't use lexers.Get because it is very slow in many cases (iterate all rules, filepath glob match, etc.)
+var chromaLexers = sync.OnceValue(func() (ret struct {
+ conflictingExtLangMap map[string]string
+
+ lowerNameMap map[string]chroma.Lexer // lexer name (lang name) in lower-case
+ fileBaseMap map[string]chroma.Lexer
+ fileExtMap map[string]chroma.Lexer
+ fileParts []struct {
+ part string
+ lexer chroma.Lexer
+ }
+},
+) {
+ ret.lowerNameMap = make(map[string]chroma.Lexer)
+ ret.fileBaseMap = make(map[string]chroma.Lexer)
+ ret.fileExtMap = make(map[string]chroma.Lexer)
+
+ // Chroma has overlaps in file extension for different languages,
+ // When we need to do fast render, there is no way to detect the language by content,
+ // So we can only choose some default languages for the overlapped file extensions.
+ ret.conflictingExtLangMap = map[string]string{
+ ".as": "ActionScript 3", // ActionScript
+ ".asm": "NASM", // TASM, NASM, RGBDS Assembly, Z80 Assembly
+ ".ASM": "NASM",
+ ".bas": "VB.net", // QBasic
+ ".bf": "Beef", // Brainfuck
+ ".fs": "FSharp", // Forth
+ ".gd": "GDScript", // GDScript3
+ ".h": "C", // Objective-C
+ ".hcl": "Terraform", // HCL
+ ".hh": "C++", // HolyC
+ ".inc": "PHP", // ObjectPascal, POVRay, SourcePawn, PHTML
+ ".m": "Objective-C", // Matlab, Mathematica, Mason
+ ".mc": "Mason", // MonkeyC
+ ".network": "SYSTEMD", // INI
+ ".php": "PHP", // PHTML
+ ".php3": "PHP", // PHTML
+ ".php4": "PHP", // PHTML
+ ".php5": "PHP", // PHTML
+ ".pl": "Perl", // Prolog, Raku
+ ".pm": "Perl", // Promela, Raku
+ ".pp": "ObjectPascal", // Puppet
+ ".s": "ArmAsm", // GAS
+ ".S": "ArmAsm", // R, GAS
+ ".service": "SYSTEMD", // INI
+ ".socket": "SYSTEMD", // INI
+ ".sql": "SQL", // MySQL
+ ".t": "Perl", // Raku
+ ".ts": "TypeScript", // TypoScript
+ ".v": "V", // verilog
+ ".xslt": "HTML", // XML
+ }
+
+ isPlainPattern := func(key string) bool {
+ return !strings.ContainsAny(key, "*?[]") // only support simple patterns
+ }
+
+ setMapWithLowerKey := func(m map[string]chroma.Lexer, key string, lexer chroma.Lexer) {
+ if _, conflict := m[key]; conflict {
+ panic("duplicate key in lexer map: " + key + ", need to add it to conflictingExtLangMap")
+ }
+ m[key] = lexer
+ m[mapKeyLowerPrefix+strings.ToLower(key)] = lexer
+ }
+
+ processFileName := func(fileName string, lexer chroma.Lexer) bool {
+ if isPlainPattern(fileName) {
+ // full base name match
+ setMapWithLowerKey(ret.fileBaseMap, fileName, lexer)
+ return true
+ }
+ if strings.HasPrefix(fileName, "*") {
+ // ext name match: "*.js"
+ fileExt := strings.Trim(fileName, "*")
+ if isPlainPattern(fileExt) {
+ presetName := ret.conflictingExtLangMap[fileExt]
+ if presetName == "" || lexer.Config().Name == presetName {
+ setMapWithLowerKey(ret.fileExtMap, fileExt, lexer)
+ }
+ return true
+ }
+ }
+ if strings.HasSuffix(fileName, "*") {
+ // part match: "*.env.*"
+ filePart := strings.Trim(fileName, "*")
+ if isPlainPattern(filePart) {
+ ret.fileParts = append(ret.fileParts, struct {
+ part string
+ lexer chroma.Lexer
+ }{
+ part: filePart,
+ lexer: lexer,
+ })
+ return true
+ }
+ }
+ return false
+ }
+
+ expandGlobPatterns := func(patterns []string) []string {
+ // expand patterns like "file.[ch]" to "file.c" and "file.h", only one pair of "[]" is supported, enough for current Chroma lexers
+ for idx, s := range patterns {
+ idx1 := strings.IndexByte(s, '[')
+ idx2 := strings.IndexByte(s, ']')
+ if idx1 != -1 && idx2 != -1 && idx2 > idx1+1 {
+ left, mid, right := s[:idx1], s[idx1+1:idx2], s[idx2+1:]
+ patterns[idx] = left + mid[0:1] + right
+ for i := 1; i < len(mid); i++ {
+ patterns = append(patterns, left+mid[i:i+1]+right)
+ }
+ }
+ }
+ return patterns
+ }
+
+ // add lexers to our map, for fast lookup
+ for _, lexer := range lexers.GlobalLexerRegistry.Lexers {
+ cfg := lexer.Config()
+ ret.lowerNameMap[strings.ToLower(lexer.Config().Name)] = lexer
+ for _, alias := range cfg.Aliases {
+ ret.lowerNameMap[strings.ToLower(alias)] = lexer
+ }
+ for _, s := range expandGlobPatterns(cfg.Filenames) {
+ if !processFileName(s, lexer) {
+ panic("unsupported file name pattern in lexer: " + s)
+ }
+ }
+ for _, s := range expandGlobPatterns(cfg.AliasFilenames) {
+ if !processFileName(s, lexer) {
+ panic("unsupported alias file name pattern in lexer: " + s)
+ }
+ }
+ }
+
+ // final check: make sure the default ext-lang mapping is correct, nothing is missing
+ for ext, lexerName := range ret.conflictingExtLangMap {
+ if lexer, ok := ret.fileExtMap[ext]; !ok || lexer.Config().Name != lexerName {
+ panic("missing default ext-lang mapping for: " + ext)
+ }
+ }
+ return ret
+})
+
+func normalizeFileNameLang(fileName, fileLang string) (string, string) {
+ fileName = path.Base(fileName)
+ fileLang, _, _ = strings.Cut(fileLang, "?") // maybe, the value from gitattributes might contain `?` parameters?
+ ext := path.Ext(fileName)
+ // the "lang" might come from enry or gitattributes, it has different naming for some languages
+ switch fileLang {
+ case "F#":
+ fileLang = "FSharp"
+ case "Pascal":
+ fileLang = "ObjectPascal"
+ case "C":
+ if ext == ".C" || ext == ".H" {
+ fileLang = "C++"
+ }
+ }
+ return fileName, fileLang
+}
+
+func DetectChromaLexerByFileName(fileName, fileLang string) chroma.Lexer {
+ lexer, _ := detectChromaLexerByFileName(fileName, fileLang)
+ return lexer
+}
+
+func detectChromaLexerByFileName(fileName, fileLang string) (_ chroma.Lexer, byLang bool) {
+ fileName, fileLang = normalizeFileNameLang(fileName, fileLang)
+ fileExt := path.Ext(fileName)
+
+ // apply custom mapping for file extension, highest priority, for example:
+ // * ".my-js" -> ".js"
+ // * ".my-html" -> "HTML"
+ if fileExt != "" {
+ if val, ok := globalVars().highlightMapping[fileExt]; ok {
+ if strings.HasPrefix(val, ".") {
+ fileName = "dummy" + val
+ fileLang = ""
+ } else {
+ fileLang = val
+ }
+ }
+ }
+
+ // try to use language for lexer name
+ if fileLang != "" {
+ lexer := chromaLexers().lowerNameMap[strings.ToLower(fileLang)]
+ if lexer != nil {
+ return lexer, true
+ }
+ }
+
+ if fileName == "" {
+ return lexers.Fallback, false
+ }
+
+ // try base name
+ {
+ baseName := path.Base(fileName)
+ if lexer, ok := chromaLexers().fileBaseMap[baseName]; ok {
+ return lexer, false
+ } else if lexer, ok = chromaLexers().fileBaseMap[mapKeyLowerPrefix+strings.ToLower(baseName)]; ok {
+ return lexer, false
+ }
+ }
+
+ if fileExt == "" {
+ return lexers.Fallback, false
+ }
+
+ // try ext name
+ {
+ if lexer, ok := chromaLexers().fileExtMap[fileExt]; ok {
+ return lexer, false
+ } else if lexer, ok = chromaLexers().fileExtMap[mapKeyLowerPrefix+strings.ToLower(fileExt)]; ok {
+ return lexer, false
+ }
+ }
+
+ // try file part match, for example: ".env.local" for "*.env.*"
+ // it assumes that there must be a dot in filename (fileExt isn't empty)
+ for _, item := range chromaLexers().fileParts {
+ if strings.Contains(fileName, item.part) {
+ return item.lexer, false
+ }
+ }
+ return lexers.Fallback, false
+}
+
+// detectChromaLexerWithAnalyze returns a chroma lexer by given file name, language and code content. All parameters can be optional.
+// When code content is provided, it will be slow if no lexer is found by file name or language.
+// If no lexer is found, it will return the fallback lexer.
+func detectChromaLexerWithAnalyze(fileName, lang string, code []byte) chroma.Lexer {
+ lexer, byLang := detectChromaLexerByFileName(fileName, lang)
+
+ // if lang is provided, and it matches a lexer, use it directly
+ if byLang {
+ return lexer
+ }
+
+ // if a lexer is detected and there is no conflict for the file extension, use it directly
+ fileExt := path.Ext(fileName)
+ _, hasConflicts := chromaLexers().conflictingExtLangMap[fileExt]
+ if !hasConflicts && lexer != lexers.Fallback {
+ return lexer
+ }
+
+ // try to detect language by content, for best guessing for the language
+ // when using "code" to detect, analyze.GetCodeLanguage is slow, it iterates many rules to detect language from content
+ analyzedLanguage := analyze.GetCodeLanguage(fileName, code)
+ lexer = DetectChromaLexerByFileName(fileName, analyzedLanguage)
+ if lexer == lexers.Fallback {
+ if analyzedLanguage != enry.OtherLanguage {
+ log.Warn("No chroma lexer found for enry detected language: %s (file: %s), need to fix the language mapping between enry and chroma.", analyzedLanguage, fileName)
+ }
+ }
+ return lexer
+}
diff --git a/modules/highlight/lexerdetect_test.go b/modules/highlight/lexerdetect_test.go
new file mode 100644
index 0000000000..868e793a68
--- /dev/null
+++ b/modules/highlight/lexerdetect_test.go
@@ -0,0 +1,90 @@
+// Copyright 2026 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package highlight
+
+import (
+ "strings"
+ "testing"
+
+ "github.com/alecthomas/chroma/v2/lexers"
+ "github.com/stretchr/testify/assert"
+)
+
+func BenchmarkDetectChromaLexerByFileName(b *testing.B) {
+ for b.Loop() {
+ // BenchmarkDetectChromaLexerByFileName-12 18214717 61.35 ns/op
+ DetectChromaLexerByFileName("a.sql", "")
+ }
+}
+
+func BenchmarkDetectChromaLexerWithAnalyze(b *testing.B) {
+ b.StopTimer()
+ code := []byte(strings.Repeat("SELECT * FROM table;\n", 1000))
+ b.StartTimer()
+ for b.Loop() {
+ // BenchmarkRenderCodeSlowGuess-12 87946 13310 ns/op
+ detectChromaLexerWithAnalyze("a", "", code)
+ }
+}
+
+func BenchmarkChromaAnalyze(b *testing.B) {
+ b.StopTimer()
+ code := strings.Repeat("SELECT * FROM table;\n", 1000)
+ b.StartTimer()
+ for b.Loop() {
+ // comparing to detectChromaLexerWithAnalyze (go-enry), "chroma/lexers.Analyse" is very slow
+ // BenchmarkChromaAnalyze-12 519 2247104 ns/op
+ lexers.Analyse(code)
+ }
+}
+
+func BenchmarkRenderCodeByLexer(b *testing.B) {
+ b.StopTimer()
+ code := strings.Repeat("SELECT * FROM table;\n", 1000)
+ lexer := DetectChromaLexerByFileName("a.sql", "")
+ b.StartTimer()
+ for b.Loop() {
+ // Really slow .......
+ // BenchmarkRenderCodeByLexer-12 22 47159038 ns/op
+ RenderCodeByLexer(lexer, code)
+ }
+}
+
+func TestDetectChromaLexer(t *testing.T) {
+ globalVars().highlightMapping[".my-html"] = "HTML"
+ t.Cleanup(func() { delete(globalVars().highlightMapping, ".my-html") })
+
+ cases := []struct {
+ fileName string
+ language string
+ content string
+ expected string
+ }{
+ {"test.py", "", "", "Python"},
+
+ {"any-file", "javascript", "", "JavaScript"},
+ {"any-file", "", "/* vim: set filetype=python */", "Python"},
+ {"any-file", "", "", "fallback"},
+
+ {"test.fs", "", "", "FSharp"},
+ {"test.fs", "F#", "", "FSharp"},
+ {"test.fs", "", "let x = 1", "FSharp"},
+
+ {"test.c", "", "", "C"},
+ {"test.C", "", "", "C++"},
+ {"OLD-CODE.PAS", "", "", "ObjectPascal"},
+ {"test.my-html", "", "", "HTML"},
+
+ {"a.php", "", "", "PHP"},
+ {"a.sql", "", "", "SQL"},
+ {"dhcpd.conf", "", "", "ISCdhcpd"},
+ {".env.my-production", "", "", "Bash"},
+ }
+ for _, c := range cases {
+ lexer := detectChromaLexerWithAnalyze(c.fileName, c.language, []byte(c.content))
+ if assert.NotNil(t, lexer, "case: %+v", c) {
+ assert.Equal(t, c.expected, lexer.Config().Name, "case: %+v", c)
+ }
+ }
+}
diff --git a/modules/indexer/code/search.go b/modules/indexer/code/search.go
index 907dd1a537..eb20b70e71 100644
--- a/modules/indexer/code/search.go
+++ b/modules/indexer/code/search.go
@@ -72,7 +72,8 @@ func writeStrings(buf *bytes.Buffer, strs ...string) error {
func HighlightSearchResultCode(filename, language string, lineNums []int, code string) []*ResultLine {
// we should highlight the whole code block first, otherwise it doesn't work well with multiple line highlighting
- hl, _ := highlight.RenderCodeFast(filename, language, code)
+ lexer := highlight.DetectChromaLexerByFileName(filename, language)
+ hl := highlight.RenderCodeByLexer(lexer, code)
highlightedLines := strings.Split(string(hl), "\n")
// The lineNums outputted by render might not match the original lineNums, because "highlight" removes the last `\n`
diff --git a/modules/markup/orgmode/orgmode.go b/modules/markup/orgmode/orgmode.go
index 17d994734a..fd3071645a 100644
--- a/modules/markup/orgmode/orgmode.go
+++ b/modules/markup/orgmode/orgmode.go
@@ -56,7 +56,7 @@ func Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error
}
}()
- lexer := highlight.GetChromaLexerWithFallback("", lang, nil) // don't use content to detect, it is too slow
+ lexer := highlight.DetectChromaLexerByFileName("", lang) // don't use content to detect, it is too slow
lexer = chroma.Coalesce(lexer)
sb := &strings.Builder{}
diff --git a/routers/web/repo/blame.go b/routers/web/repo/blame.go
index 25eb88eefc..4fb61bee6d 100644
--- a/routers/web/repo/blame.go
+++ b/routers/web/repo/blame.go
@@ -267,7 +267,7 @@ func renderBlame(ctx *context.Context, blameParts []*gitrepo.BlamePart, commitNa
bufContent := buf.Bytes()
bufContent = charset.ToUTF8(bufContent, charset.ConvertOpts{})
- highlighted, lexerName := highlight.RenderCodeSlowGuess(path.Base(ctx.Repo.TreePath), language, util.UnsafeBytesToString(bufContent))
+ highlighted, _, lexerDisplayName := highlight.RenderCodeSlowGuess(path.Base(ctx.Repo.TreePath), language, util.UnsafeBytesToString(bufContent))
unsafeLines := highlight.UnsafeSplitHighlightedLines(highlighted)
for i, br := range rows {
var line template.HTML
@@ -280,5 +280,5 @@ func renderBlame(ctx *context.Context, blameParts []*gitrepo.BlamePart, commitNa
ctx.Data["EscapeStatus"] = escapeStatus
ctx.Data["BlameRows"] = rows
- ctx.Data["LexerName"] = lexerName
+ ctx.Data["LexerName"] = lexerDisplayName
}
diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go
index 6b29582208..7777cf4a1c 100644
--- a/services/gitdiff/gitdiff.go
+++ b/services/gitdiff/gitdiff.go
@@ -40,6 +40,7 @@ import (
"code.gitea.io/gitea/modules/translation"
"code.gitea.io/gitea/modules/util"
+ "github.com/alecthomas/chroma/v2"
"github.com/sergi/go-diff/diffmatchpatch"
stdcharset "golang.org/x/net/html/charset"
"golang.org/x/text/encoding"
@@ -306,6 +307,7 @@ type DiffSection struct {
language *diffVarMutable[string]
highlightedLeftLines *diffVarMutable[map[int]template.HTML]
highlightedRightLines *diffVarMutable[map[int]template.HTML]
+ highlightLexer *diffVarMutable[chroma.Lexer]
FileName string
Lines []*DiffLine
@@ -347,8 +349,10 @@ func (diffSection *DiffSection) getLineContentForRender(lineIdx int, diffLine *D
if setting.Git.DisableDiffHighlight {
return template.HTML(html.EscapeString(diffLine.Content[1:]))
}
- h, _ = highlight.RenderCodeFast(diffSection.FileName, fileLanguage, diffLine.Content[1:])
- return h
+ if diffSection.highlightLexer.value == nil {
+ diffSection.highlightLexer.value = highlight.DetectChromaLexerByFileName(diffSection.FileName, fileLanguage)
+ }
+ return highlight.RenderCodeByLexer(diffSection.highlightLexer.value, diffLine.Content[1:])
}
func (diffSection *DiffSection) getDiffLineForRender(diffLineType DiffLineType, leftLine, rightLine *DiffLine, locale translation.Locale) DiffInline {
@@ -391,6 +395,12 @@ func (diffSection *DiffSection) getDiffLineForRender(diffLineType DiffLineType,
// GetComputedInlineDiffFor computes inline diff for the given line.
func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine, locale translation.Locale) DiffInline {
+ defer func() {
+ if err := recover(); err != nil {
+ // the logic is too complex in this function, help to catch any panic because Golang template doesn't print the stack
+ log.Error("panic in GetComputedInlineDiffFor: %v\nStack: %s", err, log.Stack(2))
+ }
+ }()
// try to find equivalent diff line. ignore, otherwise
switch diffLine.Type {
case DiffLineSection:
@@ -452,6 +462,7 @@ type DiffFile struct {
// for render purpose only, will be filled by the extra loop in GitDiffForRender, the maps of lines are 0-based
language diffVarMutable[string]
+ highlightRender diffVarMutable[chroma.Lexer] // cache render (atm: lexer) for current file, only detect once for line-by-line mode
highlightedLeftLines diffVarMutable[map[int]template.HTML]
highlightedRightLines diffVarMutable[map[int]template.HTML]
}
@@ -932,6 +943,7 @@ func skipToNextDiffHead(input *bufio.Reader) (line string, err error) {
func newDiffSectionForDiffFile(curFile *DiffFile) *DiffSection {
return &DiffSection{
language: &curFile.language,
+ highlightLexer: &curFile.highlightRender,
highlightedLeftLines: &curFile.highlightedLeftLines,
highlightedRightLines: &curFile.highlightedRightLines,
}
@@ -1395,7 +1407,8 @@ func highlightCodeLines(name, lang string, sections []*DiffSection, isLeft bool,
}
content := util.UnsafeBytesToString(charset.ToUTF8(rawContent, charset.ConvertOpts{}))
- highlightedNewContent, _ := highlight.RenderCodeFast(name, lang, content)
+ lexer := highlight.DetectChromaLexerByFileName(name, lang)
+ highlightedNewContent := highlight.RenderCodeByLexer(lexer, content)
unsafeLines := highlight.UnsafeSplitHighlightedLines(highlightedNewContent)
lines := make(map[int]template.HTML, len(unsafeLines))
// only save the highlighted lines we need, but not the whole file, to save memory
diff --git a/services/gitdiff/gitdiff_excerpt.go b/services/gitdiff/gitdiff_excerpt.go
index be66d8e2af..4b1958fc11 100644
--- a/services/gitdiff/gitdiff_excerpt.go
+++ b/services/gitdiff/gitdiff_excerpt.go
@@ -11,6 +11,8 @@ import (
"io"
"code.gitea.io/gitea/modules/setting"
+
+ "github.com/alecthomas/chroma/v2"
)
type BlobExcerptOptions struct {
@@ -65,6 +67,7 @@ func BuildBlobExcerptDiffSection(filePath string, reader io.Reader, opts BlobExc
chunkSize := BlobExcerptChunkSize
section := &DiffSection{
language: &diffVarMutable[string]{value: language},
+ highlightLexer: &diffVarMutable[chroma.Lexer]{},
highlightedLeftLines: &diffVarMutable[map[int]template.HTML]{},
highlightedRightLines: &diffVarMutable[map[int]template.HTML]{},
FileName: filePath,
diff --git a/services/gitdiff/highlightdiff_test.go b/services/gitdiff/highlightdiff_test.go
index b99b7e3675..ea9a8829ed 100644
--- a/services/gitdiff/highlightdiff_test.go
+++ b/services/gitdiff/highlightdiff_test.go
@@ -76,8 +76,8 @@ func TestDiffWithHighlight(t *testing.T) {
})
t.Run("ComplexDiff1", func(t *testing.T) {
- oldCode, _ := highlight.RenderCodeFast("a.go", "Go", `xxx || yyy`)
- newCode, _ := highlight.RenderCodeFast("a.go", "Go", `bot&xxx || bot&yyy`)
+ oldCode, _, _ := highlight.RenderCodeSlowGuess("a.go", "Go", `xxx || yyy`)
+ newCode, _, _ := highlight.RenderCodeSlowGuess("a.go", "Go", `bot&xxx || bot&yyy`)
hcd := newHighlightCodeDiff()
out := hcd.diffLineWithHighlight(DiffLineAdd, oldCode, newCode)
assert.Equal(t, strings.ReplaceAll(`