From 1b7fbcb59dfe75dde2f96ad9e87883b468b66a04 Mon Sep 17 00:00:00 2001
From: RIVOIRA <46496312+RIVOIRA@users.noreply.github.com>
Date: Wed, 11 Feb 2026 15:21:02 +0100
Subject: [PATCH] Fix UTF-8 BOM preservation when editing files in browser
(fixes #2188) (#3954)
* Fix UTF-8 BOM preservation when editing files in browser (fixes #2188)
When editing a file encoded in UTF-8 with BOM through the web interface,
the BOM was lost during save, making it impossible to use this feature
for files requiring UTF-8 BOM encoding.
This fix:
- Detects UTF-8 BOM when reading file content
- Preserves BOM information through the edit form
- Restores BOM when writing file content back to repository
Changes:
- Add hasUtf8Bom() function to detect BOM in byte arrays
- Add hasBom field to ContentInfo case class
- Update getContentInfo to detect and store BOM information
- Add hasBom hidden field in editor form
- Update EditorForm and commitFile to handle BOM preservation
- Add unit tests for BOM detection
---
.../RepositoryViewerController.scala | 8 ++++--
.../service/RepositoryCommitFileService.scala | 14 +++++++---
.../scala/gitbucket/core/util/JGitUtil.scala | 12 +++++++--
.../gitbucket/core/util/StringUtil.scala | 13 ++++++++++
.../gitbucket/core/repo/editor.scala.html | 1 +
.../gitbucket/core/util/StringUtilSpec.scala | 26 +++++++++++++++++++
6 files changed, 66 insertions(+), 8 deletions(-)
diff --git a/src/main/scala/gitbucket/core/controller/RepositoryViewerController.scala b/src/main/scala/gitbucket/core/controller/RepositoryViewerController.scala
index bde1a2748..f95b64e9e 100644
--- a/src/main/scala/gitbucket/core/controller/RepositoryViewerController.scala
+++ b/src/main/scala/gitbucket/core/controller/RepositoryViewerController.scala
@@ -88,6 +88,7 @@ trait RepositoryViewerControllerBase extends ControllerBase {
message: Option[String],
charset: String,
lineSeparator: String,
+ hasBom: Boolean,
newFileName: String,
oldFileName: Option[String],
commit: String,
@@ -134,6 +135,7 @@ trait RepositoryViewerControllerBase extends ControllerBase {
"message" -> trim(label("Message", optional(text()))),
"charset" -> trim(label("Charset", text(required))),
"lineSeparator" -> trim(label("Line Separator", text(required))),
+ "hasBom" -> trim(label("Has BOM", boolean())),
"newFileName" -> trim(label("Filename", text(required))),
"oldFileName" -> trim(label("Old filename", optional(text()))),
"commit" -> trim(label("Commit", text(required, conflict))),
@@ -439,7 +441,8 @@ trait RepositoryViewerControllerBase extends ControllerBase {
message = form.message.getOrElse(s"Create ${form.newFileName}"),
commit = form.commit,
loginAccount = loginAccount,
- settings = context.settings
+ settings = context.settings,
+ hasBom = form.hasBom
).map(_._1)
}
@@ -496,7 +499,8 @@ trait RepositoryViewerControllerBase extends ControllerBase {
},
commit = form.commit,
loginAccount = loginAccount,
- settings = context.settings
+ settings = context.settings,
+ hasBom = form.hasBom
).map(_._1)
}
diff --git a/src/main/scala/gitbucket/core/service/RepositoryCommitFileService.scala b/src/main/scala/gitbucket/core/service/RepositoryCommitFileService.scala
index 7fe17150e..11e98ab58 100644
--- a/src/main/scala/gitbucket/core/service/RepositoryCommitFileService.scala
+++ b/src/main/scala/gitbucket/core/service/RepositoryCommitFileService.scala
@@ -8,7 +8,7 @@ import gitbucket.core.service.SystemSettingsService.SystemSettings
import gitbucket.core.service.WebHookService.WebHookPushPayload
import gitbucket.core.util.Directory.getRepositoryDir
import gitbucket.core.util.JGitUtil.CommitInfo
-import gitbucket.core.util.{JGitUtil, LockUtil}
+import gitbucket.core.util.{JGitUtil, LockUtil, StringUtil}
import org.eclipse.jgit.api.Git
import org.eclipse.jgit.dircache.{DirCache, DirCacheBuilder}
import org.eclipse.jgit.lib.*
@@ -53,16 +53,22 @@ trait RepositoryCommitFileService {
message: String,
commit: String,
loginAccount: Account,
- settings: SystemSettings
+ settings: SystemSettings,
+ hasBom: Boolean = false
)(implicit s: Session, c: JsonFormat.Context): Either[String, (ObjectId, Option[ObjectId])] = {
+ val contentBytes = if (content.nonEmpty) {
+ val bytes = content.getBytes(charset)
+ if (hasBom) StringUtil.Utf8Bom ++ bytes else bytes
+ } else {
+ Array.emptyByteArray
+ }
commitFile(
repository,
branch,
path,
newFileName,
oldFileName,
- if (content.nonEmpty) { content.getBytes(charset) }
- else { Array.emptyByteArray },
+ contentBytes,
message,
commit,
loginAccount,
diff --git a/src/main/scala/gitbucket/core/util/JGitUtil.scala b/src/main/scala/gitbucket/core/util/JGitUtil.scala
index 004010a1c..18476533f 100644
--- a/src/main/scala/gitbucket/core/util/JGitUtil.scala
+++ b/src/main/scala/gitbucket/core/util/JGitUtil.scala
@@ -216,8 +216,15 @@ object JGitUtil {
* @param size total size of object in bytes
* @param content the string content
* @param charset the character encoding
+ * @param hasBom true if the content has UTF-8 BOM
*/
- case class ContentInfo(viewType: String, size: Option[Long], content: Option[String], charset: Option[String]) {
+ case class ContentInfo(
+ viewType: String,
+ size: Option[Long],
+ content: Option[String],
+ charset: Option[String],
+ hasBom: Boolean = false
+ ) {
/**
* the line separator of this content ("LF" or "CRLF")
@@ -1215,7 +1222,8 @@ object JGitUtil {
"text",
size,
Some(StringUtil.convertFromByteArray(bytes.get)),
- Some(StringUtil.detectEncoding(bytes.get))
+ Some(StringUtil.detectEncoding(bytes.get)),
+ StringUtil.hasUtf8Bom(bytes.get)
)
} else {
// binary
diff --git a/src/main/scala/gitbucket/core/util/StringUtil.scala b/src/main/scala/gitbucket/core/util/StringUtil.scala
index 3428e7a92..f6d5b6a25 100644
--- a/src/main/scala/gitbucket/core/util/StringUtil.scala
+++ b/src/main/scala/gitbucket/core/util/StringUtil.scala
@@ -106,6 +106,19 @@ object StringUtil {
}
}
+ /**
+ * Detects if the given byte array starts with UTF-8 BOM (Byte Order Mark).
+ * UTF-8 BOM is the byte sequence: 0xEF 0xBB 0xBF
+ */
+ def hasUtf8Bom(content: Array[Byte]): Boolean =
+ content.length >= 3 &&
+ (content(0) & 0xff) == 0xef &&
+ (content(1) & 0xff) == 0xbb &&
+ (content(2) & 0xff) == 0xbf
+
+ /** UTF-8 BOM byte sequence */
+ val Utf8Bom: Array[Byte] = Array(0xef.toByte, 0xbb.toByte, 0xbf.toByte)
+
/**
* Converts line separator in the given content.
*
diff --git a/src/main/twirl/gitbucket/core/repo/editor.scala.html b/src/main/twirl/gitbucket/core/repo/editor.scala.html
index fd3d73d17..b5868ef3d 100644
--- a/src/main/twirl/gitbucket/core/repo/editor.scala.html
+++ b/src/main/twirl/gitbucket/core/repo/editor.scala.html
@@ -64,6 +64,7 @@
+
diff --git a/src/test/scala/gitbucket/core/util/StringUtilSpec.scala b/src/test/scala/gitbucket/core/util/StringUtilSpec.scala
index 9629d1c59..969264119 100644
--- a/src/test/scala/gitbucket/core/util/StringUtilSpec.scala
+++ b/src/test/scala/gitbucket/core/util/StringUtilSpec.scala
@@ -150,4 +150,30 @@ class StringUtilSpec extends AnyFunSpec {
)
}
}
+
+ describe("hasUtf8Bom") {
+ it("should return true for byte array starting with UTF-8 BOM") {
+ val withBom = Array[Byte](0xef.toByte, 0xbb.toByte, 0xbf.toByte, 'H'.toByte, 'i'.toByte)
+ assert(StringUtil.hasUtf8Bom(withBom) == true)
+ }
+ it("should return false for byte array without BOM") {
+ val withoutBom = Array[Byte]('H'.toByte, 'e'.toByte, 'l'.toByte, 'l'.toByte, 'o'.toByte)
+ assert(StringUtil.hasUtf8Bom(withoutBom) == false)
+ }
+ it("should return false for empty byte array") {
+ assert(StringUtil.hasUtf8Bom(Array.emptyByteArray) == false)
+ }
+ it("should return false for byte array with less than 3 bytes") {
+ assert(StringUtil.hasUtf8Bom(Array[Byte](0xef.toByte, 0xbb.toByte)) == false)
+ }
+ }
+
+ describe("Utf8Bom") {
+ it("should be the correct BOM byte sequence") {
+ assert(StringUtil.Utf8Bom.length == 3)
+ assert((StringUtil.Utf8Bom(0) & 0xff) == 0xef)
+ assert((StringUtil.Utf8Bom(1) & 0xff) == 0xbb)
+ assert((StringUtil.Utf8Bom(2) & 0xff) == 0xbf)
+ }
+ }
}