Fix UTF-8 BOM preservation when editing files in browser (fixes #2188) (#3954)

* Fix UTF-8 BOM preservation when editing files in browser (fixes #2188)

When editing a file encoded in UTF-8 with BOM through the web interface,
the BOM was lost during save, making it impossible to use this feature
for files requiring UTF-8 BOM encoding.

This fix:
- Detects UTF-8 BOM when reading file content
- Preserves BOM information through the edit form
- Restores BOM when writing file content back to repository

Changes:
- Add hasUtf8Bom() function to detect BOM in byte arrays
- Add hasBom field to ContentInfo case class
- Update getContentInfo to detect and store BOM information
- Add hasBom hidden field in editor form
- Update EditorForm and commitFile to handle BOM preservation
- Add unit tests for BOM detection
This commit is contained in:
RIVOIRA
2026-02-11 15:21:02 +01:00
committed by GitHub
parent 03760f126b
commit 1b7fbcb59d
6 changed files with 66 additions and 8 deletions

View File

@@ -88,6 +88,7 @@ trait RepositoryViewerControllerBase extends ControllerBase {
message: Option[String],
charset: String,
lineSeparator: String,
hasBom: Boolean,
newFileName: String,
oldFileName: Option[String],
commit: String,
@@ -134,6 +135,7 @@ trait RepositoryViewerControllerBase extends ControllerBase {
"message" -> trim(label("Message", optional(text()))),
"charset" -> trim(label("Charset", text(required))),
"lineSeparator" -> trim(label("Line Separator", text(required))),
"hasBom" -> trim(label("Has BOM", boolean())),
"newFileName" -> trim(label("Filename", text(required))),
"oldFileName" -> trim(label("Old filename", optional(text()))),
"commit" -> trim(label("Commit", text(required, conflict))),
@@ -439,7 +441,8 @@ trait RepositoryViewerControllerBase extends ControllerBase {
message = form.message.getOrElse(s"Create ${form.newFileName}"),
commit = form.commit,
loginAccount = loginAccount,
settings = context.settings
settings = context.settings,
hasBom = form.hasBom
).map(_._1)
}
@@ -496,7 +499,8 @@ trait RepositoryViewerControllerBase extends ControllerBase {
},
commit = form.commit,
loginAccount = loginAccount,
settings = context.settings
settings = context.settings,
hasBom = form.hasBom
).map(_._1)
}

View File

@@ -8,7 +8,7 @@ import gitbucket.core.service.SystemSettingsService.SystemSettings
import gitbucket.core.service.WebHookService.WebHookPushPayload
import gitbucket.core.util.Directory.getRepositoryDir
import gitbucket.core.util.JGitUtil.CommitInfo
import gitbucket.core.util.{JGitUtil, LockUtil}
import gitbucket.core.util.{JGitUtil, LockUtil, StringUtil}
import org.eclipse.jgit.api.Git
import org.eclipse.jgit.dircache.{DirCache, DirCacheBuilder}
import org.eclipse.jgit.lib.*
@@ -53,16 +53,22 @@ trait RepositoryCommitFileService {
message: String,
commit: String,
loginAccount: Account,
settings: SystemSettings
settings: SystemSettings,
hasBom: Boolean = false
)(implicit s: Session, c: JsonFormat.Context): Either[String, (ObjectId, Option[ObjectId])] = {
val contentBytes = if (content.nonEmpty) {
val bytes = content.getBytes(charset)
if (hasBom) StringUtil.Utf8Bom ++ bytes else bytes
} else {
Array.emptyByteArray
}
commitFile(
repository,
branch,
path,
newFileName,
oldFileName,
if (content.nonEmpty) { content.getBytes(charset) }
else { Array.emptyByteArray },
contentBytes,
message,
commit,
loginAccount,

View File

@@ -216,8 +216,15 @@ object JGitUtil {
* @param size total size of object in bytes
* @param content the string content
* @param charset the character encoding
* @param hasBom true if the content has UTF-8 BOM
*/
case class ContentInfo(viewType: String, size: Option[Long], content: Option[String], charset: Option[String]) {
case class ContentInfo(
viewType: String,
size: Option[Long],
content: Option[String],
charset: Option[String],
hasBom: Boolean = false
) {
/**
* the line separator of this content ("LF" or "CRLF")
@@ -1215,7 +1222,8 @@ object JGitUtil {
"text",
size,
Some(StringUtil.convertFromByteArray(bytes.get)),
Some(StringUtil.detectEncoding(bytes.get))
Some(StringUtil.detectEncoding(bytes.get)),
StringUtil.hasUtf8Bom(bytes.get)
)
} else {
// binary

View File

@@ -106,6 +106,19 @@ object StringUtil {
}
}
/**
* Detects if the given byte array starts with UTF-8 BOM (Byte Order Mark).
* UTF-8 BOM is the byte sequence: 0xEF 0xBB 0xBF
*/
def hasUtf8Bom(content: Array[Byte]): Boolean =
content.length >= 3 &&
(content(0) & 0xff) == 0xef &&
(content(1) & 0xff) == 0xbb &&
(content(2) & 0xff) == 0xbf
/** UTF-8 BOM byte sequence */
val Utf8Bom: Array[Byte] = Array(0xef.toByte, 0xbb.toByte, 0xbf.toByte)
/**
* Converts line separator in the given content.
*

View File

@@ -64,6 +64,7 @@
<input type="submit" id="commitButton" class="btn btn-success" value="Commit changes" disabled="true"/>
<input type="hidden" id="charset" name="charset" value="@content.charset"/>
<input type="hidden" id="lineSeparator" name="lineSeparator" value="@content.lineSeparator"/>
<input type="hidden" id="hasBom" name="hasBom" value="@content.hasBom"/>
<input type="hidden" id="content" name="content" value=""/>
<input type="hidden" id="initial" value="@content.content"/>
<input type="hidden" id="commit" name="commit" value="@commit"/>

View File

@@ -150,4 +150,30 @@ class StringUtilSpec extends AnyFunSpec {
)
}
}
describe("hasUtf8Bom") {
it("should return true for byte array starting with UTF-8 BOM") {
val withBom = Array[Byte](0xef.toByte, 0xbb.toByte, 0xbf.toByte, 'H'.toByte, 'i'.toByte)
assert(StringUtil.hasUtf8Bom(withBom) == true)
}
it("should return false for byte array without BOM") {
val withoutBom = Array[Byte]('H'.toByte, 'e'.toByte, 'l'.toByte, 'l'.toByte, 'o'.toByte)
assert(StringUtil.hasUtf8Bom(withoutBom) == false)
}
it("should return false for empty byte array") {
assert(StringUtil.hasUtf8Bom(Array.emptyByteArray) == false)
}
it("should return false for byte array with less than 3 bytes") {
assert(StringUtil.hasUtf8Bom(Array[Byte](0xef.toByte, 0xbb.toByte)) == false)
}
}
describe("Utf8Bom") {
it("should be the correct BOM byte sequence") {
assert(StringUtil.Utf8Bom.length == 3)
assert((StringUtil.Utf8Bom(0) & 0xff) == 0xef)
assert((StringUtil.Utf8Bom(1) & 0xff) == 0xbb)
assert((StringUtil.Utf8Bom(2) & 0xff) == 0xbf)
}
}
}