mirror of
https://github.com/gitbucket/gitbucket.git
synced 2026-02-17 20:17:05 +01:00
* Fix UTF-8 BOM preservation when editing files in browser (fixes #2188) When editing a file encoded in UTF-8 with BOM through the web interface, the BOM was lost during save, making it impossible to use this feature for files requiring UTF-8 BOM encoding. This fix: - Detects UTF-8 BOM when reading file content - Preserves BOM information through the edit form - Restores BOM when writing file content back to repository Changes: - Add hasUtf8Bom() function to detect BOM in byte arrays - Add hasBom field to ContentInfo case class - Update getContentInfo to detect and store BOM information - Add hasBom hidden field in editor form - Update EditorForm and commitFile to handle BOM preservation - Add unit tests for BOM detection
This commit is contained in:
@@ -88,6 +88,7 @@ trait RepositoryViewerControllerBase extends ControllerBase {
|
||||
message: Option[String],
|
||||
charset: String,
|
||||
lineSeparator: String,
|
||||
hasBom: Boolean,
|
||||
newFileName: String,
|
||||
oldFileName: Option[String],
|
||||
commit: String,
|
||||
@@ -134,6 +135,7 @@ trait RepositoryViewerControllerBase extends ControllerBase {
|
||||
"message" -> trim(label("Message", optional(text()))),
|
||||
"charset" -> trim(label("Charset", text(required))),
|
||||
"lineSeparator" -> trim(label("Line Separator", text(required))),
|
||||
"hasBom" -> trim(label("Has BOM", boolean())),
|
||||
"newFileName" -> trim(label("Filename", text(required))),
|
||||
"oldFileName" -> trim(label("Old filename", optional(text()))),
|
||||
"commit" -> trim(label("Commit", text(required, conflict))),
|
||||
@@ -439,7 +441,8 @@ trait RepositoryViewerControllerBase extends ControllerBase {
|
||||
message = form.message.getOrElse(s"Create ${form.newFileName}"),
|
||||
commit = form.commit,
|
||||
loginAccount = loginAccount,
|
||||
settings = context.settings
|
||||
settings = context.settings,
|
||||
hasBom = form.hasBom
|
||||
).map(_._1)
|
||||
}
|
||||
|
||||
@@ -496,7 +499,8 @@ trait RepositoryViewerControllerBase extends ControllerBase {
|
||||
},
|
||||
commit = form.commit,
|
||||
loginAccount = loginAccount,
|
||||
settings = context.settings
|
||||
settings = context.settings,
|
||||
hasBom = form.hasBom
|
||||
).map(_._1)
|
||||
}
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ import gitbucket.core.service.SystemSettingsService.SystemSettings
|
||||
import gitbucket.core.service.WebHookService.WebHookPushPayload
|
||||
import gitbucket.core.util.Directory.getRepositoryDir
|
||||
import gitbucket.core.util.JGitUtil.CommitInfo
|
||||
import gitbucket.core.util.{JGitUtil, LockUtil}
|
||||
import gitbucket.core.util.{JGitUtil, LockUtil, StringUtil}
|
||||
import org.eclipse.jgit.api.Git
|
||||
import org.eclipse.jgit.dircache.{DirCache, DirCacheBuilder}
|
||||
import org.eclipse.jgit.lib.*
|
||||
@@ -53,16 +53,22 @@ trait RepositoryCommitFileService {
|
||||
message: String,
|
||||
commit: String,
|
||||
loginAccount: Account,
|
||||
settings: SystemSettings
|
||||
settings: SystemSettings,
|
||||
hasBom: Boolean = false
|
||||
)(implicit s: Session, c: JsonFormat.Context): Either[String, (ObjectId, Option[ObjectId])] = {
|
||||
val contentBytes = if (content.nonEmpty) {
|
||||
val bytes = content.getBytes(charset)
|
||||
if (hasBom) StringUtil.Utf8Bom ++ bytes else bytes
|
||||
} else {
|
||||
Array.emptyByteArray
|
||||
}
|
||||
commitFile(
|
||||
repository,
|
||||
branch,
|
||||
path,
|
||||
newFileName,
|
||||
oldFileName,
|
||||
if (content.nonEmpty) { content.getBytes(charset) }
|
||||
else { Array.emptyByteArray },
|
||||
contentBytes,
|
||||
message,
|
||||
commit,
|
||||
loginAccount,
|
||||
|
||||
@@ -216,8 +216,15 @@ object JGitUtil {
|
||||
* @param size total size of object in bytes
|
||||
* @param content the string content
|
||||
* @param charset the character encoding
|
||||
* @param hasBom true if the content has UTF-8 BOM
|
||||
*/
|
||||
case class ContentInfo(viewType: String, size: Option[Long], content: Option[String], charset: Option[String]) {
|
||||
case class ContentInfo(
|
||||
viewType: String,
|
||||
size: Option[Long],
|
||||
content: Option[String],
|
||||
charset: Option[String],
|
||||
hasBom: Boolean = false
|
||||
) {
|
||||
|
||||
/**
|
||||
* the line separator of this content ("LF" or "CRLF")
|
||||
@@ -1215,7 +1222,8 @@ object JGitUtil {
|
||||
"text",
|
||||
size,
|
||||
Some(StringUtil.convertFromByteArray(bytes.get)),
|
||||
Some(StringUtil.detectEncoding(bytes.get))
|
||||
Some(StringUtil.detectEncoding(bytes.get)),
|
||||
StringUtil.hasUtf8Bom(bytes.get)
|
||||
)
|
||||
} else {
|
||||
// binary
|
||||
|
||||
@@ -106,6 +106,19 @@ object StringUtil {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Detects if the given byte array starts with UTF-8 BOM (Byte Order Mark).
|
||||
* UTF-8 BOM is the byte sequence: 0xEF 0xBB 0xBF
|
||||
*/
|
||||
def hasUtf8Bom(content: Array[Byte]): Boolean =
|
||||
content.length >= 3 &&
|
||||
(content(0) & 0xff) == 0xef &&
|
||||
(content(1) & 0xff) == 0xbb &&
|
||||
(content(2) & 0xff) == 0xbf
|
||||
|
||||
/** UTF-8 BOM byte sequence */
|
||||
val Utf8Bom: Array[Byte] = Array(0xef.toByte, 0xbb.toByte, 0xbf.toByte)
|
||||
|
||||
/**
|
||||
* Converts line separator in the given content.
|
||||
*
|
||||
|
||||
@@ -64,6 +64,7 @@
|
||||
<input type="submit" id="commitButton" class="btn btn-success" value="Commit changes" disabled="true"/>
|
||||
<input type="hidden" id="charset" name="charset" value="@content.charset"/>
|
||||
<input type="hidden" id="lineSeparator" name="lineSeparator" value="@content.lineSeparator"/>
|
||||
<input type="hidden" id="hasBom" name="hasBom" value="@content.hasBom"/>
|
||||
<input type="hidden" id="content" name="content" value=""/>
|
||||
<input type="hidden" id="initial" value="@content.content"/>
|
||||
<input type="hidden" id="commit" name="commit" value="@commit"/>
|
||||
|
||||
@@ -150,4 +150,30 @@ class StringUtilSpec extends AnyFunSpec {
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
describe("hasUtf8Bom") {
|
||||
it("should return true for byte array starting with UTF-8 BOM") {
|
||||
val withBom = Array[Byte](0xef.toByte, 0xbb.toByte, 0xbf.toByte, 'H'.toByte, 'i'.toByte)
|
||||
assert(StringUtil.hasUtf8Bom(withBom) == true)
|
||||
}
|
||||
it("should return false for byte array without BOM") {
|
||||
val withoutBom = Array[Byte]('H'.toByte, 'e'.toByte, 'l'.toByte, 'l'.toByte, 'o'.toByte)
|
||||
assert(StringUtil.hasUtf8Bom(withoutBom) == false)
|
||||
}
|
||||
it("should return false for empty byte array") {
|
||||
assert(StringUtil.hasUtf8Bom(Array.emptyByteArray) == false)
|
||||
}
|
||||
it("should return false for byte array with less than 3 bytes") {
|
||||
assert(StringUtil.hasUtf8Bom(Array[Byte](0xef.toByte, 0xbb.toByte)) == false)
|
||||
}
|
||||
}
|
||||
|
||||
describe("Utf8Bom") {
|
||||
it("should be the correct BOM byte sequence") {
|
||||
assert(StringUtil.Utf8Bom.length == 3)
|
||||
assert((StringUtil.Utf8Bom(0) & 0xff) == 0xef)
|
||||
assert((StringUtil.Utf8Bom(1) & 0xff) == 0xbb)
|
||||
assert((StringUtil.Utf8Bom(2) & 0xff) == 0xbf)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user