diff --git a/src/main/scala/gitbucket/core/controller/RepositoryViewerController.scala b/src/main/scala/gitbucket/core/controller/RepositoryViewerController.scala index bde1a2748..f95b64e9e 100644 --- a/src/main/scala/gitbucket/core/controller/RepositoryViewerController.scala +++ b/src/main/scala/gitbucket/core/controller/RepositoryViewerController.scala @@ -88,6 +88,7 @@ trait RepositoryViewerControllerBase extends ControllerBase { message: Option[String], charset: String, lineSeparator: String, + hasBom: Boolean, newFileName: String, oldFileName: Option[String], commit: String, @@ -134,6 +135,7 @@ trait RepositoryViewerControllerBase extends ControllerBase { "message" -> trim(label("Message", optional(text()))), "charset" -> trim(label("Charset", text(required))), "lineSeparator" -> trim(label("Line Separator", text(required))), + "hasBom" -> trim(label("Has BOM", boolean())), "newFileName" -> trim(label("Filename", text(required))), "oldFileName" -> trim(label("Old filename", optional(text()))), "commit" -> trim(label("Commit", text(required, conflict))), @@ -439,7 +441,8 @@ trait RepositoryViewerControllerBase extends ControllerBase { message = form.message.getOrElse(s"Create ${form.newFileName}"), commit = form.commit, loginAccount = loginAccount, - settings = context.settings + settings = context.settings, + hasBom = form.hasBom ).map(_._1) } @@ -496,7 +499,8 @@ trait RepositoryViewerControllerBase extends ControllerBase { }, commit = form.commit, loginAccount = loginAccount, - settings = context.settings + settings = context.settings, + hasBom = form.hasBom ).map(_._1) } diff --git a/src/main/scala/gitbucket/core/service/RepositoryCommitFileService.scala b/src/main/scala/gitbucket/core/service/RepositoryCommitFileService.scala index 7fe17150e..11e98ab58 100644 --- a/src/main/scala/gitbucket/core/service/RepositoryCommitFileService.scala +++ b/src/main/scala/gitbucket/core/service/RepositoryCommitFileService.scala @@ -8,7 +8,7 @@ import gitbucket.core.service.SystemSettingsService.SystemSettings import gitbucket.core.service.WebHookService.WebHookPushPayload import gitbucket.core.util.Directory.getRepositoryDir import gitbucket.core.util.JGitUtil.CommitInfo -import gitbucket.core.util.{JGitUtil, LockUtil} +import gitbucket.core.util.{JGitUtil, LockUtil, StringUtil} import org.eclipse.jgit.api.Git import org.eclipse.jgit.dircache.{DirCache, DirCacheBuilder} import org.eclipse.jgit.lib.* @@ -53,16 +53,22 @@ trait RepositoryCommitFileService { message: String, commit: String, loginAccount: Account, - settings: SystemSettings + settings: SystemSettings, + hasBom: Boolean = false )(implicit s: Session, c: JsonFormat.Context): Either[String, (ObjectId, Option[ObjectId])] = { + val contentBytes = if (content.nonEmpty) { + val bytes = content.getBytes(charset) + if (hasBom) StringUtil.Utf8Bom ++ bytes else bytes + } else { + Array.emptyByteArray + } commitFile( repository, branch, path, newFileName, oldFileName, - if (content.nonEmpty) { content.getBytes(charset) } - else { Array.emptyByteArray }, + contentBytes, message, commit, loginAccount, diff --git a/src/main/scala/gitbucket/core/util/JGitUtil.scala b/src/main/scala/gitbucket/core/util/JGitUtil.scala index 004010a1c..18476533f 100644 --- a/src/main/scala/gitbucket/core/util/JGitUtil.scala +++ b/src/main/scala/gitbucket/core/util/JGitUtil.scala @@ -216,8 +216,15 @@ object JGitUtil { * @param size total size of object in bytes * @param content the string content * @param charset the character encoding + * @param hasBom true if the content has UTF-8 BOM */ - case class ContentInfo(viewType: String, size: Option[Long], content: Option[String], charset: Option[String]) { + case class ContentInfo( + viewType: String, + size: Option[Long], + content: Option[String], + charset: Option[String], + hasBom: Boolean = false + ) { /** * the line separator of this content ("LF" or "CRLF") @@ -1215,7 +1222,8 @@ object JGitUtil { "text", size, Some(StringUtil.convertFromByteArray(bytes.get)), - Some(StringUtil.detectEncoding(bytes.get)) + Some(StringUtil.detectEncoding(bytes.get)), + StringUtil.hasUtf8Bom(bytes.get) ) } else { // binary diff --git a/src/main/scala/gitbucket/core/util/StringUtil.scala b/src/main/scala/gitbucket/core/util/StringUtil.scala index 3428e7a92..f6d5b6a25 100644 --- a/src/main/scala/gitbucket/core/util/StringUtil.scala +++ b/src/main/scala/gitbucket/core/util/StringUtil.scala @@ -106,6 +106,19 @@ object StringUtil { } } + /** + * Detects if the given byte array starts with UTF-8 BOM (Byte Order Mark). + * UTF-8 BOM is the byte sequence: 0xEF 0xBB 0xBF + */ + def hasUtf8Bom(content: Array[Byte]): Boolean = + content.length >= 3 && + (content(0) & 0xff) == 0xef && + (content(1) & 0xff) == 0xbb && + (content(2) & 0xff) == 0xbf + + /** UTF-8 BOM byte sequence */ + val Utf8Bom: Array[Byte] = Array(0xef.toByte, 0xbb.toByte, 0xbf.toByte) + /** * Converts line separator in the given content. * diff --git a/src/main/twirl/gitbucket/core/repo/editor.scala.html b/src/main/twirl/gitbucket/core/repo/editor.scala.html index fd3d73d17..b5868ef3d 100644 --- a/src/main/twirl/gitbucket/core/repo/editor.scala.html +++ b/src/main/twirl/gitbucket/core/repo/editor.scala.html @@ -64,6 +64,7 @@ + diff --git a/src/test/scala/gitbucket/core/util/StringUtilSpec.scala b/src/test/scala/gitbucket/core/util/StringUtilSpec.scala index 9629d1c59..969264119 100644 --- a/src/test/scala/gitbucket/core/util/StringUtilSpec.scala +++ b/src/test/scala/gitbucket/core/util/StringUtilSpec.scala @@ -150,4 +150,30 @@ class StringUtilSpec extends AnyFunSpec { ) } } + + describe("hasUtf8Bom") { + it("should return true for byte array starting with UTF-8 BOM") { + val withBom = Array[Byte](0xef.toByte, 0xbb.toByte, 0xbf.toByte, 'H'.toByte, 'i'.toByte) + assert(StringUtil.hasUtf8Bom(withBom) == true) + } + it("should return false for byte array without BOM") { + val withoutBom = Array[Byte]('H'.toByte, 'e'.toByte, 'l'.toByte, 'l'.toByte, 'o'.toByte) + assert(StringUtil.hasUtf8Bom(withoutBom) == false) + } + it("should return false for empty byte array") { + assert(StringUtil.hasUtf8Bom(Array.emptyByteArray) == false) + } + it("should return false for byte array with less than 3 bytes") { + assert(StringUtil.hasUtf8Bom(Array[Byte](0xef.toByte, 0xbb.toByte)) == false) + } + } + + describe("Utf8Bom") { + it("should be the correct BOM byte sequence") { + assert(StringUtil.Utf8Bom.length == 3) + assert((StringUtil.Utf8Bom(0) & 0xff) == 0xef) + assert((StringUtil.Utf8Bom(1) & 0xff) == 0xbb) + assert((StringUtil.Utf8Bom(2) & 0xff) == 0xbf) + } + } }