Add encoding detection

This commit is contained in:
odz
2013-08-24 00:54:40 +09:00
parent 6d76e93ede
commit 13578dcee8
4 changed files with 20 additions and 7 deletions

View File

@@ -2,6 +2,7 @@ package util
import org.eclipse.jgit.api.Git
import util.Directory._
import util.StringUtil._
import scala.collection.JavaConverters._
import org.eclipse.jgit.lib._
import org.eclipse.jgit.revwalk._
@@ -414,7 +415,7 @@ object JGitUtil {
DiffInfo(ChangeType.ADD, null, walk.getPathString, None, None)
} else {
DiffInfo(ChangeType.ADD, null, walk.getPathString, None,
JGitUtil.getContent(git, walk.getObjectId(0), false).filter(FileUtil.isText).map(new String(_, "UTF-8")))
JGitUtil.getContent(git, walk.getObjectId(0), false).filter(FileUtil.isText).map(convertFromByteArray))
}))
}
walk.release
@@ -436,8 +437,8 @@ object JGitUtil {
DiffInfo(diff.getChangeType, diff.getOldPath, diff.getNewPath, None, None)
} else {
DiffInfo(diff.getChangeType, diff.getOldPath, diff.getNewPath,
JGitUtil.getContent(git, diff.getOldId.toObjectId, false).filter(FileUtil.isText).map(new String(_, "UTF-8")),
JGitUtil.getContent(git, diff.getNewId.toObjectId, false).filter(FileUtil.isText).map(new String(_, "UTF-8")))
JGitUtil.getContent(git, diff.getOldId.toObjectId, false).filter(FileUtil.isText).map(convertFromByteArray),
JGitUtil.getContent(git, diff.getNewId.toObjectId, false).filter(FileUtil.isText).map(convertFromByteArray))
}
}.toList
}

View File

@@ -1,6 +1,7 @@
package util
import java.net.{URLDecoder, URLEncoder}
import org.mozilla.universalchardet.UniversalDetector
object StringUtil {
@@ -25,4 +26,15 @@ object StringUtil {
def escapeHtml(value: String): String =
value.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;").replace("\"", "&quot;")
def convertFromByteArray(content: Array[Byte]): String = new String(content, detectEncoding(content))
def detectEncoding(content: Array[Byte]): String = {
val detector = new UniversalDetector(null)
detector.handleData(content, 0, content.length)
detector.dataEnd()
detector.getDetectedCharset match {
case null => "UTF-8"
case e => e
}
}
}