mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-11-03 20:36:07 +01:00 
			
		
		
		
	Preserve BOM in web editor (#28935)
The `ToUTF8*` functions were stripping BOM, while BOM is actually valid in UTF8, so the stripping must be optional depending on use case. This does: - Add a options struct to all `ToUTF8*` functions, that by default will strip BOM to preserve existing behaviour - Remove `ToUTF8` function, it was dead code - Rename `ToUTF8WithErr` to `ToUTF8` - Preserve BOM in Monaco Editor - Remove a unnecessary newline in the textarea value. Browsers did ignore it, it seems but it's better not to rely on this behaviour. Fixes: https://github.com/go-gitea/gitea/issues/28743 Related: https://github.com/go-gitea/gitea/issues/6716 which seems to have once introduced a mechanism that strips and re-adds the BOM, but from what I can tell, this mechanism was removed at some point after that PR.
This commit is contained in:
		@@ -22,17 +22,21 @@ import (
 | 
				
			|||||||
// UTF8BOM is the utf-8 byte-order marker
 | 
					// UTF8BOM is the utf-8 byte-order marker
 | 
				
			||||||
var UTF8BOM = []byte{'\xef', '\xbb', '\xbf'}
 | 
					var UTF8BOM = []byte{'\xef', '\xbb', '\xbf'}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					type ConvertOpts struct {
 | 
				
			||||||
 | 
						KeepBOM bool
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// ToUTF8WithFallbackReader detects the encoding of content and converts to UTF-8 reader if possible
 | 
					// ToUTF8WithFallbackReader detects the encoding of content and converts to UTF-8 reader if possible
 | 
				
			||||||
func ToUTF8WithFallbackReader(rd io.Reader) io.Reader {
 | 
					func ToUTF8WithFallbackReader(rd io.Reader, opts ConvertOpts) io.Reader {
 | 
				
			||||||
	buf := make([]byte, 2048)
 | 
						buf := make([]byte, 2048)
 | 
				
			||||||
	n, err := util.ReadAtMost(rd, buf)
 | 
						n, err := util.ReadAtMost(rd, buf)
 | 
				
			||||||
	if err != nil {
 | 
						if err != nil {
 | 
				
			||||||
		return io.MultiReader(bytes.NewReader(RemoveBOMIfPresent(buf[:n])), rd)
 | 
							return io.MultiReader(bytes.NewReader(MaybeRemoveBOM(buf[:n], opts)), rd)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	charsetLabel, err := DetectEncoding(buf[:n])
 | 
						charsetLabel, err := DetectEncoding(buf[:n])
 | 
				
			||||||
	if err != nil || charsetLabel == "UTF-8" {
 | 
						if err != nil || charsetLabel == "UTF-8" {
 | 
				
			||||||
		return io.MultiReader(bytes.NewReader(RemoveBOMIfPresent(buf[:n])), rd)
 | 
							return io.MultiReader(bytes.NewReader(MaybeRemoveBOM(buf[:n], opts)), rd)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	encoding, _ := charset.Lookup(charsetLabel)
 | 
						encoding, _ := charset.Lookup(charsetLabel)
 | 
				
			||||||
@@ -42,20 +46,20 @@ func ToUTF8WithFallbackReader(rd io.Reader) io.Reader {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
	return transform.NewReader(
 | 
						return transform.NewReader(
 | 
				
			||||||
		io.MultiReader(
 | 
							io.MultiReader(
 | 
				
			||||||
			bytes.NewReader(RemoveBOMIfPresent(buf[:n])),
 | 
								bytes.NewReader(MaybeRemoveBOM(buf[:n], opts)),
 | 
				
			||||||
			rd,
 | 
								rd,
 | 
				
			||||||
		),
 | 
							),
 | 
				
			||||||
		encoding.NewDecoder(),
 | 
							encoding.NewDecoder(),
 | 
				
			||||||
	)
 | 
						)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// ToUTF8WithErr converts content to UTF8 encoding
 | 
					// ToUTF8 converts content to UTF8 encoding
 | 
				
			||||||
func ToUTF8WithErr(content []byte) (string, error) {
 | 
					func ToUTF8(content []byte, opts ConvertOpts) (string, error) {
 | 
				
			||||||
	charsetLabel, err := DetectEncoding(content)
 | 
						charsetLabel, err := DetectEncoding(content)
 | 
				
			||||||
	if err != nil {
 | 
						if err != nil {
 | 
				
			||||||
		return "", err
 | 
							return "", err
 | 
				
			||||||
	} else if charsetLabel == "UTF-8" {
 | 
						} else if charsetLabel == "UTF-8" {
 | 
				
			||||||
		return string(RemoveBOMIfPresent(content)), nil
 | 
							return string(MaybeRemoveBOM(content, opts)), nil
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	encoding, _ := charset.Lookup(charsetLabel)
 | 
						encoding, _ := charset.Lookup(charsetLabel)
 | 
				
			||||||
@@ -70,28 +74,22 @@ func ToUTF8WithErr(content []byte) (string, error) {
 | 
				
			|||||||
		result = append(result, content[n:]...)
 | 
							result = append(result, content[n:]...)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	result = RemoveBOMIfPresent(result)
 | 
						result = MaybeRemoveBOM(result, opts)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return string(result), err
 | 
						return string(result), err
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// ToUTF8WithFallback detects the encoding of content and converts to UTF-8 if possible
 | 
					// ToUTF8WithFallback detects the encoding of content and converts to UTF-8 if possible
 | 
				
			||||||
func ToUTF8WithFallback(content []byte) []byte {
 | 
					func ToUTF8WithFallback(content []byte, opts ConvertOpts) []byte {
 | 
				
			||||||
	bs, _ := io.ReadAll(ToUTF8WithFallbackReader(bytes.NewReader(content)))
 | 
						bs, _ := io.ReadAll(ToUTF8WithFallbackReader(bytes.NewReader(content), opts))
 | 
				
			||||||
	return bs
 | 
						return bs
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// ToUTF8 converts content to UTF8 encoding and ignore error
 | 
					 | 
				
			||||||
func ToUTF8(content string) string {
 | 
					 | 
				
			||||||
	res, _ := ToUTF8WithErr([]byte(content))
 | 
					 | 
				
			||||||
	return res
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// ToUTF8DropErrors makes sure the return string is valid utf-8; attempts conversion if possible
 | 
					// ToUTF8DropErrors makes sure the return string is valid utf-8; attempts conversion if possible
 | 
				
			||||||
func ToUTF8DropErrors(content []byte) []byte {
 | 
					func ToUTF8DropErrors(content []byte, opts ConvertOpts) []byte {
 | 
				
			||||||
	charsetLabel, err := DetectEncoding(content)
 | 
						charsetLabel, err := DetectEncoding(content)
 | 
				
			||||||
	if err != nil || charsetLabel == "UTF-8" {
 | 
						if err != nil || charsetLabel == "UTF-8" {
 | 
				
			||||||
		return RemoveBOMIfPresent(content)
 | 
							return MaybeRemoveBOM(content, opts)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	encoding, _ := charset.Lookup(charsetLabel)
 | 
						encoding, _ := charset.Lookup(charsetLabel)
 | 
				
			||||||
@@ -117,11 +115,14 @@ func ToUTF8DropErrors(content []byte) []byte {
 | 
				
			|||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return RemoveBOMIfPresent(decoded)
 | 
						return MaybeRemoveBOM(decoded, opts)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// RemoveBOMIfPresent removes a UTF-8 BOM from a []byte
 | 
					// MaybeRemoveBOM removes a UTF-8 BOM from a []byte when opts.KeepBOM is false
 | 
				
			||||||
func RemoveBOMIfPresent(content []byte) []byte {
 | 
					func MaybeRemoveBOM(content []byte, opts ConvertOpts) []byte {
 | 
				
			||||||
 | 
						if opts.KeepBOM {
 | 
				
			||||||
 | 
							return content
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
	if len(content) > 2 && bytes.Equal(content[0:3], UTF8BOM) {
 | 
						if len(content) > 2 && bytes.Equal(content[0:3], UTF8BOM) {
 | 
				
			||||||
		return content[3:]
 | 
							return content[3:]
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -30,15 +30,15 @@ func resetDefaultCharsetsOrder() {
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func TestRemoveBOMIfPresent(t *testing.T) {
 | 
					func TestMaybeRemoveBOM(t *testing.T) {
 | 
				
			||||||
	res := RemoveBOMIfPresent([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})
 | 
						res := MaybeRemoveBOM([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, ConvertOpts{})
 | 
				
			||||||
	assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res)
 | 
						assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	res = RemoveBOMIfPresent([]byte{0xef, 0xbb, 0xbf, 0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})
 | 
						res = MaybeRemoveBOM([]byte{0xef, 0xbb, 0xbf, 0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, ConvertOpts{})
 | 
				
			||||||
	assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res)
 | 
						assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func TestToUTF8WithErr(t *testing.T) {
 | 
					func TestToUTF8(t *testing.T) {
 | 
				
			||||||
	resetDefaultCharsetsOrder()
 | 
						resetDefaultCharsetsOrder()
 | 
				
			||||||
	var res string
 | 
						var res string
 | 
				
			||||||
	var err error
 | 
						var err error
 | 
				
			||||||
@@ -47,53 +47,53 @@ func TestToUTF8WithErr(t *testing.T) {
 | 
				
			|||||||
	// locale, so some conversions might behave differently. For that reason, we don't
 | 
						// locale, so some conversions might behave differently. For that reason, we don't
 | 
				
			||||||
	// depend on particular conversions but in expected behaviors.
 | 
						// depend on particular conversions but in expected behaviors.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	res, err = ToUTF8WithErr([]byte{0x41, 0x42, 0x43})
 | 
						res, err = ToUTF8([]byte{0x41, 0x42, 0x43}, ConvertOpts{})
 | 
				
			||||||
	assert.NoError(t, err)
 | 
						assert.NoError(t, err)
 | 
				
			||||||
	assert.Equal(t, "ABC", res)
 | 
						assert.Equal(t, "ABC", res)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// "áéíóú"
 | 
						// "áéíóú"
 | 
				
			||||||
	res, err = ToUTF8WithErr([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})
 | 
						res, err = ToUTF8([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, ConvertOpts{})
 | 
				
			||||||
	assert.NoError(t, err)
 | 
						assert.NoError(t, err)
 | 
				
			||||||
	assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, []byte(res))
 | 
						assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, []byte(res))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// "áéíóú"
 | 
						// "áéíóú"
 | 
				
			||||||
	res, err = ToUTF8WithErr([]byte{
 | 
						res, err = ToUTF8([]byte{
 | 
				
			||||||
		0xef, 0xbb, 0xbf, 0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3,
 | 
							0xef, 0xbb, 0xbf, 0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3,
 | 
				
			||||||
		0xc3, 0xba,
 | 
							0xc3, 0xba,
 | 
				
			||||||
	})
 | 
						}, ConvertOpts{})
 | 
				
			||||||
	assert.NoError(t, err)
 | 
						assert.NoError(t, err)
 | 
				
			||||||
	assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, []byte(res))
 | 
						assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, []byte(res))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	res, err = ToUTF8WithErr([]byte{
 | 
						res, err = ToUTF8([]byte{
 | 
				
			||||||
		0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63,
 | 
							0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63,
 | 
				
			||||||
		0xF3, 0x6D, 0x6F, 0x20, 0xF1, 0x6F, 0x73, 0x41, 0x41, 0x41, 0x2e,
 | 
							0xF3, 0x6D, 0x6F, 0x20, 0xF1, 0x6F, 0x73, 0x41, 0x41, 0x41, 0x2e,
 | 
				
			||||||
	})
 | 
						}, ConvertOpts{})
 | 
				
			||||||
	assert.NoError(t, err)
 | 
						assert.NoError(t, err)
 | 
				
			||||||
	stringMustStartWith(t, "Hola,", res)
 | 
						stringMustStartWith(t, "Hola,", res)
 | 
				
			||||||
	stringMustEndWith(t, "AAA.", res)
 | 
						stringMustEndWith(t, "AAA.", res)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	res, err = ToUTF8WithErr([]byte{
 | 
						res, err = ToUTF8([]byte{
 | 
				
			||||||
		0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63,
 | 
							0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63,
 | 
				
			||||||
		0xF3, 0x6D, 0x6F, 0x20, 0x07, 0xA4, 0x6F, 0x73, 0x41, 0x41, 0x41, 0x2e,
 | 
							0xF3, 0x6D, 0x6F, 0x20, 0x07, 0xA4, 0x6F, 0x73, 0x41, 0x41, 0x41, 0x2e,
 | 
				
			||||||
	})
 | 
						}, ConvertOpts{})
 | 
				
			||||||
	assert.NoError(t, err)
 | 
						assert.NoError(t, err)
 | 
				
			||||||
	stringMustStartWith(t, "Hola,", res)
 | 
						stringMustStartWith(t, "Hola,", res)
 | 
				
			||||||
	stringMustEndWith(t, "AAA.", res)
 | 
						stringMustEndWith(t, "AAA.", res)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	res, err = ToUTF8WithErr([]byte{
 | 
						res, err = ToUTF8([]byte{
 | 
				
			||||||
		0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63,
 | 
							0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63,
 | 
				
			||||||
		0xF3, 0x6D, 0x6F, 0x20, 0x81, 0xA4, 0x6F, 0x73, 0x41, 0x41, 0x41, 0x2e,
 | 
							0xF3, 0x6D, 0x6F, 0x20, 0x81, 0xA4, 0x6F, 0x73, 0x41, 0x41, 0x41, 0x2e,
 | 
				
			||||||
	})
 | 
						}, ConvertOpts{})
 | 
				
			||||||
	assert.NoError(t, err)
 | 
						assert.NoError(t, err)
 | 
				
			||||||
	stringMustStartWith(t, "Hola,", res)
 | 
						stringMustStartWith(t, "Hola,", res)
 | 
				
			||||||
	stringMustEndWith(t, "AAA.", res)
 | 
						stringMustEndWith(t, "AAA.", res)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// Japanese (Shift-JIS)
 | 
						// Japanese (Shift-JIS)
 | 
				
			||||||
	// 日属秘ぞしちゅ。
 | 
						// 日属秘ぞしちゅ。
 | 
				
			||||||
	res, err = ToUTF8WithErr([]byte{
 | 
						res, err = ToUTF8([]byte{
 | 
				
			||||||
		0x93, 0xFA, 0x91, 0xAE, 0x94, 0xE9, 0x82, 0xBC, 0x82, 0xB5, 0x82,
 | 
							0x93, 0xFA, 0x91, 0xAE, 0x94, 0xE9, 0x82, 0xBC, 0x82, 0xB5, 0x82,
 | 
				
			||||||
		0xBF, 0x82, 0xE3, 0x81, 0x42,
 | 
							0xBF, 0x82, 0xE3, 0x81, 0x42,
 | 
				
			||||||
	})
 | 
						}, ConvertOpts{})
 | 
				
			||||||
	assert.NoError(t, err)
 | 
						assert.NoError(t, err)
 | 
				
			||||||
	assert.Equal(t, []byte{
 | 
						assert.Equal(t, []byte{
 | 
				
			||||||
		0xE6, 0x97, 0xA5, 0xE5, 0xB1, 0x9E, 0xE7, 0xA7, 0x98, 0xE3,
 | 
							0xE6, 0x97, 0xA5, 0xE5, 0xB1, 0x9E, 0xE7, 0xA7, 0x98, 0xE3,
 | 
				
			||||||
@@ -101,7 +101,7 @@ func TestToUTF8WithErr(t *testing.T) {
 | 
				
			|||||||
	},
 | 
						},
 | 
				
			||||||
		[]byte(res))
 | 
							[]byte(res))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	res, err = ToUTF8WithErr([]byte{0x00, 0x00, 0x00, 0x00})
 | 
						res, err = ToUTF8([]byte{0x00, 0x00, 0x00, 0x00}, ConvertOpts{})
 | 
				
			||||||
	assert.NoError(t, err)
 | 
						assert.NoError(t, err)
 | 
				
			||||||
	assert.Equal(t, []byte{0x00, 0x00, 0x00, 0x00}, []byte(res))
 | 
						assert.Equal(t, []byte{0x00, 0x00, 0x00, 0x00}, []byte(res))
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -109,22 +109,22 @@ func TestToUTF8WithErr(t *testing.T) {
 | 
				
			|||||||
func TestToUTF8WithFallback(t *testing.T) {
 | 
					func TestToUTF8WithFallback(t *testing.T) {
 | 
				
			||||||
	resetDefaultCharsetsOrder()
 | 
						resetDefaultCharsetsOrder()
 | 
				
			||||||
	// "ABC"
 | 
						// "ABC"
 | 
				
			||||||
	res := ToUTF8WithFallback([]byte{0x41, 0x42, 0x43})
 | 
						res := ToUTF8WithFallback([]byte{0x41, 0x42, 0x43}, ConvertOpts{})
 | 
				
			||||||
	assert.Equal(t, []byte{0x41, 0x42, 0x43}, res)
 | 
						assert.Equal(t, []byte{0x41, 0x42, 0x43}, res)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// "áéíóú"
 | 
						// "áéíóú"
 | 
				
			||||||
	res = ToUTF8WithFallback([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})
 | 
						res = ToUTF8WithFallback([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, ConvertOpts{})
 | 
				
			||||||
	assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res)
 | 
						assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// UTF8 BOM + "áéíóú"
 | 
						// UTF8 BOM + "áéíóú"
 | 
				
			||||||
	res = ToUTF8WithFallback([]byte{0xef, 0xbb, 0xbf, 0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})
 | 
						res = ToUTF8WithFallback([]byte{0xef, 0xbb, 0xbf, 0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, ConvertOpts{})
 | 
				
			||||||
	assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res)
 | 
						assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// "Hola, así cómo ños"
 | 
						// "Hola, así cómo ños"
 | 
				
			||||||
	res = ToUTF8WithFallback([]byte{
 | 
						res = ToUTF8WithFallback([]byte{
 | 
				
			||||||
		0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63,
 | 
							0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63,
 | 
				
			||||||
		0xF3, 0x6D, 0x6F, 0x20, 0xF1, 0x6F, 0x73,
 | 
							0xF3, 0x6D, 0x6F, 0x20, 0xF1, 0x6F, 0x73,
 | 
				
			||||||
	})
 | 
						}, ConvertOpts{})
 | 
				
			||||||
	assert.Equal(t, []byte{
 | 
						assert.Equal(t, []byte{
 | 
				
			||||||
		0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xC3, 0xAD, 0x20, 0x63,
 | 
							0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xC3, 0xAD, 0x20, 0x63,
 | 
				
			||||||
		0xC3, 0xB3, 0x6D, 0x6F, 0x20, 0xC3, 0xB1, 0x6F, 0x73,
 | 
							0xC3, 0xB3, 0x6D, 0x6F, 0x20, 0xC3, 0xB1, 0x6F, 0x73,
 | 
				
			||||||
@@ -133,126 +133,65 @@ func TestToUTF8WithFallback(t *testing.T) {
 | 
				
			|||||||
	// "Hola, así cómo "
 | 
						// "Hola, así cómo "
 | 
				
			||||||
	minmatch := []byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xC3, 0xAD, 0x20, 0x63, 0xC3, 0xB3, 0x6D, 0x6F, 0x20}
 | 
						minmatch := []byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xC3, 0xAD, 0x20, 0x63, 0xC3, 0xB3, 0x6D, 0x6F, 0x20}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	res = ToUTF8WithFallback([]byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63, 0xF3, 0x6D, 0x6F, 0x20, 0x07, 0xA4, 0x6F, 0x73})
 | 
						res = ToUTF8WithFallback([]byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63, 0xF3, 0x6D, 0x6F, 0x20, 0x07, 0xA4, 0x6F, 0x73}, ConvertOpts{})
 | 
				
			||||||
	// Do not fail for differences in invalid cases, as the library might change the conversion criteria for those
 | 
						// Do not fail for differences in invalid cases, as the library might change the conversion criteria for those
 | 
				
			||||||
	assert.Equal(t, minmatch, res[0:len(minmatch)])
 | 
						assert.Equal(t, minmatch, res[0:len(minmatch)])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	res = ToUTF8WithFallback([]byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63, 0xF3, 0x6D, 0x6F, 0x20, 0x81, 0xA4, 0x6F, 0x73})
 | 
						res = ToUTF8WithFallback([]byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63, 0xF3, 0x6D, 0x6F, 0x20, 0x81, 0xA4, 0x6F, 0x73}, ConvertOpts{})
 | 
				
			||||||
	// Do not fail for differences in invalid cases, as the library might change the conversion criteria for those
 | 
						// Do not fail for differences in invalid cases, as the library might change the conversion criteria for those
 | 
				
			||||||
	assert.Equal(t, minmatch, res[0:len(minmatch)])
 | 
						assert.Equal(t, minmatch, res[0:len(minmatch)])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// Japanese (Shift-JIS)
 | 
						// Japanese (Shift-JIS)
 | 
				
			||||||
	// "日属秘ぞしちゅ。"
 | 
						// "日属秘ぞしちゅ。"
 | 
				
			||||||
	res = ToUTF8WithFallback([]byte{0x93, 0xFA, 0x91, 0xAE, 0x94, 0xE9, 0x82, 0xBC, 0x82, 0xB5, 0x82, 0xBF, 0x82, 0xE3, 0x81, 0x42})
 | 
						res = ToUTF8WithFallback([]byte{0x93, 0xFA, 0x91, 0xAE, 0x94, 0xE9, 0x82, 0xBC, 0x82, 0xB5, 0x82, 0xBF, 0x82, 0xE3, 0x81, 0x42}, ConvertOpts{})
 | 
				
			||||||
	assert.Equal(t, []byte{
 | 
						assert.Equal(t, []byte{
 | 
				
			||||||
		0xE6, 0x97, 0xA5, 0xE5, 0xB1, 0x9E, 0xE7, 0xA7, 0x98, 0xE3,
 | 
							0xE6, 0x97, 0xA5, 0xE5, 0xB1, 0x9E, 0xE7, 0xA7, 0x98, 0xE3,
 | 
				
			||||||
		0x81, 0x9E, 0xE3, 0x81, 0x97, 0xE3, 0x81, 0xA1, 0xE3, 0x82, 0x85, 0xE3, 0x80, 0x82,
 | 
							0x81, 0x9E, 0xE3, 0x81, 0x97, 0xE3, 0x81, 0xA1, 0xE3, 0x82, 0x85, 0xE3, 0x80, 0x82,
 | 
				
			||||||
	}, res)
 | 
						}, res)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	res = ToUTF8WithFallback([]byte{0x00, 0x00, 0x00, 0x00})
 | 
						res = ToUTF8WithFallback([]byte{0x00, 0x00, 0x00, 0x00}, ConvertOpts{})
 | 
				
			||||||
	assert.Equal(t, []byte{0x00, 0x00, 0x00, 0x00}, res)
 | 
						assert.Equal(t, []byte{0x00, 0x00, 0x00, 0x00}, res)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func TestToUTF8(t *testing.T) {
 | 
					 | 
				
			||||||
	resetDefaultCharsetsOrder()
 | 
					 | 
				
			||||||
	// Note: golang compiler seems so behave differently depending on the current
 | 
					 | 
				
			||||||
	// locale, so some conversions might behave differently. For that reason, we don't
 | 
					 | 
				
			||||||
	// depend on particular conversions but in expected behaviors.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	res := ToUTF8(string([]byte{0x41, 0x42, 0x43}))
 | 
					 | 
				
			||||||
	assert.Equal(t, "ABC", res)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	// "áéíóú"
 | 
					 | 
				
			||||||
	res = ToUTF8(string([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}))
 | 
					 | 
				
			||||||
	assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, []byte(res))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	// BOM + "áéíóú"
 | 
					 | 
				
			||||||
	res = ToUTF8(string([]byte{
 | 
					 | 
				
			||||||
		0xef, 0xbb, 0xbf, 0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3,
 | 
					 | 
				
			||||||
		0xc3, 0xba,
 | 
					 | 
				
			||||||
	}))
 | 
					 | 
				
			||||||
	assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, []byte(res))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	// Latin1
 | 
					 | 
				
			||||||
	// Hola, así cómo ños
 | 
					 | 
				
			||||||
	res = ToUTF8(string([]byte{
 | 
					 | 
				
			||||||
		0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63,
 | 
					 | 
				
			||||||
		0xF3, 0x6D, 0x6F, 0x20, 0xF1, 0x6F, 0x73,
 | 
					 | 
				
			||||||
	}))
 | 
					 | 
				
			||||||
	assert.Equal(t, []byte{
 | 
					 | 
				
			||||||
		0x48, 0x6f, 0x6c, 0x61, 0x2c, 0x20, 0x61, 0x73, 0xc3, 0xad, 0x20, 0x63,
 | 
					 | 
				
			||||||
		0xc3, 0xb3, 0x6d, 0x6f, 0x20, 0xc3, 0xb1, 0x6f, 0x73,
 | 
					 | 
				
			||||||
	}, []byte(res))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	// Latin1
 | 
					 | 
				
			||||||
	// Hola, así cómo \x07ños
 | 
					 | 
				
			||||||
	res = ToUTF8(string([]byte{
 | 
					 | 
				
			||||||
		0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63,
 | 
					 | 
				
			||||||
		0xF3, 0x6D, 0x6F, 0x20, 0x07, 0xA4, 0x6F, 0x73,
 | 
					 | 
				
			||||||
	}))
 | 
					 | 
				
			||||||
	// Hola,
 | 
					 | 
				
			||||||
	bytesMustStartWith(t, []byte{0x48, 0x6F, 0x6C, 0x61, 0x2C}, []byte(res))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	// This test FAILS
 | 
					 | 
				
			||||||
	// res = ToUTF8("Hola, así cómo \x81ños")
 | 
					 | 
				
			||||||
	// Do not fail for differences in invalid cases, as the library might change the conversion criteria for those
 | 
					 | 
				
			||||||
	// assert.Regexp(t, "^Hola, así cómo", res)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	// Japanese (Shift-JIS)
 | 
					 | 
				
			||||||
	// 日属秘ぞしちゅ。
 | 
					 | 
				
			||||||
	res = ToUTF8(string([]byte{
 | 
					 | 
				
			||||||
		0x93, 0xFA, 0x91, 0xAE, 0x94, 0xE9, 0x82, 0xBC, 0x82, 0xB5, 0x82,
 | 
					 | 
				
			||||||
		0xBF, 0x82, 0xE3, 0x81, 0x42,
 | 
					 | 
				
			||||||
	}))
 | 
					 | 
				
			||||||
	assert.Equal(t, []byte{
 | 
					 | 
				
			||||||
		0xE6, 0x97, 0xA5, 0xE5, 0xB1, 0x9E, 0xE7, 0xA7, 0x98, 0xE3,
 | 
					 | 
				
			||||||
		0x81, 0x9E, 0xE3, 0x81, 0x97, 0xE3, 0x81, 0xA1, 0xE3, 0x82, 0x85, 0xE3, 0x80, 0x82,
 | 
					 | 
				
			||||||
	},
 | 
					 | 
				
			||||||
		[]byte(res))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	res = ToUTF8("\x00\x00\x00\x00")
 | 
					 | 
				
			||||||
	assert.Equal(t, []byte{0x00, 0x00, 0x00, 0x00}, []byte(res))
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
func TestToUTF8DropErrors(t *testing.T) {
 | 
					func TestToUTF8DropErrors(t *testing.T) {
 | 
				
			||||||
	resetDefaultCharsetsOrder()
 | 
						resetDefaultCharsetsOrder()
 | 
				
			||||||
	// "ABC"
 | 
						// "ABC"
 | 
				
			||||||
	res := ToUTF8DropErrors([]byte{0x41, 0x42, 0x43})
 | 
						res := ToUTF8DropErrors([]byte{0x41, 0x42, 0x43}, ConvertOpts{})
 | 
				
			||||||
	assert.Equal(t, []byte{0x41, 0x42, 0x43}, res)
 | 
						assert.Equal(t, []byte{0x41, 0x42, 0x43}, res)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// "áéíóú"
 | 
						// "áéíóú"
 | 
				
			||||||
	res = ToUTF8DropErrors([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})
 | 
						res = ToUTF8DropErrors([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, ConvertOpts{})
 | 
				
			||||||
	assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res)
 | 
						assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// UTF8 BOM + "áéíóú"
 | 
						// UTF8 BOM + "áéíóú"
 | 
				
			||||||
	res = ToUTF8DropErrors([]byte{0xef, 0xbb, 0xbf, 0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})
 | 
						res = ToUTF8DropErrors([]byte{0xef, 0xbb, 0xbf, 0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, ConvertOpts{})
 | 
				
			||||||
	assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res)
 | 
						assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// "Hola, así cómo ños"
 | 
						// "Hola, así cómo ños"
 | 
				
			||||||
	res = ToUTF8DropErrors([]byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63, 0xF3, 0x6D, 0x6F, 0x20, 0xF1, 0x6F, 0x73})
 | 
						res = ToUTF8DropErrors([]byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63, 0xF3, 0x6D, 0x6F, 0x20, 0xF1, 0x6F, 0x73}, ConvertOpts{})
 | 
				
			||||||
	assert.Equal(t, []byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73}, res[:8])
 | 
						assert.Equal(t, []byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73}, res[:8])
 | 
				
			||||||
	assert.Equal(t, []byte{0x73}, res[len(res)-1:])
 | 
						assert.Equal(t, []byte{0x73}, res[len(res)-1:])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// "Hola, así cómo "
 | 
						// "Hola, así cómo "
 | 
				
			||||||
	minmatch := []byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xC3, 0xAD, 0x20, 0x63, 0xC3, 0xB3, 0x6D, 0x6F, 0x20}
 | 
						minmatch := []byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xC3, 0xAD, 0x20, 0x63, 0xC3, 0xB3, 0x6D, 0x6F, 0x20}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	res = ToUTF8DropErrors([]byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63, 0xF3, 0x6D, 0x6F, 0x20, 0x07, 0xA4, 0x6F, 0x73})
 | 
						res = ToUTF8DropErrors([]byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63, 0xF3, 0x6D, 0x6F, 0x20, 0x07, 0xA4, 0x6F, 0x73}, ConvertOpts{})
 | 
				
			||||||
	// Do not fail for differences in invalid cases, as the library might change the conversion criteria for those
 | 
						// Do not fail for differences in invalid cases, as the library might change the conversion criteria for those
 | 
				
			||||||
	assert.Equal(t, minmatch, res[0:len(minmatch)])
 | 
						assert.Equal(t, minmatch, res[0:len(minmatch)])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	res = ToUTF8DropErrors([]byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63, 0xF3, 0x6D, 0x6F, 0x20, 0x81, 0xA4, 0x6F, 0x73})
 | 
						res = ToUTF8DropErrors([]byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63, 0xF3, 0x6D, 0x6F, 0x20, 0x81, 0xA4, 0x6F, 0x73}, ConvertOpts{})
 | 
				
			||||||
	// Do not fail for differences in invalid cases, as the library might change the conversion criteria for those
 | 
						// Do not fail for differences in invalid cases, as the library might change the conversion criteria for those
 | 
				
			||||||
	assert.Equal(t, minmatch, res[0:len(minmatch)])
 | 
						assert.Equal(t, minmatch, res[0:len(minmatch)])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	// Japanese (Shift-JIS)
 | 
						// Japanese (Shift-JIS)
 | 
				
			||||||
	// "日属秘ぞしちゅ。"
 | 
						// "日属秘ぞしちゅ。"
 | 
				
			||||||
	res = ToUTF8DropErrors([]byte{0x93, 0xFA, 0x91, 0xAE, 0x94, 0xE9, 0x82, 0xBC, 0x82, 0xB5, 0x82, 0xBF, 0x82, 0xE3, 0x81, 0x42})
 | 
						res = ToUTF8DropErrors([]byte{0x93, 0xFA, 0x91, 0xAE, 0x94, 0xE9, 0x82, 0xBC, 0x82, 0xB5, 0x82, 0xBF, 0x82, 0xE3, 0x81, 0x42}, ConvertOpts{})
 | 
				
			||||||
	assert.Equal(t, []byte{
 | 
						assert.Equal(t, []byte{
 | 
				
			||||||
		0xE6, 0x97, 0xA5, 0xE5, 0xB1, 0x9E, 0xE7, 0xA7, 0x98, 0xE3,
 | 
							0xE6, 0x97, 0xA5, 0xE5, 0xB1, 0x9E, 0xE7, 0xA7, 0x98, 0xE3,
 | 
				
			||||||
		0x81, 0x9E, 0xE3, 0x81, 0x97, 0xE3, 0x81, 0xA1, 0xE3, 0x82, 0x85, 0xE3, 0x80, 0x82,
 | 
							0x81, 0x9E, 0xE3, 0x81, 0x97, 0xE3, 0x81, 0xA1, 0xE3, 0x82, 0x85, 0xE3, 0x80, 0x82,
 | 
				
			||||||
	}, res)
 | 
						}, res)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	res = ToUTF8DropErrors([]byte{0x00, 0x00, 0x00, 0x00})
 | 
						res = ToUTF8DropErrors([]byte{0x00, 0x00, 0x00, 0x00}, ConvertOpts{})
 | 
				
			||||||
	assert.Equal(t, []byte{0x00, 0x00, 0x00, 0x00}, res)
 | 
						assert.Equal(t, []byte{0x00, 0x00, 0x00, 0x00}, res)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -302,10 +241,6 @@ func stringMustEndWith(t *testing.T, expected, value string) {
 | 
				
			|||||||
	assert.Equal(t, expected, value[len(value)-len(expected):])
 | 
						assert.Equal(t, expected, value[len(value)-len(expected):])
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func bytesMustStartWith(t *testing.T, expected, value []byte) {
 | 
					 | 
				
			||||||
	assert.Equal(t, expected, value[:len(expected)])
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
func TestToUTF8WithFallbackReader(t *testing.T) {
 | 
					func TestToUTF8WithFallbackReader(t *testing.T) {
 | 
				
			||||||
	resetDefaultCharsetsOrder()
 | 
						resetDefaultCharsetsOrder()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -317,7 +252,7 @@ func TestToUTF8WithFallbackReader(t *testing.T) {
 | 
				
			|||||||
		}
 | 
							}
 | 
				
			||||||
		input = input[:testLen]
 | 
							input = input[:testLen]
 | 
				
			||||||
		input += "// Выключаем"
 | 
							input += "// Выключаем"
 | 
				
			||||||
		rd := ToUTF8WithFallbackReader(bytes.NewReader([]byte(input)))
 | 
							rd := ToUTF8WithFallbackReader(bytes.NewReader([]byte(input)), ConvertOpts{})
 | 
				
			||||||
		r, _ := io.ReadAll(rd)
 | 
							r, _ := io.ReadAll(rd)
 | 
				
			||||||
		assert.EqualValuesf(t, input, string(r), "testing string len=%d", testLen)
 | 
							assert.EqualValuesf(t, input, string(r), "testing string len=%d", testLen)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -174,7 +174,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro
 | 
				
			|||||||
	return batch.Index(id, &RepoIndexerData{
 | 
						return batch.Index(id, &RepoIndexerData{
 | 
				
			||||||
		RepoID:    repo.ID,
 | 
							RepoID:    repo.ID,
 | 
				
			||||||
		CommitID:  commitSha,
 | 
							CommitID:  commitSha,
 | 
				
			||||||
		Content:   string(charset.ToUTF8DropErrors(fileContents)),
 | 
							Content:   string(charset.ToUTF8DropErrors(fileContents, charset.ConvertOpts{})),
 | 
				
			||||||
		Language:  analyze.GetCodeLanguage(update.Filename, fileContents),
 | 
							Language:  analyze.GetCodeLanguage(update.Filename, fileContents),
 | 
				
			||||||
		UpdatedAt: time.Now().UTC(),
 | 
							UpdatedAt: time.Now().UTC(),
 | 
				
			||||||
	})
 | 
						})
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -135,7 +135,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro
 | 
				
			|||||||
			Id(id).
 | 
								Id(id).
 | 
				
			||||||
			Doc(map[string]any{
 | 
								Doc(map[string]any{
 | 
				
			||||||
				"repo_id":    repo.ID,
 | 
									"repo_id":    repo.ID,
 | 
				
			||||||
				"content":    string(charset.ToUTF8DropErrors(fileContents)),
 | 
									"content":    string(charset.ToUTF8DropErrors(fileContents, charset.ConvertOpts{})),
 | 
				
			||||||
				"commit_id":  sha,
 | 
									"commit_id":  sha,
 | 
				
			||||||
				"language":   analyze.GetCodeLanguage(update.Filename, fileContents),
 | 
									"language":   analyze.GetCodeLanguage(update.Filename, fileContents),
 | 
				
			||||||
				"updated_at": timeutil.TimeStampNow(),
 | 
									"updated_at": timeutil.TimeStampNow(),
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -384,7 +384,7 @@ func Diff(ctx *context.Context) {
 | 
				
			|||||||
			Metas:   ctx.Repo.Repository.ComposeMetas(ctx),
 | 
								Metas:   ctx.Repo.Repository.ComposeMetas(ctx),
 | 
				
			||||||
			GitRepo: ctx.Repo.GitRepo,
 | 
								GitRepo: ctx.Repo.GitRepo,
 | 
				
			||||||
			Ctx:     ctx,
 | 
								Ctx:     ctx,
 | 
				
			||||||
		}, template.HTMLEscapeString(string(charset.ToUTF8WithFallback(note.Message))))
 | 
							}, template.HTMLEscapeString(string(charset.ToUTF8WithFallback(note.Message, charset.ConvertOpts{}))))
 | 
				
			||||||
		if err != nil {
 | 
							if err != nil {
 | 
				
			||||||
			ctx.ServerError("RenderCommitMessage", err)
 | 
								ctx.ServerError("RenderCommitMessage", err)
 | 
				
			||||||
			return
 | 
								return
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -142,7 +142,7 @@ func setCsvCompareContext(ctx *context.Context) {
 | 
				
			|||||||
				return nil, nil, err
 | 
									return nil, nil, err
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			csvReader, err := csv_module.CreateReaderAndDetermineDelimiter(ctx, charset.ToUTF8WithFallbackReader(reader))
 | 
								csvReader, err := csv_module.CreateReaderAndDetermineDelimiter(ctx, charset.ToUTF8WithFallbackReader(reader, charset.ConvertOpts{}))
 | 
				
			||||||
			return csvReader, reader, err
 | 
								return csvReader, reader, err
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -166,8 +166,8 @@ func editFile(ctx *context.Context, isNewFile bool) {
 | 
				
			|||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		buf = append(buf, d...)
 | 
							buf = append(buf, d...)
 | 
				
			||||||
		if content, err := charset.ToUTF8WithErr(buf); err != nil {
 | 
							if content, err := charset.ToUTF8(buf, charset.ConvertOpts{KeepBOM: true}); err != nil {
 | 
				
			||||||
			log.Error("ToUTF8WithErr: %v", err)
 | 
								log.Error("ToUTF8: %v", err)
 | 
				
			||||||
			ctx.Data["FileContent"] = string(buf)
 | 
								ctx.Data["FileContent"] = string(buf)
 | 
				
			||||||
		} else {
 | 
							} else {
 | 
				
			||||||
			ctx.Data["FileContent"] = content
 | 
								ctx.Data["FileContent"] = content
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -43,7 +43,7 @@ func RenderFile(ctx *context.Context) {
 | 
				
			|||||||
	st := typesniffer.DetectContentType(buf)
 | 
						st := typesniffer.DetectContentType(buf)
 | 
				
			||||||
	isTextFile := st.IsText()
 | 
						isTextFile := st.IsText()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	rd := charset.ToUTF8WithFallbackReader(io.MultiReader(bytes.NewReader(buf), dataRc))
 | 
						rd := charset.ToUTF8WithFallbackReader(io.MultiReader(bytes.NewReader(buf), dataRc), charset.ConvertOpts{})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if markupType := markup.Type(blob.Name()); markupType == "" {
 | 
						if markupType := markup.Type(blob.Name()); markupType == "" {
 | 
				
			||||||
		if isTextFile {
 | 
							if isTextFile {
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -303,7 +303,7 @@ func LFSFileGet(ctx *context.Context) {
 | 
				
			|||||||
			break
 | 
								break
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		rd := charset.ToUTF8WithFallbackReader(io.MultiReader(bytes.NewReader(buf), dataRc))
 | 
							rd := charset.ToUTF8WithFallbackReader(io.MultiReader(bytes.NewReader(buf), dataRc), charset.ConvertOpts{})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		// Building code view blocks with line number on server side.
 | 
							// Building code view blocks with line number on server side.
 | 
				
			||||||
		escapedContent := &bytes.Buffer{}
 | 
							escapedContent := &bytes.Buffer{}
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -303,7 +303,7 @@ func renderReadmeFile(ctx *context.Context, subfolder string, readmeFile *git.Tr
 | 
				
			|||||||
		return
 | 
							return
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	rd := charset.ToUTF8WithFallbackReader(io.MultiReader(bytes.NewReader(buf), dataRc))
 | 
						rd := charset.ToUTF8WithFallbackReader(io.MultiReader(bytes.NewReader(buf), dataRc), charset.ConvertOpts{})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if markupType := markup.Type(readmeFile.Name()); markupType != "" {
 | 
						if markupType := markup.Type(readmeFile.Name()); markupType != "" {
 | 
				
			||||||
		ctx.Data["IsMarkup"] = true
 | 
							ctx.Data["IsMarkup"] = true
 | 
				
			||||||
@@ -492,7 +492,7 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry) {
 | 
				
			|||||||
			break
 | 
								break
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		rd := charset.ToUTF8WithFallbackReader(io.MultiReader(bytes.NewReader(buf), dataRc))
 | 
							rd := charset.ToUTF8WithFallbackReader(io.MultiReader(bytes.NewReader(buf), dataRc), charset.ConvertOpts{})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		shouldRenderSource := ctx.FormString("display") == "source"
 | 
							shouldRenderSource := ctx.FormString("display") == "source"
 | 
				
			||||||
		readmeExist := util.IsReadmeFileName(blob.Name())
 | 
							readmeExist := util.IsReadmeFileName(blob.Name())
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -38,8 +38,7 @@
 | 
				
			|||||||
						data-url="{{.Repository.Link}}/markup"
 | 
											data-url="{{.Repository.Link}}/markup"
 | 
				
			||||||
						data-context="{{.RepoLink}}"
 | 
											data-context="{{.RepoLink}}"
 | 
				
			||||||
						data-previewable-extensions="{{.PreviewableExtensions}}"
 | 
											data-previewable-extensions="{{.PreviewableExtensions}}"
 | 
				
			||||||
						data-line-wrap-extensions="{{.LineWrapExtensions}}">
 | 
											data-line-wrap-extensions="{{.LineWrapExtensions}}">{{.FileContent}}</textarea>
 | 
				
			||||||
{{.FileContent}}</textarea>
 | 
					 | 
				
			||||||
					<div class="editor-loading is-loading"></div>
 | 
										<div class="editor-loading is-loading"></div>
 | 
				
			||||||
				</div>
 | 
									</div>
 | 
				
			||||||
				<div class="ui bottom attached tab segment markup" data-tab="preview">
 | 
									<div class="ui bottom attached tab segment markup" data-tab="preview">
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -141,7 +141,7 @@ func readSQLFromFile(version string) (string, error) {
 | 
				
			|||||||
	if err != nil {
 | 
						if err != nil {
 | 
				
			||||||
		return "", err
 | 
							return "", err
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	return string(charset.RemoveBOMIfPresent(bytes)), nil
 | 
						return string(charset.MaybeRemoveBOM(bytes, charset.ConvertOpts{})), nil
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func restoreOldDB(t *testing.T, version string) bool {
 | 
					func restoreOldDB(t *testing.T, version string) bool {
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -114,7 +114,7 @@ export async function createMonaco(textarea, filename, editorOpts) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  const model = editor.getModel();
 | 
					  const model = editor.getModel();
 | 
				
			||||||
  model.onDidChangeContent(() => {
 | 
					  model.onDidChangeContent(() => {
 | 
				
			||||||
    textarea.value = editor.getValue();
 | 
					    textarea.value = editor.getValue({preserveBOM: true});
 | 
				
			||||||
    textarea.dispatchEvent(new Event('change')); // seems to be needed for jquery-are-you-sure
 | 
					    textarea.dispatchEvent(new Event('change')); // seems to be needed for jquery-are-you-sure
 | 
				
			||||||
  });
 | 
					  });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user