diff --git a/source/ChanSort.Loader.Samsung/Zip/DbSerializer.cs b/source/ChanSort.Loader.Samsung/Zip/DbSerializer.cs
index 796bdd3..cf95acf 100644
--- a/source/ChanSort.Loader.Samsung/Zip/DbSerializer.cs
+++ b/source/ChanSort.Loader.Samsung/Zip/DbSerializer.cs
@@ -10,13 +10,45 @@ using ChanSort.Api;
namespace ChanSort.Loader.Samsung.Zip
{
///
- /// Loader for Samsung J/K/M/N/R/Q series .zip files (2015 - 2020)
+ /// Loader for Samsung .zip files starting with model J in 2015 (and still valid as of 2022 for current models)
+ ///
+ /// The .zip file contains various SQLite database files without file extensions.
+ ///
+ /// In theory SQLite is neutral to bit-ness (32/64) and endian-ness (MSB/LSB first) and should handle strings without issues.
+ /// SQLite also has a dynamic type system, allowing individual row values to have a different type than the column's default.
+ /// All observed Samsung databases are set to encoding "UTF-16le".
+ ///
+ /// Samsung somehow manages to store strings in columns/cells with data type TEXT in reversed UTF16 byte-order (as BE instead LE).
+ /// Reading such a TEXT column returns an object of type "string" looking Chinese due to the swapped high/low-order bytes
+ /// One solution is to explicitly cast the column to BLOB in the query and manually decode it as UTF16BE. (Always works)
+ /// Another approach is to encode the string to a byte[] and decode it again as UTF16BE. (This doesn't work for format 1242)
+ ///
+ /// While it's easy to ready strings by casting them to BLOBs, there is a severe catch writing strings to the database.
+ /// Saving a byte[] as BLOB changes the value's data type in the database to BLOB and the TV receives byte[] instead of string - booom!
+ /// Saving a byte[] as TEXT leads to automatic conversion in the Sqlite library, decoding it as UTF16LE and writing it in LE byte-order - booom!
+ /// The hack is to pass a "Chinese"-ified string to the DB, manually swapping byte order through chained LE-encode + BE-decode
+ /// That does NOT work for format 1242 though.
+ ///
+ /// Up until Microsoft.Data.Sqlite version 5.0.8 with SQLitePCLRaw 2.0.4 a workaround was to pass the SQL parameter as BLOB
+ /// with the expected byte order and cast the value to TEXT in the SQL update statement.
+ /// With Microsoft.Data.Sqlite version 7.0.0 and SQLitePCLRaw 2.1.2 this no longer work and the TEXT value ends up in the
+ /// database column as a readable UTF16-LE string instead of the expected reversed UTF16-BE.
+ ///
+ /// Format "1242"
+ ///
+ /// To make things even more complicated, there is file format version _1242, which stores channel names not as UTF16, but
+ /// instead as a raw byte sequence that encodes 16 UTF16BE bits in 3 byte UTF8 sequences, which also looks "Chinese".
+ /// In this format it is not possible to query the string as TEXT and then later re-encode/decode in code, because the
+ /// DB library already corrupts the raw data in the returned string with invalid-utf16-characters at the end (0xFD, 0xFF).
+ /// This format can only be read properly by casting the TEXT column to BLOB in the query.
+ /// There is NO WAY with Microsoft.Data.Sqlite 7.0.0 to store an arbitrary byte sequence and keep its data type TEXT.
+ /// Therefore changing channel names is disabled for this format and no updates are made to string values.
///
internal class DbSerializer : SerializerBase
{
- private readonly Dictionary channelById = new Dictionary();
- private readonly Dictionary dbPathByChannelList = new Dictionary();
- private readonly List tableNames = new List();
+ private readonly Dictionary channelById = new();
+ private readonly Dictionary dbPathByChannelList = new();
+ private readonly List tableNames = new();
private Encoding encoding;
private enum FileType { Unknown, SatDb, ChannelDbDvb, ChannelDbAnalog, ChannelDbIp }
@@ -373,18 +405,25 @@ namespace ChanSort.Loader.Samsung.Zip
{
if (r.IsDBNull(fieldIndex))
return null;
- byte[] nameBytes = new byte[200];
-
- // Microsoft.Data.SqlDataReader (and the underlying native DLLs) are bugged and throw a memory access violation when using r.GetBytes(...)
- // nameLen = (int)r.GetBytes(fieldIndex, 0, nameBytes, 0, nameBytes.Length);
-
- int nameLen = 0;
+ byte[] nameBytes = new byte[1000];
+
+ // Microsoft.Data.SqlDataReader (and the underlying native DLLs) are throwing a memory access violation when using r.GetBytes(...)
+ //int nameLen = (int)r.GetBytes(fieldIndex, 0, nameBytes, 0, nameBytes.Length);
+
+ int nameLen = 0;
var obj = r.GetValue(fieldIndex);
- if (obj is byte[] buffer)
+ if (obj is byte[] buffer) // DB returned a BLOB in correct byte order
{
nameBytes = buffer;
nameLen = buffer.Length;
}
+ else if (obj is string str)
+ {
+ // SQLite library decoded the stored utf16be as utf16le, making everything look Chinese due to reversed byte order
+ // a 1242 format file with utf16be-inside-utf8-envelope encoding can also be decoded this way, but depending on the string length, the last 1-3 characters may be garbled
+ nameBytes = Encoding.Unicode.GetBytes(str);
+ nameLen = nameBytes.Length;
+ }
this.encoding ??= AutoDetectUtf16Encoding(nameBytes, nameLen);
if (this.encoding == null)
@@ -397,8 +436,7 @@ namespace ChanSort.Loader.Samsung.Zip
#region AutoDetectUtf16Endian()
private Encoding AutoDetectUtf16Encoding(byte[] nameBytes, int nameLen)
{
- if (this.DefaultEncoding is UnicodeEncoding)
- return this.DefaultEncoding;
+ //return Encoding.BigEndianUnicode;
int evenBytesZero = 0;
int oddBytesZero = 0;
@@ -418,12 +456,14 @@ namespace ChanSort.Loader.Samsung.Zip
if (evenBytesZero + oddBytesZero == nameLen)
return null;
+ // in case of the 1242 format with 16 bits UTF16BE encoded inside 3-byte UTF8 sequences, every raw data byte has a value > 128
if (bytesAbove128 + 1 >= nameLen)
{
- //this.Features.ChannelNameEdit = ChannelNameEditMode.None; // unclear if the encoder produces byte sequences that the TV can decode again
+ this.Features.ChannelNameEdit = ChannelNameEditMode.None; // impossible to write the arbitrary byte sequence needed and at the same time maintain data type TEXT
return new Utf16InsideUtf8EnvelopeEncoding();
}
+ // so far only UTF16BE has been seen across all sample files
return evenBytesZero >= oddBytesZero ? Encoding.BigEndianUnicode : Encoding.Unicode;
}
@@ -465,6 +505,17 @@ namespace ChanSort.Loader.Samsung.Zip
}
#endregion
+ #region EncodingInfo
+ ///
+ /// The actually used encoding to decypher utf-8, utf16-le, utf16-be and utf16-inside-utf8-envelope
+ ///
+ internal string EncodingInfo =>
+ this.encoding == Encoding.BigEndianUnicode ? "uc16be" :
+ this.encoding == Encoding.Unicode ? "uc16le" :
+ this.encoding is Utf16InsideUtf8EnvelopeEncoding ? "16in8" :
+ this.encoding.GetType().Name;
+ #endregion
+
#region Save()
public override void Save()
@@ -508,7 +559,7 @@ namespace ChanSort.Loader.Samsung.Zip
{
var canUpdateNames = this.Features.ChannelNameEdit != ChannelNameEditMode.None;
var cmd = conn.CreateCommand();
- var updateSrvName = canUpdateNames ? ", srvName=cast(@srvname as varchar)" : "";
+ var updateSrvName = canUpdateNames ? ", srvName=@srvname" : "";
cmd.CommandText = "update SRV set major=@nr, lockMode=@lock, hideGuide=@hidden, hidden=@hidden, numSel=@numsel" + updateSrvName + " where srvId=@id";
cmd.Parameters.Add("@id", SqliteType.Integer);
cmd.Parameters.Add("@nr", SqliteType.Integer);
@@ -516,7 +567,8 @@ namespace ChanSort.Loader.Samsung.Zip
cmd.Parameters.Add("@hidden", SqliteType.Integer);
cmd.Parameters.Add("@numsel", SqliteType.Integer);
if (canUpdateNames)
- cmd.Parameters.Add("@srvname", SqliteType.Blob);
+ cmd.Parameters.Add("@srvname", SqliteType.Text);
+
cmd.Prepare();
return cmd;
}
@@ -597,7 +649,7 @@ namespace ChanSort.Loader.Samsung.Zip
cmdUpdateSrv.Parameters["@hidden"].Value = channel.Hidden;
cmdUpdateSrv.Parameters["@numsel"].Value = !channel.Skip;
if (canUpdateNames)
- cmdUpdateSrv.Parameters["@srvname"].Value = channel.Name == null ? (object)DBNull.Value : encoding.GetBytes(channel.Name);
+ cmdUpdateSrv.Parameters["@srvname"].Value = channel.Name == null ? (object)DBNull.Value : encoding.GetString(Encoding.Unicode.GetBytes(channel.Name)); // convert string => UTF16LE => string with flipped byte order (looking "Chinese")
cmdUpdateSrv.ExecuteNonQuery();
// update favorites
diff --git a/source/Test.Loader.Samsung/Zip/SamsungZipTest.cs b/source/Test.Loader.Samsung/Zip/SamsungZipTest.cs
index 8aec66e..e3210c6 100644
--- a/source/Test.Loader.Samsung/Zip/SamsungZipTest.cs
+++ b/source/Test.Loader.Samsung/Zip/SamsungZipTest.cs
@@ -206,6 +206,7 @@ namespace Test.Loader.Samsung.Zip
"\t" + (freesatChannelList == null ? 0 : freesatChannelList.Count) +
"\t" + (tivusatChannelList == null ? 0 : tivusatChannelList.Count) +
"\t" + (iptvChannelList == null ? 0 : iptvChannelList.Count) +
+ "\t" + serializer.EncodingInfo +
"\t" + relPath;
Assert.IsFalse(serializer.DataRoot.IsEmpty, "No channels loaded from " + file);
diff --git a/source/changelog.md b/source/changelog.md
index 145399d..a6b4907 100644
--- a/source/changelog.md
+++ b/source/changelog.md
@@ -1,6 +1,11 @@
ChanSort Change Log
===================
+2022-11-30
+- fixed Samsung .zip lists changing channel names to "Chinese" characters in the saved file
+ (caused by a breaking change in the new version of the Microsoft.Data.Sqlite library)
+- changing channel names in Samsung \*\_1242.zip format is disabled due to new Sqlite library
+
2022-11-29
- fixed saving of modified cmdb_\*.bin channel lists
- removed "Save as" function (which was disabled for most channel list formats)