mirror of
https://github.com/PredatH0r/ChanSort.git
synced 2026-01-15 11:52:04 +01:00
- moved all files to a "source" subdirectory to tidy up the GitHub project page
- started to write a readme.md
This commit is contained in:
237
source/ChanSort.Api/Utils/DvbStringDecoder.cs
Normal file
237
source/ChanSort.Api/Utils/DvbStringDecoder.cs
Normal file
@@ -0,0 +1,237 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text;
|
||||
|
||||
namespace ChanSort.Api
|
||||
{
|
||||
#region Documentation
|
||||
/*
|
||||
ETSI EN 300 468
|
||||
|
||||
For one-byte character tables, the codes in the range 0x80 to 0x9F are assigned to control functions
|
||||
as shown in Table A.1: Single byte control codes
|
||||
|
||||
Control code - Control code Description
|
||||
0x80 to 0x85 reserved for future use
|
||||
0x86 character emphasis on
|
||||
0x87 character emphasis off
|
||||
0x88 to 0x89 reserved for future use
|
||||
0x8A CR/LF
|
||||
0x8B to 0x9F user defined
|
||||
|
||||
A.2 Selection of character table
|
||||
First byte value - Character code table - Table description - Reproduced in figure
|
||||
0x01 ISO/IEC 8859-5 [27] Latin/Cyrillic alphabet A.2
|
||||
0x02 ISO/IEC 8859-6 [28] Latin/Arabic alphabet A.3
|
||||
0x03 ISO/IEC 8859-7 [29] Latin/Greek alphabet A.4
|
||||
0x04 ISO/IEC 8859-8 [30] Latin/Hebrew alphabet A.5
|
||||
0x05 ISO/IEC 8859-9 [31] Latin alphabet No. 5 A.6
|
||||
0x06 ISO/IEC 8859-10 [32] Latin alphabet No. 6 A.7
|
||||
0x07 ISO/IEC 8859-11 [33] Latin/Thai (draft only) A.8
|
||||
0x08 reserved for future use (see note)
|
||||
0x09 ISO/IEC 8859-13 [34] Latin alphabet No. 7 A.9
|
||||
0x0A ISO/IEC 8859-14 [35] Latin alphabet No. 8 (Celtic) A.10
|
||||
0x0B ISO/IEC 8859-15 [36] Latin alphabet No. 9 A.11
|
||||
0x0C to 0x0F reserved for future use
|
||||
0x10 ISO/IEC 8859 See table A.4
|
||||
0x11 ISO/IEC 10646 [16] Basic Multilingual Plane (BMP)
|
||||
0x12 KSX1001-2004 [44] Korean Character Set
|
||||
0x13 GB-2312-1980 Simplified Chinese Character
|
||||
0x14 Big5 subset of ISO/IEC 10646 [16] Traditional Chinese
|
||||
0x15 UTF-8 encoding of ISO/IEC 10646 [16] Basic Multilingual Plane (BMP)
|
||||
0x16 to 0x1E reserved for future use
|
||||
0x1F Described by encoding_type_id Described by 8 bit
|
||||
|
||||
Table A.4: Character Coding Tables for first byte 0x10
|
||||
First byte value - Second byte value - Third Byte Value - Selected character code - table - Table Description
|
||||
0x10 0x00 0x00 reserved for future use
|
||||
0x10 0x00 0x01 ISO/IEC 8859-1 [23] West European
|
||||
0x10 0x00 0x02 ISO/IEC 8859-2 [24] East European
|
||||
0x10 0x00 0x03 ISO/IEC 8859-3 [25] South European
|
||||
0x10 0x00 0x04 ISO/IEC 8859-4 [26] North and North-East European
|
||||
0x10 0x00 0x05 ISO/IEC 8859-5 [27] Latin/Cyrillic A.2
|
||||
0x10 0x00 0x06 ISO/IEC 8859-6 [28] Latin/Arabic A.3
|
||||
0x10 0x00 0x07 ISO/IEC 8859-7 [29] Latin/Greek A.4
|
||||
0x10 0x00 0x08 ISO/IEC 8859-8 [30] Latin/Hebrew A.5
|
||||
0x10 0x00 0x09 ISO/IEC 8859-9 [31] West European & Turkish A.6
|
||||
0x10 0x00 0x0A ISO/IEC 8859-10 [32] North European A.7
|
||||
0x10 0x00 0x0B ISO/IEC 8859-11 [33] Thai A.8
|
||||
0x10 0x00 0x0C Reserved for future use
|
||||
0x10 0x00 0x0D ISO/IEC 8859-13 [34] Baltic A.9
|
||||
0x10 0x00 0x0E ISO/IEC 8859-14 [35] Celtic A.10
|
||||
0x10 0x00 0x0F ISO/IEC 8859-15 [36] West European A.11
|
||||
*/
|
||||
#endregion
|
||||
|
||||
public class DvbStringDecoder
|
||||
{
|
||||
static readonly string[] codePages1 =
|
||||
{
|
||||
null, "iso-8859-5", "iso-8859-6", "iso-8859-7", "iso-8859-8", "iso-8859-9", "iso-8859-10", "iso-8859-11",
|
||||
null, "iso-8859-13", "iso-8859-14", "iso-8859-15", null, null, null, null,
|
||||
null, // codePages2 prefix
|
||||
"utf-16", "x-cp20949", "x-cp20936", "utf-16", "utf-8", null, null, null,
|
||||
"utf-8", null, null, null, "utf-8"
|
||||
};
|
||||
|
||||
static readonly string[] codePages2 =
|
||||
{
|
||||
null, "iso-8859-1", "iso-8859-2", "iso-8859-3", "iso-8859-4", "iso-8859-5", "iso-8859-6", "iso-8859-7",
|
||||
"iso-8859-8", "iso-8859-9", "iso-8859-10", "iso-8859-11", null, "iso-8859-13", "iso-8859-14", "iso-8859-15"
|
||||
};
|
||||
|
||||
private readonly Dictionary<string, Decoder> decoderCache = new Dictionary<string, Decoder>();
|
||||
|
||||
public DvbStringDecoder(Encoding defaultEncoding)
|
||||
{
|
||||
this.DefaultEncoding = defaultEncoding;
|
||||
}
|
||||
|
||||
public Encoding DefaultEncoding { get; set; }
|
||||
|
||||
#region GetChannelNames()
|
||||
public void GetChannelNames(byte[] name, int off, int len, out string longName, out string shortName)
|
||||
{
|
||||
longName = "";
|
||||
shortName = "";
|
||||
if (len == 0)
|
||||
return;
|
||||
byte b = name[off];
|
||||
if (b == 0)
|
||||
return;
|
||||
|
||||
Decoder decoder = this.DefaultEncoding.GetDecoder();
|
||||
bool singleByteChar = true;
|
||||
if (b < 0x20)
|
||||
{
|
||||
if (b == 0x10) // prefix for 2-byte code page
|
||||
{
|
||||
int cpIndex = name[off + 1] * 256 + name[off + 2];
|
||||
off += 2;
|
||||
len -= 2;
|
||||
SetDecoder(codePages2, cpIndex, ref decoder);
|
||||
}
|
||||
if (b <= 0x1F)
|
||||
SetDecoder(codePages1, b, ref decoder);
|
||||
singleByteChar = b < 0x10;
|
||||
++off;
|
||||
--len;
|
||||
}
|
||||
if (!singleByteChar)
|
||||
{
|
||||
char[] buffer = new char[100];
|
||||
int l= decoder.GetChars(name, off, len, buffer, 0, false);
|
||||
longName = new string(buffer, 0, l);
|
||||
return;
|
||||
}
|
||||
|
||||
StringBuilder sbLong = new StringBuilder();
|
||||
StringBuilder sbShort = new StringBuilder();
|
||||
bool inShortMode = false;
|
||||
for (int c = 0; c < len; c++)
|
||||
{
|
||||
int i = off + c;
|
||||
b = name[i];
|
||||
if (b == 0x00)
|
||||
break;
|
||||
|
||||
char ch = '\0';
|
||||
switch (b)
|
||||
{
|
||||
case 0x86: inShortMode = true; continue;
|
||||
case 0x87: inShortMode = false; continue;
|
||||
case 0x8a: ch = '\n'; break;
|
||||
default:
|
||||
if (b >= 0x80 && b <= 0x9f) // DVB-S control characters
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
if (ch == '\0')
|
||||
{
|
||||
// read as many bytes as necessary to get a character
|
||||
char[] charArray = new char[1];
|
||||
for (int byteCnt = 1; decoder.GetChars(name, i, byteCnt, charArray, 0) == 0; byteCnt++)
|
||||
++i;
|
||||
ch = charArray[0];
|
||||
}
|
||||
if (ch == '\0')
|
||||
continue;
|
||||
|
||||
sbLong.Append(ch);
|
||||
if (inShortMode)
|
||||
sbShort.Append(ch);
|
||||
}
|
||||
longName = sbLong.ToString();
|
||||
shortName = sbShort.ToString();
|
||||
}
|
||||
#endregion
|
||||
|
||||
#region SetDecoder()
|
||||
private void SetDecoder(string[] codePages, int cpIndex, ref Decoder defaultDecoder)
|
||||
{
|
||||
if (cpIndex >= codePages.Length)
|
||||
return;
|
||||
Decoder decoder;
|
||||
|
||||
string cp = codePages[cpIndex];
|
||||
if (cp == null)
|
||||
return;
|
||||
|
||||
if (this.decoderCache.TryGetValue(cp, out decoder))
|
||||
{
|
||||
defaultDecoder = decoder;
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var encoding = Encoding.GetEncoding(cp);
|
||||
defaultDecoder = encoding.GetDecoder();
|
||||
}
|
||||
catch (ArgumentException)
|
||||
{
|
||||
}
|
||||
decoderCache[cp] = defaultDecoder;
|
||||
}
|
||||
#endregion
|
||||
|
||||
#region GetCodepageBytes()
|
||||
public static byte[] GetCodepageBytes(Encoding encoding)
|
||||
{
|
||||
var encName = encoding.WebName;
|
||||
for (int i = 0; i < codePages1.Length; i++)
|
||||
{
|
||||
if (codePages1[i] == encName)
|
||||
return new [] {(byte)i};
|
||||
}
|
||||
|
||||
for (int i = 0; i < codePages2.Length; i++)
|
||||
{
|
||||
if (codePages2[i] == encName)
|
||||
return new[] { (byte)0x10, (byte)i };
|
||||
}
|
||||
|
||||
return new byte[0];
|
||||
}
|
||||
#endregion
|
||||
|
||||
#region GetEncoding()
|
||||
/// <summary>
|
||||
/// Pass in either a value <=0x1F excluding 0x10 or 0x10xxyy
|
||||
/// </summary>
|
||||
public static Encoding GetEncoding(int encodingMarker)
|
||||
{
|
||||
string enc = null;
|
||||
if (encodingMarker < 0x20)
|
||||
enc = codePages1[encodingMarker];
|
||||
else
|
||||
{
|
||||
encodingMarker &= 0xFFFF;
|
||||
if (encodingMarker < codePages2.Length)
|
||||
enc = codePages2[encodingMarker];
|
||||
}
|
||||
return enc == null ? null : Encoding.GetEncoding(enc);
|
||||
}
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user