C# Detect Latin1 encoding

Date: 2024-01-31
public class EncodingHelper
{
    private static Encoding Latin1 = Encoding.GetEncoding("ISO-8859-1");

    static List<string> wrongCharacters = GetWrongCharacters();

    //public static void Test()
    //{
    //    Console.OutputEncoding = Encoding.UTF8;
    //    Console.WriteLine(ToUTF8("Acción"));
    //    Console.WriteLine(ToUTF8("Acción"));
    //}

    private static List<string> GetWrongCharacters()
    {
        var replacements = new List<string>();
        for (int i = 128; i < 256; i++)
        {
            var b = GetConvertedByte((byte)i);
            replacements.Add(b);
        }
        return replacements;
    }

    public static string GetConvertedByte(byte b)
    {
        return Latin1.GetString(Encoding.UTF8.GetBytes(Latin1.GetString(new byte[] { b })));
    }

    public static bool IsLikelyConvertedLatin1(string text)
    {
        foreach (var r in wrongCharacters)
        {
            if (text.Contains(r))
                return true;
        }
        return false;
    }

    public static string ToUTF8(string text)
    {
        if (IsLikelyConvertedLatin1(text))
            return Latin1ToUtf8(text);
        return text;
    }

    public static string Latin1ToUtf8(string s) => Encoding.UTF8.GetString(Latin1.GetBytes(s));
}
82350cookie-checkC# Detect Latin1 encoding