C# Levenshtein Distance

Date: 2022-11-21

Source: https://gist.github.com/Davidblkx/e12ab0bb2aff7fd8072632b396538560

using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text;

namespace Plugin.Domain.Helpers
{
    public static class LevenshteinDistance
    {
        /// <summary>
        /// Calculate the difference between 2 strings using the Levenshtein distance algorithm
        /// </summary>
        public static int Calculate(string source1, string source2) //O(n*m)
        {
            var source1Length = source1.Length;
            var source2Length = source2.Length;

            var matrix = new int[source1Length + 1, source2Length + 1];

            // First calculation, if one entry is empty return full length
            if (source1Length == 0) return source2Length;
            if (source2Length == 0) return source1Length;

            // Initialization of matrix with row size source1Length and columns size source2Length
            for (var i = 0; i <= source1Length; matrix[i, 0] = i++) { }
            for (var j = 0; j <= source2Length; matrix[0, j] = j++) { }

            // Calculate rows and collumns distances
            for (var i = 1; i <= source1Length; i++)
            {
                for (var j = 1; j <= source2Length; j++)
                {
                    var cost = (source2[j - 1] == source1[i - 1]) ? 0 : 1;

                    matrix[i, j] = Math.Min(
                        Math.Min(matrix[i - 1, j] + 1, matrix[i, j - 1] + 1),
                        matrix[i - 1, j - 1] + cost);
                }
            }
            // return result
            return matrix[source1Length, source2Length];
        }

        public static string RemoveDiacritics(string text)
        {
            var normalizedString = text.Normalize(NormalizationForm.FormD);
            var stringBuilder = new StringBuilder();
            foreach (var c in normalizedString)
            {
                var unicodeCategory = CharUnicodeInfo.GetUnicodeCategory(c);
                if (unicodeCategory != UnicodeCategory.NonSpacingMark)
                {
                    stringBuilder.Append(c);
                }
            }
            return stringBuilder.ToString().Normalize(NormalizationForm.FormC);
        }

        public static Dictionary<string, string> CountryMapping = GetCountryMapping();

        public static Dictionary<string, string> GetCountryMapping()
        {
            var countryMapping = new Dictionary<string, string>();
            var regions = CultureInfo.GetCultures(CultureTypes.AllCultures)
                .Where(x => !x.Equals(CultureInfo.InvariantCulture)) //Remove the invariant culture as a region cannot be created from it.
                .Where(x => !x.IsNeutralCulture) //Remove nuetral cultures as a region cannot be created from them.
                .Select(x => new RegionInfo(x.LCID));
            foreach (var regio in regions)
            {
                countryMapping[RemoveDiacritics(regio.DisplayName)] = regio.TwoLetterISORegionName;
                countryMapping[RemoveDiacritics(regio.EnglishName)] = regio.TwoLetterISORegionName;
                countryMapping[RemoveDiacritics(regio.NativeName)] = regio.TwoLetterISORegionName;
            }
            return countryMapping;
        }

        public static string ParseCountryWithLevenshtein(string countryName)
        {
            var name = RemoveDiacritics(countryName);
            var result = CountryMapping.Select(x => {
                return new
                {
                    Score = Calculate(name, x.Key),
                    Value = x.Value
                };
            });
            return result.OrderBy(x => x.Score).Select(x => x.Value).FirstOrDefault(null);
        }
    }
}
70180cookie-checkC# Levenshtein Distance