一种选择是简单地编写自己的方法来确定两个字符串之间的相似性。
private static int GetSimilarity1(string first, string second)
{
if (first == null) return second == null ? 100 : 0;
// Set similarity to the percentage of characters in the same position
var matches = first.Count(chr => first.IndexOf(chr) == second.IndexOf(chr));
return (int)(matches / (decimal)first.Length * 100);
}
第二种方法是取第二个字符串中每个字符与第一个字符串中的索引的距离,然后除以它与索引的最大距离(最坏情况)。这将导致精确匹配的值较低(0),或者距离越远,值越高。然后,将该数字从1中减去(从“坏”百分比(精确匹配为0)转换为“好”百分比(精确匹配为1))并乘以100,将其转换为一个百分比。然后将该数字添加到运行总数中。
最后,将总数除以字符数,得到最终的“相似性”百分比。
private static int GetSimilarity2(string first, string second)
{
if (first == null) return second == null ? 100 : 0;
int distance = 0;
for(int i = 0; i < first.Length; i++)
{
var thisDist = Math.Abs(second.IndexOf(first[i]) - i);
var worstDist = Math.Max(first.Length - i - 1, i);
distance += (int)((1 - thisDist / (decimal) worstDist) * 100);
}
return distance / first.Length;
}
为了测试这些方法,我使用了以下代码:
private static void Main()
{
var rotorIII = "BDFHJLCPRTXVZNYEIWGAKMUSQO";
var randRotorA = "ZGFHJLCPRXTVDNYEIWBAMUKOQS";
var randRotorB = "VZBRGITYUPSDNHLXAWMJQOFECK";
var randRotorC = "NYEIWGAKMUSQOBDFHJLCPRTXVZ";
Console.WriteLine("Method 1: Rotor III -> Random Rotor A: {0}",
GetSimilarity1(rotorIII, randRotorA));
Console.WriteLine("Method 1: Rotor III -> Random Rotor B: {0}",
GetSimilarity1(rotorIII, randRotorB));
Console.WriteLine("Method 1: Rotor III -> Random Rotor C: {0}",
GetSimilarity1(rotorIII, randRotorC));
Console.WriteLine("-----------------------------------------");
Console.WriteLine("Method 2: Rotor III -> Random Rotor A: {0}",
GetSimilarity2(rotorIII, randRotorA));
Console.WriteLine("Method 2: Rotor III -> Random Rotor B: {0}",
GetSimilarity2(rotorIII, randRotorB));
Console.WriteLine("Method 2: Rotor III -> Random Rotor C: {0}",
GetSimilarity2(rotorIII, randRotorC));
// Wait for input before closing
Console.WriteLine("\nDone!\nPress any key to exit...");
Console.ReadKey();
}
输出