代码之家  ›  专栏  ›  技术社区  ›  Amy

有人能发现我的Damerau Levenshtein远程实现中的错误吗?

c#
  •  2
  • Amy  · 技术社区  · 14 年前

    我试过摆弄数组边界,但那只会导致 IndexOutOfRangeExceptions

    public static class FuzzyStringMatcher
    {
        private const int DELETION = 0;
        private const int INSERTION = 1;
        private const int SUBSTITUTION = 2;
        private const int TRANSPOSITION = 3;
    
        private const int COST_OF_DELETION = 1;
        private const int COST_OF_INSERTION = 1;
        private const int COST_OF_TRANSPOSITION = 1;
        private const int COST_OF_SUBSTITUTION = 1;
    
        public static int Compute_DamerauLevenshtein_Distance(string a, string b)
        {
            int[,] rows = new int[a.Length + 1, b.Length + 1];
            int cost_ratio;
            int[] calculations = new int[4];
    
            //
            // Init the array
            //
            for (int i = 0; i < rows.GetUpperBound(0); i++)
                rows[i, 0] = i;
    
            for (int i = 0; i < rows.GetUpperBound(1); i++)
                rows[0, i] = i;
    
    
            for (int aidx = 1; aidx < rows.GetUpperBound(0); aidx++)
            {
                for (int bidx = 1; bidx < rows.GetUpperBound(1); bidx++)
                {
                    if (a[aidx - 1] == b[bidx - 1])
                        cost_ratio = 0;
                    else
                        cost_ratio = 1;
    
                    calculations[DELETION] = rows[aidx - 1, bidx] + COST_OF_DELETION;
                    calculations[INSERTION] = rows[aidx, bidx - 1] + COST_OF_INSERTION;
                    calculations[SUBSTITUTION] = rows[aidx - 1, bidx - 1] + cost_ratio * COST_OF_SUBSTITUTION;
                    calculations[TRANSPOSITION] = int.MaxValue;
    
                    if (aidx > 1 && bidx > 1 && a[aidx] == b[bidx - 1] && a[aidx - 1] == b[bidx])
                        calculations[TRANSPOSITION] = rows[aidx - 2, bidx - 2] + cost_ratio * COST_OF_TRANSPOSITION;
    
                    rows[aidx, bidx] = calculations.Min();
                }
            }
    
            int score = rows[rows.GetUpperBound(0) - 1, rows.GetUpperBound(1) - 1];
            if (a.Contains(b) || b.Contains(a))
                score = score / 2;
            return score;
        }
    }
    

    我的实现基于上的Wikipedia页面中给出的算法 Damerau-Levenshtein-Distance

    2 回复  |  直到 14 年前
        1
  •  2
  •   Peter Mortensen kismert    9 年前

    维基百科文章中没有:

    if (a.Contains(b) || b.Contains(a))
            score = score / 2;
    

    因为你的例子是这样的——整数除法是1/2==0,那么可能就是这样。

        2
  •  3
  •   max    14 年前

    +1给卢·佛朗哥。但除此之外,似乎还有很多索引问题(请注意,所有4个 for

        public static int Compute_DamerauLevenshtein_Distance2(string a, string b)
        {
            int[,] rows = new int[a.Length + 1, b.Length + 1];
            int cost_ratio;
            int[] calculations = new int[4];
    
            for(int i = 0; i <= rows.GetUpperBound(0); i++)
                rows[i, 0] = i;
    
            for(int i = 1; i <= rows.GetUpperBound(1); i++)
                rows[0, i] = i;
    
    
            for(int aidx = 1; aidx <= rows.GetUpperBound(0); aidx++)
            {
                for(int bidx = 1; bidx <= rows.GetUpperBound(1); bidx++)
                {
                    if(a[aidx - 1] == b[bidx - 1])
                        cost_ratio = 0;
                    else
                        cost_ratio = 1;
    
                    calculations[DELETION] = rows[aidx - 1, bidx] + COST_OF_DELETION;
                    calculations[INSERTION] = rows[aidx, bidx - 1] + COST_OF_INSERTION;
                    calculations[SUBSTITUTION] = rows[aidx - 1, bidx - 1] + cost_ratio * COST_OF_SUBSTITUTION;
                    calculations[TRANSPOSITION] = int.MaxValue;
    
                    if(aidx > 1 && bidx > 1 && a[aidx - 1] == b[bidx - 2] && a[aidx - 2] == b[bidx - 1])
                        calculations[TRANSPOSITION] = rows[aidx - 2, bidx - 2] + cost_ratio * COST_OF_TRANSPOSITION;
    
                    rows[aidx, bidx] = calculations.Min();
                }
            }
    
            int score = rows[rows.GetUpperBound(0), rows.GetUpperBound(1)];
            return score;
        }