代码之家  ›  专栏  ›  技术社区  ›  Peter Radocchia

SQL比较集,第二部分:如何连接集的集合

  •  3
  • Peter Radocchia  · 技术社区  · 14 年前

    这个 question

    1. collection 套数,和
    2. probe 设置

    三个问题:

    1. 你怎么找到所有的套路 收集 探查
    2. 你怎么找到所有的套路 收集 与一系列
    3. 这是关系部门吗?如果不是,是什么?

    我有一个体面的解决问题1(见下文)。

    对于问题2,我没有一个像样的关系解决方案。有人要吗?

    IF OBJECT_ID('tempdb..#elements') IS NOT NULL DROP TABLE #elements
    IF OBJECT_ID('tempdb..#sets') IS NOT NULL DROP TABLE #sets
    
    CREATE TABLE #sets (set_no INT, PRIMARY KEY (set_no))
    CREATE TABLE #elements (set_no INT, elem CHAR(1), PRIMARY KEY (set_no, elem))
    
    INSERT #elements VALUES (1, 'A')
    INSERT #elements VALUES (1, 'B')
    INSERT #elements VALUES (1, 'C')
    INSERT #elements VALUES (1, 'D')
    INSERT #elements VALUES (1, 'E')
    INSERT #elements VALUES (1, 'F')
    INSERT #elements VALUES (2, 'A')
    INSERT #elements VALUES (2, 'B')
    INSERT #elements VALUES (2, 'C')
    INSERT #elements VALUES (3, 'D')
    INSERT #elements VALUES (3, 'E')
    INSERT #elements VALUES (3, 'F')
    INSERT #elements VALUES (4, 'B')
    INSERT #elements VALUES (4, 'C')
    INSERT #elements VALUES (4, 'F')
    INSERT #elements VALUES (5, 'F')
    
    INSERT #sets SELECT DISTINCT set_no FROM #elements
    

    问题1的设置和解决方案,设置查找:

    IF OBJECT_ID('tempdb..#probe') IS NOT NULL DROP TABLE #probe
    CREATE TABLE #probe (elem CHAR(1) PRIMARY KEY (elem))
    INSERT #probe VALUES ('B')
    INSERT #probe VALUES ('C')
    INSERT #probe VALUES ('F')
    
    -- I think this works.....upvotes for anyone who can demonstrate otherwise
    SELECT set_no FROM #sets s
    WHERE NOT EXISTS (
      SELECT * FROM #elements i WHERE i.set_no = s.set_no AND NOT EXISTS (
        SELECT * FROM #probe p WHERE p.elem = i.elem))
    AND NOT EXISTS (
      SELECT * FROM #probe p WHERE NOT EXISTS (
        SELECT * FROM #elements i WHERE i.set_no = s.set_no AND i.elem = p.elem))
    

    问题2的设置,无解决方案:

    IF OBJECT_ID('tempdb..#multi_probe') IS NOT NULL DROP TABLE #multi_probe
    CREATE TABLE #multi_probe (probe_no INT, elem CHAR(1) PRIMARY KEY (probe_no, elem))
    INSERT #multi_probe VALUES (1, 'B')
    INSERT #multi_probe VALUES (1, 'C')
    INSERT #multi_probe VALUES (1, 'F')
    INSERT #multi_probe VALUES (2, 'C')
    INSERT #multi_probe VALUES (2, 'F')
    INSERT #multi_probe VALUES (3, 'A')
    INSERT #multi_probe VALUES (3, 'B')
    INSERT #multi_probe VALUES (3, 'C')
    
    -- some magic here.....
    
    -- result set:
    -- probe_no | set_no
    ------------|--------
    -- 1        | 4
    -- 3        | 2
    
    3 回复  |  直到 7 年前
        1
  •  2
  •   CyberDude    14 年前

    好的,让我们一步一步地解决问题2:

    (1) 内部连接集和探测它们各自的元素。通过这种方式,我们将了解测试集和探测集的关系(哪些集与哪个探测有哪些共同的元素):

    SELECT
        e.set_no AS [test set],
        m.set_no AS [probe set],
        e.elem [common element]
    FROM
        @elements e
    JOIN
        @multi_probe m ON e.elem = m.elem
    

    结果:

    test set    probe set   common element
    ----------- ----------- --------------
    1           3           A
    1           1           B
    1           3           B
    1           1           C
    1           2           C
    1           3           C
    1           1           F
    1           2           F
    2           3           A
    2           1           B
    2           3           B
    2           1           C
    2           2           C
    2           3           C
    3           1           F
    3           2           F
    4           1           B
    4           3           B
    4           1           C
    4           2           C
    4           3           C
    4           1           F
    4           2           F
    5           1           F
    5           2           F
    

    SELECT
        e.set_no AS [test set],
        m.set_no AS [probe set],
        COUNT(*) AS [common element count]
    FROM
        @elements e
        JOIN
            @multi_probe m ON e.elem = m.elem
    GROUP BY
        e.set_no, m.set_no
    ORDER BY
        e.set_no, m.set_no
    

    结果:

     test set    probe set   common element count
    ----------- ----------- --------------------
    1           1           3
    1           2           2
    1           3           3
    2           1           2
    2           2           1
    2           3           3
    3           1           1
    3           2           1
    4           1           3
    4           2           2
    4           3           2
    5           1           1
    5           2           1
    

    (3) 将测试集和探测集的计数带到每一行(子查询可能不是最优雅的)

    SELECT
        e.set_no AS [test set],
        m.set_no AS [probe set],
        COUNT(*) AS [common element count],
        (SELECT COUNT(*) FROM @elements e1 WHERE e1.set_no = e.set_no) AS [test set count],
        (SELECT COUNT(*) FROM @multi_probe m1 WHERE m1.set_no = m.set_no) AS [probe set count]
    FROM
        @elements e
        JOIN @multi_probe m ON e.elem = m.elem
    GROUP BY
        e.set_no, m.set_no
    ORDER BY
        e.set_no, m.set_no
    

    test set    probe set   common element count test set count probe set count
    ----------- ----------- -------------------- -------------- ---------------
    1           1           3                    6              3
    1           2           2                    6              2
    1           3           3                    6              3
    2           1           2                    3              3
    2           2           1                    3              2
    2           3           3                    3              3
    3           1           1                    3              3
    3           2           1                    3              2
    4           1           3                    3              3
    4           2           2                    3              2
    4           3           2                    3              3
    5           1           1                    1              3
    5           2           1                    1              2
    

    (4) 找到解决方法:只保留那些元素数相同的测试集和探测集,这个数也是公共元素数,即测试集和探测集是相同的

    SELECT
        e.set_no AS [test set],
        m.set_no AS [probe set]
    FROM
        @elements e
    JOIN
        @multi_probe m ON e.elem = m.elem
    GROUP BY
        e.set_no, m.set_no
    HAVING
        COUNT(*) = (SELECT COUNT(*) FROM @elements e1 WHERE e1.set_no = e.set_no)
        AND (SELECT COUNT(*) FROM @elements e1 WHERE e1.set_no = e.set_no) = (SELECT COUNT(*) FROM @multi_probe m1 WHERE m1.set_no = m.set_no)
    ORDER BY
        e.set_no, m.set_no
    

    test set    probe set
    ----------- -----------
    2           3
    4           1
    

    请原谅 @ s而不是 #

        2
  •  2
  •   CyberDude    14 年前

    我可以用SQL Server语法为问题(1)提交一个更具“数学倾向”的解决方案吗

    SELECT
        s.set_no
    FROM
        #sets s
        JOIN @elements e ON s.set_no = e.set_no
        LEFT JOIN #probe p ON e.elem = p.elem
    GROUP BY
        s.set_no
    HAVING
        COUNT(DISTINCT p.elem) = COUNT(*)
        AND COUNT(*) = (SELECT COUNT(*) FROM #probe)
    
    • COUNT(*) LEFT JOIN )
    • COUNT(DISTINCT p.elem) NULL s将不被计算在内),即探针集中有多少个元素同时出现在测试集中

    翻译成数学术语 COUNT(DISTINCT p.elem) = COUNT(*) 表示测试集是探测集的子集( test ⊆ probe COUNT(*) = (SELECT COUNT(*) FROM #probe) |test| = |probe| test = probe .

        3
  •  0
  •   Peter Radocchia    14 年前

    首先,解决方案。EXCEPT语法可以优雅地处理多列和空值,因此这更接近于一般解决方案:

    SELECT 
      s.set_no AS test_set_no
    , p.set_no AS probe_set_no
    FROM #test_sets s CROSS JOIN #probe_sets p
    WHERE NOT EXISTS (
        SELECT elem FROM #test_elements  te WHERE te.set_no = s.set_no EXCEPT 
        SELECT elem FROM #probe_elements pe WHERE pe.set_no = p.set_no)
      AND NOT EXISTS (
        SELECT elem FROM #probe_elements pe WHERE pe.set_no = p.set_no EXCEPT
        SELECT elem FROM #test_elements  te WHERE te.set_no = s.set_no)
    ORDER BY 
      test_set_no
    , probe_set_no
    

    接下来,修改后的数据集:

    IF OBJECT_ID('tempdb..#test_elements') IS NOT NULL DROP TABLE #test_elements
    IF OBJECT_ID('tempdb..#test_sets') IS NOT NULL DROP TABLE #test_sets
    
    CREATE TABLE #test_sets (set_no INT, PRIMARY KEY (set_no))
    CREATE TABLE #test_elements (set_no INT, elem CHAR(1), PRIMARY KEY (set_no, elem))
    
    INSERT #test_elements VALUES (1, 'A')
    INSERT #test_elements VALUES (1, 'B')
    INSERT #test_elements VALUES (1, 'C')
    INSERT #test_elements VALUES (1, 'D')
    INSERT #test_elements VALUES (1, 'E')
    INSERT #test_elements VALUES (1, 'F')
    INSERT #test_elements VALUES (2, 'A')
    INSERT #test_elements VALUES (2, 'B')
    INSERT #test_elements VALUES (2, 'C')
    INSERT #test_elements VALUES (3, 'D')
    INSERT #test_elements VALUES (3, 'E')
    INSERT #test_elements VALUES (3, 'F')
    INSERT #test_elements VALUES (4, 'B')
    INSERT #test_elements VALUES (4, 'C')
    INSERT #test_elements VALUES (4, 'F')
    INSERT #test_elements VALUES (5, 'F')
    
    INSERT #test_sets SELECT DISTINCT set_no FROM #test_elements
    
    IF OBJECT_ID('tempdb..#probe_elements') IS NOT NULL DROP TABLE #probe_elements
    IF OBJECT_ID('tempdb..#probe_sets') IS NOT NULL DROP TABLE #probe_sets
    CREATE TABLE #probe_sets (set_no INT PRIMARY KEY (set_no))
    CREATE TABLE #probe_elements (set_no INT, elem CHAR(1) PRIMARY KEY (set_no, elem))
    
    INSERT #probe_elements VALUES (1, 'B')
    INSERT #probe_elements VALUES (1, 'C')
    INSERT #probe_elements VALUES (1, 'F')
    INSERT #probe_elements VALUES (2, 'C')
    INSERT #probe_elements VALUES (2, 'F')
    INSERT #probe_elements VALUES (3, 'A')
    INSERT #probe_elements VALUES (3, 'B')
    INSERT #probe_elements VALUES (3, 'C')
    
    INSERT #probe_sets SELECT DISTINCT set_no FROM #probe_elements
    

    相比之下,根据CyberDude,使用聚合:

    SELECT
      e.set_no AS [test set]
    , m.set_no AS [probe set]
    FROM #test_elements e
    JOIN #probe_elements m ON e.elem = m.elem
    GROUP BY 
      e.set_no
    , m.set_no
    HAVING (SELECT COUNT(*) FROM #test_elements  e1 WHERE e1.set_no = e.set_no) 
         = (SELECT COUNT(*) FROM #probe_elements m1 WHERE m1.set_no = m.set_no)
       AND (SELECT COUNT(*) FROM #test_elements  e1 WHERE e1.set_no = e.set_no)
         = COUNT(*) 
    ORDER BY
      e.set_no
    , m.set_no