代码之家  ›  专栏  ›  技术社区  ›  Michael Buen

为什么最自然的查询(即使用内部连接(而不是左连接))非常慢

  •  7
  • Michael Buen  · 技术社区  · 14 年前

    此查询花费的时间太长。

    explain analyze
    
    select 
    
    c.company_rec_id, 
    c.the_company_code ,
    c.company
    
    from 
    tlist t
    
    -- it is questionable why this query become fast when using left join, the most natural query is inner join...
    join mlist m using(mlist_rec_id)
    join parcel_application ord_app using(parcel_application_rec_id)
    join parcel ord using(parcel_rec_id)
    join company c on c.company_rec_id = ord.client_rec_id
    -- ...questionable
    
    where 
    (
        'cadmium' = ''
    
        or
    
        exists
        (
        select * 
            from mlist_detail md
            where 
                md.mlist_rec_id = m.mlist_rec_id        
                and exists
                (
                    select *
                    from mlist_detail_parameter mdp            
                    join parameter p using(parameter_rec_id)
                    where mdp.mlist_detail_rec_id = md.mlist_detail_rec_id            
                        and to_tsvector(extract_words(p.parameter)) @@ plainto_tsquery(extract_words('cadmium'))       
                )
    
        )
    )
    group by c.company_rec_id, 
        c.the_company_code, c.company
    

    "HashAggregate  (cost=3791153.43..3791156.38 rows=295 width=71) (actual time=222284.340..222284.357 rows=84 loops=1)"
    "  ->  Nested Loop  (cost=0.00..3791139.81 rows=1816 width=71) (actual time=421.412..222282.253 rows=715 loops=1)"
    "        ->  Merge Join  (cost=0.00..3790598.03 rows=1816 width=37) (actual time=421.395..222273.876 rows=715 loops=1)"
    "              Merge Cond: ((ord_app.parcel_rec_id)::text = (ord.parcel_rec_id)::text)"
    "              ->  Nested Loop  (cost=0.00..3790229.12 rows=1816 width=37) (actual time=421.371..222264.556 rows=715 loops=1)"
    "                    ->  Nested Loop  (cost=0.00..3789062.60 rows=1816 width=74) (actual time=421.346..222250.614 rows=715 loops=1)"
    "                          ->  Index Scan using fki_parcel_application__parcel on parcel_application ord_app  (cost=0.00..508.50 rows=3218 width=74) (actual time=0.019..6.952 rows=3218 loops=1)"
    "                          ->  Index Scan using fki_mlist__parcel_application on mlist m  (cost=0.00..1177.29 rows=1 width=74) (actual time=66.900..69.060 rows=0 loops=3218)"
    "                                Index Cond: ((m.parcel_application_rec_id)::text = (ord_app.parcel_application_rec_id)::text)"
    "                                Filter: (alternatives: SubPlan 1 or hashed SubPlan 2)"
    "                                SubPlan 1"
    "                                  ->  Nested Loop Semi Join  (cost=3.49..5870.46 rows=5 width=0) (actual time=61.189..61.189 rows=0 loops=3631)"
    "                                        Join Filter: ((md.mlist_detail_rec_id)::text = (mdp.mlist_detail_rec_id)::text)"
    "                                        ->  Index Scan using fki_mlist_detail__mlist on mlist_detail md  (cost=0.00..24.31 rows=5 width=37) (actual time=0.014..0.023 rows=4 loops=3631)"
    "                                              Index Cond: ((mlist_rec_id)::text = ($0)::text)"
    "                                        ->  Hash Join  (cost=3.49..1163.57 rows=676 width=37) (actual time=0.004..16.045 rows=1865 loops=13473)"
    "                                              Hash Cond: ((mdp.parameter_rec_id)::text = (p.parameter_rec_id)::text)"
    "                                              ->  Seq Scan on mlist_detail_parameter mdp  (cost=0.00..1013.87 rows=37187 width=74) (actual time=0.001..5.935 rows=36353 loops=13473)"
    "                                              ->  Hash  (cost=3.48..3.48 rows=1 width=37) (actual time=1.646..1.646 rows=1 loops=1)"
    "                                                    ->  Seq Scan on parameter p  (cost=0.00..3.48 rows=1 width=37) (actual time=1.397..1.642 rows=1 loops=1)"
    "                                                          Filter: (to_tsvector(regexp_replace((parameter)::text, '[\\(\\)\\!\\.\\/,\\-\\?]+'::text, ' '::text, 'g'::text)) @@ plainto_tsquery('cadmium'::text))"
    "                                SubPlan 2"
    "                                  ->  Hash Join  (cost=1180.47..3241.32 rows=15198 width=37) (never executed)"
    "                                        Hash Cond: ((md.mlist_detail_rec_id)::text = (mdp.mlist_detail_rec_id)::text)"
    "                                        ->  Seq Scan on mlist_detail md  (cost=0.00..1987.79 rows=17679 width=74) (never executed)"
    "                                        ->  Hash  (cost=1172.02..1172.02 rows=676 width=37) (never executed)"
    "                                              ->  HashAggregate  (cost=1165.26..1172.02 rows=676 width=37) (never executed)"
    "                                                    ->  Hash Join  (cost=3.49..1163.57 rows=676 width=37) (never executed)"
    "                                                          Hash Cond: ((mdp.parameter_rec_id)::text = (p.parameter_rec_id)::text)"
    "                                                          ->  Seq Scan on mlist_detail_parameter mdp  (cost=0.00..1013.87 rows=37187 width=74) (never executed)"
    "                                                          ->  Hash  (cost=3.48..3.48 rows=1 width=37) (never executed)"
    "                                                                ->  Seq Scan on parameter p  (cost=0.00..3.48 rows=1 width=37) (never executed)"
    "                                                                      Filter: (to_tsvector(regexp_replace((parameter)::text, '[\\(\\)\\!\\.\\/,\\-\\?]+'::text, ' '::text, 'g'::text)) @@ plainto_tsquery('cadmium'::text))"
    "                    ->  Index Scan using fki_tlist__mlist on tlist t  (cost=0.00..0.63 rows=1 width=37) (actual time=0.015..0.015 rows=1 loops=715)"
    "                          Index Cond: ((t.mlist_rec_id)::text = (m.mlist_rec_id)::text)"
    "              ->  Index Scan using pk_parcel on parcel ord  (cost=0.00..338.49 rows=3087 width=74) (actual time=0.008..3.312 rows=3146 loops=1)"
    "        ->  Index Scan using pk_company on company c  (cost=0.00..0.29 rows=1 width=71) (actual time=0.009..0.009 rows=1 loops=715)"
    "              Index Cond: ((c.company_rec_id)::text = (ord.client_rec_id)::text)"
    "Total runtime: 222284.584 ms"
    

    当我把它转换成左连接。

    explain analyze
    
    select 
    
    c.company_rec_id, 
    c.the_company_code ,
    c.company
    
    from 
    tlist t
    
    -- it is questionable why this query become fast when using left join, the most natural query is inner join...
    left join mlist m using(mlist_rec_id)
    left join parcel_application ord_app using(parcel_application_rec_id)
    left join parcel ord using(parcel_rec_id)
    left join company c on c.company_rec_id = ord.client_rec_id
    -- ...questionable
    
    where 
    (
        'cadmium' = ''
    
        or
    
        exists
        (
            select * 
            from mlist_detail md
            where 
                md.mlist_rec_id = m.mlist_rec_id        
                and exists
                (
                    select *
                    from mlist_detail_parameter mdp            
                    join parameter p using(parameter_rec_id)
                    where mdp.mlist_detail_rec_id = md.mlist_detail_rec_id            
                        and to_tsvector(extract_words(p.parameter)) @@ plainto_tsquery(extract_words('cadmium'))            
                )
    
        )
    )
    group by c.company_rec_id, 
        c.the_company_code, c.company
    

    …变得很快:

    "HashAggregate  (cost=4276494.40..4276497.35 rows=295 width=71) (actual time=71.299..71.309 rows=84 loops=1)"
    "  ->  Nested Loop Left Join  (cost=0.00..4276480.78 rows=1816 width=71) (actual time=35.271..70.776 rows=715 loops=1)"
    "        ->  Nested Loop Left Join  (cost=0.00..4275939.00 rows=1816 width=37) (actual time=35.260..66.157 rows=715 loops=1)"
    "              ->  Nested Loop Left Join  (cost=0.00..4275263.92 rows=1816 width=37) (actual time=35.243..60.185 rows=715 loops=1)"
    "                    ->  Merge Left Join  (cost=0.00..4274523.56 rows=1816 width=37) (actual time=35.226..54.834 rows=715 loops=1)"
    "                          Merge Cond: ((t.mlist_rec_id)::text = (m.mlist_rec_id)::text)"
    "                          Filter: (alternatives: SubPlan 1 or hashed SubPlan 2)"
    "                          ->  Index Scan using fki_tlist__mlist on tlist t  (cost=0.00..710.72 rows=3631 width=37) (actual time=0.011..1.947 rows=3631 loops=1)"
    "                          ->  Index Scan using pk_mlist on mlist m  (cost=0.00..482.71 rows=3631 width=74) (actual time=0.008..1.829 rows=3631 loops=1)"
    "                          SubPlan 1"
    "                            ->  Nested Loop Semi Join  (cost=3.49..5870.46 rows=5 width=0) (never executed)"
    "                                  Join Filter: ((md.mlist_detail_rec_id)::text = (mdp.mlist_detail_rec_id)::text)"
    "                                  ->  Index Scan using fki_mlist_detail__mlist on mlist_detail md  (cost=0.00..24.31 rows=5 width=37) (never executed)"
    "                                        Index Cond: ((mlist_rec_id)::text = ($0)::text)"
    "                                  ->  Hash Join  (cost=3.49..1163.57 rows=676 width=37) (never executed)"
    "                                        Hash Cond: ((mdp.parameter_rec_id)::text = (p.parameter_rec_id)::text)"
    "                                        ->  Seq Scan on mlist_detail_parameter mdp  (cost=0.00..1013.87 rows=37187 width=74) (never executed)"
    "                                        ->  Hash  (cost=3.48..3.48 rows=1 width=37) (never executed)"
    "                                              ->  Seq Scan on parameter p  (cost=0.00..3.48 rows=1 width=37) (never executed)"
    "                                                    Filter: (to_tsvector(regexp_replace((parameter)::text, '[\\(\\)\\!\\.\\/,\\-\\?]+'::text, ' '::text, 'g'::text)) @@ plainto_tsquery('cadmium'::text))"
    "                          SubPlan 2"
    "                            ->  Hash Join  (cost=1180.47..3241.32 rows=15198 width=37) (actual time=16.478..34.359 rows=1918 loops=1)"
    "                                  Hash Cond: ((md.mlist_detail_rec_id)::text = (mdp.mlist_detail_rec_id)::text)"
    "                                  ->  Seq Scan on mlist_detail md  (cost=0.00..1987.79 rows=17679 width=74) (actual time=0.004..3.013 rows=17679 loops=1)"
    "                                  ->  Hash  (cost=1172.02..1172.02 rows=676 width=37) (actual time=16.461..16.461 rows=1918 loops=1)"
    "                                        ->  HashAggregate  (cost=1165.26..1172.02 rows=676 width=37) (actual time=15.790..16.037 rows=1918 loops=1)"
    "                                              ->  Hash Join  (cost=3.49..1163.57 rows=676 width=37) (actual time=1.613..15.037 rows=1918 loops=1)"
    "                                                    Hash Cond: ((mdp.parameter_rec_id)::text = (p.parameter_rec_id)::text)"
    "                                                    ->  Seq Scan on mlist_detail_parameter mdp  (cost=0.00..1013.87 rows=37187 width=74) (actual time=0.002..4.624 rows=37187 loops=1)"
    "                                                    ->  Hash  (cost=3.48..3.48 rows=1 width=37) (actual time=1.593..1.593 rows=1 loops=1)"
    "                                                          ->  Seq Scan on parameter p  (cost=0.00..3.48 rows=1 width=37) (actual time=1.311..1.588 rows=1 loops=1)"
    "                                                                Filter: (to_tsvector(regexp_replace((parameter)::text, '[\\(\\)\\!\\.\\/,\\-\\?]+'::text, ' '::text, 'g'::text)) @@ plainto_tsquery('cadmium'::text))"
    "                    ->  Index Scan using pk_parcel_application on parcel_application ord_app  (cost=0.00..0.40 rows=1 width=74) (actual time=0.007..0.007 rows=1 loops=715)"
    "                          Index Cond: ((m.parcel_application_rec_id)::text = (ord_app.parcel_application_rec_id)::text)"
    "              ->  Index Scan using pk_parcel on parcel ord  (cost=0.00..0.36 rows=1 width=74) (actual time=0.007..0.008 rows=1 loops=715)"
    "                    Index Cond: ((ord_app.parcel_rec_id)::text = (ord.parcel_rec_id)::text)"
    "        ->  Index Scan using pk_company on company c  (cost=0.00..0.29 rows=1 width=71) (actual time=0.006..0.006 rows=1 loops=715)"
    "              Index Cond: ((c.company_rec_id)::text = (ord.client_rec_id)::text)"
    "Total runtime: 71.504 ms"
    

    explain analyze
    select 
    
    c.company_rec_id, 
    c.the_company_code ,
    c.company
    
    from 
    tlist t
    
    join mlist m using(mlist_rec_id)
    join parcel_application ord_app using(parcel_application_rec_id)
    join parcel ord using(parcel_rec_id)
    join company c on c.company_rec_id = ord.client_rec_id
    
    where 
    (
        'cadmium' = ''
    
        or
    
        exists
        (
            select * 
            from mlist_detail md
            where 
                md.mlist_rec_id = m.mlist_rec_id        
                and exists
                (
                    select * from mlist_detail_parameter mdp
                    where mdp.mlist_detail_rec_id = md.mlist_detail_rec_id
                        -- Notice that there is no joining of parameter TO mlist_detail_parameter; We use EXISTS instead, which looks very superfluous
                        and exists
                        (
                            select * from parameter p
                            where parameter_rec_id = mdp.parameter_rec_id
                            and to_tsvector(extract_words(p.parameter)) @@ plainto_tsquery(extract_words('cadmium'))
                        )
    
                )
    
        )
    )
    
    group by c.company_rec_id, 
        c.the_company_code, c.company
    

    …,它的工作原理是:

    "HashAggregate  (cost=76424.17..76427.12 rows=295 width=71) (actual time=398.906..398.932 rows=84 loops=1)"
    "  ->  Nested Loop  (cost=0.00..76410.55 rows=1816 width=71) (actual time=52.529..398.171 rows=715 loops=1)"
    "        ->  Merge Join  (cost=0.00..75868.77 rows=1816 width=37) (actual time=45.106..385.128 rows=715 loops=1)"
    "              Merge Cond: ((ord_app.parcel_rec_id)::text = (ord.parcel_rec_id)::text)"
    "              ->  Nested Loop  (cost=0.00..75499.86 rows=1816 width=37) (actual time=41.745..374.833 rows=715 loops=1)"
    "                    ->  Nested Loop  (cost=0.00..74333.34 rows=1816 width=74) (actual time=31.483..302.607 rows=715 loops=1)"
    "                          ->  Index Scan using fki_parcel_application__parcel on parcel_application ord_app  (cost=0.00..508.50 rows=3218 width=74) (actual time=2.459..5.797 rows=3218 loops=1)"
    "                          ->  Index Scan using fki_mlist__parcel_application on mlist m  (cost=0.00..22.93 rows=1 width=74) (actual time=0.089..0.092 rows=0 loops=3218)"
    "                                Index Cond: ((m.parcel_application_rec_id)::text = (ord_app.parcel_application_rec_id)::text)"
    "                                Filter: (alternatives: SubPlan 3 or hashed SubPlan 6)"
    "                                SubPlan 3"
    "                                  ->  Nested Loop Semi Join  (cost=0.00..112.64 rows=5 width=0) (actual time=0.071..0.071 rows=0 loops=3631)"
    "                                        ->  Index Scan using fki_mlist_detail__mlist on mlist_detail md  (cost=0.00..24.31 rows=5 width=37) (actual time=0.019..0.023 rows=4 loops=3631)"
    "                                              Index Cond: ((mlist_rec_id)::text = ($0)::text)"
    "                                        ->  Index Scan using fki_mlist_detail_parameter__mlist_detail on mlist_detail_parameter mdp  (cost=0.00..19.63 rows=1 width=37) (actual time=0.012..0.012 rows=0 loops=13473)"
    "                                              Index Cond: ((mdp.mlist_detail_rec_id)::text = (md.mlist_detail_rec_id)::text)"
    "                                              Filter: (alternatives: SubPlan 1 or hashed SubPlan 2)"
    "                                              SubPlan 1"
    "                                                ->  Seq Scan on parameter p  (cost=0.00..3.67 rows=1 width=0) (never executed)"
    "                                                      Filter: (((parameter_rec_id)::text = ($1)::text) AND (to_tsvector(regexp_replace((parameter)::text, '[\\(\\)\\!\\.\\/,\\-\\?]+'::text, ' '::text, 'g'::text)) @@ plainto_tsquery('cadmium'::text)))"
    "                                              SubPlan 2"
    "                                                ->  Seq Scan on parameter p  (cost=0.00..3.48 rows=1 width=37) (actual time=1.475..1.774 rows=1 loops=1)"
    "                                                      Filter: (to_tsvector(regexp_replace((parameter)::text, '[\\(\\)\\!\\.\\/,\\-\\?]+'::text, ' '::text, 'g'::text)) @@ plainto_tsquery('cadmium'::text))"
    "                                SubPlan 6"
    "                                  ->  Nested Loop Semi Join  (cost=0.00..128665.02 rows=15198 width=37) (never executed)"
    "                                        ->  Seq Scan on mlist_detail md  (cost=0.00..1987.79 rows=17679 width=74) (never executed)"
    "                                        ->  Index Scan using fki_mlist_detail_parameter__mlist_detail on mlist_detail_parameter mdp  (cost=0.00..7.83 rows=1 width=37) (never executed)"
    "                                              Index Cond: ((mdp.mlist_detail_rec_id)::text = (md.mlist_detail_rec_id)::text)"
    "                                              Filter: (alternatives: SubPlan 4 or hashed SubPlan 5)"
    "                                              SubPlan 4"
    "                                                ->  Seq Scan on parameter p  (cost=0.00..3.67 rows=1 width=0) (never executed)"
    "                                                      Filter: (((parameter_rec_id)::text = ($1)::text) AND (to_tsvector(regexp_replace((parameter)::text, '[\\(\\)\\!\\.\\/,\\-\\?]+'::text, ' '::text, 'g'::text)) @@ plainto_tsquery('cadmium'::text)))"
    "                                              SubPlan 5"
    "                                                ->  Seq Scan on parameter p  (cost=0.00..3.48 rows=1 width=37) (never executed)"
    "                                                      Filter: (to_tsvector(regexp_replace((parameter)::text, '[\\(\\)\\!\\.\\/,\\-\\?]+'::text, ' '::text, 'g'::text)) @@ plainto_tsquery('cadmium'::text))"
    "                    ->  Index Scan using fki_tlist__mlist on tlist t  (cost=0.00..0.63 rows=1 width=37) (actual time=0.100..0.100 rows=1 loops=715)"
    "                          Index Cond: ((t.mlist_rec_id)::text = (m.mlist_rec_id)::text)"
    "              ->  Index Scan using pk_parcel on parcel ord  (cost=0.00..338.49 rows=3087 width=74) (actual time=3.277..5.920 rows=3146 loops=1)"
    "        ->  Index Scan using pk_company on company c  (cost=0.00..0.29 rows=1 width=71) (actual time=0.017..0.017 rows=1 loops=715)"
    "              Index Cond: ((c.company_rec_id)::text = (ord.client_rec_id)::text)"
    "Total runtime: 399.649 ms"
    

    即使这是快,我认为这是不自然的使用多余的存在。

    and exists
    (
        select * from mlist_detail_parameter mdp
        where mdp.mlist_detail_rec_id = md.mlist_detail_rec_id
    
        -- Notice that there is no joining of parameter to mlist_detail_parameter, we use EXISTS instead
        and exists -- SUPERFLUOUS
        (
            select * from parameter p
            where parameter_rec_id = mdp.parameter_rec_id
            and to_tsvector(extract_words(p.parameter)) @@ plainto_tsquery(extract_words('cadmium'))
        )
    
    )
    

    …,何时加入:

    and exists
    (
       select *
        from mlist_detail_parameter mdp            
        join parameter p using(parameter_rec_id)
        where mdp.mlist_detail_rec_id = md.mlist_detail_rec_id          
        and to_tsvector(extract_words(p.parameter)) @@ plainto_tsquery(extract_words('cadmium'))
    )
    

    我的问题是为什么最自然的查询(即使用内部连接(而不是左连接))非常慢。我可以选择使用不带左联接的查询,但必须在mlist\u detail\u参数和参数表上使用多余的EXISTS,而不是使用直接联接。

    我发现的最美妙的性能点是只使用左连接,但我认为应用左连接 盲目地 能帮我找到货运邪教程序

    [编辑:2010-07-06]

    此查询。

    explain analyze
    
    select 
    
    c.company_rec_id, 
    c.the_company_code ,
    c.company
    
    from 
    tlist t
    
    -- it is questionable why this query become fast when using left join, the most natural query is inner join...
    join mlist m using(mlist_rec_id)
    join parcel_application ord_app using(parcel_application_rec_id)
    join parcel ord using(parcel_rec_id)
    join company c on c.company_rec_id = ord.client_rec_id
    -- ...questionable
    
    where 
    (
        'cadmium' = ''
    
        or
    
        mlist_rec_id in 
        (
            select mlist_rec_id
            from mlist_detail md
            where 
                mlist_detail_rec_id in             
                (
                    select mlist_detail_rec_id
                    from mlist_detail_parameter mdp            
                    join parameter p using(parameter_rec_id)
                    where to_tsvector(extract_words(p.parameter)) @@ plainto_tsquery(extract_words('cadmium'))       
                )
    
        )
    )
    group by c.company_rec_id, 
        c.the_company_code, c.company
    

    …有这个执行计划(0.037秒):

    "HashAggregate  (cost=4045.22..4048.17 rows=295 width=71) (actual time=37.117..37.128 rows=84 loops=1)"
    "  ->  Hash Join  (cost=3734.44..4031.60 rows=1816 width=71) (actual time=33.610..36.650 rows=715 loops=1)"
    "        Hash Cond: ((ord.client_rec_id)::text = (c.company_rec_id)::text)"
    "        ->  Hash Join  (cost=3718.80..3990.99 rows=1816 width=37) (actual time=33.448..36.150 rows=715 loops=1)"
    "              Hash Cond: ((ord_app.parcel_rec_id)::text = (ord.parcel_rec_id)::text)"
    "              ->  Hash Join  (cost=3601.34..3844.02 rows=1816 width=37) (actual time=32.096..34.360 rows=715 loops=1)"
    "                    Hash Cond: ((m.parcel_application_rec_id)::text = (ord_app.parcel_application_rec_id)::text)"
    "                    ->  Hash Join  (cost=3438.94..3652.11 rows=1816 width=37) (actual time=30.578..32.370 rows=715 loops=1)"
    "                          Hash Cond: ((t.mlist_rec_id)::text = (m.mlist_rec_id)::text)"
    "                          ->  Seq Scan on tlist t  (cost=3278.24..3459.63 rows=1816 width=37) (actual time=28.930..30.219 rows=715 loops=1)"
    "                                Filter: (hashed SubPlan 1)"
    "                                SubPlan 1"
    "                                  ->  Hash Join  (cost=1179.63..3240.24 rows=15201 width=37) (actual time=17.259..27.282 rows=1918 loops=1)"
    "                                        Hash Cond: ((md.mlist_detail_rec_id)::text = (mdp.mlist_detail_rec_id)::text)"
    "                                        ->  Seq Scan on mlist_detail md  (cost=0.00..1987.79 rows=17679 width=74) (actual time=0.002..4.160 rows=17679 loops=1)"
    "                                        ->  Hash  (cost=1171.48..1171.48 rows=652 width=37) (actual time=17.236..17.236 rows=1918 loops=1)"
    "                                              ->  HashAggregate  (cost=1164.96..1171.48 rows=652 width=37) (actual time=16.290..16.692 rows=1918 loops=1)"
    "                                                    ->  Hash Join  (cost=3.49..1163.33 rows=652 width=37) (actual time=0.868..15.420 rows=1918 loops=1)"
    "                                                          Hash Cond: ((mdp.parameter_rec_id)::text = (p.parameter_rec_id)::text)"
    "                                                          ->  Seq Scan on mlist_detail_parameter mdp  (cost=0.00..1013.87 rows=37187 width=74) (actual time=0.002..5.397 rows=37187 loops=1)"
    "                                                          ->  Hash  (cost=3.48..3.48 rows=1 width=37) (actual time=0.850..0.850 rows=1 loops=1)"
    "                                                                ->  Seq Scan on parameter p  (cost=0.00..3.48 rows=1 width=37) (actual time=0.719..0.848 rows=1 loops=1)"
    "                                                                      Filter: (to_tsvector(regexp_replace((parameter)::text, '[\\(\\)\\!\\.\\/,\\-\\?]+'::text, ' '::text, 'g'::text)) @@ plainto_tsquery('cadmium'::text))"
    "                          ->  Hash  (cost=115.31..115.31 rows=3631 width=74) (actual time=1.632..1.632 rows=3631 loops=1)"
    "                                ->  Seq Scan on mlist m  (cost=0.00..115.31 rows=3631 width=74) (actual time=0.002..0.569 rows=3631 loops=1)"
    "                    ->  Hash  (cost=122.18..122.18 rows=3218 width=74) (actual time=1.507..1.507 rows=3218 loops=1)"
    "                          ->  Seq Scan on parcel_application ord_app  (cost=0.00..122.18 rows=3218 width=74) (actual time=0.002..0.531 rows=3218 loops=1)"
    "              ->  Hash  (cost=78.87..78.87 rows=3087 width=74) (actual time=1.345..1.345 rows=3087 loops=1)"
    "                    ->  Seq Scan on parcel ord  (cost=0.00..78.87 rows=3087 width=74) (actual time=0.002..0.469 rows=3087 loops=1)"
    "        ->  Hash  (cost=11.95..11.95 rows=295 width=71) (actual time=0.155..0.155 rows=295 loops=1)"
    "              ->  Seq Scan on company c  (cost=0.00..11.95 rows=295 width=71) (actual time=0.003..0.063 rows=295 loops=1)"
    "Total runtime: 37.240 ms"
    

    一些想法。我认为,最重要的是,Postgresql只是在EXISTS子句上做了一个糟糕的优化,我不认为EXISTS子句天生就比IN慢。事实上,我记得我以前优化过一个非共振查询(我认为是8.3或8.2),将In子句转换为EXISTS子句

    2 回复  |  直到 12 年前
        1
  •  4
  •   Matthew Wood    14 年前

    将EXISTS表达式转换为IN表达式。

    在本例中效果更好,因为现在将从包含最限制因素的查询(全文搜索查找)开始,从“由内而外”对查询进行有效的评估。该查询将返回一小组行,这些行可以直接针对外部查询的主键(其中x在(选择x…)中)进行查找,而不是针对外部查询的每个值调用一次“内部”查询(或者对于原始情况下的所有值,如果我读得正确的话)。EXISTS方法在这里导致嵌套循环(一个查询对另一个查询中的每个值进行一次求值)而in方法使用散列联接(在许多情况下,如果不是大多数情况下,这是一种更有效的执行方法)。

    Hash Join更像一个map,其中两个数组同时遍历,并对这两个数组执行一个操作。这大致是线性的(O(n))。把它们当作加法嵌套,这样它就变成O(n)到O(2n)到O(3n),等等。

    是的,是的,我知道这不完全是一回事,但关键是有多个嵌套循环通常表明查询计划很慢,比较两个big-O样式更容易识别,我相信。

    嵌套循环和EXISTS本身并不是邪恶的,但是在大多数情况下,基本过滤条件最终会影响所有内容(例如,问题中的全文搜索)、in表达式(或in) 在某些情况下,适当的连接(JOIN)会产生一个更有效的计划。

        2
  •  4
  •   Michael Buen    14 年前

    您的查询基本上是以下内容(很难作为评论提出):

    select 
    
    c.company_rec_id, 
    c.the_company_code,
    c.company
    
    from 
    
    company c 
    
    where
    
    exists (
        select *
        from parameter p
        join mlist_detail_parameter mdp on mdp.parameter_rec_id = p.parameter_rec_id
        join mlist_detail md            on md.mlist_detail_rec_id = mdp.mlist_detail_rec_id
        join mlist m                    on m.mlist_rec_id = md.mlist_rec_id
    
        join parcel_application ord_app on ord_app.parcel_application_rec_id = m.parcel_application_rec_id
        join parcel ord                 on ord.parcel_rec_id = ord_app.parcel_rec_id
    
        join tlist t                    on t.mlist_rec_id = m.mlist_rec_id
    
        where
            ord.client_rec_id = c.company_rec_id
        and to_tsvector(extract_words(p.parameter)) @@ plainto_tsquery(extract_words('cadmium'))
    )
    

    [编辑:2010-07-06,由Michael Buen添加]

    "Hash Join  (cost=2152.94..2172.52 rows=232 width=71) (actual time=71.106..71.207 rows=84 loops=1)"
    "  Hash Cond: ((c.company_rec_id)::text = (ord.client_rec_id)::text)"
    "  ->  Seq Scan on company c  (cost=0.00..11.95 rows=295 width=71) (actual time=0.004..0.030 rows=295 loops=1)"
    "  ->  Hash  (cost=2150.04..2150.04 rows=232 width=37) (actual time=71.077..71.077 rows=84 loops=1)"
    "        ->  HashAggregate  (cost=2147.72..2150.04 rows=232 width=37) (actual time=71.033..71.040 rows=84 loops=1)"
    "              ->  Nested Loop  (cost=1783.22..2146.09 rows=652 width=37) (actual time=51.029..70.187 rows=1918 loops=1)"
    "                    ->  Hash Join  (cost=1783.22..1938.61 rows=652 width=111) (actual time=51.014..55.913 rows=1918 loops=1)"
    "                          Hash Cond: ((ord_app.parcel_rec_id)::text = (ord.parcel_rec_id)::text)"
    "                          ->  Hash Join  (cost=1665.76..1810.55 rows=652 width=111) (actual time=48.360..52.004 rows=1918 loops=1)"
    "                                Hash Cond: ((ord_app.parcel_application_rec_id)::text = (m.parcel_application_rec_id)::text)"
    "                                ->  Seq Scan on parcel_application ord_app  (cost=0.00..122.18 rows=3218 width=74) (actual time=0.003..1.485 rows=3218 loops=1)"
    "                                ->  Hash  (cost=1657.61..1657.61 rows=652 width=111) (actual time=48.331..48.331 rows=1918 loops=1)"
    "                                      ->  Hash Join  (cost=164.19..1657.61 rows=652 width=111) (actual time=4.755..46.122 rows=1918 loops=1)"
    "                                            Hash Cond: ((md.mlist_rec_id)::text = (m.mlist_rec_id)::text)"
    "                                            ->  Nested Loop  (cost=3.49..1485.51 rows=652 width=37) (actual time=1.638..40.974 rows=1918 loops=1)"
    "                                                  ->  Hash Join  (cost=3.49..1163.33 rows=652 width=37) (actual time=1.590..18.090 rows=1918 loops=1)"
    "                                                        Hash Cond: ((mdp.parameter_rec_id)::text = (p.parameter_rec_id)::text)"
    "                                                        ->  Seq Scan on mlist_detail_parameter mdp  (cost=0.00..1013.87 rows=37187 width=74) (actual time=0.003..5.499 rows=37187 loops=1)"
    "                                                        ->  Hash  (cost=3.48..3.48 rows=1 width=37) (actual time=1.568..1.568 rows=1 loops=1)"
    "                                                              ->  Seq Scan on parameter p  (cost=0.00..3.48 rows=1 width=37) (actual time=1.324..1.564 rows=1 loops=1)"
    "                                                                    Filter: (to_tsvector(regexp_replace((parameter)::text, '[\\(\\)\\!\\.\\/,\\-\\?]+'::text, ' '::text, 'g'::text)) @@ plainto_tsquery('cadmium'::text))"
    "                                                  ->  Index Scan using pk_mlist_detail on mlist_detail md  (cost=0.00..0.48 rows=1 width=74) (actual time=0.011..0.011 rows=1 loops=1918)"
    "                                                        Index Cond: ((md.mlist_detail_rec_id)::text = (mdp.mlist_detail_rec_id)::text)"
    "                                            ->  Hash  (cost=115.31..115.31 rows=3631 width=74) (actual time=3.096..3.096 rows=3631 loops=1)"
    "                                                  ->  Seq Scan on mlist m  (cost=0.00..115.31 rows=3631 width=74) (actual time=0.003..0.994 rows=3631 loops=1)"
    "                          ->  Hash  (cost=78.87..78.87 rows=3087 width=74) (actual time=2.640..2.640 rows=3087 loops=1)"
    "                                ->  Seq Scan on parcel ord  (cost=0.00..78.87 rows=3087 width=74) (actual time=0.004..0.876 rows=3087 loops=1)"
    "                    ->  Index Scan using fki_tlist__mlist on tlist t  (cost=0.00..0.31 rows=1 width=37) (actual time=0.006..0.006 rows=1 loops=1918)"
    "                          Index Cond: ((t.mlist_rec_id)::text = (m.mlist_rec_id)::text)"
    "Total runtime: 71.373 ms"