代码之家  ›  专栏  ›  技术社区  ›  Ravi

marklogic中的near查询问题

  •  0
  • Ravi  · 技术社区  · 6 年前

    我对near query有个奇怪的问题..

    let $xml :=
      <titles count="6">
        <title type="source">ASIA-PACIFIC JOURNAL OF CLINICAL ONCOLOGY</title>
        <title type="source_abbrev">ASIA-PAC J CLIN ONCO</title>
        <title type="abbrev_iso">Asia-Pac. J. Clin. Oncol.</title>
        <title type="abbrev_11">ASIA-PAC J</title>
        <title type="abbrev_29">ASIA-PAC J CLIN ONCOL</title>
        <title type="item">Phase II study of cetuximab with irinotecan for KRAS wild-type colorectal cancer in Japanese patients</title>
       </titles>
    

    最初我运行这个查询

    let $q1 := 
          cts:element-query((xs:QName("title")),
              cts:word-query(("phase 0","phase 1","phase 2","phase 3","phase 4","phase I","phase ii","phase iii","phase iv"),
              ("case-insensitive", "wildcarded"))
            )
    return
      cts:highlight($xml,$q1, <b>{$cts:text}</b>)
    

    我得到了正确的结果 enter image description here

    现在我运行这个,得到了以下正确的结果

    let $q2 := 
          cts:element-query((xs:QName("title")),
              cts:word-query(("trial*", "study", "studies*"),
              ("case-insensitive", "wildcarded"))
            )
    
    return
      cts:highlight($xml,$q2, <b>{$cts:text}</b>)
    

    enter image description here 然后我用near/0运行以下查询,没有得到任何

    let $q3 :=
        cts:near-query((
                  cts:element-query((xs:QName("title")),
                    cts:word-query(("phase 0","phase 1","phase 2","phase 3","phase 4","phase I","phase ii","phase iii","phase iv"),
                      ("case-insensitive", "wildcarded")))
              ,
                 cts:element-query((xs:QName("title")),
                    cts:word-query(("trial*", "study", "studies*"),
                      ("case-insensitive", "wildcarded")))
             ),
             0,
             ('ordered'))
    
    return
      cts:highlight($xml,$q3, <b>{$cts:text}</b>)
    

    enter image description here

    但是我用near/1运行了查询,得到了结果。但这是为什么?法尔赛一号紧接着是法尔赛二号。所以近距离应该是0对吧?

    let $q3 :=
        cts:near-query((
                  cts:element-query((xs:QName("title")),
                    cts:word-query(("phase 0","phase 1","phase 2","phase 3","phase 4","phase I","phase ii","phase iii","phase iv"),
                      ("case-insensitive", "wildcarded")))
              ,
                 cts:element-query((xs:QName("title")),
                    cts:word-query(("trial*", "study", "studies*"),
                      ("case-insensitive", "wildcarded")))
             ),
             1,
             ('ordered'))
    
    return
      cts:highlight($xml,$q3, <b>{$cts:text}</b>)
    

    enter image description here

    1 回复  |  直到 6 年前
        1
  •  2
  •   Elijah Bernstein-Cooper    6 年前

    我相信marklogic索引词距离,从0位置的锚定词开始,到1位置的后续标记等等。为了查询相邻词,您需要使用1的近查询距离。示例中的查询执行正确。

    借用marklogic cts:near-query 文档:

    xquery version "1.0-ml";
    let $x := <p>Now is the winter of our discontent</p>
    return
    cts:contains($x, cts:near-query(
                        ("now", "the"),
                        2, "ordered"));
    
    (: => returns true, "the" is 2 words from "now" :)
    
    let $x := <p>Now is the winter of our discontent</p>
    return
    cts:contains($x, cts:near-query(
                        ("now", "is"),
                        1, "ordered"));
    (: => returns true, "is" is 1 word from "now" :)
    
    let $x := <p>Now is the winter of our discontent</p>
    return
    cts:contains($x, cts:near-query(
                        ("now", "is"),
                        0, "ordered"));
    
    (: => returns false, "is" is 1 word from "now" :)