下面的函数假定一个单词由字母数字字符组成(任何其他字符都将被删除),空格用作分隔符。
create or replace function word_ngrams(str text, n int)
returns setof text language plpgsql as $$
declare
i int;
arr text[];
begin
arr := regexp_split_to_array(str, '[^[:alnum:]]+');
for i in 1 .. cardinality(arr)- n+ 1 loop
return next array_to_string(arr[i : i+n-1], ' ');
end loop;
end $$;
查找所有三个单词短语:
select word_ngrams('ed ut perspiciatis, unde omnis iste natus error sit voluptatem accusantium', 3)
word_ngrams
----------------------------
ed ut perspiciatis
ut perspiciatis unde
perspiciatis unde omnis
unde omnis iste
omnis iste natus
iste natus error
natus error sit
error sit voluptatem
sit voluptatem accusantium
(9 rows)
查找所有六个单词的短语:
select word_ngrams('ed ut perspiciatis, unde omnis iste natus error sit voluptatem accusantium', 6)
word_ngrams
---------------------------------------------
ed ut perspiciatis unde omnis iste
ut perspiciatis unde omnis iste natus
perspiciatis unde omnis iste natus error
unde omnis iste natus error sit
omnis iste natus error sit voluptatem
iste natus error sit voluptatem accusantium
(6 rows)