我有一个包含值的 varchar 列,我想针对关键财务提取值并忽略 test,abc 等词。
Other:transaction1|finance:insurance|finance:insurance|Other:Place|finance:testing|finance:abc
产出:保险
with temp as (
select unnest(string_to_array(t.str,'|')) as str
from (values('Other:transaction1|finance:insurance|finance:insurance|Other:Place|finance:testing|finance:abc')) as t(str)
)
select distinct substring(str from position(':' in str)+1 for length(str))
from temp
where str ~'finance:' and --matching finance
str !~'testing' and -- ignore words testing
str !~'abc' -- ignore words abc
一种方法是使用regexp_matches
,像这样:
with the_table(id, col) as(
select 1, 'Other:transaction1|finance:testing|finance:testing|finance:Place|finance:testing|finance:insurance'::text union all
select 2, 'finance:insurance'::text union all
select 3, 'ffinance:insurance|finance:testing'::text union all
select 4, 'Other:transaction1|finance:insurance|finance:insurance|Other:Place|finance:testing|finance:abc'::text
)
select distinct id, arr[1] as word from (
select id, regexp_matches('|'||col, '|finance:([^|]+)', 'g') as arr
from the_table
) t
where
arr[1] not in('testing', 'abc') -- there is words you don't want to see in result
order by id
这会将除"黑名单"以外的每个单词都作为不同的行。我使用独特的列id
只是为了显示,哪些单词对应于哪一行。