将函数动态应用于Postgres表中的所有列

使用Postgres13.1，我想将前向填充函数应用于表的所有列。前向填充功能在我前面的问题中有解释：

如何将前向填充作为PL/PGSQL函数

但是，在这种情况下，会指定列和表。我想将该代码应用于任意表，即指定一个表，并将正向填充应用于每一列。

使用此表作为示例：

CREATE TABLE example(row_num int, id int, str text, val integer);
INSERT INTO example VALUES
(1, 1, '1a', NULL)
, (2, 1, NULL,    1)
, (3, 2, '2a',    2)
, (4, 2, NULL, NULL)
, (5, 3, NULL, NULL)
, (6, 3, '3a',   31)
, (7, 3, NULL, NULL)
, (8, 3, NULL,   32)
, (9, 3, '3b', NULL)
, (10,3, NULL, NULL)
;

我从以下函数的工作基础开始。我称之为传递一些变量名。请注意，第一个是表名称，而不是列名。该函数获取表名，创建一个包含所有列名的数组，然后输出这些名称。

create or replace function col_collect(tbl text, id text, row_num text)
returns text[]
language plpgsql as
$func$
declare
tmp text[];
col text;
begin
select array (
select column_name
from information_schema."columns" c
where table_name = tbl
) into tmp;
foreach col in array tmp
loop
raise notice 'col: %', col;
end loop;
return tmp;
end
$func$;

我想应用"；正向填充"；函数，我从前面的问题中得到，用于表的每一列。UPDATE似乎是正确的方法。所以这是前面的函数，我用execute的更新替换raise notice，这样我就可以传入表名：

create or replace function col_collect(tbl text, id text, row_num text)
returns void
language plpgsql as
$func$
declare
tmp text[];
col text;
begin
select array (
select column_name
from information_schema."columns" c
where table_name = tbl
) into tmp;
foreach col in array tmp
loop
execute 'update '||tbl||' 
set '||col||' = gapfill('||col||') OVER w AS '||col||' 
where '||tbl||'.row_num = '||col||'.row_num
window w as (PARTITION BY '||id||' ORDER BY '||row_num||') 
returning *;';
end loop;
end
$func$;
-- call the function
select col_collect('example','id','row_num')

前面的错误带有语法错误。我试过很多不同的方法，但都失败了。关于SO的有用答案就在这里。我试图应用的聚合函数(作为窗口函数(是：

CREATE OR REPLACE FUNCTION gap_fill_internal(s anyelement, v anyelement)
RETURNS anyelement
LANGUAGE plpgsql AS
$func$
BEGIN
RETURN COALESCE(v, s);  -- that's all!
END
$func$;
CREATE AGGREGATE gap_fill(anyelement) ( 
SFUNC = gap_fill_internal, 
STYPE = anyelement 
);

我的问题是：

这是一个好方法吗？如果是，我做错了什么；或
有更好的方法吗

您所要求的不是一项琐碎的任务。您应该熟悉PL/pgSQL。我不建议初学者使用这种动态SQL查询，因为它太强大了。

话虽如此，让我们潜入水中。系好安全带！

CREATE OR REPLACE FUNCTION f_gap_fill_update(_tbl regclass, _id text, _row_num text, OUT nullable_columns int, OUT updated_rows int)
LANGUAGE plpgsql AS
$func$
DECLARE
_pk  text  := quote_ident(_row_num);
_sql text;
BEGIN   
SELECT INTO _sql, nullable_columns
concat_ws(E'n'
, 'UPDATE ' || _tbl || ' t'
, 'SET   (' || string_agg(        quote_ident(a.attname), ', ') || ')'
, '    = (' || string_agg('u.' || quote_ident(a.attname), ', ') || ')'
, 'FROM  (' 
, '   SELECT ' || _pk
, '        , ' || string_agg(format('gap_fill(%1$I) OVER w AS %1$I', a.attname), ', ')
, '   FROM   ' || _tbl
, format('   WINDOW w AS (PARTITION BY %I ORDER BY %s)', _id, _pk)
, '   ) u'
, format('WHERE t.%1$s = u.%1$s', _pk)
, 'AND  (' || string_agg('t.' || quote_ident(a.attname), ', ') || ') IS DISTINCT FROM'
, '     (' || string_agg('u.' || quote_ident(a.attname), ', ') || ')'
)
, count(*) -- AS _col_ct
FROM  (
SELECT a.attname
FROM   pg_attribute a
WHERE  a.attrelid = _tbl
AND    a.attnum > 0
AND    NOT a.attisdropped
AND    NOT a.attnotnull
ORDER  BY a.attnum
) a;
IF nullable_columns = 0 THEN
RAISE EXCEPTION 'No nullable columns found in table >>%<<', _tbl;
ELSIF _sql IS NULL THEN
RAISE EXCEPTION 'SQL string is NULL. Should not occur!';
END IF;

-- RAISE NOTICE '%', _sql;       -- debug
EXECUTE _sql;              -- execute
GET DIAGNOSTICS updated_rows = ROW_COUNT; 
END
$func$;

示例调用：

SELECT * FROM f_gap_fill_update('example', 'id', 'row_num');

db<gt；小提琴这里

功能是最先进的。生成并执行以下形式的查询：

UPDATE tbl t
SET   (str, val, col1)
= (u.str, u.val, u.col1)
FROM  (
SELECT row_num
, gap_fill(str) OVER w AS str, gap_fill(val) OVER w AS val
, gap_fill(col1) OVER w AS col1
FROM   tbl
WINDOW w AS (PARTITION BY id ORDER BY row_num)
) u
WHERE t.row_num = u.row_num
AND  (t.str, t.val, t.col1) IS DISTINCT FROM
(u.str, u.val, u.col1)

使用pg_catalog.pg_attribute而不是信息模式。参见：

注意最后的WHERE子句，以防止(可能昂贵的(空更新。只有实际更改的行才会被写入。参见：

如何(或可以(在多列上选择DISTINCT

此外，甚至只考虑可为null的列(未定义的NOT NULL(，以避免不必要的工作。

在UPDATE中使用ROW语法以保持代码简单。参见：

SQL从另一个表的字段更新一个表中的字段

函数返回两个整数值：nullable_columns和updated_rows，报告名称的含义。

该函数正确地防止SQL注入。参见：

表名作为PostgreSQL函数参数
Postgres函数中的SQL注入与准备好的查询

关于GET DIAGNOSTICS:

计算PostgreSQL中受批量查询影响的行数

上面的函数会更新，但不会返回行。以下是如何返回不同类型的行的基本演示：

CREATE OR REPLACE FUNCTION f_gap_fill_select(_tbl_type anyelement, _id text, _row_num text)
RETURNS SETOF anyelement
LANGUAGE plpgsql AS
$func$
DECLARE
_tbl regclass := pg_typeof(_tbl_type)::text::regclass;
_sql text;
BEGIN   
SELECT INTO _sql
'SELECT ' || string_agg(CASE WHEN a.attnotnull
THEN format('%I', a.attname)
ELSE format('gap_fill(%1$I) OVER w AS %1$I', a.attname) END
, ', ' ORDER BY a.attnum)
|| E'nFROM ' || _tbl
|| format(E'nWINDOW w AS (PARTITION BY %I ORDER BY %I)', _id, _row_num)
FROM   pg_attribute a
WHERE  a.attrelid = _tbl
AND    a.attnum > 0
AND    NOT a.attisdropped;

IF _sql IS NULL THEN
RAISE EXCEPTION 'SQL string is NULL. Should not occur!';
END IF;
RETURN QUERY EXECUTE _sql;
-- RAISE NOTICE '%', _sql;       -- debug
END
$func$;

调用(注意特殊语法！(：

SELECT * FROM f_gap_fill_select(NULL::example, 'id', 'row_num');

db<gt；小提琴这里

关于返回多态行类型：

重构PL/pgSQL函数以返回各种SELECT查询的输出

相关内容

最新更新

热门标签：