我有一个包含几百万条记录的数据库,看起来像这样:
{
"id": "aa1e24cd-2825-490f-8ccb-60bd4914dc20",
"circulationNotes": [
{
"note": "REFERENCE BOOK",
"noteType": "Check in"
},
{
"note": "REFERENCE BOOK",
"noteType": "Check out"
}
],
"holdingsRecordId": "fd79910f-4d11-41b4-9e53-198fff089917",
"permanentLocationId": "cee4d952-da5a-4d34-bb0a-a5d4d4581f39"
}
我需要为每个缺少它们的circulationNotes (~ 10K条记录)生成并添加一个uid,因此期望的输出是
{
"id": "aa1e24cd-2825-490f-8ccb-60bd4914dc20",
"circulationNotes": [
{
"id": "3794824a-b814-4b94-b7c4-d8be53088a51",
"note": "REFERENCE BOOK",
"noteType": "Check in"
},
{
"id": "175989f9-16f4-4cfe-a1aa-d665ec1263e7",
"note": "REFERENCE BOOK",
"noteType": "Check out"
}
],
"holdingsRecordId": "fd79910f-4d11-41b4-9e53-198fff089917",
"permanentLocationId": "cee4d952-da5a-4d34-bb0a-a5d4d4581f39"
}
我可以用查询选择我想要更新的记录:
SELECT id
FROM mod_inventory_storage.item
WHERE EXISTS
( SELECT *
FROM jsonb_array_elements(jsonb->'circulationNotes')
WHERE value->'note' IS NOT NULL
AND value->'id' IS NULL )
(数据是这样的,如果一个音符有问题,两个音符都有问题)
但是我如何在circulationNotes中的每个条目中使用uid更新jsonb对象?
我复制了你的案例
create table test (id serial, text jsonb);
insert into test (text) values ('{
"id": "aa1e24cd-2825-490f-8ccb-60bd4914dc20",
"circulationNotes": [
{
"note": "REFERENCE BOOK",
"noteType": "Check in"
},
{
"note": "REFERENCE BOOK",
"noteType": "Check out"
}
],
"holdingsRecordId": "fd79910f-4d11-41b4-9e53-198fff089917",
"permanentLocationId": "cee4d952-da5a-4d34-bb0a-a5d4d4581f39"
}');
insert into test (text) values ('{
"id": "aa1e24cd-2825-490f-8ccb-60bd4914dc20",
"circulationNotes": [
{
"note": "REFERENCE BOOK",
"noteType": "Check in"
},
{
"note": "REFERENCE BOOK",
"noteType": "Check out"
}
],
"holdingsRecordId": "fd79910f-4d11-41b4-9e53-198fff089917",
"permanentLocationId": "cee4d952-da5a-4d34-bb0a-a5d4d4581f39"
}');
insert into test (text) values ('{
"id": "aa1e24cd-2825-490f-8ccb-60bd4914dc20",
"circulationNotes": [
{
"note": "REFERENCE BOOK",
"noteType": "Check in"
},
{
"note": "REFERENCE BOOK",
"noteType": "Check out"
}
],
"holdingsRecordId": "fd79910f-4d11-41b4-9e53-198fff089917",
"permanentLocationId": "cee4d952-da5a-4d34-bb0a-a5d4d4581f39"
}');
insert into test (text) values ('{
"id": "aa1e24cd-2825-490f-8ccb-60bd4914dc20",
"circulationNotes": [
{
"id": "3794824a-b814-4b94-b7c4-d8be53088a51",
"note": "REFERENCE BOOK",
"noteType": "Check in"
},
{
"id": "175989f9-16f4-4cfe-a1aa-d665ec1263e7",
"note": "REFERENCE BOOK",
"noteType": "Check out"
}
],
"holdingsRecordId": "fd79910f-4d11-41b4-9e53-198fff089917",
"permanentLocationId": "cee4d952-da5a-4d34-bb0a-a5d4d4581f39"
}');
然后,为了解决这个问题,我将问题分为两个步骤:分解和重组。步骤1:在circulationNotes
上创建一个表,并将新的id
列放在下面的
with fixing_id as (
select id,
text->'id' json_id,
text->'holdingsRecordId' holdingsRecordId,
text->'permanentLocationId' as permanentLocationId,
('{"id":"'||uuid_generate_v1()||'"}')::jsonb || jsonb_array_elements((text->'circulationNotes')::jsonb) as circulationNotes
from test
where exists
(select *
FROM jsonb_array_elements(text->'circulationNotes')
where value->'note' is not null
and value->'id' is null))
select * from fixing_id
结果如下
id | json_id | holdingsrecordid | permanentlocationid | circulationnotes
----+----------------------------------------+----------------------------------------+----------------------------------------+---------------------------------------------------------------------------------------------------
1 | "aa1e24cd-2825-490f-8ccb-60bd4914dc20" | "fd79910f-4d11-41b4-9e53-198fff089917" | "cee4d952-da5a-4d34-bb0a-a5d4d4581f39" | {"id": "4ad151ea-f5c9-11eb-a458-42010af00961", "note": "REFERENCE BOOK", "noteType": "Check in"}
1 | "aa1e24cd-2825-490f-8ccb-60bd4914dc20" | "fd79910f-4d11-41b4-9e53-198fff089917" | "cee4d952-da5a-4d34-bb0a-a5d4d4581f39" | {"id": "4ad155fa-f5c9-11eb-a458-42010af00961", "note": "REFERENCE BOOK", "noteType": "Check out"}
2 | "aa1e24cd-2825-490f-8ccb-60bd4914dc20" | "fd79910f-4d11-41b4-9e53-198fff089917" | "cee4d952-da5a-4d34-bb0a-a5d4d4581f39" | {"id": "4ad15708-f5c9-11eb-a458-42010af00961", "note": "REFERENCE BOOK", "noteType": "Check in"}
2 | "aa1e24cd-2825-490f-8ccb-60bd4914dc20" | "fd79910f-4d11-41b4-9e53-198fff089917" | "cee4d952-da5a-4d34-bb0a-a5d4d4581f39" | {"id": "4ad157a8-f5c9-11eb-a458-42010af00961", "note": "REFERENCE BOOK", "noteType": "Check out"}
3 | "aa1e24cd-2825-490f-8ccb-60bd4914dc20" | "fd79910f-4d11-41b4-9e53-198fff089917" | "cee4d952-da5a-4d34-bb0a-a5d4d4581f39" | {"id": "4ad15870-f5c9-11eb-a458-42010af00961", "note": "REFERENCE BOOK", "noteType": "Check in"}
3 | "aa1e24cd-2825-490f-8ccb-60bd4914dc20" | "fd79910f-4d11-41b4-9e53-198fff089917" | "cee4d952-da5a-4d34-bb0a-a5d4d4581f39" | {"id": "4ad15906-f5c9-11eb-a458-42010af00961", "note": "REFERENCE BOOK", "noteType": "Check out"}
(6 rows)
检查我正在使用uuid_generate_v1()
函数来生成uuid(您需要uuid-ossp
扩展). And that the
jsonb_array_elementscreates a row for each
text->'circulationNotes' '项)。
步骤2:用jsonb_agg
聚合fixing_id
,用jsonb_build_object
重建json对象
select id,
jsonb_build_object('id',json_id,'circulationNotes',jsonb_agg(circulationNotes) ,'holdingsRecordId',holdingsRecordId,'permanentLocationId',permanentLocationId)
from fixing_id
group by id,
json_id,
holdingsRecordId,
permanentLocationId
结果
id | jsonb_build_object
----+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 | {"id": "aa1e24cd-2825-490f-8ccb-60bd4914dc20", "circulationNotes": [{"id": "b71fc232-f5c9-11eb-a458-42010af00961", "note": "REFERENCE BOOK", "noteType": "Check in"}, {"id": "b71fc39a-f5c9-11eb-a458-42010af00961", "note": "REFERENCE BOOK", "noteType": "Check out"}], "holdingsRecordId": "fd79910f-4d11-41b4-9e53-198fff089917", "permanentLocationId": "cee4d952-da5a-4d34-bb0a-a5d4d4581f39"}
3 | {"id": "aa1e24cd-2825-490f-8ccb-60bd4914dc20", "circulationNotes": [{"id": "b71fc624-f5c9-11eb-a458-42010af00961", "note": "REFERENCE BOOK", "noteType": "Check in"}, {"id": "b71fc6c4-f5c9-11eb-a458-42010af00961", "note": "REFERENCE BOOK", "noteType": "Check out"}], "holdingsRecordId": "fd79910f-4d11-41b4-9e53-198fff089917", "permanentLocationId": "cee4d952-da5a-4d34-bb0a-a5d4d4581f39"}
2 | {"id": "aa1e24cd-2825-490f-8ccb-60bd4914dc20", "circulationNotes": [{"id": "b71fc4b2-f5c9-11eb-a458-42010af00961", "note": "REFERENCE BOOK", "noteType": "Check in"}, {"id": "b71fc55c-f5c9-11eb-a458-42010af00961", "note": "REFERENCE BOOK", "noteType": "Check out"}], "holdingsRecordId": "fd79910f-4d11-41b4-9e53-198fff089917", "permanentLocationId": "cee4d952-da5a-4d34-bb0a-a5d4d4581f39"}
(3 rows)
整个查询是
with fixing_id as (
select id,
text->'id' json_id,
text->'holdingsRecordId' holdingsRecordId,
text->'permanentLocationId' as permanentLocationId,
('{"id":"'||uuid_generate_v1()||'"}')::jsonb || jsonb_array_elements((text->'circulationNotes')::jsonb) as circulationNotes
from test
where exists
(select *
FROM jsonb_array_elements(text->'circulationNotes')
where value->'note' is not null
and value->'id' is null))
select id,
jsonb_build_object('id',json_id,'circulationNotes',jsonb_agg(circulationNotes) ,'holdingsRecordId',holdingsRecordId,'permanentLocationId',permanentLocationId)
from fixing_id
group by id,
json_id,
holdingsRecordId,
permanentLocationId