设置end_date是否插入新的相同行



我在找到最有效的语句集来处理历史数据时遇到了麻烦。假设我有两张桌子。暂存表和存档表。每次将数据导入临时表时,我想使用以下逻辑将其插入到存档表中。

if circuitId = circuitId and checksum = checksum(*)
then do nothing
if circuitId = circuiId and checksum <> checksum(*)
then insert new row and set end_date of old row.
if rows exists in archive but not in staging
then change delete_flag to 1

到目前为止,我已经在下面创建了处理delete_flag和如果checksum = checksum(*),但是我似乎无法弄清楚如何集成checksum <> checksum(*)并将end_date设置在旧行上。

UPDATE ARCHIVE.F1_CIRCUITS n
SET
n.DELETE_FLAG = 1
FROM
ARCHIVE.F1_CIRCUITS n
LEFT JOIN EXTRACT.F1_CIRCUITS o
ON o."circuitId" = n.CIRCUITID
WHERE o."circuitId" IS NULL;
INSERT INTO ARCHIVE.F1_CIRCUITS
SELECT ("circuitId","circuitRef","name","location","country","lat","lng","alt","url", CURRENT_TIMESTAMP, NULL, HASH_MD5("circuitId","circuitRef","name","location","country","lat","lng","alt","url"), 'C', 0) FROM EXTRACT.F1_CIRCUITS o
WHERE NOT EXISTS (SELECT 1
FROM ARCHIVE.F1_CIRCUITS n
WHERE o."circuitId" = n.CIRCUITID AND
HASH_MD5(o."circuitId",o."circuitRef",o."name",o."location",o."country",o."lat",o."lng",o."alt",o."url") = n.cheetax_checksum
);

暂存架构表

CREATE TABLE EXTRACT.F1_CIRCUITS (
"circuitId"  DECIMAL(11,0),
"circuitRef" VARCHAR(255) UTF8,
"name"       VARCHAR(255) UTF8,
"location"   VARCHAR(255) UTF8,
"country"    VARCHAR(255) UTF8,
"lat"        DOUBLE PRECISION,
"lng"        DOUBLE PRECISION,
"alt"        DECIMAL(11,0),
"url"        VARCHAR(255) UTF8
);

存档架构表

CREATE TABLE F1_CIRCUITS (
ID                      DECIMAL(18,0) IDENTITY,
CIRCUITID               DECIMAL(11,0),
CIRCUITREF              VARCHAR(255) UTF8,
NAME                    VARCHAR(255) UTF8,
LOCATION                VARCHAR(255) UTF8,
COUNTRY                 VARCHAR(255) UTF8,
LAT                     DOUBLE PRECISION,
LNG                     DOUBLE PRECISION,
ALT                     DECIMAL(11,0),
URL                     VARCHAR(255) UTF8,
START_TIMESTAMP TIMESTAMP,
END_TIMESTAMP   TIMESTAMP,
CHECKSUM        CHAR(32) UTF8,
STATUS          CHAR(1) UTF8,
DELETE_FLAG     DECIMAL(1,0)
);

对于您的方案,我建议使用触发器并自动执行操作。 当您在 EXTRACT 表中插入新行(条件 1/2(或从那里删除(条件 3(时,下面的触发器将触发,并将检查:

插入到数据提取时:

if circuitId = circuitId and checksum = checksum(*)
then do nothing
if circuitId = circuiId and checksum <> checksum(*)
then insert new row and set end_date of old row.

从数据提取中删除时:

if rows exists in archive but not in staging
then change delete_flag to 1

触发您的条件:

CREATE OR REPLACE TRIGGER archiver
AFTER INSERT OR DELETE
ON EXTRACT.F1_Circuits
REFERENCES OLD AS OLD NEW AS NEW
FOR EACH ROW

DECLARE
v_extr_new_row VARCHAR2 (500); --- for storing new inserting row data
v_extr_row_checksum RAW(500);  --- for storing new row as RAW
v_encrypted_raw RAW(2048);     --- encrypted to md5 data
v_arch_cir_id NUMBER (11) := 0;   --- circuitId from ARCHIVE table
v_arch_checksum VARCHAR2 (32) := '0'; --- checksum from ARCHIVE table
v_arch_last_chsum NUMBER (3);    --- for finding the latest data by checksum in ARCH table
v_arch_start_date TIMESTAMP;   --- for finding the latest data by start_date in ARCH table
BEGIN
IF INSERTING THEN
--- taking the inserting data into the variable for further converting to md5
v_extr_new_row := :NEW."circuitId" || ',' || :NEW."circuitRef" || ',' ||  :NEW."name" || ',' ||
:NEW."location" || ',' ||  :NEW."country" || ',' ||  :NEW."lat" || ',' ||
:NEW."lng" || ',' ||  :NEW."alt" || ',' || :NEW."url";
--- converting the inserting data to hash_md5, checksum
v_extr_row_checksum := utl_raw.cast_to_raw(v_extr_new_row);
v_encrypted_raw := dbms_crypto.hash(v_extr_row_checksum, 2);
--- taking the latest row from the ARCH table for changing the Updating the END_DATE;
SELECT START_TIMESTAMP, r_last_chsum, CHECKSUM, circuitId
INTO v_arch_start_date, v_arch_last_chsum, v_arch_checksum, v_arch_cir_id
FROM
(SELECT START_TIMESTAMP, ROW_NUMBER()
OVER (PARTITION BY arch_in.circuitId ORDER BY arch_in.START_TIMESTAMP DESC) r_last_chsum,
CIRCUITID, CHECKSUM
FROM ARCHIVE.F1_CIRCUITS arch_in
WHERE arch_in.circuitId = :NEW."circuitId")
WHERE r_last_chsum = 1;

---- checking whether the data already exist in the ARCH table
IF (v_arch_cir_id = :NEW."circuitId" AND v_arch_checksum <> v_encrypted_raw)
THEN
---- Update end_date for old row
UPDATE ARCHIVE.F1_CIRCUITS arch
SET arch.END_TIMESTAMP = CURRENT_TIMESTAMP
WHERE arch.circuitId = :NEW."circuitId"
AND arch.CHECKSUM = v_arch_checksum
AND arch.START_TIMESTAMP = v_arch_start_date;
--- inserting the new data into ARCH table
INSERT INTO ARCHIVE.F1_CIRCUITS
(CIRCUITID, CIRCUITREF, NAME, LOCATION, COUNTRY, LAT, LNG, ALT, URL, START_TIMESTAMP, END_TIMESTAMP, CHECKSUM,
STATUS, DELETE_FLAG)
VALUES (:NEW."circuitId", :NEW."circuitRef", :NEW."name", :NEW."location", :NEW."country",
:NEW."lat", :NEW."lng", :NEW."alt", :NEW."url", CURRENT_TIMESTAMP, NULL, v_encrypted_raw, 'C', to_number (0));
---- else if they are EQUAL then DO NOTHING, but if you need you can do;)
ELSIF (v_arch_cir_id = :NEW."circuitId" AND v_arch_checksum = v_encrypted_raw)
THEN
dbms_output.put_line ('Do Nothing!');
/*                 INSERT INTO ikrom.f1_circuits_arch   ---- ARCHIVE.F1_CIRCUITS
(CIRCUITID, CIRCUITREF, NAME, LOCATION, COUNTRY, LAT, LNG, ALT, URL, START_TIMESTAMP, END_TIMESTAMP, CHECKSUM,
STATUS, DELETE_FLAG)
VALUES (:NEW."circuitId", :NEW."circuitRef", :NEW."name", :NEW."location", :NEW."country",
:NEW."lat", :NEW."lng", :NEW."alt", :NEW."url", CURRENT_TIMESTAMP, NULL, v_encrypted_raw, 'C', to_number (0));*/
END IF;
END IF;
IF DELETING THEN
--- taking the inserting data into the variable for further converting to md5
v_extr_new_row := :OLD."circuitId" || ',' || :OLD."circuitRef" || ',' ||  :OLD."name" || ',' ||
:OLD."location" || ',' ||  :OLD."country" || ',' ||  :OLD."lat" || ',' ||
:OLD."lng" || ',' ||  :OLD."alt" || ',' || :OLD."url";
--- converting the inserting data to hash_md5, checksum
v_extr_row_checksum := utl_raw.cast_to_raw(v_extr_new_row);
v_encrypted_raw := dbms_crypto.hash(v_extr_row_checksum, 2);
--- taking the latest row from the ARCH table for changing the Updating the END_DATE;
SELECT START_TIMESTAMP, r_last_chsum, CHECKSUM, circuitId
INTO v_arch_start_date, v_arch_last_chsum, v_arch_checksum, v_arch_cir_id
FROM
(SELECT START_TIMESTAMP, ROW_NUMBER()
OVER (PARTITION BY arch_in.circuitId ORDER BY arch_in.START_TIMESTAMP DESC) r_last_chsum,
CIRCUITID, CHECKSUM
FROM ARCHIVE.F1_CIRCUITS arch_in
WHERE arch_in.circuitId = :OLD."circuitId")
WHERE r_last_chsum = 1;

---- checking whether the data already exist in the ARCH table
IF (v_arch_cir_id = :OLD."circuitId")
THEN
---- Update end_date for old row
UPDATE ARCHIVE.F1_CIRCUITS arch
SET arch.DELETE_FLAG = 1
WHERE arch.circuitId = :OLD."circuitId"
AND arch.DELETE_FLAG = 0;
END IF;                   
END IF;  
EXCEPTION
WHEN NO_DATA_FOUND
THEN
---- if no data found in ARCH table by SELECTING in INSERTING process then just INSERT the new row
INSERT INTO ARCHIVE.F1_CIRCUITS
(CIRCUITID, CIRCUITREF, NAME, LOCATION, COUNTRY, LAT, LNG, ALT, URL, START_TIMESTAMP, END_TIMESTAMP, CHECKSUM,
STATUS, DELETE_FLAG)
VALUES (:NEW."circuitId", :NEW."circuitRef", :NEW."name", :NEW."location", :NEW."country",
:NEW."lat", :NEW."lng", :NEW."alt", :NEW."url", CURRENT_TIMESTAMP, NULL, v_encrypted_raw, 'C', to_number (0));
END;

重要说:当从提取表中删除时,存档中具有相同circuitId的所有行都将更新; 还为触发器运行用户授予dbms_crypto执行;

似乎对于每个电路id,您希望end_date/end_timestamp对于最近插入的行始终为空。为什么不直接这样做,并为不是最近插入的所有行设置结束时间戳?

update ARCHIVE.F1_CIRCUITS set end_timestamp = current_timestamp
where end_timestamp is null
and exists (select 1 from ARCHIVE.F1_CIRCUITS n
where n.circuitid = f1_circuits.circuitid
and n.start_timestamp > f1_circuits.start_timestamp);

另一方面,当我听到"每次这个表发生某些事情时,我想对另一个表做点什么"时,我的第一个想法是使用触发器。如果您最终在触发器(或其他 PL/SQL 块(中执行此操作,如果您将校验和分配给:new_checksum这样的变量,这也很容易;你可以做:

update ARCHIVE.F1_CIRCUITS set end_timestamp = current_timestamp
where end_timestamp is null and circuitid = :new.circuitid and checksum <> :new_checksum;

最新更新