当有不同的reservation_id
时,我试图获得重复的ip_number
记录。
DECLARE @Reservations TABLE (ip_number INT, reservation_id VARCHAR(16), name VARCHAR(16),quote_date DATETIME, arrival_date DATETIME, deposit_amount DECIMAL(16,2)
INSERT INTO @Reservations (ip_number, reservation_id, name, quote_date, arrival_date, deposit_amount)
VALUES
(50053177,21132003,'Christine','2022-12-27 00:00:00.000','2022-12-29 07:00:00.000','288.90'),
(50053177,21132003,'Christine','2022-12-27 00:00:00.000','2022-12-29 07:00:00.000','288.90'),
(50053177,21132003,'Christine','2022-12-27 00:00:00.000','2022-12-29 07:00:00.000','288.90'),
(50053177,21132003,'Christine','2022-12-27 00:00:00.000','2022-12-29 07:00:00.000','288.90'),
(50053177,21132003,'Christine','2022-12-27 00:00:00.000','2022-12-29 07:00:00.000','288.90'),
(50053177,21132003,'Christine','2022-12-27 00:00:00.000','2022-12-29 07:00:00.000','288.90'),
(50053177,21132518,'Christine','2022-12-27 00:00:00.000','2022-12-29 07:00:00.000','64.20'),
(50053177,21132518,'Christine','2022-12-27 00:00:00.000','2022-12-29 07:00:00.000','64.20'),
(50053177,21132714,'Christine','2022-12-27 00:00:00.000','2022-12-29 07:00:00.000','32.10'),
(50053161,21131464,'Amy','2022-12-27 00:00:00.000','2022-12-28 07:00:00.000','52.31'),
(50053151,21131445,'Chung','2022-12-27 00:00:00.000','2022-12-28 07:00:00.000','119.04'),
(50053151,21131445,'Chung','2022-12-27 00:00:00.000','2022-12-28 07:00:00.000','119.04'),
(50053151,21131445,'Chung','2022-12-27 00:00:00.000','2022-12-28 07:00:00.000','119.04'),
(50039951,21125684,'Jennifer','2022-12-27 00:00:00.000','2022-12-29 07:00:00.000','103.88'),
(50039951,21125683,'Jennifer','2022-12-27 00:00:00.000','2022-12-29 07:00:00.000','103.88'),
(50039951,21125683,'Jennifer','2022-12-27 00:00:00.000','2022-12-29 07:00:00.000','103.88'),
(50039951,21125682,'Jennifer','2022-12-27 00:00:00.000','2022-12-29 07:00:00.000','103.88'),
(50039951,21125682,'Jennifer','2022-12-27 00:00:00.000','2022-12-29 07:00:00.000','103.88');
我正在尝试使用ROW_NUMBER()
:
SELECT ROW_NUMBER() OVER (ORDER BY rth.ip_number DESC) AS row,
reservation_id,
name,
ip_number,
quote_date,
arrival_date,
deposit_amount
FROM r_order_reservation
WHERE quote_date > '2022-12-01';
返回以下内容:
row reservation_id name quote_date arrival_date deposit_amount
1801 21132003 Christine 2022-12-27 00:00:00.000 2022-12-29 00:00:00.000 288.90
1802 21132003 Christine 2022-12-27 00:00:00.000 2022-12-29 00:00:00.000 288.90
1803 21132003 Christine 2022-12-27 00:00:00.000 2022-12-29 00:00:00.000 288.90
1804 21132003 Christine 2022-12-27 00:00:00.000 2022-12-29 00:00:00.000 288.90
1805 21132003 Christine 2022-12-27 00:00:00.000 2022-12-29 00:00:00.000 288.90
1806 21132003 Christine 2022-12-27 00:00:00.000 2022-12-29 00:00:00.000 288.90
1807 21132518 Christine 2022-12-27 00:00:00.000 2022-12-29 00:00:00.000 64.20
1808 21132518 Christine 2022-12-27 00:00:00.000 2022-12-29 00:00:00.000 64.20
1809 21132714 Christine 2022-12-27 00:00:00.000 2022-12-29 00:00:00.000 32.10
1810 21131464 Amy 2022-12-27 00:00:00.000 2022-12-28 07:00:00.000 52.31
1811 21131445 Chung 2022-12-27 00:00:00.000 2022-12-28 07:00:00.000 119.04
1812 21131445 Chung 2022-12-27 00:00:00.000 2022-12-28 07:00:00.000 119.04
1813 21131445 Chung 2022-12-27 00:00:00.000 2022-12-28 07:00:00.000 119.04
1814 21125684 Jennifer 2022-12-27 00:00:00.000 2022-12-31 07:00:00.000 103.88
1815 21125683 Jennifer 2022-12-27 00:00:00.000 2022-12-30 07:00:00.000 103.88
1816 21125683 Jennifer 2022-12-27 00:00:00.000 2022-12-30 07:00:00.000 103.88
1817 21125682 Jennifer 2022-12-27 00:00:00.000 2022-12-29 07:00:00.000 103.88
1818 21125682 Jennifer 2022-12-27 00:00:00.000 2022-12-29 07:00:00.000 103.88
Christine有3个reservation_id
21132003
,21132518
和21132714
,但它显示了reservation_id
21132003
和21132518
的重复记录。
Amy只有1个reservation_id
21131464
.
Chung有多个相同的reservation_id
21131445
,但不包括任何其他reservation_id
。
Jennifer有3个不同的reservation_id
、21125684
、21125683
和21125682
,但有一个重复的reservation_id
和21125682
。
我试图拉入具有不同reservation_id
但具有相同name
,quote_date
,arrival_date
和deposit_amount
的记录,当同一ip_number
在给定的quote_date
上有超过1个reservation_id
时。
预期结果:
row reservation_id name quote_date arrival_date deposit_amount
1801 21132003 Christine 2022-12-27 00:00:00.000 2022-12-29 00:00:00.000 288.90
1802 21132518 Christine 2022-12-27 00:00:00.000 2022-12-29 00:00:00.000 64.20
1803 21132714 Christine 2022-12-27 00:00:00.000 2022-12-29 00:00:00.000 32.10
1804 21125684 Jennifer 2022-12-27 00:00:00.000 2022-12-31 07:00:00.000 103.88
1815 21125683 Jennifer 2022-12-27 00:00:00.000 2022-12-30 07:00:00.000 103.88
1817 21125682 Jennifer 2022-12-27 00:00:00.000 2022-12-29 07:00:00.000 103.88
我不确定row_number如何帮助您在这里进行重复数据删除,或者如果此数据是您的实际表或(可能有缺陷?)查询的结果-但也许这就是您所追求的?
首先识别具有多个reservation_id
的组,然后将其与数据集连接并选择不同的行:
with d as (
select name,quote_date,arrival_date, Count(distinct reservation_id) dupe
from Reservations
group by name,quote_date,arrival_date
)
select distinct *
from Reservations r
where exists (
select *
from d
where d.name = r.name
and d.quote_date = r.quote_date
and d.arrival_date = r.arrival_date
and d.dupe > 1
);
演示小提琴
这是你期望的样子吗?
DECLARE @Reservations TABLE (ip_number INT, reservation_id VARCHAR(16), name VARCHAR(16),quote_date DATETIME, arrival_date DATETIME, deposit_amount DECIMAL(16,2))
INSERT INTO @Reservations (ip_number, reservation_id, name, quote_date, arrival_date, deposit_amount)
VALUES
(50053177,21132003,'Christine','2022-12-27 00:00:00.000','2022-12-29 07:00:00.000','288.90'),
(50053177,21132003,'Christine','2022-12-27 00:00:00.000','2022-12-29 07:00:00.000','288.90'),
(50053177,21132003,'Christine','2022-12-27 00:00:00.000','2022-12-29 07:00:00.000','288.90'),
(50053177,21132003,'Christine','2022-12-27 00:00:00.000','2022-12-29 07:00:00.000','288.90'),
(50053177,21132003,'Christine','2022-12-27 00:00:00.000','2022-12-29 07:00:00.000','288.90'),
(50053177,21132003,'Christine','2022-12-27 00:00:00.000','2022-12-29 07:00:00.000','288.90'),
(50053177,21132518,'Christine','2022-12-27 00:00:00.000','2022-12-29 07:00:00.000','64.20'),
(50053177,21132518,'Christine','2022-12-27 00:00:00.000','2022-12-29 07:00:00.000','64.20'),
(50053177,21132714,'Christine','2022-12-27 00:00:00.000','2022-12-29 07:00:00.000','32.10'),
(50053161,21131464,'Amy','2022-12-27 00:00:00.000','2022-12-28 07:00:00.000','52.31'),
(50053151,21131445,'Chung','2022-12-27 00:00:00.000','2022-12-28 07:00:00.000','119.04'),
(50053151,21131445,'Chung','2022-12-27 00:00:00.000','2022-12-28 07:00:00.000','119.04'),
(50053151,21131445,'Chung','2022-12-27 00:00:00.000','2022-12-28 07:00:00.000','119.04'),
(50039951,21125684,'Jennifer','2022-12-27 00:00:00.000','2022-12-29 07:00:00.000','103.88'),
(50039951,21125683,'Jennifer','2022-12-27 00:00:00.000','2022-12-29 07:00:00.000','103.88'),
(50039951,21125683,'Jennifer','2022-12-27 00:00:00.000','2022-12-29 07:00:00.000','103.88'),
(50039951,21125682,'Jennifer','2022-12-27 00:00:00.000','2022-12-29 07:00:00.000','103.88'),
(50039951,21125682,'Jennifer','2022-12-27 00:00:00.000','2022-12-29 07:00:00.000','103.88');
WITH TMP AS (
SELECT ROW_NUMBER() OVER (ORDER BY ip_number DESC) AS row,
reservation_id,
name,
ip_number,
quote_date,
arrival_date,
deposit_amount
FROM @Reservations
WHERE quote_date > '2022-12-01'
GROUP BY reservation_id, name, ip_number, quote_date, arrival_date, deposit_amount
)
SELECT *
FROM TMP T1
WHERE EXISTS (
SELECT 1
FROM TMP T2
WHERE T1.name = T2.name
AND T1.ip_number = T2.ip_number
AND T1.quote_date = T2.quote_date
AND T1.arrival_date = T2.arrival_date
GROUP BY T2.name, T2.ip_number, T2.quote_date, T2.arrival_date
HAVING COUNT(*) > 1
)