如何在SQL中连接并只保留最新的行(Bigquery)



我正在使用Bigquery。我的数据集如下所示。基本上,我从一个事件表开始,其中只有在更新数据时才会添加一行。我通过交叉连接到my_date>=事件日期。我现在遇到的问题是,当每个my_date(每个tenancy_id(有两个event_date时,我只想最后得到最新的一行。(在本例中,这意味着我要删除第4行和第12行(。实现这一目标的最佳方式是什么?

您可以使用row_number((窗口函数来获取每个my_date和tenancy_id的最新行。

create table yourtable (Row int, my_date date, event_date date, tenancy_ld int, rent_period varchar(100), rent_amount int, tenancy_start_date date, tenancy_end_date date);
insert into yourtable values(1,    '6/29/2021',    '6/29/2021', 1109,  'FORTN IGHTLY', 67794   ,'03/15/21','8/28/2021');
insert into yourtable values(2,    '6/29/2021',    '6/29/2021', 1110,  'MONTHLY',      224364  ,'02/14/21','3/14/2022');
insert into yourtable values(3,    '6/28/2021',    '6/28/2021', 1111,  'FORTN IGHTLY', 70000   ,'02/27/21','4/20/2022');
insert into yourtable values(4,    '6/29/2021',    '6/28/2021', 1111,  'FORTN IGHTLY', 70000   ,'02/27/21','4/20/2022');
insert into yourtable values(5,    '6/29/2021',    '6/29/2021', 1111,  'FORTN IGHTLY', 77056   ,'02/27/21','4/20/2022');
insert into yourtable values(6,    '6/29/2021',    '6/29/2021', 1112,  'WEEKLY',       32830   ,'10/17/20','12/22/2021');
insert into yourtable values(7,    '6/29/2021',    '6/29/2021', 1113,  'WEEKLY',       73620   ,'07/17/20','2/3/2022');
insert into yourtable values(8,    '6/25/2021',    '6/25/2021', 1114,  'WEEKLY',       136     ,'09/19/20','4/26/2022');
insert into yourtable values(9,    '6/26/2021',    '6/25/2021', 1114,  'WEEKLY',       136     ,'09/19/20','4/26/2022');
insert into yourtable values(10,'6/27/2021',   '6/25/2021', 1114,  'WEEKLY',       136     ,'09/19/20','4/26/2022');
insert into yourtable values(11,'6/28/2021',   '6/25/2021', 1114,  'WEEKLY',       136     ,'09/19/20','4/26/2022');
insert into yourtable values(12,'6/29/2021',   '6/25/2021', 1114,  'WEEKLY',       136     ,'09/19/20','4/26/2022');
insert into yourtable values(13,'6/29/2021',   '6/29/2021', 1114,  'FORTN IGHTLY', 136552  ,'09/19/20','4/26/2022');

查询:

with cte as
(
select my_date, event_date, tenancy_ld, rent_period, rent_amount, 
tenancy_start_date, tenancy_end_date, row_number()over(partition by tenancy_ld, 
my_date order by event_date desc) rnk
from yourtable
)
select my_date, event_date, tenancy_ld, rent_period, rent_amount, 
tenancy_start_date, tenancy_end_date from cte where rnk=1

输出:

my_date2021-06-29
2021-06-292021-06-292021-06-292021-06-29

我建议使用qualify:

select t.*
from t
where 1=1
qualify row_number() over (partition by tenancy_id, event_date
order by my_date desc
) = 1;

最新更新