我正在使用Bigquery。我的数据集如下所示。基本上,我从一个事件表开始,其中只有在更新数据时才会添加一行。我通过交叉连接到my_date>=事件日期。我现在遇到的问题是,当每个my_date(每个tenancy_id(有两个event_date时,我只想最后得到最新的一行。(在本例中,这意味着我要删除第4行和第12行(。实现这一目标的最佳方式是什么?
您可以使用row_number((窗口函数来获取每个my_date和tenancy_id的最新行。
create table yourtable (Row int, my_date date, event_date date, tenancy_ld int, rent_period varchar(100), rent_amount int, tenancy_start_date date, tenancy_end_date date);
insert into yourtable values(1, '6/29/2021', '6/29/2021', 1109, 'FORTN IGHTLY', 67794 ,'03/15/21','8/28/2021');
insert into yourtable values(2, '6/29/2021', '6/29/2021', 1110, 'MONTHLY', 224364 ,'02/14/21','3/14/2022');
insert into yourtable values(3, '6/28/2021', '6/28/2021', 1111, 'FORTN IGHTLY', 70000 ,'02/27/21','4/20/2022');
insert into yourtable values(4, '6/29/2021', '6/28/2021', 1111, 'FORTN IGHTLY', 70000 ,'02/27/21','4/20/2022');
insert into yourtable values(5, '6/29/2021', '6/29/2021', 1111, 'FORTN IGHTLY', 77056 ,'02/27/21','4/20/2022');
insert into yourtable values(6, '6/29/2021', '6/29/2021', 1112, 'WEEKLY', 32830 ,'10/17/20','12/22/2021');
insert into yourtable values(7, '6/29/2021', '6/29/2021', 1113, 'WEEKLY', 73620 ,'07/17/20','2/3/2022');
insert into yourtable values(8, '6/25/2021', '6/25/2021', 1114, 'WEEKLY', 136 ,'09/19/20','4/26/2022');
insert into yourtable values(9, '6/26/2021', '6/25/2021', 1114, 'WEEKLY', 136 ,'09/19/20','4/26/2022');
insert into yourtable values(10,'6/27/2021', '6/25/2021', 1114, 'WEEKLY', 136 ,'09/19/20','4/26/2022');
insert into yourtable values(11,'6/28/2021', '6/25/2021', 1114, 'WEEKLY', 136 ,'09/19/20','4/26/2022');
insert into yourtable values(12,'6/29/2021', '6/25/2021', 1114, 'WEEKLY', 136 ,'09/19/20','4/26/2022');
insert into yourtable values(13,'6/29/2021', '6/29/2021', 1114, 'FORTN IGHTLY', 136552 ,'09/19/20','4/26/2022');
查询:
with cte as
(
select my_date, event_date, tenancy_ld, rent_period, rent_amount,
tenancy_start_date, tenancy_end_date, row_number()over(partition by tenancy_ld,
my_date order by event_date desc) rnk
from yourtable
)
select my_date, event_date, tenancy_ld, rent_period, rent_amount,
tenancy_start_date, tenancy_end_date from cte where rnk=1
输出:
my_date | 2021-06-29 | ||||||
---|---|---|---|---|---|---|---|
2021-06-29 | 2021-06-29 | 2021-06-29 | 2021-06-29 |
我建议使用qualify
:
select t.*
from t
where 1=1
qualify row_number() over (partition by tenancy_id, event_date
order by my_date desc
) = 1;